| 1 |
// Copyright (c) 2005, Google Inc. |
// Copyright (c) 2010, Google Inc. |
| 2 |
// All rights reserved. |
// All rights reserved. |
| 3 |
// |
// |
| 4 |
// Redistribution and use in source and binary forms, with or without |
// Redistribution and use in source and binary forms, with or without |
| 331 |
bool RE::Replace(const StringPiece& rewrite, |
bool RE::Replace(const StringPiece& rewrite, |
| 332 |
string *str) const { |
string *str) const { |
| 333 |
int vec[kVecSize]; |
int vec[kVecSize]; |
| 334 |
int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize); |
int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize); |
| 335 |
if (matches == 0) |
if (matches == 0) |
| 336 |
return false; |
return false; |
| 337 |
|
|
| 384 |
string out; |
string out; |
| 385 |
int start = 0; |
int start = 0; |
| 386 |
int lastend = -1; |
int lastend = -1; |
| 387 |
|
bool last_match_was_empty_string = false; |
| 388 |
|
|
| 389 |
while (start <= static_cast<int>(str->length())) { |
while (start <= static_cast<int>(str->length())) { |
| 390 |
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize); |
// If the previous match was for the empty string, we shouldn't |
| 391 |
if (matches <= 0) |
// just match again: we'll match in the same way and get an |
| 392 |
break; |
// infinite loop. Instead, we do the match in a special way: |
| 393 |
int matchstart = vec[0], matchend = vec[1]; |
// anchored -- to force another try at the same position -- |
| 394 |
assert(matchstart >= start); |
// and with a flag saying that this time, ignore empty matches. |
| 395 |
assert(matchend >= matchstart); |
// If this special match returns, that means there's a non-empty |
| 396 |
if (matchstart == matchend && matchstart == lastend) { |
// match at this position as well, and we can continue. If not, |
| 397 |
// advance one character if we matched an empty string at the same |
// we do what perl does, and just advance by one. |
| 398 |
// place as the last match occurred |
// Notice that perl prints '@@@' for this; |
| 399 |
matchend = start + 1; |
// perl -le '$_ = "aa"; s/b*|aa/@/g; print' |
| 400 |
// If the current char is CR and we're in CRLF mode, skip LF too. |
int matches; |
| 401 |
// Note it's better to call pcre_fullinfo() than to examine |
if (last_match_was_empty_string) { |
| 402 |
// all_options(), since options_ could have changed bewteen |
matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize); |
| 403 |
// compile-time and now, but this is simpler and safe enough. |
if (matches <= 0) { |
| 404 |
// Modified by PH to add ANY and ANYCRLF. |
int matchend = start + 1; // advance one character. |
| 405 |
if (start+1 < static_cast<int>(str->length()) && |
// If the current char is CR and we're in CRLF mode, skip LF too. |
| 406 |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
// Note it's better to call pcre_fullinfo() than to examine |
| 407 |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
// all_options(), since options_ could have changed bewteen |
| 408 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
// compile-time and now, but this is simpler and safe enough. |
| 409 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF) |
// Modified by PH to add ANY and ANYCRLF. |
| 410 |
) { |
if (matchend < static_cast<int>(str->length()) && |
| 411 |
matchend++; |
(*str)[start] == '\r' && (*str)[matchend] == '\n' && |
| 412 |
} |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
| 413 |
// We also need to advance more than one char if we're in utf8 mode. |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
| 414 |
#ifdef SUPPORT_UTF8 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) { |
|
if (options_.utf8()) { |
|
|
while (matchend < static_cast<int>(str->length()) && |
|
|
((*str)[matchend] & 0xc0) == 0x80) |
|
| 415 |
matchend++; |
matchend++; |
| 416 |
} |
} |
| 417 |
|
// We also need to advance more than one char if we're in utf8 mode. |
| 418 |
|
#ifdef SUPPORT_UTF8 |
| 419 |
|
if (options_.utf8()) { |
| 420 |
|
while (matchend < static_cast<int>(str->length()) && |
| 421 |
|
((*str)[matchend] & 0xc0) == 0x80) |
| 422 |
|
matchend++; |
| 423 |
|
} |
| 424 |
#endif |
#endif |
| 425 |
if (matchend <= static_cast<int>(str->length())) |
if (start < static_cast<int>(str->length())) |
| 426 |
out.append(*str, start, matchend - start); |
out.append(*str, start, matchend - start); |
| 427 |
start = matchend; |
start = matchend; |
| 428 |
|
last_match_was_empty_string = false; |
| 429 |
|
continue; |
| 430 |
|
} |
| 431 |
} else { |
} else { |
| 432 |
out.append(*str, start, matchstart - start); |
matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize); |
| 433 |
Rewrite(&out, rewrite, *str, vec, matches); |
if (matches <= 0) |
| 434 |
start = matchend; |
break; |
|
lastend = matchend; |
|
|
count++; |
|
| 435 |
} |
} |
| 436 |
|
int matchstart = vec[0], matchend = vec[1]; |
| 437 |
|
assert(matchstart >= start); |
| 438 |
|
assert(matchend >= matchstart); |
| 439 |
|
out.append(*str, start, matchstart - start); |
| 440 |
|
Rewrite(&out, rewrite, *str, vec, matches); |
| 441 |
|
start = matchend; |
| 442 |
|
lastend = matchend; |
| 443 |
|
count++; |
| 444 |
|
last_match_was_empty_string = (matchstart == matchend); |
| 445 |
} |
} |
| 446 |
|
|
| 447 |
if (count == 0) |
if (count == 0) |
| 457 |
const StringPiece& text, |
const StringPiece& text, |
| 458 |
string *out) const { |
string *out) const { |
| 459 |
int vec[kVecSize]; |
int vec[kVecSize]; |
| 460 |
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize); |
int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize); |
| 461 |
if (matches == 0) |
if (matches == 0) |
| 462 |
return false; |
return false; |
| 463 |
out->erase(); |
out->erase(); |
| 503 |
int RE::TryMatch(const StringPiece& text, |
int RE::TryMatch(const StringPiece& text, |
| 504 |
int startpos, |
int startpos, |
| 505 |
Anchor anchor, |
Anchor anchor, |
| 506 |
|
bool empty_ok, |
| 507 |
int *vec, |
int *vec, |
| 508 |
int vecsize) const { |
int vecsize) const { |
| 509 |
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; |
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_; |
| 521 |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
| 522 |
extra.match_limit_recursion = options_.match_limit_recursion(); |
extra.match_limit_recursion = options_.match_limit_recursion(); |
| 523 |
} |
} |
| 524 |
|
|
| 525 |
|
int options = 0; |
| 526 |
|
if (anchor != UNANCHORED) |
| 527 |
|
options |= PCRE_ANCHORED; |
| 528 |
|
if (!empty_ok) |
| 529 |
|
options |= PCRE_NOTEMPTY; |
| 530 |
|
|
| 531 |
int rc = pcre_exec(re, // The regular expression object |
int rc = pcre_exec(re, // The regular expression object |
| 532 |
&extra, |
&extra, |
| 533 |
(text.data() == NULL) ? "" : text.data(), |
(text.data() == NULL) ? "" : text.data(), |
| 534 |
text.size(), |
text.size(), |
| 535 |
startpos, |
startpos, |
| 536 |
(anchor == UNANCHORED) ? 0 : PCRE_ANCHORED, |
options, |
| 537 |
vec, |
vec, |
| 538 |
vecsize); |
vecsize); |
| 539 |
|
|
| 563 |
int* vec, |
int* vec, |
| 564 |
int vecsize) const { |
int vecsize) const { |
| 565 |
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace |
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace |
| 566 |
int matches = TryMatch(text, 0, anchor, vec, vecsize); |
int matches = TryMatch(text, 0, anchor, true, vec, vecsize); |
| 567 |
assert(matches >= 0); // TryMatch never returns negatives |
assert(matches >= 0); // TryMatch never returns negatives |
| 568 |
if (matches == 0) |
if (matches == 0) |
| 569 |
return false; |
return false; |