/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 356 by ph10, Tue Jul 8 14:14:34 2008 UTC revision 474 by ph10, Sat Jan 2 16:30:46 2010 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // Copyright (c) 2010, Google Inc.
2  // All rights reserved.  // All rights reserved.
3  //  //
4  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 331  bool RE::FindAndConsume(StringPiece* inp Line 331  bool RE::FindAndConsume(StringPiece* inp
331  bool RE::Replace(const StringPiece& rewrite,  bool RE::Replace(const StringPiece& rewrite,
332                   string *str) const {                   string *str) const {
333    int vec[kVecSize];    int vec[kVecSize];
334    int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);    int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
335    if (matches == 0)    if (matches == 0)
336      return false;      return false;
337    
# Line 384  int RE::GlobalReplace(const StringPiece& Line 384  int RE::GlobalReplace(const StringPiece&
384    string out;    string out;
385    int start = 0;    int start = 0;
386    int lastend = -1;    int lastend = -1;
387      bool last_match_was_empty_string = false;
388    
389    while (start <= static_cast<int>(str->length())) {    while (start <= static_cast<int>(str->length())) {
390      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);      // If the previous match was for the empty string, we shouldn't
391      if (matches <= 0)      // just match again: we'll match in the same way and get an
392        break;      // infinite loop.  Instead, we do the match in a special way:
393      int matchstart = vec[0], matchend = vec[1];      // anchored -- to force another try at the same position --
394      assert(matchstart >= start);      // and with a flag saying that this time, ignore empty matches.
395      assert(matchend >= matchstart);      // If this special match returns, that means there's a non-empty
396      if (matchstart == matchend && matchstart == lastend) {      // match at this position as well, and we can continue.  If not,
397        // advance one character if we matched an empty string at the same      // we do what perl does, and just advance by one.
398        // place as the last match occurred      // Notice that perl prints '@@@' for this;
399        matchend = start + 1;      //    perl -le '$_ = "aa"; s/b*|aa/@/g; print'
400        // If the current char is CR and we're in CRLF mode, skip LF too.      int matches;
401        // Note it's better to call pcre_fullinfo() than to examine      if (last_match_was_empty_string) {
402        // all_options(), since options_ could have changed bewteen        matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize);
403        // compile-time and now, but this is simpler and safe enough.        if (matches <= 0) {
404        // Modified by PH to add ANY and ANYCRLF.          int matchend = start + 1;     // advance one character.
405        if (start+1 < static_cast<int>(str->length()) &&          // If the current char is CR and we're in CRLF mode, skip LF too.
406            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&          // Note it's better to call pcre_fullinfo() than to examine
407            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||          // all_options(), since options_ could have changed bewteen
408             NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||          // compile-time and now, but this is simpler and safe enough.
409             NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)          // Modified by PH to add ANY and ANYCRLF.
410            ) {          if (matchend < static_cast<int>(str->length()) &&
411          matchend++;              (*str)[start] == '\r' && (*str)[matchend] == '\n' &&
412        }              (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
413        // We also need to advance more than one char if we're in utf8 mode.               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
414  #ifdef SUPPORT_UTF8               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) {
       if (options_.utf8()) {  
         while (matchend < static_cast<int>(str->length()) &&  
                ((*str)[matchend] & 0xc0) == 0x80)  
415            matchend++;            matchend++;
416        }          }
417            // We also need to advance more than one char if we're in utf8 mode.
418    #ifdef SUPPORT_UTF8
419            if (options_.utf8()) {
420              while (matchend < static_cast<int>(str->length()) &&
421                     ((*str)[matchend] & 0xc0) == 0x80)
422                matchend++;
423            }
424  #endif  #endif
425        if (matchend <= static_cast<int>(str->length()))          if (start < static_cast<int>(str->length()))
426          out.append(*str, start, matchend - start);            out.append(*str, start, matchend - start);
427        start = matchend;          start = matchend;
428            last_match_was_empty_string = false;
429            continue;
430          }
431      } else {      } else {
432        out.append(*str, start, matchstart - start);        matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
433        Rewrite(&out, rewrite, *str, vec, matches);        if (matches <= 0)
434        start = matchend;          break;
       lastend = matchend;  
       count++;  
435      }      }
436        int matchstart = vec[0], matchend = vec[1];
437        assert(matchstart >= start);
438        assert(matchend >= matchstart);
439        out.append(*str, start, matchstart - start);
440        Rewrite(&out, rewrite, *str, vec, matches);
441        start = matchend;
442        lastend = matchend;
443        count++;
444        last_match_was_empty_string = (matchstart == matchend);
445    }    }
446    
447    if (count == 0)    if (count == 0)
# Line 442  bool RE::Extract(const StringPiece& rewr Line 457  bool RE::Extract(const StringPiece& rewr
457                   const StringPiece& text,                   const StringPiece& text,
458                   string *out) const {                   string *out) const {
459    int vec[kVecSize];    int vec[kVecSize];
460    int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);    int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
461    if (matches == 0)    if (matches == 0)
462      return false;      return false;
463    out->erase();    out->erase();
# Line 488  bool RE::Extract(const StringPiece& rewr Line 503  bool RE::Extract(const StringPiece& rewr
503  int RE::TryMatch(const StringPiece& text,  int RE::TryMatch(const StringPiece& text,
504                   int startpos,                   int startpos,
505                   Anchor anchor,                   Anchor anchor,
506                     bool empty_ok,
507                   int *vec,                   int *vec,
508                   int vecsize) const {                   int vecsize) const {
509    pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;    pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
# Line 505  int RE::TryMatch(const StringPiece& text Line 521  int RE::TryMatch(const StringPiece& text
521      extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;      extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
522      extra.match_limit_recursion = options_.match_limit_recursion();      extra.match_limit_recursion = options_.match_limit_recursion();
523    }    }
524    
525      int options = 0;
526      if (anchor != UNANCHORED)
527        options |= PCRE_ANCHORED;
528      if (!empty_ok)
529        options |= PCRE_NOTEMPTY;
530    
531    int rc = pcre_exec(re,              // The regular expression object    int rc = pcre_exec(re,              // The regular expression object
532                       &extra,                       &extra,
533                       (text.data() == NULL) ? "" : text.data(),                       (text.data() == NULL) ? "" : text.data(),
534                       text.size(),                       text.size(),
535                       startpos,                       startpos,
536                       (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,                       options,
537                       vec,                       vec,
538                       vecsize);                       vecsize);
539    
# Line 540  bool RE::DoMatchImpl(const StringPiece& Line 563  bool RE::DoMatchImpl(const StringPiece&
563                       int* vec,                       int* vec,
564                       int vecsize) const {                       int vecsize) const {
565    assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace    assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace
566    int matches = TryMatch(text, 0, anchor, vec, vecsize);    int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
567    assert(matches >= 0);  // TryMatch never returns negatives    assert(matches >= 0);  // TryMatch never returns negatives
568    if (matches == 0)    if (matches == 0)
569      return false;      return false;

Legend:
Removed from v.356  
changed lines
  Added in v.474

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12