/[pcre]/code/branches/pcre16/pcrecpp.cc
ViewVC logotype

Diff of /code/branches/pcre16/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcrecpp.cc revision 356 by ph10, Tue Jul 8 14:14:34 2008 UTC code/branches/pcre16/pcrecpp.cc revision 755 by ph10, Mon Nov 21 10:41:54 2011 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // Copyright (c) 2010, Google Inc.
2  // All rights reserved.  // All rights reserved.
3  //  //
4  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 37  Line 37 
37  #include <stdio.h>  #include <stdio.h>
38  #include <ctype.h>  #include <ctype.h>
39  #include <limits.h>      /* for SHRT_MIN, USHRT_MAX, etc */  #include <limits.h>      /* for SHRT_MIN, USHRT_MAX, etc */
40    #include <string.h>      /* for memcpy */
41  #include <assert.h>  #include <assert.h>
42  #include <errno.h>  #include <errno.h>
43  #include <string>  #include <string>
# Line 331  bool RE::FindAndConsume(StringPiece* inp Line 332  bool RE::FindAndConsume(StringPiece* inp
332  bool RE::Replace(const StringPiece& rewrite,  bool RE::Replace(const StringPiece& rewrite,
333                   string *str) const {                   string *str) const {
334    int vec[kVecSize];    int vec[kVecSize];
335    int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);    int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
336    if (matches == 0)    if (matches == 0)
337      return false;      return false;
338    
# Line 384  int RE::GlobalReplace(const StringPiece& Line 385  int RE::GlobalReplace(const StringPiece&
385    string out;    string out;
386    int start = 0;    int start = 0;
387    int lastend = -1;    int lastend = -1;
388      bool last_match_was_empty_string = false;
389    
390    while (start <= static_cast<int>(str->length())) {    while (start <= static_cast<int>(str->length())) {
391      int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);      // If the previous match was for the empty string, we shouldn't
392      if (matches <= 0)      // just match again: we'll match in the same way and get an
393        break;      // infinite loop.  Instead, we do the match in a special way:
394      int matchstart = vec[0], matchend = vec[1];      // anchored -- to force another try at the same position --
395      assert(matchstart >= start);      // and with a flag saying that this time, ignore empty matches.
396      assert(matchend >= matchstart);      // If this special match returns, that means there's a non-empty
397      if (matchstart == matchend && matchstart == lastend) {      // match at this position as well, and we can continue.  If not,
398        // advance one character if we matched an empty string at the same      // we do what perl does, and just advance by one.
399        // place as the last match occurred      // Notice that perl prints '@@@' for this;
400        matchend = start + 1;      //    perl -le '$_ = "aa"; s/b*|aa/@/g; print'
401        // If the current char is CR and we're in CRLF mode, skip LF too.      int matches;
402        // Note it's better to call pcre_fullinfo() than to examine      if (last_match_was_empty_string) {
403        // all_options(), since options_ could have changed bewteen        matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize);
404        // compile-time and now, but this is simpler and safe enough.        if (matches <= 0) {
405        // Modified by PH to add ANY and ANYCRLF.          int matchend = start + 1;     // advance one character.
406        if (start+1 < static_cast<int>(str->length()) &&          // If the current char is CR and we're in CRLF mode, skip LF too.
407            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&          // Note it's better to call pcre_fullinfo() than to examine
408            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||          // all_options(), since options_ could have changed bewteen
409             NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||          // compile-time and now, but this is simpler and safe enough.
410             NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)          // Modified by PH to add ANY and ANYCRLF.
411            ) {          if (matchend < static_cast<int>(str->length()) &&
412          matchend++;              (*str)[start] == '\r' && (*str)[matchend] == '\n' &&
413        }              (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
414        // We also need to advance more than one char if we're in utf8 mode.               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
415  #ifdef SUPPORT_UTF8               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) {
       if (options_.utf8()) {  
         while (matchend < static_cast<int>(str->length()) &&  
                ((*str)[matchend] & 0xc0) == 0x80)  
416            matchend++;            matchend++;
417        }          }
418            // We also need to advance more than one char if we're in utf8 mode.
419    #ifdef SUPPORT_UTF8
420            if (options_.utf8()) {
421              while (matchend < static_cast<int>(str->length()) &&
422                     ((*str)[matchend] & 0xc0) == 0x80)
423                matchend++;
424            }
425  #endif  #endif
426        if (matchend <= static_cast<int>(str->length()))          if (start < static_cast<int>(str->length()))
427          out.append(*str, start, matchend - start);            out.append(*str, start, matchend - start);
428        start = matchend;          start = matchend;
429            last_match_was_empty_string = false;
430            continue;
431          }
432      } else {      } else {
433        out.append(*str, start, matchstart - start);        matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
434        Rewrite(&out, rewrite, *str, vec, matches);        if (matches <= 0)
435        start = matchend;          break;
       lastend = matchend;  
       count++;  
436      }      }
437        int matchstart = vec[0], matchend = vec[1];
438        assert(matchstart >= start);
439        assert(matchend >= matchstart);
440        out.append(*str, start, matchstart - start);
441        Rewrite(&out, rewrite, *str, vec, matches);
442        start = matchend;
443        lastend = matchend;
444        count++;
445        last_match_was_empty_string = (matchstart == matchend);
446    }    }
447    
448    if (count == 0)    if (count == 0)
# Line 442  bool RE::Extract(const StringPiece& rewr Line 458  bool RE::Extract(const StringPiece& rewr
458                   const StringPiece& text,                   const StringPiece& text,
459                   string *out) const {                   string *out) const {
460    int vec[kVecSize];    int vec[kVecSize];
461    int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);    int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
462    if (matches == 0)    if (matches == 0)
463      return false;      return false;
464    out->erase();    out->erase();
# Line 488  bool RE::Extract(const StringPiece& rewr Line 504  bool RE::Extract(const StringPiece& rewr
504  int RE::TryMatch(const StringPiece& text,  int RE::TryMatch(const StringPiece& text,
505                   int startpos,                   int startpos,
506                   Anchor anchor,                   Anchor anchor,
507                     bool empty_ok,
508                   int *vec,                   int *vec,
509                   int vecsize) const {                   int vecsize) const {
510    pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;    pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
# Line 505  int RE::TryMatch(const StringPiece& text Line 522  int RE::TryMatch(const StringPiece& text
522      extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;      extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
523      extra.match_limit_recursion = options_.match_limit_recursion();      extra.match_limit_recursion = options_.match_limit_recursion();
524    }    }
525    
526      int options = 0;
527      if (anchor != UNANCHORED)
528        options |= PCRE_ANCHORED;
529      if (!empty_ok)
530        options |= PCRE_NOTEMPTY;
531    
532    int rc = pcre_exec(re,              // The regular expression object    int rc = pcre_exec(re,              // The regular expression object
533                       &extra,                       &extra,
534                       (text.data() == NULL) ? "" : text.data(),                       (text.data() == NULL) ? "" : text.data(),
535                       text.size(),                       text.size(),
536                       startpos,                       startpos,
537                       (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,                       options,
538                       vec,                       vec,
539                       vecsize);                       vecsize);
540    
# Line 540  bool RE::DoMatchImpl(const StringPiece& Line 564  bool RE::DoMatchImpl(const StringPiece&
564                       int* vec,                       int* vec,
565                       int vecsize) const {                       int vecsize) const {
566    assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace    assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace
567    int matches = TryMatch(text, 0, anchor, vec, vecsize);    int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
568    assert(matches >= 0);  // TryMatch never returns negatives    assert(matches >= 0);  // TryMatch never returns negatives
569    if (matches == 0)    if (matches == 0)
570      return false;      return false;
# Line 582  bool RE::DoMatch(const StringPiece& text Line 606  bool RE::DoMatch(const StringPiece& text
606                                         // (as for kVecSize)                                         // (as for kVecSize)
607    int space[21];   // use stack allocation for small vecsize (common case)    int space[21];   // use stack allocation for small vecsize (common case)
608    int* vec = vecsize <= 21 ? space : new int[vecsize];    int* vec = vecsize <= 21 ? space : new int[vecsize];
609    bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);    bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, (int)vecsize);
610    if (vec != space) delete [] vec;    if (vec != space) delete [] vec;
611    return retval;    return retval;
612  }  }
# Line 798  bool Arg::parse_longlong_radix(const cha Line 822  bool Arg::parse_longlong_radix(const cha
822    long long r = strtoll(str, &end, radix);    long long r = strtoll(str, &end, radix);
823  #elif defined HAVE__STRTOI64  #elif defined HAVE__STRTOI64
824    long long r = _strtoi64(str, &end, radix);    long long r = _strtoi64(str, &end, radix);
825    #elif defined HAVE_STRTOIMAX
826      long long r = strtoimax(str, &end, radix);
827  #else  #else
828  #error parse_longlong_radix: cannot convert input to a long-long  #error parse_longlong_radix: cannot convert input to a long-long
829  #endif  #endif
# Line 828  bool Arg::parse_ulonglong_radix(const ch Line 854  bool Arg::parse_ulonglong_radix(const ch
854    unsigned long long r = strtoull(str, &end, radix);    unsigned long long r = strtoull(str, &end, radix);
855  #elif defined HAVE__STRTOI64  #elif defined HAVE__STRTOI64
856    unsigned long long r = _strtoui64(str, &end, radix);    unsigned long long r = _strtoui64(str, &end, radix);
857    #elif defined HAVE_STRTOIMAX
858      unsigned long long r = strtoumax(str, &end, radix);
859  #else  #else
860  #error parse_ulonglong_radix: cannot convert input to a long-long  #error parse_ulonglong_radix: cannot convert input to a long-long
861  #endif  #endif

Legend:
Removed from v.356  
changed lines
  Added in v.755

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12