/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 257 by ph10, Wed Sep 19 09:11:19 2007 UTC
# Line 29  Line 29 
29  //  //
30  // Author: Sanjay Ghemawat  // Author: Sanjay Ghemawat
31    
32    #ifdef HAVE_CONFIG_H
33    #include "config.h"
34    #endif
35    
36  #include <stdlib.h>  #include <stdlib.h>
37  #include <stdio.h>  #include <stdio.h>
38  #include <ctype.h>  #include <ctype.h>
# Line 37  Line 41 
41  #include <errno.h>  #include <errno.h>
42  #include <string>  #include <string>
43  #include <algorithm>  #include <algorithm>
44  #include "config.h"  
45  // We need this to compile the proper dll on windows/msys.  This is copied  #include "pcrecpp_internal.h"
 // from pcre_internal.h.  It would probably be better just to include that.  
 #define PCRE_DEFINITION  /* Win32 __declspec(export) trigger for .dll */  
46  #include "pcre.h"  #include "pcre.h"
 #include "pcre_stringpiece.h"  
47  #include "pcrecpp.h"  #include "pcrecpp.h"
48    #include "pcre_stringpiece.h"
49    
50    
51  namespace pcrecpp {  namespace pcrecpp {
# Line 53  static const int kMaxArgs = 16; Line 55  static const int kMaxArgs = 16;
55  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace  static const int kVecSize = (1 + kMaxArgs) * 3;  // results + PCRE workspace
56    
57  // Special object that stands-in for no argument  // Special object that stands-in for no argument
58  Arg no_arg((void*)NULL);  PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);
59    
60  // If a regular expression has no error, its error_ field points here  // If a regular expression has no error, its error_ field points here
61  static const string empty_string;  static const string empty_string;
# Line 61  static const string empty_string; Line 63  static const string empty_string;
63  // If the user doesn't ask for any options, we just use this one  // If the user doesn't ask for any options, we just use this one
64  static RE_Options default_options;  static RE_Options default_options;
65    
66  void RE::Init(const char* pat, const RE_Options* options) {  void RE::Init(const string& pat, const RE_Options* options) {
67    pattern_ = pat;    pattern_ = pat;
68    if (options == NULL) {    if (options == NULL) {
69      options_ = default_options;      options_ = default_options;
# Line 74  void RE::Init(const char* pat, const RE_ Line 76  void RE::Init(const char* pat, const RE_
76    
77    re_partial_ = Compile(UNANCHORED);    re_partial_ = Compile(UNANCHORED);
78    if (re_partial_ != NULL) {    if (re_partial_ != NULL) {
79      // Check for complicated patterns.  The following change is      re_full_ = Compile(ANCHOR_BOTH);
     // conservative in that it may treat some "simple" patterns  
     // as "complex" (e.g., if the vertical bar is in a character  
     // class or is escaped).  But it seems good enough.  
     if (strchr(pat, '|') == NULL) {  
       // Simple pattern: we can use position-based checks to perform  
       // fully anchored matches  
       re_full_ = re_partial_;  
     } else {  
       // We need a special pattern for anchored matches  
       re_full_ = Compile(ANCHOR_BOTH);  
     }  
80    }    }
81  }  }
82    
83    void RE::Cleanup() {
84      if (re_full_ != NULL)         (*pcre_free)(re_full_);
85      if (re_partial_ != NULL)      (*pcre_free)(re_partial_);
86      if (error_ != &empty_string)  delete error_;
87    }
88    
89    
90  RE::~RE() {  RE::~RE() {
91    if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);    Cleanup();
   if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);  
   if (error_ != &empty_string)                     delete error_;  
92  }  }
93    
94    
95  pcre* RE::Compile(Anchor anchor) {  pcre* RE::Compile(Anchor anchor) {
96    // First, convert RE_Options into pcre options    // First, convert RE_Options into pcre options
97    int pcre_options = 0;    int pcre_options = 0;
# Line 334  bool RE::Replace(const StringPiece& rewr Line 331  bool RE::Replace(const StringPiece& rewr
331    
332  // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.  // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
333  // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.  // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
334    // Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF.
335    
336  static int NewlineMode(int pcre_options) {  static int NewlineMode(int pcre_options) {
337    // TODO: if we can make it threadsafe, cache this var    // TODO: if we can make it threadsafe, cache this var
338    int newline_mode = 0;    int newline_mode = 0;
339    /* if (newline_mode) return newline_mode; */  // do this once it's cached    /* if (newline_mode) return newline_mode; */  // do this once it's cached
340    if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {    if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
341                          PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) {
342      newline_mode = (pcre_options &      newline_mode = (pcre_options &
343                      (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));                      (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|
344                         PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF));
345    } else {    } else {
346      int newline;      int newline;
347      pcre_config(PCRE_CONFIG_NEWLINE, &newline);      pcre_config(PCRE_CONFIG_NEWLINE, &newline);
# Line 350  static int NewlineMode(int pcre_options) Line 351  static int NewlineMode(int pcre_options)
351        newline_mode = PCRE_NEWLINE_CR;        newline_mode = PCRE_NEWLINE_CR;
352      else if (newline == 3338)      else if (newline == 3338)
353        newline_mode = PCRE_NEWLINE_CRLF;        newline_mode = PCRE_NEWLINE_CRLF;
354        else if (newline == -1)
355          newline_mode = PCRE_NEWLINE_ANY;
356        else if (newline == -2)
357          newline_mode = PCRE_NEWLINE_ANYCRLF;
358      else      else
359        assert("" == "Unexpected return value from pcre_config(NEWLINE)");        assert("" == "Unexpected return value from pcre_config(NEWLINE)");
360    }    }
# Line 379  int RE::GlobalReplace(const StringPiece& Line 384  int RE::GlobalReplace(const StringPiece&
384        // Note it's better to call pcre_fullinfo() than to examine        // Note it's better to call pcre_fullinfo() than to examine
385        // all_options(), since options_ could have changed bewteen        // all_options(), since options_ could have changed bewteen
386        // compile-time and now, but this is simpler and safe enough.        // compile-time and now, but this is simpler and safe enough.
387          // Modified by PH to add ANY and ANYCRLF.
388        if (start+1 < static_cast<int>(str->length()) &&        if (start+1 < static_cast<int>(str->length()) &&
389            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&            (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
390            NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {            (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
391               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
392               NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
393              ) {
394          matchend++;          matchend++;
395        }        }
396        // We also need to advance more than one char if we're in utf8 mode.        // We also need to advance more than one char if we're in utf8 mode.
# Line 424  bool RE::Extract(const StringPiece& rewr Line 433  bool RE::Extract(const StringPiece& rewr
433    return Rewrite(out, rewrite, text, vec, matches);    return Rewrite(out, rewrite, text, vec, matches);
434  }  }
435    
436    /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
437      string result;
438    
439      // Escape any ascii character not in [A-Za-z_0-9].
440      //
441      // Note that it's legal to escape a character even if it has no
442      // special meaning in a regular expression -- so this function does
443      // that.  (This also makes it identical to the perl function of the
444      // same name; see `perldoc -f quotemeta`.)
445      for (int ii = 0; ii < unquoted.size(); ++ii) {
446        // Note that using 'isalnum' here raises the benchmark time from
447        // 32ns to 58ns:
448        if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
449            (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
450            (unquoted[ii] < '0' || unquoted[ii] > '9') &&
451            unquoted[ii] != '_' &&
452            // If this is the part of a UTF8 or Latin1 character, we need
453            // to copy this byte without escaping.  Experimentally this is
454            // what works correctly with the regexp library.
455            !(unquoted[ii] & 128)) {
456          result += '\\';
457        }
458        result += unquoted[ii];
459      }
460    
461      return result;
462    }
463    
464  /***** Actual matching and rewriting code *****/  /***** Actual matching and rewriting code *****/
465    
466  int RE::TryMatch(const StringPiece& text,  int RE::TryMatch(const StringPiece& text,
# Line 437  int RE::TryMatch(const StringPiece& text Line 474  int RE::TryMatch(const StringPiece& text
474      return 0;      return 0;
475    }    }
476    
477    pcre_extra extra = { 0 };    pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
478    if (options_.match_limit() > 0) {    if (options_.match_limit() > 0) {
479      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;      extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
480      extra.match_limit = options_.match_limit();      extra.match_limit = options_.match_limit();
# Line 470  int RE::TryMatch(const StringPiece& text Line 507  int RE::TryMatch(const StringPiece& text
507      rc = vecsize / 2;      rc = vecsize / 2;
508    }    }
509    
   if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {  
     // We need an extra check to make sure that the match extended  
     // to the end of the input string  
     assert(vec[0] == 0);                 // PCRE_ANCHORED forces starting match  
     if (vec[1] != text.size()) return 0; // Did not get ending match  
   }  
   
510    return rc;    return rc;
511  }  }
512    
# Line 681  bool Arg::parse_short_radix(const char* Line 711  bool Arg::parse_short_radix(const char*
711    long r;    long r;
712    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
713    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range    if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range
714    *(reinterpret_cast<short*>(dest)) = r;    *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
715    return true;    return true;
716  }  }
717    
# Line 692  bool Arg::parse_ushort_radix(const char* Line 722  bool Arg::parse_ushort_radix(const char*
722    unsigned long r;    unsigned long r;
723    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse    if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
724    if (r > USHRT_MAX) return false;                      // Out of range    if (r > USHRT_MAX) return false;                      // Out of range
725    *(reinterpret_cast<unsigned short*>(dest)) = r;    *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
726    return true;    return true;
727  }  }
728    
# Line 734  bool Arg::parse_longlong_radix(const cha Line 764  bool Arg::parse_longlong_radix(const cha
764    long long r = strtoq(str, &end, radix);    long long r = strtoq(str, &end, radix);
765  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
766    long long r = strtoll(str, &end, radix);    long long r = strtoll(str, &end, radix);
767    #elif defined HAVE__STRTOI64
768      long long r = _strtoi64(str, &end, radix);
769  #else  #else
770  #error parse_longlong_radix: cannot convert input to a long-long  #error parse_longlong_radix: cannot convert input to a long-long
771  #endif  #endif
# Line 761  bool Arg::parse_ulonglong_radix(const ch Line 793  bool Arg::parse_ulonglong_radix(const ch
793    unsigned long long r = strtouq(str, &end, radix);    unsigned long long r = strtouq(str, &end, radix);
794  #elif defined HAVE_STRTOLL  #elif defined HAVE_STRTOLL
795    unsigned long long r = strtoull(str, &end, radix);    unsigned long long r = strtoull(str, &end, radix);
796    #elif defined HAVE__STRTOI64
797      unsigned long long r = _strtoui64(str, &end, radix);
798  #else  #else
799  #error parse_ulonglong_radix: cannot convert input to a long-long  #error parse_ulonglong_radix: cannot convert input to a long-long
800  #endif  #endif
# Line 809  bool Arg::parse_float(const char* str, i Line 843  bool Arg::parse_float(const char* str, i
843      return parse_##name##_radix(str, n, dest, 0);                       \      return parse_##name##_radix(str, n, dest, 0);                       \
844    }    }
845    
846  DEFINE_INTEGER_PARSERS(short);  DEFINE_INTEGER_PARSERS(short)      /*                                   */
847  DEFINE_INTEGER_PARSERS(ushort);  DEFINE_INTEGER_PARSERS(ushort)     /*                                   */
848  DEFINE_INTEGER_PARSERS(int);  DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */
849  DEFINE_INTEGER_PARSERS(uint);  DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */
850  DEFINE_INTEGER_PARSERS(long);  DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */
851  DEFINE_INTEGER_PARSERS(ulong);  DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */
852  DEFINE_INTEGER_PARSERS(longlong);  DEFINE_INTEGER_PARSERS(longlong)   /*                                   */
853  DEFINE_INTEGER_PARSERS(ulonglong);  DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */
854    
855  #undef DEFINE_INTEGER_PARSERS  #undef DEFINE_INTEGER_PARSERS
856    

Legend:
Removed from v.91  
changed lines
  Added in v.257

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12