| 30 |
// Author: Sanjay Ghemawat |
// Author: Sanjay Ghemawat |
| 31 |
|
|
| 32 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
| 33 |
# include <config.h> |
#include "config.h" |
| 34 |
|
#endif |
| 35 |
|
|
| 36 |
|
#ifdef HAVE_WINDOWS_H |
| 37 |
|
#define HAVE_STRTOQ 1 |
| 38 |
|
#define strtoll _strtoui64 |
| 39 |
|
#define strtoull _strtoi64 |
| 40 |
#endif |
#endif |
| 41 |
|
|
| 42 |
#include <stdlib.h> |
#include <stdlib.h> |
| 47 |
#include <errno.h> |
#include <errno.h> |
| 48 |
#include <string> |
#include <string> |
| 49 |
#include <algorithm> |
#include <algorithm> |
| 50 |
// We need this to compile the proper dll on windows/msys. This is copied |
|
| 51 |
// from pcre_internal.h. It would probably be better just to include that. |
#include "pcrecpp_internal.h" |
| 52 |
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
#include "pcre.h" |
|
#include <pcre.h> |
|
|
#include "pcre_stringpiece.h" |
|
| 53 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
| 54 |
|
#include "pcre_stringpiece.h" |
| 55 |
|
|
| 56 |
|
|
| 57 |
namespace pcrecpp { |
namespace pcrecpp { |
| 61 |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
| 62 |
|
|
| 63 |
// Special object that stands-in for no argument |
// Special object that stands-in for no argument |
| 64 |
Arg no_arg((void*)NULL); |
PCRECPP_EXP_DEFN Arg no_arg((void*)NULL); |
| 65 |
|
|
| 66 |
// If a regular expression has no error, its error_ field points here |
// If a regular expression has no error, its error_ field points here |
| 67 |
static const string empty_string; |
static const string empty_string; |
| 82 |
|
|
| 83 |
re_partial_ = Compile(UNANCHORED); |
re_partial_ = Compile(UNANCHORED); |
| 84 |
if (re_partial_ != NULL) { |
if (re_partial_ != NULL) { |
| 85 |
// Check for complicated patterns. The following change is |
re_full_ = Compile(ANCHOR_BOTH); |
|
// conservative in that it may treat some "simple" patterns |
|
|
// as "complex" (e.g., if the vertical bar is in a character |
|
|
// class or is escaped). But it seems good enough. |
|
|
if (strchr(pat.c_str(), '|') == NULL) { |
|
|
// Simple pattern: we can use position-based checks to perform |
|
|
// fully anchored matches |
|
|
re_full_ = re_partial_; |
|
|
} else { |
|
|
// We need a special pattern for anchored matches |
|
|
re_full_ = Compile(ANCHOR_BOTH); |
|
|
} |
|
| 86 |
} |
} |
| 87 |
} |
} |
| 88 |
|
|
| 89 |
void RE::Cleanup() { |
void RE::Cleanup() { |
| 90 |
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_); |
if (re_full_ != NULL) (*pcre_free)(re_full_); |
| 91 |
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
if (re_partial_ != NULL) (*pcre_free)(re_partial_); |
| 92 |
if (error_ != &empty_string) delete error_; |
if (error_ != &empty_string) delete error_; |
| 93 |
} |
} |
| 94 |
|
|
| 95 |
|
|
| 337 |
|
|
| 338 |
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. |
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. |
| 339 |
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. |
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. |
| 340 |
|
// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF. |
| 341 |
|
|
| 342 |
static int NewlineMode(int pcre_options) { |
static int NewlineMode(int pcre_options) { |
| 343 |
// TODO: if we can make it threadsafe, cache this var |
// TODO: if we can make it threadsafe, cache this var |
| 344 |
int newline_mode = 0; |
int newline_mode = 0; |
| 345 |
/* if (newline_mode) return newline_mode; */ // do this once it's cached |
/* if (newline_mode) return newline_mode; */ // do this once it's cached |
| 346 |
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) { |
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| |
| 347 |
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) { |
| 348 |
newline_mode = (pcre_options & |
newline_mode = (pcre_options & |
| 349 |
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)); |
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| |
| 350 |
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)); |
| 351 |
} else { |
} else { |
| 352 |
int newline; |
int newline; |
| 353 |
pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
| 357 |
newline_mode = PCRE_NEWLINE_CR; |
newline_mode = PCRE_NEWLINE_CR; |
| 358 |
else if (newline == 3338) |
else if (newline == 3338) |
| 359 |
newline_mode = PCRE_NEWLINE_CRLF; |
newline_mode = PCRE_NEWLINE_CRLF; |
| 360 |
|
else if (newline == -1) |
| 361 |
|
newline_mode = PCRE_NEWLINE_ANY; |
| 362 |
|
else if (newline == -2) |
| 363 |
|
newline_mode = PCRE_NEWLINE_ANYCRLF; |
| 364 |
else |
else |
| 365 |
assert("" == "Unexpected return value from pcre_config(NEWLINE)"); |
assert("" == "Unexpected return value from pcre_config(NEWLINE)"); |
| 366 |
} |
} |
| 390 |
// Note it's better to call pcre_fullinfo() than to examine |
// Note it's better to call pcre_fullinfo() than to examine |
| 391 |
// all_options(), since options_ could have changed bewteen |
// all_options(), since options_ could have changed bewteen |
| 392 |
// compile-time and now, but this is simpler and safe enough. |
// compile-time and now, but this is simpler and safe enough. |
| 393 |
|
// Modified by PH to add ANY and ANYCRLF. |
| 394 |
if (start+1 < static_cast<int>(str->length()) && |
if (start+1 < static_cast<int>(str->length()) && |
| 395 |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
| 396 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) { |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
| 397 |
|
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
| 398 |
|
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF) |
| 399 |
|
) { |
| 400 |
matchend++; |
matchend++; |
| 401 |
} |
} |
| 402 |
// We also need to advance more than one char if we're in utf8 mode. |
// We also need to advance more than one char if we're in utf8 mode. |
| 480 |
return 0; |
return 0; |
| 481 |
} |
} |
| 482 |
|
|
| 483 |
pcre_extra extra = { 0 }; |
pcre_extra extra = { 0, 0, 0, 0, 0, 0 }; |
| 484 |
if (options_.match_limit() > 0) { |
if (options_.match_limit() > 0) { |
| 485 |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
| 486 |
extra.match_limit = options_.match_limit(); |
extra.match_limit = options_.match_limit(); |
| 513 |
rc = vecsize / 2; |
rc = vecsize / 2; |
| 514 |
} |
} |
| 515 |
|
|
|
if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) { |
|
|
// We need an extra check to make sure that the match extended |
|
|
// to the end of the input string |
|
|
assert(vec[0] == 0); // PCRE_ANCHORED forces starting match |
|
|
if (vec[1] != text.size()) return 0; // Did not get ending match |
|
|
} |
|
|
|
|
| 516 |
return rc; |
return rc; |
| 517 |
} |
} |
| 518 |
|
|
| 717 |
long r; |
long r; |
| 718 |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
| 719 |
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range |
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range |
| 720 |
*(reinterpret_cast<short*>(dest)) = r; |
*(reinterpret_cast<short*>(dest)) = static_cast<short>(r); |
| 721 |
return true; |
return true; |
| 722 |
} |
} |
| 723 |
|
|
| 728 |
unsigned long r; |
unsigned long r; |
| 729 |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
| 730 |
if (r > USHRT_MAX) return false; // Out of range |
if (r > USHRT_MAX) return false; // Out of range |
| 731 |
*(reinterpret_cast<unsigned short*>(dest)) = r; |
*(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r); |
| 732 |
return true; |
return true; |
| 733 |
} |
} |
| 734 |
|
|