| 30 |
// Author: Sanjay Ghemawat |
// Author: Sanjay Ghemawat |
| 31 |
|
|
| 32 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
| 33 |
# include <config.h> |
#include "config.h" |
| 34 |
#endif |
#endif |
| 35 |
|
|
| 36 |
#include <stdlib.h> |
#include <stdlib.h> |
| 41 |
#include <errno.h> |
#include <errno.h> |
| 42 |
#include <string> |
#include <string> |
| 43 |
#include <algorithm> |
#include <algorithm> |
| 44 |
// We need this to compile the proper dll on windows/msys. This is copied |
|
| 45 |
// from pcre_internal.h. It would probably be better just to include that. |
#include "pcrecpp_internal.h" |
| 46 |
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
#include "pcre.h" |
|
#include <pcre.h> |
|
|
#include "pcre_stringpiece.h" |
|
| 47 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
| 48 |
|
#include "pcre_stringpiece.h" |
| 49 |
|
|
| 50 |
|
|
| 51 |
namespace pcrecpp { |
namespace pcrecpp { |
| 55 |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace |
| 56 |
|
|
| 57 |
// Special object that stands-in for no argument |
// Special object that stands-in for no argument |
| 58 |
Arg no_arg((void*)NULL); |
Arg RE::no_arg((void*)NULL); |
| 59 |
|
|
| 60 |
|
// This is for ABI compatibility with old versions of pcre (pre-7.6), |
| 61 |
|
// which defined a global no_arg variable instead of putting it in the |
| 62 |
|
// RE class. This works on GCC >= 3, at least. We could probably have |
| 63 |
|
// a more inclusive test if we ever needed it. |
| 64 |
|
#if defined(__GNUC__) && __GNUC__ >= 3 |
| 65 |
|
extern Arg no_arg __attribute__((alias("_ZN7pcrecpp2RE6no_argE"))); |
| 66 |
|
#endif |
| 67 |
|
|
| 68 |
// If a regular expression has no error, its error_ field points here |
// If a regular expression has no error, its error_ field points here |
| 69 |
static const string empty_string; |
static const string empty_string; |
| 339 |
|
|
| 340 |
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. |
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF. |
| 341 |
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. |
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF. |
| 342 |
|
// Modified by PH to add PCRE_NEWLINE_ANY and PCRE_NEWLINE_ANYCRLF. |
| 343 |
|
|
| 344 |
static int NewlineMode(int pcre_options) { |
static int NewlineMode(int pcre_options) { |
| 345 |
// TODO: if we can make it threadsafe, cache this var |
// TODO: if we can make it threadsafe, cache this var |
| 346 |
int newline_mode = 0; |
int newline_mode = 0; |
| 347 |
/* if (newline_mode) return newline_mode; */ // do this once it's cached |
/* if (newline_mode) return newline_mode; */ // do this once it's cached |
| 348 |
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) { |
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| |
| 349 |
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)) { |
| 350 |
newline_mode = (pcre_options & |
newline_mode = (pcre_options & |
| 351 |
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)); |
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF| |
| 352 |
|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF)); |
| 353 |
} else { |
} else { |
| 354 |
int newline; |
int newline; |
| 355 |
pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
pcre_config(PCRE_CONFIG_NEWLINE, &newline); |
| 359 |
newline_mode = PCRE_NEWLINE_CR; |
newline_mode = PCRE_NEWLINE_CR; |
| 360 |
else if (newline == 3338) |
else if (newline == 3338) |
| 361 |
newline_mode = PCRE_NEWLINE_CRLF; |
newline_mode = PCRE_NEWLINE_CRLF; |
| 362 |
|
else if (newline == -1) |
| 363 |
|
newline_mode = PCRE_NEWLINE_ANY; |
| 364 |
|
else if (newline == -2) |
| 365 |
|
newline_mode = PCRE_NEWLINE_ANYCRLF; |
| 366 |
else |
else |
| 367 |
assert("" == "Unexpected return value from pcre_config(NEWLINE)"); |
assert("" == "Unexpected return value from pcre_config(NEWLINE)"); |
| 368 |
} |
} |
| 377 |
int start = 0; |
int start = 0; |
| 378 |
int lastend = -1; |
int lastend = -1; |
| 379 |
|
|
| 380 |
for (; start <= static_cast<int>(str->length()); count++) { |
while (start <= static_cast<int>(str->length())) { |
| 381 |
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize); |
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize); |
| 382 |
if (matches <= 0) |
if (matches <= 0) |
| 383 |
break; |
break; |
| 392 |
// Note it's better to call pcre_fullinfo() than to examine |
// Note it's better to call pcre_fullinfo() than to examine |
| 393 |
// all_options(), since options_ could have changed bewteen |
// all_options(), since options_ could have changed bewteen |
| 394 |
// compile-time and now, but this is simpler and safe enough. |
// compile-time and now, but this is simpler and safe enough. |
| 395 |
|
// Modified by PH to add ANY and ANYCRLF. |
| 396 |
if (start+1 < static_cast<int>(str->length()) && |
if (start+1 < static_cast<int>(str->length()) && |
| 397 |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
(*str)[start] == '\r' && (*str)[start+1] == '\n' && |
| 398 |
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) { |
(NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF || |
| 399 |
|
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY || |
| 400 |
|
NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF) |
| 401 |
|
) { |
| 402 |
matchend++; |
matchend++; |
| 403 |
} |
} |
| 404 |
// We also need to advance more than one char if we're in utf8 mode. |
// We also need to advance more than one char if we're in utf8 mode. |
| 482 |
return 0; |
return 0; |
| 483 |
} |
} |
| 484 |
|
|
| 485 |
pcre_extra extra = { 0 }; |
pcre_extra extra = { 0, 0, 0, 0, 0, 0 }; |
| 486 |
if (options_.match_limit() > 0) { |
if (options_.match_limit() > 0) { |
| 487 |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
extra.flags |= PCRE_EXTRA_MATCH_LIMIT; |
| 488 |
extra.match_limit = options_.match_limit(); |
extra.match_limit = options_.match_limit(); |
| 626 |
} |
} |
| 627 |
|
|
| 628 |
bool Arg::parse_string(const char* str, int n, void* dest) { |
bool Arg::parse_string(const char* str, int n, void* dest) { |
| 629 |
|
if (dest == NULL) return true; |
| 630 |
reinterpret_cast<string*>(dest)->assign(str, n); |
reinterpret_cast<string*>(dest)->assign(str, n); |
| 631 |
return true; |
return true; |
| 632 |
} |
} |
| 633 |
|
|
| 634 |
bool Arg::parse_stringpiece(const char* str, int n, void* dest) { |
bool Arg::parse_stringpiece(const char* str, int n, void* dest) { |
| 635 |
|
if (dest == NULL) return true; |
| 636 |
reinterpret_cast<StringPiece*>(dest)->set(str, n); |
reinterpret_cast<StringPiece*>(dest)->set(str, n); |
| 637 |
return true; |
return true; |
| 638 |
} |
} |
| 639 |
|
|
| 640 |
bool Arg::parse_char(const char* str, int n, void* dest) { |
bool Arg::parse_char(const char* str, int n, void* dest) { |
| 641 |
if (n != 1) return false; |
if (n != 1) return false; |
| 642 |
|
if (dest == NULL) return true; |
| 643 |
*(reinterpret_cast<char*>(dest)) = str[0]; |
*(reinterpret_cast<char*>(dest)) = str[0]; |
| 644 |
return true; |
return true; |
| 645 |
} |
} |
| 646 |
|
|
| 647 |
bool Arg::parse_uchar(const char* str, int n, void* dest) { |
bool Arg::parse_uchar(const char* str, int n, void* dest) { |
| 648 |
if (n != 1) return false; |
if (n != 1) return false; |
| 649 |
|
if (dest == NULL) return true; |
| 650 |
*(reinterpret_cast<unsigned char*>(dest)) = str[0]; |
*(reinterpret_cast<unsigned char*>(dest)) = str[0]; |
| 651 |
return true; |
return true; |
| 652 |
} |
} |
| 695 |
long r = strtol(str, &end, radix); |
long r = strtol(str, &end, radix); |
| 696 |
if (end != str + n) return false; // Leftover junk |
if (end != str + n) return false; // Leftover junk |
| 697 |
if (errno) return false; |
if (errno) return false; |
| 698 |
|
if (dest == NULL) return true; |
| 699 |
*(reinterpret_cast<long*>(dest)) = r; |
*(reinterpret_cast<long*>(dest)) = r; |
| 700 |
return true; |
return true; |
| 701 |
} |
} |
| 713 |
unsigned long r = strtoul(str, &end, radix); |
unsigned long r = strtoul(str, &end, radix); |
| 714 |
if (end != str + n) return false; // Leftover junk |
if (end != str + n) return false; // Leftover junk |
| 715 |
if (errno) return false; |
if (errno) return false; |
| 716 |
|
if (dest == NULL) return true; |
| 717 |
*(reinterpret_cast<unsigned long*>(dest)) = r; |
*(reinterpret_cast<unsigned long*>(dest)) = r; |
| 718 |
return true; |
return true; |
| 719 |
} |
} |
| 725 |
long r; |
long r; |
| 726 |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
| 727 |
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range |
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range |
| 728 |
*(reinterpret_cast<short*>(dest)) = r; |
if (dest == NULL) return true; |
| 729 |
|
*(reinterpret_cast<short*>(dest)) = static_cast<short>(r); |
| 730 |
return true; |
return true; |
| 731 |
} |
} |
| 732 |
|
|
| 737 |
unsigned long r; |
unsigned long r; |
| 738 |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
| 739 |
if (r > USHRT_MAX) return false; // Out of range |
if (r > USHRT_MAX) return false; // Out of range |
| 740 |
*(reinterpret_cast<unsigned short*>(dest)) = r; |
if (dest == NULL) return true; |
| 741 |
|
*(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r); |
| 742 |
return true; |
return true; |
| 743 |
} |
} |
| 744 |
|
|
| 749 |
long r; |
long r; |
| 750 |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse |
| 751 |
if (r < INT_MIN || r > INT_MAX) return false; // Out of range |
if (r < INT_MIN || r > INT_MAX) return false; // Out of range |
| 752 |
|
if (dest == NULL) return true; |
| 753 |
*(reinterpret_cast<int*>(dest)) = r; |
*(reinterpret_cast<int*>(dest)) = r; |
| 754 |
return true; |
return true; |
| 755 |
} |
} |
| 761 |
unsigned long r; |
unsigned long r; |
| 762 |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse |
| 763 |
if (r > UINT_MAX) return false; // Out of range |
if (r > UINT_MAX) return false; // Out of range |
| 764 |
|
if (dest == NULL) return true; |
| 765 |
*(reinterpret_cast<unsigned int*>(dest)) = r; |
*(reinterpret_cast<unsigned int*>(dest)) = r; |
| 766 |
return true; |
return true; |
| 767 |
} |
} |
| 782 |
long long r = strtoq(str, &end, radix); |
long long r = strtoq(str, &end, radix); |
| 783 |
#elif defined HAVE_STRTOLL |
#elif defined HAVE_STRTOLL |
| 784 |
long long r = strtoll(str, &end, radix); |
long long r = strtoll(str, &end, radix); |
| 785 |
|
#elif defined HAVE__STRTOI64 |
| 786 |
|
long long r = _strtoi64(str, &end, radix); |
| 787 |
#else |
#else |
| 788 |
#error parse_longlong_radix: cannot convert input to a long-long |
#error parse_longlong_radix: cannot convert input to a long-long |
| 789 |
#endif |
#endif |
| 790 |
if (end != str + n) return false; // Leftover junk |
if (end != str + n) return false; // Leftover junk |
| 791 |
if (errno) return false; |
if (errno) return false; |
| 792 |
|
if (dest == NULL) return true; |
| 793 |
*(reinterpret_cast<long long*>(dest)) = r; |
*(reinterpret_cast<long long*>(dest)) = r; |
| 794 |
return true; |
return true; |
| 795 |
#endif /* HAVE_LONG_LONG */ |
#endif /* HAVE_LONG_LONG */ |
| 812 |
unsigned long long r = strtouq(str, &end, radix); |
unsigned long long r = strtouq(str, &end, radix); |
| 813 |
#elif defined HAVE_STRTOLL |
#elif defined HAVE_STRTOLL |
| 814 |
unsigned long long r = strtoull(str, &end, radix); |
unsigned long long r = strtoull(str, &end, radix); |
| 815 |
|
#elif defined HAVE__STRTOI64 |
| 816 |
|
unsigned long long r = _strtoui64(str, &end, radix); |
| 817 |
#else |
#else |
| 818 |
#error parse_ulonglong_radix: cannot convert input to a long-long |
#error parse_ulonglong_radix: cannot convert input to a long-long |
| 819 |
#endif |
#endif |
| 820 |
if (end != str + n) return false; // Leftover junk |
if (end != str + n) return false; // Leftover junk |
| 821 |
if (errno) return false; |
if (errno) return false; |
| 822 |
|
if (dest == NULL) return true; |
| 823 |
*(reinterpret_cast<unsigned long long*>(dest)) = r; |
*(reinterpret_cast<unsigned long long*>(dest)) = r; |
| 824 |
return true; |
return true; |
| 825 |
#endif /* HAVE_UNSIGNED_LONG_LONG */ |
#endif /* HAVE_UNSIGNED_LONG_LONG */ |
| 837 |
double r = strtod(buf, &end); |
double r = strtod(buf, &end); |
| 838 |
if (end != buf + n) return false; // Leftover junk |
if (end != buf + n) return false; // Leftover junk |
| 839 |
if (errno) return false; |
if (errno) return false; |
| 840 |
|
if (dest == NULL) return true; |
| 841 |
*(reinterpret_cast<double*>(dest)) = r; |
*(reinterpret_cast<double*>(dest)) = r; |
| 842 |
return true; |
return true; |
| 843 |
} |
} |
| 845 |
bool Arg::parse_float(const char* str, int n, void* dest) { |
bool Arg::parse_float(const char* str, int n, void* dest) { |
| 846 |
double r; |
double r; |
| 847 |
if (!parse_double(str, n, &r)) return false; |
if (!parse_double(str, n, &r)) return false; |
| 848 |
|
if (dest == NULL) return true; |
| 849 |
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r); |
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r); |
| 850 |
return true; |
return true; |
| 851 |
} |
} |