| 37 |
#include "config.h" |
#include "config.h" |
| 38 |
#endif |
#endif |
| 39 |
|
|
|
#ifdef HAVE_WINDOWS_H |
|
|
#define snprintf _snprintf |
|
|
#endif |
|
|
|
|
| 40 |
#include <stdio.h> |
#include <stdio.h> |
| 41 |
#include <cassert> |
#include <cassert> |
| 42 |
#include <vector> |
#include <vector> |
| 110 |
initial_size = VirtualProcessSize(); |
initial_size = VirtualProcessSize(); |
| 111 |
printf("Size after 50000: %llu\n", initial_size); |
printf("Size after 50000: %llu\n", initial_size); |
| 112 |
} |
} |
| 113 |
char buf[100]; |
char buf[100]; // definitely big enough |
| 114 |
snprintf(buf, sizeof(buf), "pat%09d", i); |
sprintf(buf, "pat%09d", i); |
| 115 |
RE newre(buf); |
RE newre(buf); |
| 116 |
} |
} |
| 117 |
uint64 final_size = VirtualProcessSize(); |
uint64 final_size = VirtualProcessSize(); |
| 213 |
const char *original; |
const char *original; |
| 214 |
const char *single; |
const char *single; |
| 215 |
const char *global; |
const char *global; |
| 216 |
|
int global_count; // the expected return value from ReplaceAll |
| 217 |
}; |
}; |
| 218 |
static const ReplaceTest tests[] = { |
static const ReplaceTest tests[] = { |
| 219 |
{ "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
{ "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
| 220 |
"\\2\\1ay", |
"\\2\\1ay", |
| 221 |
"the quick brown fox jumps over the lazy dogs.", |
"the quick brown fox jumps over the lazy dogs.", |
| 222 |
"ethay quick brown fox jumps over the lazy dogs.", |
"ethay quick brown fox jumps over the lazy dogs.", |
| 223 |
"ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." }, |
"ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", |
| 224 |
|
9 }, |
| 225 |
{ "\\w+", |
{ "\\w+", |
| 226 |
"\\0-NOSPAM", |
"\\0-NOSPAM", |
| 227 |
"paul.haahr@google.com", |
"paul.haahr@google.com", |
| 228 |
"paul-NOSPAM.haahr@google.com", |
"paul-NOSPAM.haahr@google.com", |
| 229 |
"paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" }, |
"paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", |
| 230 |
|
4 }, |
| 231 |
{ "^", |
{ "^", |
| 232 |
"(START)", |
"(START)", |
| 233 |
"foo", |
"foo", |
| 234 |
"(START)foo", |
"(START)foo", |
| 235 |
"(START)foo" }, |
"(START)foo", |
| 236 |
|
1 }, |
| 237 |
{ "^", |
{ "^", |
| 238 |
"(START)", |
"(START)", |
| 239 |
"", |
"", |
| 240 |
"(START)", |
"(START)", |
| 241 |
"(START)" }, |
"(START)", |
| 242 |
|
1 }, |
| 243 |
{ "$", |
{ "$", |
| 244 |
"(END)", |
"(END)", |
| 245 |
"", |
"", |
| 246 |
"(END)", |
"(END)", |
| 247 |
"(END)" }, |
"(END)", |
| 248 |
|
1 }, |
| 249 |
{ "b", |
{ "b", |
| 250 |
"bb", |
"bb", |
| 251 |
"ababababab", |
"ababababab", |
| 252 |
"abbabababab", |
"abbabababab", |
| 253 |
"abbabbabbabbabb" }, |
"abbabbabbabbabb", |
| 254 |
|
5 }, |
| 255 |
{ "b", |
{ "b", |
| 256 |
"bb", |
"bb", |
| 257 |
"bbbbbb", |
"bbbbbb", |
| 258 |
"bbbbbbb", |
"bbbbbbb", |
| 259 |
"bbbbbbbbbbbb" }, |
"bbbbbbbbbbbb", |
| 260 |
|
6 }, |
| 261 |
{ "b+", |
{ "b+", |
| 262 |
"bb", |
"bb", |
| 263 |
"bbbbbb", |
"bbbbbb", |
| 264 |
"bb", |
"bb", |
| 265 |
"bb" }, |
"bb", |
| 266 |
|
1 }, |
| 267 |
{ "b*", |
{ "b*", |
| 268 |
"bb", |
"bb", |
| 269 |
"bbbbbb", |
"bbbbbb", |
| 270 |
"bb", |
"bb", |
| 271 |
"bb" }, |
"bb", |
| 272 |
|
1 }, |
| 273 |
{ "b*", |
{ "b*", |
| 274 |
"bb", |
"bb", |
| 275 |
"aaaaa", |
"aaaaa", |
| 276 |
"bbaaaaa", |
"bbaaaaa", |
| 277 |
"bbabbabbabbabbabb" }, |
"bbabbabbabbabbabb", |
| 278 |
|
6 }, |
| 279 |
{ "b*", |
{ "b*", |
| 280 |
"bb", |
"bb", |
| 281 |
"aa\naa\n", |
"aa\naa\n", |
| 282 |
"bbaa\naa\n", |
"bbaa\naa\n", |
| 283 |
"bbabbabb\nbbabbabb\nbb" }, |
"bbabbabb\nbbabbabb\nbb", |
| 284 |
|
7 }, |
| 285 |
{ "b*", |
{ "b*", |
| 286 |
"bb", |
"bb", |
| 287 |
"aa\raa\r", |
"aa\raa\r", |
| 288 |
"bbaa\raa\r", |
"bbaa\raa\r", |
| 289 |
"bbabbabb\rbbabbabb\rbb" }, |
"bbabbabb\rbbabbabb\rbb", |
| 290 |
|
7 }, |
| 291 |
{ "b*", |
{ "b*", |
| 292 |
"bb", |
"bb", |
| 293 |
"aa\r\naa\r\n", |
"aa\r\naa\r\n", |
| 294 |
"bbaa\r\naa\r\n", |
"bbaa\r\naa\r\n", |
| 295 |
"bbabbabb\r\nbbabbabb\r\nbb" }, |
"bbabbabb\r\nbbabbabb\r\nbb", |
| 296 |
|
7 }, |
| 297 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 298 |
{ "b*", |
{ "b*", |
| 299 |
"bb", |
"bb", |
| 300 |
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
| 301 |
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
| 302 |
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, |
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", |
| 303 |
|
5 }, |
| 304 |
{ "b*", |
{ "b*", |
| 305 |
"bb", |
"bb", |
| 306 |
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
| 307 |
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
| 308 |
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
| 309 |
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, |
"bb\nbb""\xE3\x81\xB8""bb\r\nbb"), |
| 310 |
|
9 }, |
| 311 |
#endif |
#endif |
| 312 |
{ "", NULL, NULL, NULL, NULL } |
{ "", NULL, NULL, NULL, NULL, 0 } |
| 313 |
}; |
}; |
| 314 |
|
|
| 315 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 325 |
CHECK(re.Replace(t->rewrite, &one)); |
CHECK(re.Replace(t->rewrite, &one)); |
| 326 |
CHECK_EQ(one, t->single); |
CHECK_EQ(one, t->single); |
| 327 |
string all(t->original); |
string all(t->original); |
| 328 |
CHECK(re.GlobalReplace(t->rewrite, &all) > 0); |
const int replace_count = re.GlobalReplace(t->rewrite, &all); |
| 329 |
CHECK_EQ(all, t->global); |
CHECK_EQ(all, t->global); |
| 330 |
|
CHECK_EQ(replace_count, t->global_count); |
| 331 |
} |
} |
| 332 |
|
|
| 333 |
// One final test: test \r\n replacement when we're not in CRLF mode |
// One final test: test \r\n replacement when we're not in CRLF mode |
| 335 |
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
| 336 |
assert(re.error().empty()); |
assert(re.error().empty()); |
| 337 |
string all("aa\r\naa\r\n"); |
string all("aa\r\naa\r\n"); |
| 338 |
CHECK(re.GlobalReplace("bb", &all) > 0); |
CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
| 339 |
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 340 |
} |
} |
| 341 |
{ |
{ |
| 342 |
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
| 343 |
assert(re.error().empty()); |
assert(re.error().empty()); |
| 344 |
string all("aa\r\naa\r\n"); |
string all("aa\r\naa\r\n"); |
| 345 |
CHECK(re.GlobalReplace("bb", &all) > 0); |
CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
| 346 |
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 347 |
} |
} |
| 348 |
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
| 874 |
CHECK_EQ(s, string("ruby")); |
CHECK_EQ(s, string("ruby")); |
| 875 |
CHECK_EQ(i, 1234); |
CHECK_EQ(i, 1234); |
| 876 |
|
|
| 877 |
|
// Ignore non-void* NULL arg |
| 878 |
|
CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); |
| 879 |
|
CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); |
| 880 |
|
CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); |
| 881 |
|
CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); |
| 882 |
|
CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); |
| 883 |
|
CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); |
| 884 |
|
CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); |
| 885 |
|
|
| 886 |
|
// Fail on non-void* NULL arg if the match doesn't parse for the given type. |
| 887 |
|
CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); |
| 888 |
|
CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); |
| 889 |
|
CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); |
| 890 |
|
CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); |
| 891 |
|
CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); |
| 892 |
|
|
| 893 |
// Ignored arg |
// Ignored arg |
| 894 |
CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); |
CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); |
| 895 |
CHECK_EQ(s, string("ruby")); |
CHECK_EQ(s, string("ruby")); |
| 952 |
long long v; |
long long v; |
| 953 |
static const long long max_value = 0x7fffffffffffffffLL; |
static const long long max_value = 0x7fffffffffffffffLL; |
| 954 |
static const long long min_value = -max_value - 1; |
static const long long min_value = -max_value - 1; |
| 955 |
char buf[32]; |
char buf[32]; // definitely big enough for a long long |
| 956 |
|
|
| 957 |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 958 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
| 959 |
|
|
| 960 |
snprintf(buf, sizeof(buf), LLD, max_value); |
sprintf(buf, LLD, max_value); |
| 961 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 962 |
|
|
| 963 |
snprintf(buf, sizeof(buf), LLD, min_value); |
sprintf(buf, LLD, min_value); |
| 964 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
| 965 |
|
|
| 966 |
snprintf(buf, sizeof(buf), LLD, max_value); |
sprintf(buf, LLD, max_value); |
| 967 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |
| 968 |
buf[strlen(buf)-1]++; |
buf[strlen(buf)-1]++; |
| 969 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 970 |
|
|
| 971 |
snprintf(buf, sizeof(buf), LLD, min_value); |
sprintf(buf, LLD, min_value); |
| 972 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |
| 973 |
buf[strlen(buf)-1]++; |
buf[strlen(buf)-1]++; |
| 974 |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 979 |
unsigned long long v; |
unsigned long long v; |
| 980 |
long long v2; |
long long v2; |
| 981 |
static const unsigned long long max_value = 0xffffffffffffffffULL; |
static const unsigned long long max_value = 0xffffffffffffffffULL; |
| 982 |
char buf[32]; |
char buf[32]; // definitely big enough for a unsigned long long |
| 983 |
|
|
| 984 |
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
| 985 |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
| 986 |
|
|
| 987 |
snprintf(buf, sizeof(buf), LLU, max_value); |
sprintf(buf, LLU, max_value); |
| 988 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 989 |
|
|
| 990 |
assert(buf[strlen(buf)-1] != '9'); |
assert(buf[strlen(buf)-1] != '9'); |