| 1 |
// Copyright (c) 2005, Google Inc. |
// -*- coding: utf-8 -*- |
| 2 |
|
// |
| 3 |
|
// Copyright (c) 2005 - 2006, Google Inc. |
| 4 |
// All rights reserved. |
// All rights reserved. |
| 5 |
// |
// |
| 6 |
// Redistribution and use in source and binary forms, with or without |
// Redistribution and use in source and binary forms, with or without |
| 34 |
// TODO: Test extractions for PartialMatch/Consume |
// TODO: Test extractions for PartialMatch/Consume |
| 35 |
|
|
| 36 |
#include <stdio.h> |
#include <stdio.h> |
| 37 |
|
#include <cassert> |
| 38 |
#include <vector> |
#include <vector> |
| 39 |
#include "config.h" |
#include "config.h" |
| 40 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
| 262 |
"aaaaa", |
"aaaaa", |
| 263 |
"bbaaaaa", |
"bbaaaaa", |
| 264 |
"bbabbabbabbabbabb" }, |
"bbabbabbabbabbabb" }, |
| 265 |
|
{ "b*", |
| 266 |
|
"bb", |
| 267 |
|
"aa\naa\n", |
| 268 |
|
"bbaa\naa\n", |
| 269 |
|
"bbabbabb\nbbabbabb\nbb" }, |
| 270 |
|
{ "b*", |
| 271 |
|
"bb", |
| 272 |
|
"aa\raa\r", |
| 273 |
|
"bbaa\raa\r", |
| 274 |
|
"bbabbabb\rbbabbabb\rbb" }, |
| 275 |
|
{ "b*", |
| 276 |
|
"bb", |
| 277 |
|
"aa\r\naa\r\n", |
| 278 |
|
"bbaa\r\naa\r\n", |
| 279 |
|
"bbabbabb\r\nbbabbabb\r\nbb" }, |
| 280 |
|
#ifdef SUPPORT_UTF8 |
| 281 |
|
{ "b*", |
| 282 |
|
"bb", |
| 283 |
|
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
| 284 |
|
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
| 285 |
|
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, |
| 286 |
|
{ "b*", |
| 287 |
|
"bb", |
| 288 |
|
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
| 289 |
|
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
| 290 |
|
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
| 291 |
|
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, |
| 292 |
|
#endif |
| 293 |
{ "", NULL, NULL, NULL, NULL } |
{ "", NULL, NULL, NULL, NULL } |
| 294 |
}; |
}; |
| 295 |
|
|
| 296 |
|
#ifdef SUPPORT_UTF8 |
| 297 |
|
const bool support_utf8 = true; |
| 298 |
|
#else |
| 299 |
|
const bool support_utf8 = false; |
| 300 |
|
#endif |
| 301 |
|
|
| 302 |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 303 |
|
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
| 304 |
|
assert(re.error().empty()); |
| 305 |
string one(t->original); |
string one(t->original); |
| 306 |
CHECK(RE(t->regexp).Replace(t->rewrite, &one)); |
CHECK(re.Replace(t->rewrite, &one)); |
| 307 |
CHECK_EQ(one, t->single); |
CHECK_EQ(one, t->single); |
| 308 |
string all(t->original); |
string all(t->original); |
| 309 |
CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0); |
CHECK(re.GlobalReplace(t->rewrite, &all) > 0); |
| 310 |
CHECK_EQ(all, t->global); |
CHECK_EQ(all, t->global); |
| 311 |
} |
} |
| 312 |
|
|
| 313 |
|
// One final test: test \r\n replacement when we're not in CRLF mode |
| 314 |
|
{ |
| 315 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
| 316 |
|
assert(re.error().empty()); |
| 317 |
|
string all("aa\r\naa\r\n"); |
| 318 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 319 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 320 |
|
} |
| 321 |
|
{ |
| 322 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
| 323 |
|
assert(re.error().empty()); |
| 324 |
|
string all("aa\r\naa\r\n"); |
| 325 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 326 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 327 |
|
} |
| 328 |
|
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
| 329 |
|
// Alas, the answer depends on how pcre was compiled. |
| 330 |
} |
} |
| 331 |
|
|
| 332 |
static void TestExtract() { |
static void TestExtract() { |
| 447 |
CHECK(re4.FullMatch(text_bad) == false); |
CHECK(re4.FullMatch(text_bad) == false); |
| 448 |
} |
} |
| 449 |
|
|
| 450 |
|
// A meta-quoted string, interpreted as a pattern, should always match |
| 451 |
|
// the original unquoted string. |
| 452 |
|
static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { |
| 453 |
|
string quoted = RE::QuoteMeta(unquoted); |
| 454 |
|
RE re(quoted, options); |
| 455 |
|
CHECK(re.FullMatch(unquoted)); |
| 456 |
|
} |
| 457 |
|
|
| 458 |
|
// A string containing meaningful regexp characters, which is then meta- |
| 459 |
|
// quoted, should not generally match a string the unquoted string does. |
| 460 |
|
static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
| 461 |
|
RE_Options options = RE_Options()) { |
| 462 |
|
string quoted = RE::QuoteMeta(unquoted); |
| 463 |
|
RE re(quoted, options); |
| 464 |
|
CHECK(!re.FullMatch(should_not_match)); |
| 465 |
|
} |
| 466 |
|
|
| 467 |
|
// Tests that quoted meta characters match their original strings, |
| 468 |
|
// and that a few things that shouldn't match indeed do not. |
| 469 |
|
static void TestQuotaMetaSimple() { |
| 470 |
|
TestQuoteMeta("foo"); |
| 471 |
|
TestQuoteMeta("foo.bar"); |
| 472 |
|
TestQuoteMeta("foo\\.bar"); |
| 473 |
|
TestQuoteMeta("[1-9]"); |
| 474 |
|
TestQuoteMeta("1.5-2.0?"); |
| 475 |
|
TestQuoteMeta("\\d"); |
| 476 |
|
TestQuoteMeta("Who doesn't like ice cream?"); |
| 477 |
|
TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
| 478 |
|
TestQuoteMeta("((?!)xxx).*yyy"); |
| 479 |
|
TestQuoteMeta("(["); |
| 480 |
|
} |
| 481 |
|
|
| 482 |
|
static void TestQuoteMetaSimpleNegative() { |
| 483 |
|
NegativeTestQuoteMeta("foo", "bar"); |
| 484 |
|
NegativeTestQuoteMeta("...", "bar"); |
| 485 |
|
NegativeTestQuoteMeta("\\.", "."); |
| 486 |
|
NegativeTestQuoteMeta("\\.", ".."); |
| 487 |
|
NegativeTestQuoteMeta("(a)", "a"); |
| 488 |
|
NegativeTestQuoteMeta("(a|b)", "a"); |
| 489 |
|
NegativeTestQuoteMeta("(a|b)", "(a)"); |
| 490 |
|
NegativeTestQuoteMeta("(a|b)", "a|b"); |
| 491 |
|
NegativeTestQuoteMeta("[0-9]", "0"); |
| 492 |
|
NegativeTestQuoteMeta("[0-9]", "0-9"); |
| 493 |
|
NegativeTestQuoteMeta("[0-9]", "[9]"); |
| 494 |
|
NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
| 495 |
|
} |
| 496 |
|
|
| 497 |
|
static void TestQuoteMetaLatin1() { |
| 498 |
|
TestQuoteMeta("3\xb2 = 9"); |
| 499 |
|
} |
| 500 |
|
|
| 501 |
|
static void TestQuoteMetaUtf8() { |
| 502 |
|
#ifdef SUPPORT_UTF8 |
| 503 |
|
TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); |
| 504 |
|
TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 |
| 505 |
|
TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) |
| 506 |
|
TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character |
| 507 |
|
TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) |
| 508 |
|
TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) |
| 509 |
|
TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work |
| 510 |
|
NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) |
| 511 |
|
"27\\\xc2\\\xb0", |
| 512 |
|
pcrecpp::UTF8()); |
| 513 |
|
#endif |
| 514 |
|
} |
| 515 |
|
|
| 516 |
|
static void TestQuoteMetaAll() { |
| 517 |
|
printf("Testing QuoteMeta\n"); |
| 518 |
|
TestQuotaMetaSimple(); |
| 519 |
|
TestQuoteMetaSimpleNegative(); |
| 520 |
|
TestQuoteMetaLatin1(); |
| 521 |
|
TestQuoteMetaUtf8(); |
| 522 |
|
} |
| 523 |
|
|
| 524 |
// |
// |
| 525 |
// Options tests contributed by |
// Options tests contributed by |
| 526 |
// Giuseppe Maxia, CTO, Stardata s.r.l. |
// Giuseppe Maxia, CTO, Stardata s.r.l. |
| 743 |
Test_all_options(); |
Test_all_options(); |
| 744 |
} |
} |
| 745 |
|
|
| 746 |
|
static void TestConstructors() { |
| 747 |
|
printf("Testing constructors\n"); |
| 748 |
|
|
| 749 |
|
RE_Options options; |
| 750 |
|
options.set_dotall(true); |
| 751 |
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 752 |
|
|
| 753 |
|
RE orig("HELLO.*world", options); |
| 754 |
|
CHECK(orig.FullMatch(str)); |
| 755 |
|
|
| 756 |
|
RE copy1(orig); |
| 757 |
|
CHECK(copy1.FullMatch(str)); |
| 758 |
|
|
| 759 |
|
RE copy2("not a match"); |
| 760 |
|
CHECK(!copy2.FullMatch(str)); |
| 761 |
|
copy2 = copy1; |
| 762 |
|
CHECK(copy2.FullMatch(str)); |
| 763 |
|
copy2 = orig; |
| 764 |
|
CHECK(copy2.FullMatch(str)); |
| 765 |
|
|
| 766 |
|
// Make sure when we assign to ourselves, nothing bad happens |
| 767 |
|
orig = orig; |
| 768 |
|
copy1 = copy1; |
| 769 |
|
copy2 = copy2; |
| 770 |
|
CHECK(orig.FullMatch(str)); |
| 771 |
|
CHECK(copy1.FullMatch(str)); |
| 772 |
|
CHECK(copy2.FullMatch(str)); |
| 773 |
|
} |
| 774 |
|
|
| 775 |
int main(int argc, char** argv) { |
int main(int argc, char** argv) { |
| 776 |
// Treat any flag as --help |
// Treat any flag as --help |
| 777 |
if (argc > 1 && argv[1][0] == '-') { |
if (argc > 1 && argv[1][0] == '-') { |
| 1090 |
CHECK(RE("h.*o").PartialMatch("hello!")); |
CHECK(RE("h.*o").PartialMatch("hello!")); |
| 1091 |
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
| 1092 |
|
|
| 1093 |
|
/***** other tests *****/ |
| 1094 |
|
|
| 1095 |
RadixTests(); |
RadixTests(); |
| 1096 |
TestReplace(); |
TestReplace(); |
| 1097 |
TestExtract(); |
TestExtract(); |
| 1098 |
TestConsume(); |
TestConsume(); |
| 1099 |
TestFindAndConsume(); |
TestFindAndConsume(); |
| 1100 |
|
TestQuoteMetaAll(); |
| 1101 |
TestMatchNumberPeculiarity(); |
TestMatchNumberPeculiarity(); |
| 1102 |
|
|
| 1103 |
// Check the pattern() accessor |
// Check the pattern() accessor |
| 1217 |
VERBOSE_TEST = true; |
VERBOSE_TEST = true; |
| 1218 |
TestOptions(); |
TestOptions(); |
| 1219 |
|
|
| 1220 |
|
// Test the constructors |
| 1221 |
|
TestConstructors(); |
| 1222 |
|
|
| 1223 |
// Done |
// Done |
| 1224 |
printf("OK\n"); |
printf("OK\n"); |
| 1225 |
|
|