| 32 |
// TODO: Test extractions for PartialMatch/Consume |
// TODO: Test extractions for PartialMatch/Consume |
| 33 |
|
|
| 34 |
#include <stdio.h> |
#include <stdio.h> |
| 35 |
|
#include <cassert> |
| 36 |
#include <vector> |
#include <vector> |
| 37 |
#include "config.h" |
#include "config.h" |
| 38 |
#include "pcrecpp.h" |
#include "pcrecpp.h" |
| 44 |
using pcrecpp::Octal; |
using pcrecpp::Octal; |
| 45 |
using pcrecpp::CRadix; |
using pcrecpp::CRadix; |
| 46 |
|
|
| 47 |
|
static bool VERBOSE_TEST = false; |
| 48 |
|
|
| 49 |
// CHECK dies with a fatal error if condition is not true. It is *not* |
// CHECK dies with a fatal error if condition is not true. It is *not* |
| 50 |
// controlled by NDEBUG, so the check will be executed regardless of |
// controlled by NDEBUG, so the check will be executed regardless of |
| 51 |
// compilation mode. Therefore, it is safe to do things like: |
// compilation mode. Therefore, it is safe to do things like: |
| 260 |
"aaaaa", |
"aaaaa", |
| 261 |
"bbaaaaa", |
"bbaaaaa", |
| 262 |
"bbabbabbabbabbabb" }, |
"bbabbabbabbabbabb" }, |
| 263 |
|
{ "b*", |
| 264 |
|
"bb", |
| 265 |
|
"aa\naa\n", |
| 266 |
|
"bbaa\naa\n", |
| 267 |
|
"bbabbabb\nbbabbabb\nbb" }, |
| 268 |
|
{ "b*", |
| 269 |
|
"bb", |
| 270 |
|
"aa\raa\r", |
| 271 |
|
"bbaa\raa\r", |
| 272 |
|
"bbabbabb\rbbabbabb\rbb" }, |
| 273 |
|
{ "b*", |
| 274 |
|
"bb", |
| 275 |
|
"aa\r\naa\r\n", |
| 276 |
|
"bbaa\r\naa\r\n", |
| 277 |
|
"bbabbabb\r\nbbabbabb\r\nbb" }, |
| 278 |
|
#ifdef SUPPORT_UTF8 |
| 279 |
|
{ "b*", |
| 280 |
|
"bb", |
| 281 |
|
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
| 282 |
|
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
| 283 |
|
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, |
| 284 |
|
{ "b*", |
| 285 |
|
"bb", |
| 286 |
|
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
| 287 |
|
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
| 288 |
|
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
| 289 |
|
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, |
| 290 |
|
#endif |
| 291 |
{ "", NULL, NULL, NULL, NULL } |
{ "", NULL, NULL, NULL, NULL } |
| 292 |
}; |
}; |
| 293 |
|
|
| 294 |
|
#ifdef SUPPORT_UTF8 |
| 295 |
|
const bool support_utf8 = true; |
| 296 |
|
#else |
| 297 |
|
const bool support_utf8 = false; |
| 298 |
|
#endif |
| 299 |
|
|
| 300 |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 301 |
|
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
| 302 |
|
assert(re.error().empty()); |
| 303 |
string one(t->original); |
string one(t->original); |
| 304 |
CHECK(RE(t->regexp).Replace(t->rewrite, &one)); |
CHECK(re.Replace(t->rewrite, &one)); |
| 305 |
CHECK_EQ(one, t->single); |
CHECK_EQ(one, t->single); |
| 306 |
string all(t->original); |
string all(t->original); |
| 307 |
CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0); |
CHECK(re.GlobalReplace(t->rewrite, &all) > 0); |
| 308 |
CHECK_EQ(all, t->global); |
CHECK_EQ(all, t->global); |
| 309 |
} |
} |
| 310 |
|
|
| 311 |
|
// One final test: test \r\n replacement when we're not in CRLF mode |
| 312 |
|
{ |
| 313 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
| 314 |
|
assert(re.error().empty()); |
| 315 |
|
string all("aa\r\naa\r\n"); |
| 316 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 317 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 318 |
|
} |
| 319 |
|
{ |
| 320 |
|
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
| 321 |
|
assert(re.error().empty()); |
| 322 |
|
string all("aa\r\naa\r\n"); |
| 323 |
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 324 |
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 325 |
|
} |
| 326 |
|
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
| 327 |
|
// Alas, the answer depends on how pcre was compiled. |
| 328 |
} |
} |
| 329 |
|
|
| 330 |
static void TestExtract() { |
static void TestExtract() { |
| 403 |
CHECK_EQ(a, ""); |
CHECK_EQ(a, ""); |
| 404 |
} |
} |
| 405 |
|
|
| 406 |
static void TestRecursion(int size, const char *pattern, int match_limit) { |
static void TestRecursion() { |
| 407 |
printf("Testing recursion\n"); |
printf("Testing recursion\n"); |
| 408 |
|
|
| 409 |
// Fill up a string repeating the pattern given |
// Get one string that passes (sometimes), one that never does. |
| 410 |
string domain; |
string text_good("abcdefghijk"); |
| 411 |
domain.resize(size); |
string text_bad("acdefghijkl"); |
| 412 |
int patlen = strlen(pattern); |
|
| 413 |
for (int i = 0; i < size; ++i) { |
// According to pcretest, matching text_good against (\w+)*b |
| 414 |
domain[i] = pattern[i % patlen]; |
// requires match_limit of at least 8192, and match_recursion_limit |
| 415 |
|
// of at least 37. |
| 416 |
|
|
| 417 |
|
RE_Options options_ml; |
| 418 |
|
options_ml.set_match_limit(8192); |
| 419 |
|
RE re("(\\w+)*b", options_ml); |
| 420 |
|
CHECK(re.PartialMatch(text_good) == true); |
| 421 |
|
CHECK(re.PartialMatch(text_bad) == false); |
| 422 |
|
CHECK(re.FullMatch(text_good) == false); |
| 423 |
|
CHECK(re.FullMatch(text_bad) == false); |
| 424 |
|
|
| 425 |
|
options_ml.set_match_limit(1024); |
| 426 |
|
RE re2("(\\w+)*b", options_ml); |
| 427 |
|
CHECK(re2.PartialMatch(text_good) == false); // because of match_limit |
| 428 |
|
CHECK(re2.PartialMatch(text_bad) == false); |
| 429 |
|
CHECK(re2.FullMatch(text_good) == false); |
| 430 |
|
CHECK(re2.FullMatch(text_bad) == false); |
| 431 |
|
|
| 432 |
|
RE_Options options_mlr; |
| 433 |
|
options_mlr.set_match_limit_recursion(50); |
| 434 |
|
RE re3("(\\w+)*b", options_mlr); |
| 435 |
|
CHECK(re3.PartialMatch(text_good) == true); |
| 436 |
|
CHECK(re3.PartialMatch(text_bad) == false); |
| 437 |
|
CHECK(re3.FullMatch(text_good) == false); |
| 438 |
|
CHECK(re3.FullMatch(text_bad) == false); |
| 439 |
|
|
| 440 |
|
options_mlr.set_match_limit_recursion(10); |
| 441 |
|
RE re4("(\\w+)*b", options_mlr); |
| 442 |
|
CHECK(re4.PartialMatch(text_good) == false); |
| 443 |
|
CHECK(re4.PartialMatch(text_bad) == false); |
| 444 |
|
CHECK(re4.FullMatch(text_good) == false); |
| 445 |
|
CHECK(re4.FullMatch(text_bad) == false); |
| 446 |
|
} |
| 447 |
|
|
| 448 |
|
// |
| 449 |
|
// Options tests contributed by |
| 450 |
|
// Giuseppe Maxia, CTO, Stardata s.r.l. |
| 451 |
|
// July 2005 |
| 452 |
|
// |
| 453 |
|
static void GetOneOptionResult( |
| 454 |
|
const char *option_name, |
| 455 |
|
const char *regex, |
| 456 |
|
const char *str, |
| 457 |
|
RE_Options options, |
| 458 |
|
bool full, |
| 459 |
|
string expected) { |
| 460 |
|
|
| 461 |
|
printf("Testing Option <%s>\n", option_name); |
| 462 |
|
if(VERBOSE_TEST) |
| 463 |
|
printf("/%s/ finds \"%s\" within \"%s\" \n", |
| 464 |
|
regex, |
| 465 |
|
expected.c_str(), |
| 466 |
|
str); |
| 467 |
|
string captured(""); |
| 468 |
|
if (full) |
| 469 |
|
RE(regex,options).FullMatch(str, &captured); |
| 470 |
|
else |
| 471 |
|
RE(regex,options).PartialMatch(str, &captured); |
| 472 |
|
CHECK_EQ(captured, expected); |
| 473 |
|
} |
| 474 |
|
|
| 475 |
|
static void TestOneOption( |
| 476 |
|
const char *option_name, |
| 477 |
|
const char *regex, |
| 478 |
|
const char *str, |
| 479 |
|
RE_Options options, |
| 480 |
|
bool full, |
| 481 |
|
bool assertive = true) { |
| 482 |
|
|
| 483 |
|
printf("Testing Option <%s>\n", option_name); |
| 484 |
|
if (VERBOSE_TEST) |
| 485 |
|
printf("'%s' %s /%s/ \n", |
| 486 |
|
str, |
| 487 |
|
(assertive? "matches" : "doesn't match"), |
| 488 |
|
regex); |
| 489 |
|
if (assertive) { |
| 490 |
|
if (full) |
| 491 |
|
CHECK(RE(regex,options).FullMatch(str)); |
| 492 |
|
else |
| 493 |
|
CHECK(RE(regex,options).PartialMatch(str)); |
| 494 |
|
} else { |
| 495 |
|
if (full) |
| 496 |
|
CHECK(!RE(regex,options).FullMatch(str)); |
| 497 |
|
else |
| 498 |
|
CHECK(!RE(regex,options).PartialMatch(str)); |
| 499 |
} |
} |
| 500 |
// Just make sure it doesn't crash due to too much recursion. |
} |
| 501 |
|
|
| 502 |
|
static void Test_CASELESS() { |
| 503 |
RE_Options options; |
RE_Options options; |
| 504 |
options.set_match_limit(match_limit); |
RE_Options options2; |
| 505 |
RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options); |
|
| 506 |
re.FullMatch(domain); |
options.set_caseless(true); |
| 507 |
|
TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); |
| 508 |
|
TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); |
| 509 |
|
TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); |
| 510 |
|
|
| 511 |
|
TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); |
| 512 |
|
TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); |
| 513 |
|
options.set_caseless(false); |
| 514 |
|
TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); |
| 515 |
} |
} |
| 516 |
|
|
| 517 |
|
static void Test_MULTILINE() { |
| 518 |
|
RE_Options options; |
| 519 |
|
RE_Options options2; |
| 520 |
|
const char *str = "HELLO\n" "cruel\n" "world\n"; |
| 521 |
|
|
| 522 |
|
options.set_multiline(true); |
| 523 |
|
TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); |
| 524 |
|
TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); |
| 525 |
|
TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); |
| 526 |
|
options.set_multiline(false); |
| 527 |
|
TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); |
| 528 |
|
} |
| 529 |
|
|
| 530 |
|
static void Test_DOTALL() { |
| 531 |
|
RE_Options options; |
| 532 |
|
RE_Options options2; |
| 533 |
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 534 |
|
|
| 535 |
|
options.set_dotall(true); |
| 536 |
|
TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); |
| 537 |
|
TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); |
| 538 |
|
TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); |
| 539 |
|
options.set_dotall(false); |
| 540 |
|
TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); |
| 541 |
|
} |
| 542 |
|
|
| 543 |
|
static void Test_DOLLAR_ENDONLY() { |
| 544 |
|
RE_Options options; |
| 545 |
|
RE_Options options2; |
| 546 |
|
const char *str = "HELLO world\n"; |
| 547 |
|
|
| 548 |
|
TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); |
| 549 |
|
options.set_dollar_endonly(true); |
| 550 |
|
TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); |
| 551 |
|
TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); |
| 552 |
|
} |
| 553 |
|
|
| 554 |
|
static void Test_EXTRA() { |
| 555 |
|
RE_Options options; |
| 556 |
|
const char *str = "HELLO"; |
| 557 |
|
|
| 558 |
|
options.set_extra(true); |
| 559 |
|
TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); |
| 560 |
|
TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); |
| 561 |
|
options.set_extra(false); |
| 562 |
|
TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); |
| 563 |
|
} |
| 564 |
|
|
| 565 |
|
static void Test_EXTENDED() { |
| 566 |
|
RE_Options options; |
| 567 |
|
RE_Options options2; |
| 568 |
|
const char *str = "HELLO world"; |
| 569 |
|
|
| 570 |
|
options.set_extended(true); |
| 571 |
|
TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); |
| 572 |
|
TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); |
| 573 |
|
TestOneOption("EXTENDED (class)", |
| 574 |
|
"^ HE L{2} O " |
| 575 |
|
"\\s+ " |
| 576 |
|
"\\w+ $ ", |
| 577 |
|
str, |
| 578 |
|
options, |
| 579 |
|
false); |
| 580 |
|
|
| 581 |
|
TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); |
| 582 |
|
TestOneOption("EXTENDED (function)", |
| 583 |
|
"^ HE L{2} O " |
| 584 |
|
"\\s+ " |
| 585 |
|
"\\w+ $ ", |
| 586 |
|
str, |
| 587 |
|
pcrecpp::EXTENDED(), |
| 588 |
|
false); |
| 589 |
|
|
| 590 |
|
options.set_extended(false); |
| 591 |
|
TestOneOption("no EXTENDED", "HELLO world", str, options, false); |
| 592 |
|
} |
| 593 |
|
|
| 594 |
|
static void Test_NO_AUTO_CAPTURE() { |
| 595 |
|
RE_Options options; |
| 596 |
|
const char *str = "HELLO world"; |
| 597 |
|
string captured; |
| 598 |
|
|
| 599 |
|
printf("Testing Option <no NO_AUTO_CAPTURE>\n"); |
| 600 |
|
if (VERBOSE_TEST) |
| 601 |
|
printf("parentheses capture text\n"); |
| 602 |
|
RE re("(world|universe)$", options); |
| 603 |
|
CHECK(re.Extract("\\1", str , &captured)); |
| 604 |
|
CHECK_EQ(captured, "world"); |
| 605 |
|
options.set_no_auto_capture(true); |
| 606 |
|
printf("testing Option <NO_AUTO_CAPTURE>\n"); |
| 607 |
|
if (VERBOSE_TEST) |
| 608 |
|
printf("parentheses do not capture text\n"); |
| 609 |
|
re.Extract("\\1",str, &captured ); |
| 610 |
|
CHECK_EQ(captured, "world"); |
| 611 |
|
} |
| 612 |
|
|
| 613 |
|
static void Test_UNGREEDY() { |
| 614 |
|
RE_Options options; |
| 615 |
|
const char *str = "HELLO, 'this' is the 'world'"; |
| 616 |
|
|
| 617 |
|
options.set_ungreedy(true); |
| 618 |
|
GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); |
| 619 |
|
GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); |
| 620 |
|
GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); |
| 621 |
|
|
| 622 |
|
options.set_ungreedy(false); |
| 623 |
|
GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); |
| 624 |
|
GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); |
| 625 |
|
} |
| 626 |
|
|
| 627 |
|
static void Test_all_options() { |
| 628 |
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 629 |
|
RE_Options options; |
| 630 |
|
options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); |
| 631 |
|
|
| 632 |
|
TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); |
| 633 |
|
options.set_all_options(0); |
| 634 |
|
TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); |
| 635 |
|
options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); |
| 636 |
|
|
| 637 |
|
TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); |
| 638 |
|
TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", |
| 639 |
|
" ^ c r u e l $ ", |
| 640 |
|
str, |
| 641 |
|
RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), |
| 642 |
|
false); |
| 643 |
|
|
| 644 |
|
TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", |
| 645 |
|
" ^ c r u e l $ ", |
| 646 |
|
str, |
| 647 |
|
RE_Options() |
| 648 |
|
.set_multiline(true) |
| 649 |
|
.set_extended(true), |
| 650 |
|
false); |
| 651 |
|
|
| 652 |
|
options.set_all_options(0); |
| 653 |
|
TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); |
| 654 |
|
|
| 655 |
|
} |
| 656 |
|
|
| 657 |
|
static void TestOptions() { |
| 658 |
|
printf("Testing Options\n"); |
| 659 |
|
Test_CASELESS(); |
| 660 |
|
Test_MULTILINE(); |
| 661 |
|
Test_DOTALL(); |
| 662 |
|
Test_DOLLAR_ENDONLY(); |
| 663 |
|
Test_EXTENDED(); |
| 664 |
|
Test_NO_AUTO_CAPTURE(); |
| 665 |
|
Test_UNGREEDY(); |
| 666 |
|
Test_EXTRA(); |
| 667 |
|
Test_all_options(); |
| 668 |
|
} |
| 669 |
|
|
| 670 |
int main(int argc, char** argv) { |
int main(int argc, char** argv) { |
| 671 |
// Treat any flag as --help |
// Treat any flag as --help |
| 1101 |
CHECK(!re.error().empty()); |
CHECK(!re.error().empty()); |
| 1102 |
} |
} |
| 1103 |
|
|
| 1104 |
// Test that recursion is stopped: there will be some errors reported |
// Test that recursion is stopped |
| 1105 |
int matchlimit = 5000; |
TestRecursion(); |
| 1106 |
int bytes = 15 * 1024; // enough to crash if there was no match limit |
|
| 1107 |
TestRecursion(bytes, ".", matchlimit); |
// Test Options |
| 1108 |
TestRecursion(bytes, "a", matchlimit); |
if (getenv("VERBOSE_TEST") != NULL) |
| 1109 |
TestRecursion(bytes, "a.", matchlimit); |
VERBOSE_TEST = true; |
| 1110 |
TestRecursion(bytes, "ab.", matchlimit); |
TestOptions(); |
|
TestRecursion(bytes, "abc.", matchlimit); |
|
| 1111 |
|
|
| 1112 |
// Done |
// Done |
| 1113 |
printf("OK\n"); |
printf("OK\n"); |