/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 1  Line 1 
1  // Copyright (c) 2005, Google Inc.  // -*- coding: utf-8 -*-
2    //
3    // Copyright (c) 2005 - 2006, Google Inc.
4  // All rights reserved.  // All rights reserved.
5  //  //
6  // Redistribution and use in source and binary forms, with or without  // Redistribution and use in source and binary forms, with or without
# Line 32  Line 34 
34  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
35    
36  #include <stdio.h>  #include <stdio.h>
37    #include <cassert>
38  #include <vector>  #include <vector>
39  #include "config.h"  #include "config.h"
40  #include "pcrecpp.h"  #include "pcrecpp.h"
# Line 43  using pcrecpp::Hex; Line 46  using pcrecpp::Hex;
46  using pcrecpp::Octal;  using pcrecpp::Octal;
47  using pcrecpp::CRadix;  using pcrecpp::CRadix;
48    
49    static bool VERBOSE_TEST  = false;
50    
51  // CHECK dies with a fatal error if condition is not true.  It is *not*  // CHECK dies with a fatal error if condition is not true.  It is *not*
52  // controlled by NDEBUG, so the check will be executed regardless of  // controlled by NDEBUG, so the check will be executed regardless of
53  // compilation mode.  Therefore, it is safe to do things like:  // compilation mode.  Therefore, it is safe to do things like:
# Line 257  static void TestReplace() { Line 262  static void TestReplace() {
262        "aaaaa",        "aaaaa",
263        "bbaaaaa",        "bbaaaaa",
264        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
265        { "b*",
266          "bb",
267          "aa\naa\n",
268          "bbaa\naa\n",
269          "bbabbabb\nbbabbabb\nbb" },
270        { "b*",
271          "bb",
272          "aa\raa\r",
273          "bbaa\raa\r",
274          "bbabbabb\rbbabbabb\rbb" },
275        { "b*",
276          "bb",
277          "aa\r\naa\r\n",
278          "bbaa\r\naa\r\n",
279          "bbabbabb\r\nbbabbabb\r\nbb" },
280    #ifdef SUPPORT_UTF8
281        { "b*",
282          "bb",
283          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
284          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286        { "b*",
287          "bb",
288          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
289          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292    #endif
293      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
294    };    };
295    
296    #ifdef SUPPORT_UTF8
297      const bool support_utf8 = true;
298    #else
299      const bool support_utf8 = false;
300    #endif
301    
302    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304        assert(re.error().empty());
305      string one(t->original);      string one(t->original);
306      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
307      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
308      string all(t->original);      string all(t->original);
309      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
311    }    }
312    
313      // One final test: test \r\n replacement when we're not in CRLF mode
314      {
315        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316        assert(re.error().empty());
317        string all("aa\r\naa\r\n");
318        CHECK(re.GlobalReplace("bb", &all) > 0);
319        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320      }
321      {
322        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323        assert(re.error().empty());
324        string all("aa\r\naa\r\n");
325        CHECK(re.GlobalReplace("bb", &all) > 0);
326        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327      }
328      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329      //       Alas, the answer depends on how pcre was compiled.
330  }  }
331    
332  static void TestExtract() {  static void TestExtract() {
# Line 346  static void TestMatchNumberPeculiarity() Line 405  static void TestMatchNumberPeculiarity()
405    CHECK_EQ(a, "");    CHECK_EQ(a, "");
406  }  }
407    
408  static void TestRecursion(int size, const char *pattern, int match_limit) {  static void TestRecursion() {
409    printf("Testing recursion\n");    printf("Testing recursion\n");
410    
411    // Fill up a string repeating the pattern given    // Get one string that passes (sometimes), one that never does.
412    string domain;    string text_good("abcdefghijk");
413    domain.resize(size);    string text_bad("acdefghijkl");
414    int patlen = strlen(pattern);  
415    for (int i = 0; i < size; ++i) {    // According to pcretest, matching text_good against (\w+)*b
416      domain[i] = pattern[i % patlen];    // requires match_limit of at least 8192, and match_recursion_limit
417      // of at least 37.
418    
419      RE_Options options_ml;
420      options_ml.set_match_limit(8192);
421      RE re("(\\w+)*b", options_ml);
422      CHECK(re.PartialMatch(text_good) == true);
423      CHECK(re.PartialMatch(text_bad) == false);
424      CHECK(re.FullMatch(text_good) == false);
425      CHECK(re.FullMatch(text_bad) == false);
426    
427      options_ml.set_match_limit(1024);
428      RE re2("(\\w+)*b", options_ml);
429      CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
430      CHECK(re2.PartialMatch(text_bad) == false);
431      CHECK(re2.FullMatch(text_good) == false);
432      CHECK(re2.FullMatch(text_bad) == false);
433    
434      RE_Options options_mlr;
435      options_mlr.set_match_limit_recursion(50);
436      RE re3("(\\w+)*b", options_mlr);
437      CHECK(re3.PartialMatch(text_good) == true);
438      CHECK(re3.PartialMatch(text_bad) == false);
439      CHECK(re3.FullMatch(text_good) == false);
440      CHECK(re3.FullMatch(text_bad) == false);
441    
442      options_mlr.set_match_limit_recursion(10);
443      RE re4("(\\w+)*b", options_mlr);
444      CHECK(re4.PartialMatch(text_good) == false);
445      CHECK(re4.PartialMatch(text_bad) == false);
446      CHECK(re4.FullMatch(text_good) == false);
447      CHECK(re4.FullMatch(text_bad) == false);
448    }
449    
450    // A meta-quoted string, interpreted as a pattern, should always match
451    // the original unquoted string.
452    static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453      string quoted = RE::QuoteMeta(unquoted);
454      RE re(quoted, options);
455      CHECK(re.FullMatch(unquoted));
456    }
457    
458    // A string containing meaningful regexp characters, which is then meta-
459    // quoted, should not generally match a string the unquoted string does.
460    static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461                                      RE_Options options = RE_Options()) {
462      string quoted = RE::QuoteMeta(unquoted);
463      RE re(quoted, options);
464      CHECK(!re.FullMatch(should_not_match));
465    }
466    
467    // Tests that quoted meta characters match their original strings,
468    // and that a few things that shouldn't match indeed do not.
469    static void TestQuotaMetaSimple() {
470      TestQuoteMeta("foo");
471      TestQuoteMeta("foo.bar");
472      TestQuoteMeta("foo\\.bar");
473      TestQuoteMeta("[1-9]");
474      TestQuoteMeta("1.5-2.0?");
475      TestQuoteMeta("\\d");
476      TestQuoteMeta("Who doesn't like ice cream?");
477      TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478      TestQuoteMeta("((?!)xxx).*yyy");
479      TestQuoteMeta("([");
480    }
481    
482    static void TestQuoteMetaSimpleNegative() {
483      NegativeTestQuoteMeta("foo", "bar");
484      NegativeTestQuoteMeta("...", "bar");
485      NegativeTestQuoteMeta("\\.", ".");
486      NegativeTestQuoteMeta("\\.", "..");
487      NegativeTestQuoteMeta("(a)", "a");
488      NegativeTestQuoteMeta("(a|b)", "a");
489      NegativeTestQuoteMeta("(a|b)", "(a)");
490      NegativeTestQuoteMeta("(a|b)", "a|b");
491      NegativeTestQuoteMeta("[0-9]", "0");
492      NegativeTestQuoteMeta("[0-9]", "0-9");
493      NegativeTestQuoteMeta("[0-9]", "[9]");
494      NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495    }
496    
497    static void TestQuoteMetaLatin1() {
498      TestQuoteMeta("3\xb2 = 9");
499    }
500    
501    static void TestQuoteMetaUtf8() {
502    #ifdef SUPPORT_UTF8
503      TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504      TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
505      TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
506      TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
507      TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
508      TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509      TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510      NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
511                            "27\\\xc2\\\xb0",
512                            pcrecpp::UTF8());
513    #endif
514    }
515    
516    static void TestQuoteMetaAll() {
517      printf("Testing QuoteMeta\n");
518      TestQuotaMetaSimple();
519      TestQuoteMetaSimpleNegative();
520      TestQuoteMetaLatin1();
521      TestQuoteMetaUtf8();
522    }
523    
524    //
525    // Options tests contributed by
526    // Giuseppe Maxia, CTO, Stardata s.r.l.
527    // July 2005
528    //
529    static void GetOneOptionResult(
530                    const char *option_name,
531                    const char *regex,
532                    const char *str,
533                    RE_Options options,
534                    bool full,
535                    string expected) {
536    
537      printf("Testing Option <%s>\n", option_name);
538      if(VERBOSE_TEST)
539        printf("/%s/ finds \"%s\" within \"%s\" \n",
540                        regex,
541                        expected.c_str(),
542                        str);
543      string captured("");
544      if (full)
545        RE(regex,options).FullMatch(str, &captured);
546      else
547        RE(regex,options).PartialMatch(str, &captured);
548      CHECK_EQ(captured, expected);
549    }
550    
551    static void TestOneOption(
552                    const char *option_name,
553                    const char *regex,
554                    const char *str,
555                    RE_Options options,
556                    bool full,
557                    bool assertive = true) {
558    
559      printf("Testing Option <%s>\n", option_name);
560      if (VERBOSE_TEST)
561        printf("'%s' %s /%s/ \n",
562                      str,
563                      (assertive? "matches" : "doesn't match"),
564                      regex);
565      if (assertive) {
566        if (full)
567          CHECK(RE(regex,options).FullMatch(str));
568        else
569          CHECK(RE(regex,options).PartialMatch(str));
570      } else {
571        if (full)
572          CHECK(!RE(regex,options).FullMatch(str));
573        else
574          CHECK(!RE(regex,options).PartialMatch(str));
575    }    }
576    // Just make sure it doesn't crash due to too much recursion.  }
577    
578    static void Test_CASELESS() {
579    RE_Options options;    RE_Options options;
580    options.set_match_limit(match_limit);    RE_Options options2;
581    RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);  
582    re.FullMatch(domain);    options.set_caseless(true);
583      TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
584      TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
585      TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
586    
587      TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
588      TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589      options.set_caseless(false);
590      TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
591    }
592    
593    static void Test_MULTILINE() {
594      RE_Options options;
595      RE_Options options2;
596      const char *str = "HELLO\n" "cruel\n" "world\n";
597    
598      options.set_multiline(true);
599      TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
600      TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
601      TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602      options.set_multiline(false);
603      TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604    }
605    
606    static void Test_DOTALL() {
607      RE_Options options;
608      RE_Options options2;
609      const char *str = "HELLO\n" "cruel\n" "world";
610    
611      options.set_dotall(true);
612      TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
613      TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
614      TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
615      options.set_dotall(false);
616      TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617    }
618    
619    static void Test_DOLLAR_ENDONLY() {
620      RE_Options options;
621      RE_Options options2;
622      const char *str = "HELLO world\n";
623    
624      TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625      options.set_dollar_endonly(true);
626      TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
627      TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
628    }
629    
630    static void Test_EXTRA() {
631      RE_Options options;
632      const char *str = "HELLO";
633    
634      options.set_extra(true);
635      TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636      TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637      options.set_extra(false);
638      TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639    }
640    
641    static void Test_EXTENDED() {
642      RE_Options options;
643      RE_Options options2;
644      const char *str = "HELLO world";
645    
646      options.set_extended(true);
647      TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
648      TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
649      TestOneOption("EXTENDED (class)",
650                        "^ HE L{2} O "
651                        "\\s+        "
652                        "\\w+ $      ",
653                        str,
654                        options,
655                        false);
656    
657      TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658      TestOneOption("EXTENDED (function)",
659                        "^ HE L{2} O "
660                        "\\s+        "
661                        "\\w+ $      ",
662                        str,
663                        pcrecpp::EXTENDED(),
664                        false);
665    
666      options.set_extended(false);
667      TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668  }  }
669    
670    static void Test_NO_AUTO_CAPTURE() {
671      RE_Options options;
672      const char *str = "HELLO world";
673      string captured;
674    
675      printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676      if (VERBOSE_TEST)
677        printf("parentheses capture text\n");
678      RE re("(world|universe)$", options);
679      CHECK(re.Extract("\\1", str , &captured));
680      CHECK_EQ(captured, "world");
681      options.set_no_auto_capture(true);
682      printf("testing Option <NO_AUTO_CAPTURE>\n");
683      if (VERBOSE_TEST)
684        printf("parentheses do not capture text\n");
685      re.Extract("\\1",str, &captured );
686      CHECK_EQ(captured, "world");
687    }
688    
689    static void Test_UNGREEDY() {
690      RE_Options options;
691      const char *str = "HELLO, 'this' is the 'world'";
692    
693      options.set_ungreedy(true);
694      GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695      GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696      GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697    
698      options.set_ungreedy(false);
699      GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700      GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701    }
702    
703    static void Test_all_options() {
704      const char *str = "HELLO\n" "cruel\n" "world";
705      RE_Options options;
706      options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707    
708      TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709      options.set_all_options(0);
710      TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711      options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712    
713      TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714      TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715                      " ^ c r u e l $ ",
716                      str,
717                      RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718                      false);
719    
720      TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721                      " ^ c r u e l $ ",
722                      str,
723                      RE_Options()
724                           .set_multiline(true)
725                           .set_extended(true),
726                      false);
727    
728      options.set_all_options(0);
729      TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730    
731    }
732    
733    static void TestOptions() {
734      printf("Testing Options\n");
735      Test_CASELESS();
736      Test_MULTILINE();
737      Test_DOTALL();
738      Test_DOLLAR_ENDONLY();
739      Test_EXTENDED();
740      Test_NO_AUTO_CAPTURE();
741      Test_UNGREEDY();
742      Test_EXTRA();
743      Test_all_options();
744    }
745    
746    static void TestConstructors() {
747      printf("Testing constructors\n");
748    
749      RE_Options options;
750      options.set_dotall(true);
751      const char *str = "HELLO\n" "cruel\n" "world";
752    
753      RE orig("HELLO.*world", options);
754      CHECK(orig.FullMatch(str));
755    
756      RE copy1(orig);
757      CHECK(copy1.FullMatch(str));
758    
759      RE copy2("not a match");
760      CHECK(!copy2.FullMatch(str));
761      copy2 = copy1;
762      CHECK(copy2.FullMatch(str));
763      copy2 = orig;
764      CHECK(copy2.FullMatch(str));
765    
766      // Make sure when we assign to ourselves, nothing bad happens
767      orig = orig;
768      copy1 = copy1;
769      copy2 = copy2;
770      CHECK(orig.FullMatch(str));
771      CHECK(copy1.FullMatch(str));
772      CHECK(copy2.FullMatch(str));
773    }
774    
775  int main(int argc, char** argv) {  int main(int argc, char** argv) {
776    // Treat any flag as --help    // Treat any flag as --help
# Line 682  int main(int argc, char** argv) { Line 1090  int main(int argc, char** argv) {
1090    CHECK(RE("h.*o").PartialMatch("hello!"));    CHECK(RE("h.*o").PartialMatch("hello!"));
1091    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));    CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1092    
1093      /***** other tests *****/
1094    
1095    RadixTests();    RadixTests();
1096    TestReplace();    TestReplace();
1097    TestExtract();    TestExtract();
1098    TestConsume();    TestConsume();
1099    TestFindAndConsume();    TestFindAndConsume();
1100      TestQuoteMetaAll();
1101    TestMatchNumberPeculiarity();    TestMatchNumberPeculiarity();
1102    
1103    // Check the pattern() accessor    // Check the pattern() accessor
# Line 798  int main(int argc, char** argv) { Line 1209  int main(int argc, char** argv) {
1209      CHECK(!re.error().empty());      CHECK(!re.error().empty());
1210    }    }
1211    
1212    // Test that recursion is stopped: there will be some errors reported    // Test that recursion is stopped
1213    int matchlimit = 5000;    TestRecursion();
1214    int bytes = 15 * 1024;  // enough to crash if there was no match limit  
1215    TestRecursion(bytes, ".", matchlimit);    // Test Options
1216    TestRecursion(bytes, "a", matchlimit);    if (getenv("VERBOSE_TEST") != NULL)
1217    TestRecursion(bytes, "a.", matchlimit);      VERBOSE_TEST  = true;
1218    TestRecursion(bytes, "ab.", matchlimit);    TestOptions();
1219    TestRecursion(bytes, "abc.", matchlimit);  
1220      // Test the constructors
1221      TestConstructors();
1222    
1223    // Done    // Done
1224    printf("OK\n");    printf("OK\n");

Legend:
Removed from v.77  
changed lines
  Added in v.96

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12