/[pcre]/code/tags/pcre-6.7/pcrecpp_unittest.cc
ViewVC logotype

Diff of /code/tags/pcre-6.7/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 32  Line 32 
32  // TODO: Test extractions for PartialMatch/Consume  // TODO: Test extractions for PartialMatch/Consume
33    
34  #include <stdio.h>  #include <stdio.h>
35    #include <cassert>
36  #include <vector>  #include <vector>
37  #include "config.h"  #include "config.h"
38  #include "pcrecpp.h"  #include "pcrecpp.h"
# Line 43  using pcrecpp::Hex; Line 44  using pcrecpp::Hex;
44  using pcrecpp::Octal;  using pcrecpp::Octal;
45  using pcrecpp::CRadix;  using pcrecpp::CRadix;
46    
47    static bool VERBOSE_TEST  = false;
48    
49  // CHECK dies with a fatal error if condition is not true.  It is *not*  // CHECK dies with a fatal error if condition is not true.  It is *not*
50  // controlled by NDEBUG, so the check will be executed regardless of  // controlled by NDEBUG, so the check will be executed regardless of
51  // compilation mode.  Therefore, it is safe to do things like:  // compilation mode.  Therefore, it is safe to do things like:
# Line 257  static void TestReplace() { Line 260  static void TestReplace() {
260        "aaaaa",        "aaaaa",
261        "bbaaaaa",        "bbaaaaa",
262        "bbabbabbabbabbabb" },        "bbabbabbabbabbabb" },
263        { "b*",
264          "bb",
265          "aa\naa\n",
266          "bbaa\naa\n",
267          "bbabbabb\nbbabbabb\nbb" },
268        { "b*",
269          "bb",
270          "aa\raa\r",
271          "bbaa\raa\r",
272          "bbabbabb\rbbabbabb\rbb" },
273        { "b*",
274          "bb",
275          "aa\r\naa\r\n",
276          "bbaa\r\naa\r\n",
277          "bbabbabb\r\nbbabbabb\r\nbb" },
278    #ifdef SUPPORT_UTF8
279        { "b*",
280          "bb",
281          "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
282          "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
283          "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
284        { "b*",
285          "bb",
286          "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
287          "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
288          ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
289           "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
290    #endif
291      { "", NULL, NULL, NULL, NULL }      { "", NULL, NULL, NULL, NULL }
292    };    };
293    
294    #ifdef SUPPORT_UTF8
295      const bool support_utf8 = true;
296    #else
297      const bool support_utf8 = false;
298    #endif
299    
300    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {    for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
301        RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
302        assert(re.error().empty());
303      string one(t->original);      string one(t->original);
304      CHECK(RE(t->regexp).Replace(t->rewrite, &one));      CHECK(re.Replace(t->rewrite, &one));
305      CHECK_EQ(one, t->single);      CHECK_EQ(one, t->single);
306      string all(t->original);      string all(t->original);
307      CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);      CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
308      CHECK_EQ(all, t->global);      CHECK_EQ(all, t->global);
309    }    }
310    
311      // One final test: test \r\n replacement when we're not in CRLF mode
312      {
313        RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
314        assert(re.error().empty());
315        string all("aa\r\naa\r\n");
316        CHECK(re.GlobalReplace("bb", &all) > 0);
317        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
318      }
319      {
320        RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
321        assert(re.error().empty());
322        string all("aa\r\naa\r\n");
323        CHECK(re.GlobalReplace("bb", &all) > 0);
324        CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
325      }
326      // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
327      //       Alas, the answer depends on how pcre was compiled.
328  }  }
329    
330  static void TestExtract() {  static void TestExtract() {
# Line 346  static void TestMatchNumberPeculiarity() Line 403  static void TestMatchNumberPeculiarity()
403    CHECK_EQ(a, "");    CHECK_EQ(a, "");
404  }  }
405    
406  static void TestRecursion(int size, const char *pattern, int match_limit) {  static void TestRecursion() {
407    printf("Testing recursion\n");    printf("Testing recursion\n");
408    
409    // Fill up a string repeating the pattern given    // Get one string that passes (sometimes), one that never does.
410    string domain;    string text_good("abcdefghijk");
411    domain.resize(size);    string text_bad("acdefghijkl");
412    int patlen = strlen(pattern);  
413    for (int i = 0; i < size; ++i) {    // According to pcretest, matching text_good against (\w+)*b
414      domain[i] = pattern[i % patlen];    // requires match_limit of at least 8192, and match_recursion_limit
415      // of at least 37.
416    
417      RE_Options options_ml;
418      options_ml.set_match_limit(8192);
419      RE re("(\\w+)*b", options_ml);
420      CHECK(re.PartialMatch(text_good) == true);
421      CHECK(re.PartialMatch(text_bad) == false);
422      CHECK(re.FullMatch(text_good) == false);
423      CHECK(re.FullMatch(text_bad) == false);
424    
425      options_ml.set_match_limit(1024);
426      RE re2("(\\w+)*b", options_ml);
427      CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
428      CHECK(re2.PartialMatch(text_bad) == false);
429      CHECK(re2.FullMatch(text_good) == false);
430      CHECK(re2.FullMatch(text_bad) == false);
431    
432      RE_Options options_mlr;
433      options_mlr.set_match_limit_recursion(50);
434      RE re3("(\\w+)*b", options_mlr);
435      CHECK(re3.PartialMatch(text_good) == true);
436      CHECK(re3.PartialMatch(text_bad) == false);
437      CHECK(re3.FullMatch(text_good) == false);
438      CHECK(re3.FullMatch(text_bad) == false);
439    
440      options_mlr.set_match_limit_recursion(10);
441      RE re4("(\\w+)*b", options_mlr);
442      CHECK(re4.PartialMatch(text_good) == false);
443      CHECK(re4.PartialMatch(text_bad) == false);
444      CHECK(re4.FullMatch(text_good) == false);
445      CHECK(re4.FullMatch(text_bad) == false);
446    }
447    
448    //
449    // Options tests contributed by
450    // Giuseppe Maxia, CTO, Stardata s.r.l.
451    // July 2005
452    //
453    static void GetOneOptionResult(
454                    const char *option_name,
455                    const char *regex,
456                    const char *str,
457                    RE_Options options,
458                    bool full,
459                    string expected) {
460    
461      printf("Testing Option <%s>\n", option_name);
462      if(VERBOSE_TEST)
463        printf("/%s/ finds \"%s\" within \"%s\" \n",
464                        regex,
465                        expected.c_str(),
466                        str);
467      string captured("");
468      if (full)
469        RE(regex,options).FullMatch(str, &captured);
470      else
471        RE(regex,options).PartialMatch(str, &captured);
472      CHECK_EQ(captured, expected);
473    }
474    
475    static void TestOneOption(
476                    const char *option_name,
477                    const char *regex,
478                    const char *str,
479                    RE_Options options,
480                    bool full,
481                    bool assertive = true) {
482    
483      printf("Testing Option <%s>\n", option_name);
484      if (VERBOSE_TEST)
485        printf("'%s' %s /%s/ \n",
486                      str,
487                      (assertive? "matches" : "doesn't match"),
488                      regex);
489      if (assertive) {
490        if (full)
491          CHECK(RE(regex,options).FullMatch(str));
492        else
493          CHECK(RE(regex,options).PartialMatch(str));
494      } else {
495        if (full)
496          CHECK(!RE(regex,options).FullMatch(str));
497        else
498          CHECK(!RE(regex,options).PartialMatch(str));
499    }    }
500    // Just make sure it doesn't crash due to too much recursion.  }
501    
502    static void Test_CASELESS() {
503    RE_Options options;    RE_Options options;
504    options.set_match_limit(match_limit);    RE_Options options2;
505    RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);  
506    re.FullMatch(domain);    options.set_caseless(true);
507      TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
508      TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
509      TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
510    
511      TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
512      TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
513      options.set_caseless(false);
514      TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
515  }  }
516    
517    static void Test_MULTILINE() {
518      RE_Options options;
519      RE_Options options2;
520      const char *str = "HELLO\n" "cruel\n" "world\n";
521    
522      options.set_multiline(true);
523      TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
524      TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
525      TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
526      options.set_multiline(false);
527      TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
528    }
529    
530    static void Test_DOTALL() {
531      RE_Options options;
532      RE_Options options2;
533      const char *str = "HELLO\n" "cruel\n" "world";
534    
535      options.set_dotall(true);
536      TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
537      TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
538      TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
539      options.set_dotall(false);
540      TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
541    }
542    
543    static void Test_DOLLAR_ENDONLY() {
544      RE_Options options;
545      RE_Options options2;
546      const char *str = "HELLO world\n";
547    
548      TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
549      options.set_dollar_endonly(true);
550      TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
551      TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
552    }
553    
554    static void Test_EXTRA() {
555      RE_Options options;
556      const char *str = "HELLO";
557    
558      options.set_extra(true);
559      TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
560      TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
561      options.set_extra(false);
562      TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
563    }
564    
565    static void Test_EXTENDED() {
566      RE_Options options;
567      RE_Options options2;
568      const char *str = "HELLO world";
569    
570      options.set_extended(true);
571      TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
572      TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
573      TestOneOption("EXTENDED (class)",
574                        "^ HE L{2} O "
575                        "\\s+        "
576                        "\\w+ $      ",
577                        str,
578                        options,
579                        false);
580    
581      TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
582      TestOneOption("EXTENDED (function)",
583                        "^ HE L{2} O "
584                        "\\s+        "
585                        "\\w+ $      ",
586                        str,
587                        pcrecpp::EXTENDED(),
588                        false);
589    
590      options.set_extended(false);
591      TestOneOption("no EXTENDED", "HELLO world", str, options, false);
592    }
593    
594    static void Test_NO_AUTO_CAPTURE() {
595      RE_Options options;
596      const char *str = "HELLO world";
597      string captured;
598    
599      printf("Testing Option <no NO_AUTO_CAPTURE>\n");
600      if (VERBOSE_TEST)
601        printf("parentheses capture text\n");
602      RE re("(world|universe)$", options);
603      CHECK(re.Extract("\\1", str , &captured));
604      CHECK_EQ(captured, "world");
605      options.set_no_auto_capture(true);
606      printf("testing Option <NO_AUTO_CAPTURE>\n");
607      if (VERBOSE_TEST)
608        printf("parentheses do not capture text\n");
609      re.Extract("\\1",str, &captured );
610      CHECK_EQ(captured, "world");
611    }
612    
613    static void Test_UNGREEDY() {
614      RE_Options options;
615      const char *str = "HELLO, 'this' is the 'world'";
616    
617      options.set_ungreedy(true);
618      GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
619      GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
620      GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
621    
622      options.set_ungreedy(false);
623      GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
624      GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
625    }
626    
627    static void Test_all_options() {
628      const char *str = "HELLO\n" "cruel\n" "world";
629      RE_Options options;
630      options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
631    
632      TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
633      options.set_all_options(0);
634      TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
635      options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
636    
637      TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
638      TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
639                      " ^ c r u e l $ ",
640                      str,
641                      RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
642                      false);
643    
644      TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
645                      " ^ c r u e l $ ",
646                      str,
647                      RE_Options()
648                           .set_multiline(true)
649                           .set_extended(true),
650                      false);
651    
652      options.set_all_options(0);
653      TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
654    
655    }
656    
657    static void TestOptions() {
658      printf("Testing Options\n");
659      Test_CASELESS();
660      Test_MULTILINE();
661      Test_DOTALL();
662      Test_DOLLAR_ENDONLY();
663      Test_EXTENDED();
664      Test_NO_AUTO_CAPTURE();
665      Test_UNGREEDY();
666      Test_EXTRA();
667      Test_all_options();
668    }
669    
670  int main(int argc, char** argv) {  int main(int argc, char** argv) {
671    // Treat any flag as --help    // Treat any flag as --help
# Line 798  int main(int argc, char** argv) { Line 1101  int main(int argc, char** argv) {
1101      CHECK(!re.error().empty());      CHECK(!re.error().empty());
1102    }    }
1103    
1104    // Test that recursion is stopped: there will be some errors reported    // Test that recursion is stopped
1105    int matchlimit = 5000;    TestRecursion();
1106    int bytes = 15 * 1024;  // enough to crash if there was no match limit  
1107    TestRecursion(bytes, ".", matchlimit);    // Test Options
1108    TestRecursion(bytes, "a", matchlimit);    if (getenv("VERBOSE_TEST") != NULL)
1109    TestRecursion(bytes, "a.", matchlimit);      VERBOSE_TEST  = true;
1110    TestRecursion(bytes, "ab.", matchlimit);    TestOptions();
   TestRecursion(bytes, "abc.", matchlimit);  
1111    
1112    // Done    // Done
1113    printf("OK\n");    printf("OK\n");

Legend:
Removed from v.77  
changed lines
  Added in v.91

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12