/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (hide annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 7 months ago) by nigel
File size: 31985 byte(s)
Load pcre-6.5 into code/trunk.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31     //
32     // TODO: Test extractions for PartialMatch/Consume
33    
34     #include <stdio.h>
35     #include <vector>
36     #include "config.h"
37     #include "pcrecpp.h"
38    
39     using pcrecpp::StringPiece;
40     using pcrecpp::RE;
41     using pcrecpp::RE_Options;
42     using pcrecpp::Hex;
43     using pcrecpp::Octal;
44     using pcrecpp::CRadix;
45    
46 nigel 81 static bool VERBOSE_TEST = false;
47    
48 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
49     // controlled by NDEBUG, so the check will be executed regardless of
50     // compilation mode. Therefore, it is safe to do things like:
51     // CHECK_EQ(fp->Write(x), 4)
52     #define CHECK(condition) do { \
53     if (!(condition)) { \
54     fprintf(stderr, "%s:%d: Check failed: %s\n", \
55     __FILE__, __LINE__, #condition); \
56     exit(1); \
57     } \
58     } while (0)
59    
60     #define CHECK_EQ(a, b) CHECK(a == b)
61    
62     static void Timing1(int num_iters) {
63     // Same pattern lots of times
64     RE pattern("ruby:\\d+");
65     StringPiece p("ruby:1234");
66     for (int j = num_iters; j > 0; j--) {
67     CHECK(pattern.FullMatch(p));
68     }
69     }
70    
71     static void Timing2(int num_iters) {
72     // Same pattern lots of times
73     RE pattern("ruby:(\\d+)");
74     int i;
75     for (int j = num_iters; j > 0; j--) {
76     CHECK(pattern.FullMatch("ruby:1234", &i));
77     CHECK_EQ(i, 1234);
78     }
79     }
80    
81     static void Timing3(int num_iters) {
82     string text_string;
83     for (int j = num_iters; j > 0; j--) {
84     text_string += "this is another line\n";
85     }
86    
87     RE line_matcher(".*\n");
88     string line;
89     StringPiece text(text_string);
90     int counter = 0;
91     while (line_matcher.Consume(&text)) {
92     counter++;
93     }
94     printf("Matched %d lines\n", counter);
95     }
96    
97     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
98    
99     static void LeakTest() {
100     // Check for memory leaks
101     unsigned long long initial_size = 0;
102     for (int i = 0; i < 100000; i++) {
103     if (i == 50000) {
104     initial_size = VirtualProcessSize();
105     printf("Size after 50000: %llu\n", initial_size);
106     }
107     char buf[100];
108     snprintf(buf, sizeof(buf), "pat%09d", i);
109     RE newre(buf);
110     }
111     uint64 final_size = VirtualProcessSize();
112     printf("Size after 100000: %llu\n", final_size);
113     const double growth = double(final_size - initial_size) / final_size;
114     printf("Growth: %0.2f%%", growth * 100);
115     CHECK(growth < 0.02); // Allow < 2% growth
116     }
117    
118     #endif
119    
120     static void RadixTests() {
121     printf("Testing hex\n");
122    
123     #define CHECK_HEX(type, value) \
124     do { \
125     type v; \
126     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
127     CHECK_EQ(v, 0x ## value); \
128     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
129     CHECK_EQ(v, 0x ## value); \
130     } while(0)
131    
132     CHECK_HEX(short, 2bad);
133     CHECK_HEX(unsigned short, 2badU);
134     CHECK_HEX(int, dead);
135     CHECK_HEX(unsigned int, deadU);
136     CHECK_HEX(long, 7eadbeefL);
137     CHECK_HEX(unsigned long, deadbeefUL);
138     #ifdef HAVE_LONG_LONG
139     CHECK_HEX(long long, 12345678deadbeefLL);
140     #endif
141     #ifdef HAVE_UNSIGNED_LONG_LONG
142     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
143     #endif
144    
145     #undef CHECK_HEX
146    
147     printf("Testing octal\n");
148    
149     #define CHECK_OCTAL(type, value) \
150     do { \
151     type v; \
152     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
153     CHECK_EQ(v, 0 ## value); \
154     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
155     CHECK_EQ(v, 0 ## value); \
156     } while(0)
157    
158     CHECK_OCTAL(short, 77777);
159     CHECK_OCTAL(unsigned short, 177777U);
160     CHECK_OCTAL(int, 17777777777);
161     CHECK_OCTAL(unsigned int, 37777777777U);
162     CHECK_OCTAL(long, 17777777777L);
163     CHECK_OCTAL(unsigned long, 37777777777UL);
164     #ifdef HAVE_LONG_LONG
165     CHECK_OCTAL(long long, 777777777777777777777LL);
166     #endif
167     #ifdef HAVE_UNSIGNED_LONG_LONG
168     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
169     #endif
170    
171     #undef CHECK_OCTAL
172    
173     printf("Testing decimal\n");
174    
175     #define CHECK_DECIMAL(type, value) \
176     do { \
177     type v; \
178     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
179     CHECK_EQ(v, value); \
180     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
181     CHECK_EQ(v, value); \
182     } while(0)
183    
184     CHECK_DECIMAL(short, -1);
185     CHECK_DECIMAL(unsigned short, 9999);
186     CHECK_DECIMAL(int, -1000);
187     CHECK_DECIMAL(unsigned int, 12345U);
188     CHECK_DECIMAL(long, -10000000L);
189     CHECK_DECIMAL(unsigned long, 3083324652U);
190     #ifdef HAVE_LONG_LONG
191     CHECK_DECIMAL(long long, -100000000000000LL);
192     #endif
193     #ifdef HAVE_UNSIGNED_LONG_LONG
194     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
195     #endif
196    
197     #undef CHECK_DECIMAL
198    
199     }
200    
201     static void TestReplace() {
202     printf("Testing Replace\n");
203    
204     struct ReplaceTest {
205     const char *regexp;
206     const char *rewrite;
207     const char *original;
208     const char *single;
209     const char *global;
210     };
211     static const ReplaceTest tests[] = {
212     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
213     "\\2\\1ay",
214     "the quick brown fox jumps over the lazy dogs.",
215     "ethay quick brown fox jumps over the lazy dogs.",
216     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
217     { "\\w+",
218     "\\0-NOSPAM",
219     "paul.haahr@google.com",
220     "paul-NOSPAM.haahr@google.com",
221     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
222     { "^",
223     "(START)",
224     "foo",
225     "(START)foo",
226     "(START)foo" },
227     { "^",
228     "(START)",
229     "",
230     "(START)",
231     "(START)" },
232     { "$",
233     "(END)",
234     "",
235     "(END)",
236     "(END)" },
237     { "b",
238     "bb",
239     "ababababab",
240     "abbabababab",
241     "abbabbabbabbabb" },
242     { "b",
243     "bb",
244     "bbbbbb",
245     "bbbbbbb",
246     "bbbbbbbbbbbb" },
247     { "b+",
248     "bb",
249     "bbbbbb",
250     "bb",
251     "bb" },
252     { "b*",
253     "bb",
254     "bbbbbb",
255     "bb",
256     "bb" },
257     { "b*",
258     "bb",
259     "aaaaa",
260     "bbaaaaa",
261     "bbabbabbabbabbabb" },
262     { "", NULL, NULL, NULL, NULL }
263     };
264    
265     for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
266     string one(t->original);
267     CHECK(RE(t->regexp).Replace(t->rewrite, &one));
268     CHECK_EQ(one, t->single);
269     string all(t->original);
270     CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
271     CHECK_EQ(all, t->global);
272     }
273     }
274    
275     static void TestExtract() {
276     printf("Testing Extract\n");
277    
278     string s;
279    
280     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
281     CHECK_EQ(s, "kremvax!boris");
282    
283     // check the RE interface as well
284     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
285     CHECK_EQ(s, "'foo'");
286     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
287     CHECK_EQ(s, "'foo'");
288     }
289    
290     static void TestConsume() {
291     printf("Testing Consume\n");
292    
293     string word;
294    
295     string s(" aaa b!@#$@#$cccc");
296     StringPiece input(s);
297    
298     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
299     CHECK(r.Consume(&input, &word));
300     CHECK_EQ(word, "aaa");
301     CHECK(r.Consume(&input, &word));
302     CHECK_EQ(word, "b");
303     CHECK(! r.Consume(&input, &word));
304     }
305    
306     static void TestFindAndConsume() {
307     printf("Testing FindAndConsume\n");
308    
309     string word;
310    
311     string s(" aaa b!@#$@#$cccc");
312     StringPiece input(s);
313    
314     RE r("(\\w+)"); // matches a word
315     CHECK(r.FindAndConsume(&input, &word));
316     CHECK_EQ(word, "aaa");
317     CHECK(r.FindAndConsume(&input, &word));
318     CHECK_EQ(word, "b");
319     CHECK(r.FindAndConsume(&input, &word));
320     CHECK_EQ(word, "cccc");
321     CHECK(! r.FindAndConsume(&input, &word));
322     }
323    
324     static void TestMatchNumberPeculiarity() {
325     printf("Testing match-number peculiaraity\n");
326    
327     string word1;
328     string word2;
329     string word3;
330    
331     RE r("(foo)|(bar)|(baz)");
332     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
333     CHECK_EQ(word1, "foo");
334     CHECK_EQ(word2, "");
335     CHECK_EQ(word3, "");
336     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
337     CHECK_EQ(word1, "");
338     CHECK_EQ(word2, "bar");
339     CHECK_EQ(word3, "");
340     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
341     CHECK_EQ(word1, "");
342     CHECK_EQ(word2, "");
343     CHECK_EQ(word3, "baz");
344     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
345    
346     string a;
347     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
348     CHECK_EQ(a, "");
349     }
350    
351 nigel 87 static void TestRecursion() {
352 nigel 77 printf("Testing recursion\n");
353    
354 nigel 87 // Get one string that passes (sometimes), one that never does.
355     string text_good("abcdefghijk");
356     string text_bad("acdefghijkl");
357    
358     // According to pcretest, matching text_good against (\w+)*b
359     // requires match_limit of at least 8192, and match_recursion_limit
360     // of at least 37.
361    
362     RE_Options options_ml;
363     options_ml.set_match_limit(8192);
364     RE re("(\\w+)*b", options_ml);
365     CHECK(re.PartialMatch(text_good) == true);
366     CHECK(re.PartialMatch(text_bad) == false);
367     CHECK(re.FullMatch(text_good) == false);
368     CHECK(re.FullMatch(text_bad) == false);
369    
370     options_ml.set_match_limit(1024);
371     RE re2("(\\w+)*b", options_ml);
372     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
373     CHECK(re2.PartialMatch(text_bad) == false);
374     CHECK(re2.FullMatch(text_good) == false);
375     CHECK(re2.FullMatch(text_bad) == false);
376    
377     RE_Options options_mlr;
378     options_mlr.set_match_limit_recursion(50);
379     RE re3("(\\w+)*b", options_mlr);
380     CHECK(re3.PartialMatch(text_good) == true);
381     CHECK(re3.PartialMatch(text_bad) == false);
382     CHECK(re3.FullMatch(text_good) == false);
383     CHECK(re3.FullMatch(text_bad) == false);
384    
385     options_mlr.set_match_limit_recursion(10);
386     RE re4("(\\w+)*b", options_mlr);
387     CHECK(re4.PartialMatch(text_good) == false);
388     CHECK(re4.PartialMatch(text_bad) == false);
389     CHECK(re4.FullMatch(text_good) == false);
390     CHECK(re4.FullMatch(text_bad) == false);
391 nigel 77 }
392    
393 nigel 81 //
394     // Options tests contributed by
395     // Giuseppe Maxia, CTO, Stardata s.r.l.
396     // July 2005
397     //
398     static void GetOneOptionResult(
399     const char *option_name,
400     const char *regex,
401     const char *str,
402     RE_Options options,
403     bool full,
404     string expected) {
405 nigel 77
406 nigel 81 printf("Testing Option <%s>\n", option_name);
407     if(VERBOSE_TEST)
408     printf("/%s/ finds \"%s\" within \"%s\" \n",
409     regex,
410     expected.c_str(),
411     str);
412     string captured("");
413     if (full)
414     RE(regex,options).FullMatch(str, &captured);
415     else
416     RE(regex,options).PartialMatch(str, &captured);
417     CHECK_EQ(captured, expected);
418     }
419    
420     static void TestOneOption(
421     const char *option_name,
422     const char *regex,
423     const char *str,
424     RE_Options options,
425     bool full,
426     bool assertive = true) {
427    
428     printf("Testing Option <%s>\n", option_name);
429     if (VERBOSE_TEST)
430     printf("'%s' %s /%s/ \n",
431     str,
432     (assertive? "matches" : "doesn't match"),
433     regex);
434     if (assertive) {
435     if (full)
436     CHECK(RE(regex,options).FullMatch(str));
437     else
438     CHECK(RE(regex,options).PartialMatch(str));
439     } else {
440     if (full)
441     CHECK(!RE(regex,options).FullMatch(str));
442     else
443     CHECK(!RE(regex,options).PartialMatch(str));
444     }
445     }
446    
447     static void Test_CASELESS() {
448     RE_Options options;
449     RE_Options options2;
450    
451     options.set_caseless(true);
452     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
453     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
454     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
455    
456     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
457     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
458     options.set_caseless(false);
459     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
460     }
461    
462     static void Test_MULTILINE() {
463     RE_Options options;
464     RE_Options options2;
465     const char *str = "HELLO\n" "cruel\n" "world\n";
466    
467     options.set_multiline(true);
468     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
469     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
470     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
471     options.set_multiline(false);
472     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
473     }
474    
475     static void Test_DOTALL() {
476     RE_Options options;
477     RE_Options options2;
478     const char *str = "HELLO\n" "cruel\n" "world";
479    
480     options.set_dotall(true);
481     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
482     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
483     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
484     options.set_dotall(false);
485     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
486     }
487    
488     static void Test_DOLLAR_ENDONLY() {
489     RE_Options options;
490     RE_Options options2;
491     const char *str = "HELLO world\n";
492    
493     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
494     options.set_dollar_endonly(true);
495     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
496     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
497     }
498    
499     static void Test_EXTRA() {
500     RE_Options options;
501     const char *str = "HELLO";
502    
503     options.set_extra(true);
504     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
505     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
506     options.set_extra(false);
507     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
508     }
509    
510     static void Test_EXTENDED() {
511     RE_Options options;
512     RE_Options options2;
513     const char *str = "HELLO world";
514    
515     options.set_extended(true);
516     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
517     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
518     TestOneOption("EXTENDED (class)",
519     "^ HE L{2} O "
520     "\\s+ "
521     "\\w+ $ ",
522     str,
523     options,
524     false);
525    
526     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
527     TestOneOption("EXTENDED (function)",
528     "^ HE L{2} O "
529     "\\s+ "
530     "\\w+ $ ",
531     str,
532     pcrecpp::EXTENDED(),
533     false);
534    
535     options.set_extended(false);
536     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
537     }
538    
539     static void Test_NO_AUTO_CAPTURE() {
540     RE_Options options;
541     const char *str = "HELLO world";
542     string captured;
543    
544     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
545     if (VERBOSE_TEST)
546     printf("parentheses capture text\n");
547     RE re("(world|universe)$", options);
548     CHECK(re.Extract("\\1", str , &captured));
549     CHECK_EQ(captured, "world");
550     options.set_no_auto_capture(true);
551     printf("testing Option <NO_AUTO_CAPTURE>\n");
552     if (VERBOSE_TEST)
553     printf("parentheses do not capture text\n");
554     re.Extract("\\1",str, &captured );
555     CHECK_EQ(captured, "world");
556     }
557    
558     static void Test_UNGREEDY() {
559     RE_Options options;
560     const char *str = "HELLO, 'this' is the 'world'";
561    
562     options.set_ungreedy(true);
563     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
564     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
565     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
566    
567     options.set_ungreedy(false);
568     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
569     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
570     }
571    
572     static void Test_all_options() {
573     const char *str = "HELLO\n" "cruel\n" "world";
574     RE_Options options;
575     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
576    
577     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
578     options.set_all_options(0);
579     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
580     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
581    
582     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
583     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
584     " ^ c r u e l $ ",
585     str,
586     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
587     false);
588    
589     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
590     " ^ c r u e l $ ",
591     str,
592     RE_Options()
593     .set_multiline(true)
594     .set_extended(true),
595     false);
596    
597     options.set_all_options(0);
598     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
599    
600     }
601    
602     static void TestOptions() {
603     printf("Testing Options\n");
604     Test_CASELESS();
605     Test_MULTILINE();
606     Test_DOTALL();
607     Test_DOLLAR_ENDONLY();
608     Test_EXTENDED();
609     Test_NO_AUTO_CAPTURE();
610     Test_UNGREEDY();
611     Test_EXTRA();
612     Test_all_options();
613     }
614    
615 nigel 77 int main(int argc, char** argv) {
616     // Treat any flag as --help
617     if (argc > 1 && argv[1][0] == '-') {
618     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
619     " If 'timingX ###' is specified, run the given timing test\n"
620     " with the given number of iterations, rather than running\n"
621     " the default corectness test.\n", argv[0]);
622     return 0;
623     }
624    
625     if (argc > 1) {
626     if ( argc == 2 || atoi(argv[2]) == 0) {
627     printf("timing mode needs a num-iters argument\n");
628     return 1;
629     }
630     if (!strcmp(argv[1], "timing1"))
631     Timing1(atoi(argv[2]));
632     else if (!strcmp(argv[1], "timing2"))
633     Timing2(atoi(argv[2]));
634     else if (!strcmp(argv[1], "timing3"))
635     Timing3(atoi(argv[2]));
636     else
637     printf("Unknown argument '%s'\n", argv[1]);
638     return 0;
639     }
640    
641     printf("Testing FullMatch\n");
642    
643     int i;
644     string s;
645    
646     /***** FullMatch with no args *****/
647    
648     CHECK(RE("h.*o").FullMatch("hello"));
649     CHECK(!RE("h.*o").FullMatch("othello"));
650     CHECK(!RE("h.*o").FullMatch("hello!"));
651    
652     /***** FullMatch with args *****/
653    
654     // Zero-arg
655     CHECK(RE("\\d+").FullMatch("1001"));
656    
657     // Single-arg
658     CHECK(RE("(\\d+)").FullMatch("1001", &i));
659     CHECK_EQ(i, 1001);
660     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
661     CHECK_EQ(i, -123);
662     CHECK(!RE("()\\d+").FullMatch("10", &i));
663     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
664     &i));
665    
666     // Digits surrounding integer-arg
667     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
668     CHECK_EQ(i, 23);
669     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
670     CHECK_EQ(i, 1);
671     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
672     CHECK_EQ(i, -1);
673     CHECK(RE("(\\d)").PartialMatch("1234", &i));
674     CHECK_EQ(i, 1);
675     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
676     CHECK_EQ(i, -1);
677    
678     // String-arg
679     CHECK(RE("h(.*)o").FullMatch("hello", &s));
680     CHECK_EQ(s, string("ell"));
681    
682     // StringPiece-arg
683     StringPiece sp;
684     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
685     CHECK_EQ(sp.size(), 4);
686     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
687     CHECK_EQ(i, 1234);
688    
689     // Multi-arg
690     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
691     CHECK_EQ(s, string("ruby"));
692     CHECK_EQ(i, 1234);
693    
694     // Ignored arg
695     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
696     CHECK_EQ(s, string("ruby"));
697     CHECK_EQ(i, 1234);
698    
699     // Type tests
700     {
701     char c;
702     CHECK(RE("(H)ello").FullMatch("Hello", &c));
703     CHECK_EQ(c, 'H');
704     }
705     {
706     unsigned char c;
707     CHECK(RE("(H)ello").FullMatch("Hello", &c));
708     CHECK_EQ(c, static_cast<unsigned char>('H'));
709     }
710     {
711     short v;
712     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
713     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
714     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
715     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
716     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
717     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
718     }
719     {
720     unsigned short v;
721     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
722     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
723     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
724     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
725     }
726     {
727     int v;
728     static const int max_value = 0x7fffffff;
729     static const int min_value = -max_value - 1;
730     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
731     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
732     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
733     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
734     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
735     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
736     }
737     {
738     unsigned int v;
739     static const unsigned int max_value = 0xfffffffful;
740     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
741     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
742     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
743     }
744     #ifdef HAVE_LONG_LONG
745     {
746     long long v;
747     static const long long max_value = 0x7fffffffffffffffLL;
748     static const long long min_value = -max_value - 1;
749     char buf[32];
750    
751     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
752     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
753    
754     snprintf(buf, sizeof(buf), "%lld", max_value);
755     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
756    
757     snprintf(buf, sizeof(buf), "%lld", min_value);
758     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
759    
760     snprintf(buf, sizeof(buf), "%lld", max_value);
761     assert(buf[strlen(buf)-1] != '9');
762     buf[strlen(buf)-1]++;
763     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
764    
765     snprintf(buf, sizeof(buf), "%lld", min_value);
766     assert(buf[strlen(buf)-1] != '9');
767     buf[strlen(buf)-1]++;
768     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
769     }
770     #endif
771     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
772     {
773     unsigned long long v;
774     long long v2;
775     static const unsigned long long max_value = 0xffffffffffffffffULL;
776     char buf[32];
777    
778     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
779     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
780    
781     snprintf(buf, sizeof(buf), "%llu", max_value);
782     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
783    
784     assert(buf[strlen(buf)-1] != '9');
785     buf[strlen(buf)-1]++;
786     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
787     }
788     #endif
789     {
790     float v;
791     CHECK(RE("(.*)").FullMatch("100", &v));
792     CHECK(RE("(.*)").FullMatch("-100.", &v));
793     CHECK(RE("(.*)").FullMatch("1e23", &v));
794     }
795     {
796     double v;
797     CHECK(RE("(.*)").FullMatch("100", &v));
798     CHECK(RE("(.*)").FullMatch("-100.", &v));
799     CHECK(RE("(.*)").FullMatch("1e23", &v));
800     }
801    
802     // Check that matching is fully anchored
803     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
804     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
805     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
806     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
807    
808     // Braces
809     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
810     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
811     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
812    
813     // Complicated RE
814     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
815     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
816     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
817     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
818    
819     // Check full-match handling (needs '$' tacked on internally)
820     CHECK(RE("fo|foo").FullMatch("fo"));
821     CHECK(RE("fo|foo").FullMatch("foo"));
822     CHECK(RE("fo|foo$").FullMatch("fo"));
823     CHECK(RE("fo|foo$").FullMatch("foo"));
824     CHECK(RE("foo$").FullMatch("foo"));
825     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
826     CHECK(!RE("fo|bar").FullMatch("fox"));
827    
828     // Uncomment the following if we change the handling of '$' to
829     // prevent it from matching a trailing newline
830     if (false) {
831     // Check that we don't get bitten by pcre's special handling of a
832     // '\n' at the end of the string matching '$'
833     CHECK(!RE("foo$").PartialMatch("foo\n"));
834     }
835    
836     // Number of args
837     int a[16];
838     CHECK(RE("").FullMatch(""));
839    
840     memset(a, 0, sizeof(0));
841     CHECK(RE("(\\d){1}").FullMatch("1",
842     &a[0]));
843     CHECK_EQ(a[0], 1);
844    
845     memset(a, 0, sizeof(0));
846     CHECK(RE("(\\d)(\\d)").FullMatch("12",
847     &a[0], &a[1]));
848     CHECK_EQ(a[0], 1);
849     CHECK_EQ(a[1], 2);
850    
851     memset(a, 0, sizeof(0));
852     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
853     &a[0], &a[1], &a[2]));
854     CHECK_EQ(a[0], 1);
855     CHECK_EQ(a[1], 2);
856     CHECK_EQ(a[2], 3);
857    
858     memset(a, 0, sizeof(0));
859     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
860     &a[0], &a[1], &a[2], &a[3]));
861     CHECK_EQ(a[0], 1);
862     CHECK_EQ(a[1], 2);
863     CHECK_EQ(a[2], 3);
864     CHECK_EQ(a[3], 4);
865    
866     memset(a, 0, sizeof(0));
867     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
868     &a[0], &a[1], &a[2],
869     &a[3], &a[4]));
870     CHECK_EQ(a[0], 1);
871     CHECK_EQ(a[1], 2);
872     CHECK_EQ(a[2], 3);
873     CHECK_EQ(a[3], 4);
874     CHECK_EQ(a[4], 5);
875    
876     memset(a, 0, sizeof(0));
877     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
878     &a[0], &a[1], &a[2],
879     &a[3], &a[4], &a[5]));
880     CHECK_EQ(a[0], 1);
881     CHECK_EQ(a[1], 2);
882     CHECK_EQ(a[2], 3);
883     CHECK_EQ(a[3], 4);
884     CHECK_EQ(a[4], 5);
885     CHECK_EQ(a[5], 6);
886    
887     memset(a, 0, sizeof(0));
888     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
889     &a[0], &a[1], &a[2], &a[3],
890     &a[4], &a[5], &a[6]));
891     CHECK_EQ(a[0], 1);
892     CHECK_EQ(a[1], 2);
893     CHECK_EQ(a[2], 3);
894     CHECK_EQ(a[3], 4);
895     CHECK_EQ(a[4], 5);
896     CHECK_EQ(a[5], 6);
897     CHECK_EQ(a[6], 7);
898    
899     memset(a, 0, sizeof(0));
900     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
901     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
902     "1234567890123456",
903     &a[0], &a[1], &a[2], &a[3],
904     &a[4], &a[5], &a[6], &a[7],
905     &a[8], &a[9], &a[10], &a[11],
906     &a[12], &a[13], &a[14], &a[15]));
907     CHECK_EQ(a[0], 1);
908     CHECK_EQ(a[1], 2);
909     CHECK_EQ(a[2], 3);
910     CHECK_EQ(a[3], 4);
911     CHECK_EQ(a[4], 5);
912     CHECK_EQ(a[5], 6);
913     CHECK_EQ(a[6], 7);
914     CHECK_EQ(a[7], 8);
915     CHECK_EQ(a[8], 9);
916     CHECK_EQ(a[9], 0);
917     CHECK_EQ(a[10], 1);
918     CHECK_EQ(a[11], 2);
919     CHECK_EQ(a[12], 3);
920     CHECK_EQ(a[13], 4);
921     CHECK_EQ(a[14], 5);
922     CHECK_EQ(a[15], 6);
923    
924     /***** PartialMatch *****/
925    
926     printf("Testing PartialMatch\n");
927    
928     CHECK(RE("h.*o").PartialMatch("hello"));
929     CHECK(RE("h.*o").PartialMatch("othello"));
930     CHECK(RE("h.*o").PartialMatch("hello!"));
931     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
932    
933     RadixTests();
934     TestReplace();
935     TestExtract();
936     TestConsume();
937     TestFindAndConsume();
938     TestMatchNumberPeculiarity();
939    
940     // Check the pattern() accessor
941     {
942     const string kPattern = "http://([^/]+)/.*";
943     const RE re(kPattern);
944     CHECK_EQ(kPattern, re.pattern());
945     }
946    
947     // Check RE error field.
948     {
949     RE re("foo");
950     CHECK(re.error().empty()); // Must have no error
951     }
952    
953     #ifdef SUPPORT_UTF8
954     // Check UTF-8 handling
955     {
956     printf("Testing UTF-8 handling\n");
957    
958     // Three Japanese characters (nihongo)
959     const char utf8_string[] = {
960     0xe6, 0x97, 0xa5, // 65e5
961     0xe6, 0x9c, 0xac, // 627c
962     0xe8, 0xaa, 0x9e, // 8a9e
963     0
964     };
965     const char utf8_pattern[] = {
966     '.',
967     0xe6, 0x9c, 0xac, // 627c
968     '.',
969     0
970     };
971    
972     // Both should match in either mode, bytes or UTF-8
973     RE re_test1(".........");
974     CHECK(re_test1.FullMatch(utf8_string));
975     RE re_test2("...", pcrecpp::UTF8());
976     CHECK(re_test2.FullMatch(utf8_string));
977    
978     // Check that '.' matches one byte or UTF-8 character
979     // according to the mode.
980     string ss;
981     RE re_test3("(.)");
982     CHECK(re_test3.PartialMatch(utf8_string, &ss));
983     CHECK_EQ(ss, string("\xe6"));
984     RE re_test4("(.)", pcrecpp::UTF8());
985     CHECK(re_test4.PartialMatch(utf8_string, &ss));
986     CHECK_EQ(ss, string("\xe6\x97\xa5"));
987    
988     // Check that string matches itself in either mode
989     RE re_test5(utf8_string);
990     CHECK(re_test5.FullMatch(utf8_string));
991     RE re_test6(utf8_string, pcrecpp::UTF8());
992     CHECK(re_test6.FullMatch(utf8_string));
993    
994     // Check that pattern matches string only in UTF8 mode
995     RE re_test7(utf8_pattern);
996     CHECK(!re_test7.FullMatch(utf8_string));
997     RE re_test8(utf8_pattern, pcrecpp::UTF8());
998     CHECK(re_test8.FullMatch(utf8_string));
999     }
1000    
1001     // Check that ungreedy, UTF8 regular expressions don't match when they
1002     // oughtn't -- see bug 82246.
1003     {
1004     // This code always worked.
1005     const char* pattern = "\\w+X";
1006     const string target = "a aX";
1007     RE match_sentence(pattern);
1008     RE match_sentence_re(pattern, pcrecpp::UTF8());
1009    
1010     CHECK(!match_sentence.FullMatch(target));
1011     CHECK(!match_sentence_re.FullMatch(target));
1012     }
1013    
1014     {
1015     const char* pattern = "(?U)\\w+X";
1016     const string target = "a aX";
1017     RE match_sentence(pattern);
1018     RE match_sentence_re(pattern, pcrecpp::UTF8());
1019    
1020     CHECK(!match_sentence.FullMatch(target));
1021     CHECK(!match_sentence_re.FullMatch(target));
1022     }
1023     #endif /* def SUPPORT_UTF8 */
1024    
1025     printf("Testing error reporting\n");
1026    
1027     { RE re("a\\1"); CHECK(!re.error().empty()); }
1028     {
1029     RE re("a[x");
1030     CHECK(!re.error().empty());
1031     }
1032     {
1033     RE re("a[z-a]");
1034     CHECK(!re.error().empty());
1035     }
1036     {
1037     RE re("a[[:foobar:]]");
1038     CHECK(!re.error().empty());
1039     }
1040     {
1041     RE re("a(b");
1042     CHECK(!re.error().empty());
1043     }
1044     {
1045     RE re("a\\");
1046     CHECK(!re.error().empty());
1047     }
1048    
1049 nigel 87 // Test that recursion is stopped
1050     TestRecursion();
1051 nigel 77
1052 nigel 81 // Test Options
1053     if (getenv("VERBOSE_TEST") != NULL)
1054     VERBOSE_TEST = true;
1055     TestOptions();
1056    
1057 nigel 77 // Done
1058     printf("OK\n");
1059    
1060     return 0;
1061     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12