/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (hide annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 2 months ago) by nigel
File size: 33739 byte(s)
Load pcre-6.7 into code/trunk.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31     //
32     // TODO: Test extractions for PartialMatch/Consume
33    
34     #include <stdio.h>
35 nigel 91 #include <cassert>
36 nigel 77 #include <vector>
37     #include "config.h"
38     #include "pcrecpp.h"
39    
40     using pcrecpp::StringPiece;
41     using pcrecpp::RE;
42     using pcrecpp::RE_Options;
43     using pcrecpp::Hex;
44     using pcrecpp::Octal;
45     using pcrecpp::CRadix;
46    
47 nigel 81 static bool VERBOSE_TEST = false;
48    
49 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
50     // controlled by NDEBUG, so the check will be executed regardless of
51     // compilation mode. Therefore, it is safe to do things like:
52     // CHECK_EQ(fp->Write(x), 4)
53     #define CHECK(condition) do { \
54     if (!(condition)) { \
55     fprintf(stderr, "%s:%d: Check failed: %s\n", \
56     __FILE__, __LINE__, #condition); \
57     exit(1); \
58     } \
59     } while (0)
60    
61     #define CHECK_EQ(a, b) CHECK(a == b)
62    
63     static void Timing1(int num_iters) {
64     // Same pattern lots of times
65     RE pattern("ruby:\\d+");
66     StringPiece p("ruby:1234");
67     for (int j = num_iters; j > 0; j--) {
68     CHECK(pattern.FullMatch(p));
69     }
70     }
71    
72     static void Timing2(int num_iters) {
73     // Same pattern lots of times
74     RE pattern("ruby:(\\d+)");
75     int i;
76     for (int j = num_iters; j > 0; j--) {
77     CHECK(pattern.FullMatch("ruby:1234", &i));
78     CHECK_EQ(i, 1234);
79     }
80     }
81    
82     static void Timing3(int num_iters) {
83     string text_string;
84     for (int j = num_iters; j > 0; j--) {
85     text_string += "this is another line\n";
86     }
87    
88     RE line_matcher(".*\n");
89     string line;
90     StringPiece text(text_string);
91     int counter = 0;
92     while (line_matcher.Consume(&text)) {
93     counter++;
94     }
95     printf("Matched %d lines\n", counter);
96     }
97    
98     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
99    
100     static void LeakTest() {
101     // Check for memory leaks
102     unsigned long long initial_size = 0;
103     for (int i = 0; i < 100000; i++) {
104     if (i == 50000) {
105     initial_size = VirtualProcessSize();
106     printf("Size after 50000: %llu\n", initial_size);
107     }
108     char buf[100];
109     snprintf(buf, sizeof(buf), "pat%09d", i);
110     RE newre(buf);
111     }
112     uint64 final_size = VirtualProcessSize();
113     printf("Size after 100000: %llu\n", final_size);
114     const double growth = double(final_size - initial_size) / final_size;
115     printf("Growth: %0.2f%%", growth * 100);
116     CHECK(growth < 0.02); // Allow < 2% growth
117     }
118    
119     #endif
120    
121     static void RadixTests() {
122     printf("Testing hex\n");
123    
124     #define CHECK_HEX(type, value) \
125     do { \
126     type v; \
127     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
128     CHECK_EQ(v, 0x ## value); \
129     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
130     CHECK_EQ(v, 0x ## value); \
131     } while(0)
132    
133     CHECK_HEX(short, 2bad);
134     CHECK_HEX(unsigned short, 2badU);
135     CHECK_HEX(int, dead);
136     CHECK_HEX(unsigned int, deadU);
137     CHECK_HEX(long, 7eadbeefL);
138     CHECK_HEX(unsigned long, deadbeefUL);
139     #ifdef HAVE_LONG_LONG
140     CHECK_HEX(long long, 12345678deadbeefLL);
141     #endif
142     #ifdef HAVE_UNSIGNED_LONG_LONG
143     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
144     #endif
145    
146     #undef CHECK_HEX
147    
148     printf("Testing octal\n");
149    
150     #define CHECK_OCTAL(type, value) \
151     do { \
152     type v; \
153     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
154     CHECK_EQ(v, 0 ## value); \
155     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
156     CHECK_EQ(v, 0 ## value); \
157     } while(0)
158    
159     CHECK_OCTAL(short, 77777);
160     CHECK_OCTAL(unsigned short, 177777U);
161     CHECK_OCTAL(int, 17777777777);
162     CHECK_OCTAL(unsigned int, 37777777777U);
163     CHECK_OCTAL(long, 17777777777L);
164     CHECK_OCTAL(unsigned long, 37777777777UL);
165     #ifdef HAVE_LONG_LONG
166     CHECK_OCTAL(long long, 777777777777777777777LL);
167     #endif
168     #ifdef HAVE_UNSIGNED_LONG_LONG
169     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
170     #endif
171    
172     #undef CHECK_OCTAL
173    
174     printf("Testing decimal\n");
175    
176     #define CHECK_DECIMAL(type, value) \
177     do { \
178     type v; \
179     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
180     CHECK_EQ(v, value); \
181     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
182     CHECK_EQ(v, value); \
183     } while(0)
184    
185     CHECK_DECIMAL(short, -1);
186     CHECK_DECIMAL(unsigned short, 9999);
187     CHECK_DECIMAL(int, -1000);
188     CHECK_DECIMAL(unsigned int, 12345U);
189     CHECK_DECIMAL(long, -10000000L);
190     CHECK_DECIMAL(unsigned long, 3083324652U);
191     #ifdef HAVE_LONG_LONG
192     CHECK_DECIMAL(long long, -100000000000000LL);
193     #endif
194     #ifdef HAVE_UNSIGNED_LONG_LONG
195     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
196     #endif
197    
198     #undef CHECK_DECIMAL
199    
200     }
201    
202     static void TestReplace() {
203     printf("Testing Replace\n");
204    
205     struct ReplaceTest {
206     const char *regexp;
207     const char *rewrite;
208     const char *original;
209     const char *single;
210     const char *global;
211     };
212     static const ReplaceTest tests[] = {
213     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
214     "\\2\\1ay",
215     "the quick brown fox jumps over the lazy dogs.",
216     "ethay quick brown fox jumps over the lazy dogs.",
217     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
218     { "\\w+",
219     "\\0-NOSPAM",
220     "paul.haahr@google.com",
221     "paul-NOSPAM.haahr@google.com",
222     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
223     { "^",
224     "(START)",
225     "foo",
226     "(START)foo",
227     "(START)foo" },
228     { "^",
229     "(START)",
230     "",
231     "(START)",
232     "(START)" },
233     { "$",
234     "(END)",
235     "",
236     "(END)",
237     "(END)" },
238     { "b",
239     "bb",
240     "ababababab",
241     "abbabababab",
242     "abbabbabbabbabb" },
243     { "b",
244     "bb",
245     "bbbbbb",
246     "bbbbbbb",
247     "bbbbbbbbbbbb" },
248     { "b+",
249     "bb",
250     "bbbbbb",
251     "bb",
252     "bb" },
253     { "b*",
254     "bb",
255     "bbbbbb",
256     "bb",
257     "bb" },
258     { "b*",
259     "bb",
260     "aaaaa",
261     "bbaaaaa",
262     "bbabbabbabbabbabb" },
263 nigel 91 { "b*",
264     "bb",
265     "aa\naa\n",
266     "bbaa\naa\n",
267     "bbabbabb\nbbabbabb\nbb" },
268     { "b*",
269     "bb",
270     "aa\raa\r",
271     "bbaa\raa\r",
272     "bbabbabb\rbbabbabb\rbb" },
273     { "b*",
274     "bb",
275     "aa\r\naa\r\n",
276     "bbaa\r\naa\r\n",
277     "bbabbabb\r\nbbabbabb\r\nbb" },
278     #ifdef SUPPORT_UTF8
279     { "b*",
280     "bb",
281     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
282     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
283     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
284     { "b*",
285     "bb",
286     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
287     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
288     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
289     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
290     #endif
291 nigel 77 { "", NULL, NULL, NULL, NULL }
292     };
293    
294 nigel 91 #ifdef SUPPORT_UTF8
295     const bool support_utf8 = true;
296     #else
297     const bool support_utf8 = false;
298     #endif
299    
300 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
301 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
302     assert(re.error().empty());
303 nigel 77 string one(t->original);
304 nigel 91 CHECK(re.Replace(t->rewrite, &one));
305 nigel 77 CHECK_EQ(one, t->single);
306     string all(t->original);
307 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
308 nigel 77 CHECK_EQ(all, t->global);
309     }
310 nigel 91
311     // One final test: test \r\n replacement when we're not in CRLF mode
312     {
313     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
314     assert(re.error().empty());
315     string all("aa\r\naa\r\n");
316     CHECK(re.GlobalReplace("bb", &all) > 0);
317     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
318     }
319     {
320     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
321     assert(re.error().empty());
322     string all("aa\r\naa\r\n");
323     CHECK(re.GlobalReplace("bb", &all) > 0);
324     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
325     }
326     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
327     // Alas, the answer depends on how pcre was compiled.
328 nigel 77 }
329    
330     static void TestExtract() {
331     printf("Testing Extract\n");
332    
333     string s;
334    
335     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
336     CHECK_EQ(s, "kremvax!boris");
337    
338     // check the RE interface as well
339     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
340     CHECK_EQ(s, "'foo'");
341     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
342     CHECK_EQ(s, "'foo'");
343     }
344    
345     static void TestConsume() {
346     printf("Testing Consume\n");
347    
348     string word;
349    
350     string s(" aaa b!@#$@#$cccc");
351     StringPiece input(s);
352    
353     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
354     CHECK(r.Consume(&input, &word));
355     CHECK_EQ(word, "aaa");
356     CHECK(r.Consume(&input, &word));
357     CHECK_EQ(word, "b");
358     CHECK(! r.Consume(&input, &word));
359     }
360    
361     static void TestFindAndConsume() {
362     printf("Testing FindAndConsume\n");
363    
364     string word;
365    
366     string s(" aaa b!@#$@#$cccc");
367     StringPiece input(s);
368    
369     RE r("(\\w+)"); // matches a word
370     CHECK(r.FindAndConsume(&input, &word));
371     CHECK_EQ(word, "aaa");
372     CHECK(r.FindAndConsume(&input, &word));
373     CHECK_EQ(word, "b");
374     CHECK(r.FindAndConsume(&input, &word));
375     CHECK_EQ(word, "cccc");
376     CHECK(! r.FindAndConsume(&input, &word));
377     }
378    
379     static void TestMatchNumberPeculiarity() {
380     printf("Testing match-number peculiaraity\n");
381    
382     string word1;
383     string word2;
384     string word3;
385    
386     RE r("(foo)|(bar)|(baz)");
387     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
388     CHECK_EQ(word1, "foo");
389     CHECK_EQ(word2, "");
390     CHECK_EQ(word3, "");
391     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
392     CHECK_EQ(word1, "");
393     CHECK_EQ(word2, "bar");
394     CHECK_EQ(word3, "");
395     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
396     CHECK_EQ(word1, "");
397     CHECK_EQ(word2, "");
398     CHECK_EQ(word3, "baz");
399     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
400    
401     string a;
402     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
403     CHECK_EQ(a, "");
404     }
405    
406 nigel 87 static void TestRecursion() {
407 nigel 77 printf("Testing recursion\n");
408    
409 nigel 87 // Get one string that passes (sometimes), one that never does.
410     string text_good("abcdefghijk");
411     string text_bad("acdefghijkl");
412    
413     // According to pcretest, matching text_good against (\w+)*b
414     // requires match_limit of at least 8192, and match_recursion_limit
415     // of at least 37.
416    
417     RE_Options options_ml;
418     options_ml.set_match_limit(8192);
419     RE re("(\\w+)*b", options_ml);
420     CHECK(re.PartialMatch(text_good) == true);
421     CHECK(re.PartialMatch(text_bad) == false);
422     CHECK(re.FullMatch(text_good) == false);
423     CHECK(re.FullMatch(text_bad) == false);
424    
425     options_ml.set_match_limit(1024);
426     RE re2("(\\w+)*b", options_ml);
427     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
428     CHECK(re2.PartialMatch(text_bad) == false);
429     CHECK(re2.FullMatch(text_good) == false);
430     CHECK(re2.FullMatch(text_bad) == false);
431    
432     RE_Options options_mlr;
433     options_mlr.set_match_limit_recursion(50);
434     RE re3("(\\w+)*b", options_mlr);
435     CHECK(re3.PartialMatch(text_good) == true);
436     CHECK(re3.PartialMatch(text_bad) == false);
437     CHECK(re3.FullMatch(text_good) == false);
438     CHECK(re3.FullMatch(text_bad) == false);
439    
440     options_mlr.set_match_limit_recursion(10);
441     RE re4("(\\w+)*b", options_mlr);
442     CHECK(re4.PartialMatch(text_good) == false);
443     CHECK(re4.PartialMatch(text_bad) == false);
444     CHECK(re4.FullMatch(text_good) == false);
445     CHECK(re4.FullMatch(text_bad) == false);
446 nigel 77 }
447    
448 nigel 81 //
449     // Options tests contributed by
450     // Giuseppe Maxia, CTO, Stardata s.r.l.
451     // July 2005
452     //
453     static void GetOneOptionResult(
454     const char *option_name,
455     const char *regex,
456     const char *str,
457     RE_Options options,
458     bool full,
459     string expected) {
460 nigel 77
461 nigel 81 printf("Testing Option <%s>\n", option_name);
462     if(VERBOSE_TEST)
463     printf("/%s/ finds \"%s\" within \"%s\" \n",
464     regex,
465     expected.c_str(),
466     str);
467     string captured("");
468     if (full)
469     RE(regex,options).FullMatch(str, &captured);
470     else
471     RE(regex,options).PartialMatch(str, &captured);
472     CHECK_EQ(captured, expected);
473     }
474    
475     static void TestOneOption(
476     const char *option_name,
477     const char *regex,
478     const char *str,
479     RE_Options options,
480     bool full,
481     bool assertive = true) {
482    
483     printf("Testing Option <%s>\n", option_name);
484     if (VERBOSE_TEST)
485     printf("'%s' %s /%s/ \n",
486     str,
487     (assertive? "matches" : "doesn't match"),
488     regex);
489     if (assertive) {
490     if (full)
491     CHECK(RE(regex,options).FullMatch(str));
492     else
493     CHECK(RE(regex,options).PartialMatch(str));
494     } else {
495     if (full)
496     CHECK(!RE(regex,options).FullMatch(str));
497     else
498     CHECK(!RE(regex,options).PartialMatch(str));
499     }
500     }
501    
502     static void Test_CASELESS() {
503     RE_Options options;
504     RE_Options options2;
505    
506     options.set_caseless(true);
507     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
508     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
509     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
510    
511     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
512     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
513     options.set_caseless(false);
514     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
515     }
516    
517     static void Test_MULTILINE() {
518     RE_Options options;
519     RE_Options options2;
520     const char *str = "HELLO\n" "cruel\n" "world\n";
521    
522     options.set_multiline(true);
523     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
524     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
525     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
526     options.set_multiline(false);
527     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
528     }
529    
530     static void Test_DOTALL() {
531     RE_Options options;
532     RE_Options options2;
533     const char *str = "HELLO\n" "cruel\n" "world";
534    
535     options.set_dotall(true);
536     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
537     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
538     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
539     options.set_dotall(false);
540     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
541     }
542    
543     static void Test_DOLLAR_ENDONLY() {
544     RE_Options options;
545     RE_Options options2;
546     const char *str = "HELLO world\n";
547    
548     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
549     options.set_dollar_endonly(true);
550     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
551     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
552     }
553    
554     static void Test_EXTRA() {
555     RE_Options options;
556     const char *str = "HELLO";
557    
558     options.set_extra(true);
559     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
560     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
561     options.set_extra(false);
562     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
563     }
564    
565     static void Test_EXTENDED() {
566     RE_Options options;
567     RE_Options options2;
568     const char *str = "HELLO world";
569    
570     options.set_extended(true);
571     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
572     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
573     TestOneOption("EXTENDED (class)",
574     "^ HE L{2} O "
575     "\\s+ "
576     "\\w+ $ ",
577     str,
578     options,
579     false);
580    
581     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
582     TestOneOption("EXTENDED (function)",
583     "^ HE L{2} O "
584     "\\s+ "
585     "\\w+ $ ",
586     str,
587     pcrecpp::EXTENDED(),
588     false);
589    
590     options.set_extended(false);
591     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
592     }
593    
594     static void Test_NO_AUTO_CAPTURE() {
595     RE_Options options;
596     const char *str = "HELLO world";
597     string captured;
598    
599     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
600     if (VERBOSE_TEST)
601     printf("parentheses capture text\n");
602     RE re("(world|universe)$", options);
603     CHECK(re.Extract("\\1", str , &captured));
604     CHECK_EQ(captured, "world");
605     options.set_no_auto_capture(true);
606     printf("testing Option <NO_AUTO_CAPTURE>\n");
607     if (VERBOSE_TEST)
608     printf("parentheses do not capture text\n");
609     re.Extract("\\1",str, &captured );
610     CHECK_EQ(captured, "world");
611     }
612    
613     static void Test_UNGREEDY() {
614     RE_Options options;
615     const char *str = "HELLO, 'this' is the 'world'";
616    
617     options.set_ungreedy(true);
618     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
619     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
620     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
621    
622     options.set_ungreedy(false);
623     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
624     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
625     }
626    
627     static void Test_all_options() {
628     const char *str = "HELLO\n" "cruel\n" "world";
629     RE_Options options;
630     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
631    
632     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
633     options.set_all_options(0);
634     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
635     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
636    
637     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
638     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
639     " ^ c r u e l $ ",
640     str,
641     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
642     false);
643    
644     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
645     " ^ c r u e l $ ",
646     str,
647     RE_Options()
648     .set_multiline(true)
649     .set_extended(true),
650     false);
651    
652     options.set_all_options(0);
653     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
654    
655     }
656    
657     static void TestOptions() {
658     printf("Testing Options\n");
659     Test_CASELESS();
660     Test_MULTILINE();
661     Test_DOTALL();
662     Test_DOLLAR_ENDONLY();
663     Test_EXTENDED();
664     Test_NO_AUTO_CAPTURE();
665     Test_UNGREEDY();
666     Test_EXTRA();
667     Test_all_options();
668     }
669    
670 nigel 77 int main(int argc, char** argv) {
671     // Treat any flag as --help
672     if (argc > 1 && argv[1][0] == '-') {
673     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
674     " If 'timingX ###' is specified, run the given timing test\n"
675     " with the given number of iterations, rather than running\n"
676     " the default corectness test.\n", argv[0]);
677     return 0;
678     }
679    
680     if (argc > 1) {
681     if ( argc == 2 || atoi(argv[2]) == 0) {
682     printf("timing mode needs a num-iters argument\n");
683     return 1;
684     }
685     if (!strcmp(argv[1], "timing1"))
686     Timing1(atoi(argv[2]));
687     else if (!strcmp(argv[1], "timing2"))
688     Timing2(atoi(argv[2]));
689     else if (!strcmp(argv[1], "timing3"))
690     Timing3(atoi(argv[2]));
691     else
692     printf("Unknown argument '%s'\n", argv[1]);
693     return 0;
694     }
695    
696     printf("Testing FullMatch\n");
697    
698     int i;
699     string s;
700    
701     /***** FullMatch with no args *****/
702    
703     CHECK(RE("h.*o").FullMatch("hello"));
704     CHECK(!RE("h.*o").FullMatch("othello"));
705     CHECK(!RE("h.*o").FullMatch("hello!"));
706    
707     /***** FullMatch with args *****/
708    
709     // Zero-arg
710     CHECK(RE("\\d+").FullMatch("1001"));
711    
712     // Single-arg
713     CHECK(RE("(\\d+)").FullMatch("1001", &i));
714     CHECK_EQ(i, 1001);
715     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
716     CHECK_EQ(i, -123);
717     CHECK(!RE("()\\d+").FullMatch("10", &i));
718     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
719     &i));
720    
721     // Digits surrounding integer-arg
722     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
723     CHECK_EQ(i, 23);
724     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
725     CHECK_EQ(i, 1);
726     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
727     CHECK_EQ(i, -1);
728     CHECK(RE("(\\d)").PartialMatch("1234", &i));
729     CHECK_EQ(i, 1);
730     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
731     CHECK_EQ(i, -1);
732    
733     // String-arg
734     CHECK(RE("h(.*)o").FullMatch("hello", &s));
735     CHECK_EQ(s, string("ell"));
736    
737     // StringPiece-arg
738     StringPiece sp;
739     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
740     CHECK_EQ(sp.size(), 4);
741     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
742     CHECK_EQ(i, 1234);
743    
744     // Multi-arg
745     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
746     CHECK_EQ(s, string("ruby"));
747     CHECK_EQ(i, 1234);
748    
749     // Ignored arg
750     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
751     CHECK_EQ(s, string("ruby"));
752     CHECK_EQ(i, 1234);
753    
754     // Type tests
755     {
756     char c;
757     CHECK(RE("(H)ello").FullMatch("Hello", &c));
758     CHECK_EQ(c, 'H');
759     }
760     {
761     unsigned char c;
762     CHECK(RE("(H)ello").FullMatch("Hello", &c));
763     CHECK_EQ(c, static_cast<unsigned char>('H'));
764     }
765     {
766     short v;
767     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
768     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
769     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
770     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
771     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
772     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
773     }
774     {
775     unsigned short v;
776     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
777     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
778     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
779     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
780     }
781     {
782     int v;
783     static const int max_value = 0x7fffffff;
784     static const int min_value = -max_value - 1;
785     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
786     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
787     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
788     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
789     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
790     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
791     }
792     {
793     unsigned int v;
794     static const unsigned int max_value = 0xfffffffful;
795     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
796     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
797     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
798     }
799     #ifdef HAVE_LONG_LONG
800     {
801     long long v;
802     static const long long max_value = 0x7fffffffffffffffLL;
803     static const long long min_value = -max_value - 1;
804     char buf[32];
805    
806     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
807     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
808    
809     snprintf(buf, sizeof(buf), "%lld", max_value);
810     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
811    
812     snprintf(buf, sizeof(buf), "%lld", min_value);
813     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
814    
815     snprintf(buf, sizeof(buf), "%lld", max_value);
816     assert(buf[strlen(buf)-1] != '9');
817     buf[strlen(buf)-1]++;
818     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
819    
820     snprintf(buf, sizeof(buf), "%lld", min_value);
821     assert(buf[strlen(buf)-1] != '9');
822     buf[strlen(buf)-1]++;
823     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
824     }
825     #endif
826     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
827     {
828     unsigned long long v;
829     long long v2;
830     static const unsigned long long max_value = 0xffffffffffffffffULL;
831     char buf[32];
832    
833     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
834     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
835    
836     snprintf(buf, sizeof(buf), "%llu", max_value);
837     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
838    
839     assert(buf[strlen(buf)-1] != '9');
840     buf[strlen(buf)-1]++;
841     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
842     }
843     #endif
844     {
845     float v;
846     CHECK(RE("(.*)").FullMatch("100", &v));
847     CHECK(RE("(.*)").FullMatch("-100.", &v));
848     CHECK(RE("(.*)").FullMatch("1e23", &v));
849     }
850     {
851     double v;
852     CHECK(RE("(.*)").FullMatch("100", &v));
853     CHECK(RE("(.*)").FullMatch("-100.", &v));
854     CHECK(RE("(.*)").FullMatch("1e23", &v));
855     }
856    
857     // Check that matching is fully anchored
858     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
859     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
860     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
861     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
862    
863     // Braces
864     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
865     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
866     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
867    
868     // Complicated RE
869     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
870     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
871     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
872     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
873    
874     // Check full-match handling (needs '$' tacked on internally)
875     CHECK(RE("fo|foo").FullMatch("fo"));
876     CHECK(RE("fo|foo").FullMatch("foo"));
877     CHECK(RE("fo|foo$").FullMatch("fo"));
878     CHECK(RE("fo|foo$").FullMatch("foo"));
879     CHECK(RE("foo$").FullMatch("foo"));
880     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
881     CHECK(!RE("fo|bar").FullMatch("fox"));
882    
883     // Uncomment the following if we change the handling of '$' to
884     // prevent it from matching a trailing newline
885     if (false) {
886     // Check that we don't get bitten by pcre's special handling of a
887     // '\n' at the end of the string matching '$'
888     CHECK(!RE("foo$").PartialMatch("foo\n"));
889     }
890    
891     // Number of args
892     int a[16];
893     CHECK(RE("").FullMatch(""));
894    
895     memset(a, 0, sizeof(0));
896     CHECK(RE("(\\d){1}").FullMatch("1",
897     &a[0]));
898     CHECK_EQ(a[0], 1);
899    
900     memset(a, 0, sizeof(0));
901     CHECK(RE("(\\d)(\\d)").FullMatch("12",
902     &a[0], &a[1]));
903     CHECK_EQ(a[0], 1);
904     CHECK_EQ(a[1], 2);
905    
906     memset(a, 0, sizeof(0));
907     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
908     &a[0], &a[1], &a[2]));
909     CHECK_EQ(a[0], 1);
910     CHECK_EQ(a[1], 2);
911     CHECK_EQ(a[2], 3);
912    
913     memset(a, 0, sizeof(0));
914     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
915     &a[0], &a[1], &a[2], &a[3]));
916     CHECK_EQ(a[0], 1);
917     CHECK_EQ(a[1], 2);
918     CHECK_EQ(a[2], 3);
919     CHECK_EQ(a[3], 4);
920    
921     memset(a, 0, sizeof(0));
922     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
923     &a[0], &a[1], &a[2],
924     &a[3], &a[4]));
925     CHECK_EQ(a[0], 1);
926     CHECK_EQ(a[1], 2);
927     CHECK_EQ(a[2], 3);
928     CHECK_EQ(a[3], 4);
929     CHECK_EQ(a[4], 5);
930    
931     memset(a, 0, sizeof(0));
932     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
933     &a[0], &a[1], &a[2],
934     &a[3], &a[4], &a[5]));
935     CHECK_EQ(a[0], 1);
936     CHECK_EQ(a[1], 2);
937     CHECK_EQ(a[2], 3);
938     CHECK_EQ(a[3], 4);
939     CHECK_EQ(a[4], 5);
940     CHECK_EQ(a[5], 6);
941    
942     memset(a, 0, sizeof(0));
943     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
944     &a[0], &a[1], &a[2], &a[3],
945     &a[4], &a[5], &a[6]));
946     CHECK_EQ(a[0], 1);
947     CHECK_EQ(a[1], 2);
948     CHECK_EQ(a[2], 3);
949     CHECK_EQ(a[3], 4);
950     CHECK_EQ(a[4], 5);
951     CHECK_EQ(a[5], 6);
952     CHECK_EQ(a[6], 7);
953    
954     memset(a, 0, sizeof(0));
955     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
956     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
957     "1234567890123456",
958     &a[0], &a[1], &a[2], &a[3],
959     &a[4], &a[5], &a[6], &a[7],
960     &a[8], &a[9], &a[10], &a[11],
961     &a[12], &a[13], &a[14], &a[15]));
962     CHECK_EQ(a[0], 1);
963     CHECK_EQ(a[1], 2);
964     CHECK_EQ(a[2], 3);
965     CHECK_EQ(a[3], 4);
966     CHECK_EQ(a[4], 5);
967     CHECK_EQ(a[5], 6);
968     CHECK_EQ(a[6], 7);
969     CHECK_EQ(a[7], 8);
970     CHECK_EQ(a[8], 9);
971     CHECK_EQ(a[9], 0);
972     CHECK_EQ(a[10], 1);
973     CHECK_EQ(a[11], 2);
974     CHECK_EQ(a[12], 3);
975     CHECK_EQ(a[13], 4);
976     CHECK_EQ(a[14], 5);
977     CHECK_EQ(a[15], 6);
978    
979     /***** PartialMatch *****/
980    
981     printf("Testing PartialMatch\n");
982    
983     CHECK(RE("h.*o").PartialMatch("hello"));
984     CHECK(RE("h.*o").PartialMatch("othello"));
985     CHECK(RE("h.*o").PartialMatch("hello!"));
986     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
987    
988     RadixTests();
989     TestReplace();
990     TestExtract();
991     TestConsume();
992     TestFindAndConsume();
993     TestMatchNumberPeculiarity();
994    
995     // Check the pattern() accessor
996     {
997     const string kPattern = "http://([^/]+)/.*";
998     const RE re(kPattern);
999     CHECK_EQ(kPattern, re.pattern());
1000     }
1001    
1002     // Check RE error field.
1003     {
1004     RE re("foo");
1005     CHECK(re.error().empty()); // Must have no error
1006     }
1007    
1008     #ifdef SUPPORT_UTF8
1009     // Check UTF-8 handling
1010     {
1011     printf("Testing UTF-8 handling\n");
1012    
1013     // Three Japanese characters (nihongo)
1014     const char utf8_string[] = {
1015     0xe6, 0x97, 0xa5, // 65e5
1016     0xe6, 0x9c, 0xac, // 627c
1017     0xe8, 0xaa, 0x9e, // 8a9e
1018     0
1019     };
1020     const char utf8_pattern[] = {
1021     '.',
1022     0xe6, 0x9c, 0xac, // 627c
1023     '.',
1024     0
1025     };
1026    
1027     // Both should match in either mode, bytes or UTF-8
1028     RE re_test1(".........");
1029     CHECK(re_test1.FullMatch(utf8_string));
1030     RE re_test2("...", pcrecpp::UTF8());
1031     CHECK(re_test2.FullMatch(utf8_string));
1032    
1033     // Check that '.' matches one byte or UTF-8 character
1034     // according to the mode.
1035     string ss;
1036     RE re_test3("(.)");
1037     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1038     CHECK_EQ(ss, string("\xe6"));
1039     RE re_test4("(.)", pcrecpp::UTF8());
1040     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1041     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1042    
1043     // Check that string matches itself in either mode
1044     RE re_test5(utf8_string);
1045     CHECK(re_test5.FullMatch(utf8_string));
1046     RE re_test6(utf8_string, pcrecpp::UTF8());
1047     CHECK(re_test6.FullMatch(utf8_string));
1048    
1049     // Check that pattern matches string only in UTF8 mode
1050     RE re_test7(utf8_pattern);
1051     CHECK(!re_test7.FullMatch(utf8_string));
1052     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1053     CHECK(re_test8.FullMatch(utf8_string));
1054     }
1055    
1056     // Check that ungreedy, UTF8 regular expressions don't match when they
1057     // oughtn't -- see bug 82246.
1058     {
1059     // This code always worked.
1060     const char* pattern = "\\w+X";
1061     const string target = "a aX";
1062     RE match_sentence(pattern);
1063     RE match_sentence_re(pattern, pcrecpp::UTF8());
1064    
1065     CHECK(!match_sentence.FullMatch(target));
1066     CHECK(!match_sentence_re.FullMatch(target));
1067     }
1068    
1069     {
1070     const char* pattern = "(?U)\\w+X";
1071     const string target = "a aX";
1072     RE match_sentence(pattern);
1073     RE match_sentence_re(pattern, pcrecpp::UTF8());
1074    
1075     CHECK(!match_sentence.FullMatch(target));
1076     CHECK(!match_sentence_re.FullMatch(target));
1077     }
1078     #endif /* def SUPPORT_UTF8 */
1079    
1080     printf("Testing error reporting\n");
1081    
1082     { RE re("a\\1"); CHECK(!re.error().empty()); }
1083     {
1084     RE re("a[x");
1085     CHECK(!re.error().empty());
1086     }
1087     {
1088     RE re("a[z-a]");
1089     CHECK(!re.error().empty());
1090     }
1091     {
1092     RE re("a[[:foobar:]]");
1093     CHECK(!re.error().empty());
1094     }
1095     {
1096     RE re("a(b");
1097     CHECK(!re.error().empty());
1098     }
1099     {
1100     RE re("a\\");
1101     CHECK(!re.error().empty());
1102     }
1103    
1104 nigel 87 // Test that recursion is stopped
1105     TestRecursion();
1106 nigel 77
1107 nigel 81 // Test Options
1108     if (getenv("VERBOSE_TEST") != NULL)
1109     VERBOSE_TEST = true;
1110     TestOptions();
1111    
1112 nigel 77 // Done
1113     printf("OK\n");
1114    
1115     return 0;
1116     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12