/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 81 - (hide annotations) (download)
Sat Feb 24 21:40:59 2007 UTC (7 years, 6 months ago) by nigel
File size: 31385 byte(s)
Load pcre-6.2 into code/trunk.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31     //
32     // TODO: Test extractions for PartialMatch/Consume
33    
34     #include <stdio.h>
35     #include <vector>
36     #include "config.h"
37     #include "pcrecpp.h"
38    
39     using pcrecpp::StringPiece;
40     using pcrecpp::RE;
41     using pcrecpp::RE_Options;
42     using pcrecpp::Hex;
43     using pcrecpp::Octal;
44     using pcrecpp::CRadix;
45    
46 nigel 81 static bool VERBOSE_TEST = false;
47    
48 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
49     // controlled by NDEBUG, so the check will be executed regardless of
50     // compilation mode. Therefore, it is safe to do things like:
51     // CHECK_EQ(fp->Write(x), 4)
52     #define CHECK(condition) do { \
53     if (!(condition)) { \
54     fprintf(stderr, "%s:%d: Check failed: %s\n", \
55     __FILE__, __LINE__, #condition); \
56     exit(1); \
57     } \
58     } while (0)
59    
60     #define CHECK_EQ(a, b) CHECK(a == b)
61    
62     static void Timing1(int num_iters) {
63     // Same pattern lots of times
64     RE pattern("ruby:\\d+");
65     StringPiece p("ruby:1234");
66     for (int j = num_iters; j > 0; j--) {
67     CHECK(pattern.FullMatch(p));
68     }
69     }
70    
71     static void Timing2(int num_iters) {
72     // Same pattern lots of times
73     RE pattern("ruby:(\\d+)");
74     int i;
75     for (int j = num_iters; j > 0; j--) {
76     CHECK(pattern.FullMatch("ruby:1234", &i));
77     CHECK_EQ(i, 1234);
78     }
79     }
80    
81     static void Timing3(int num_iters) {
82     string text_string;
83     for (int j = num_iters; j > 0; j--) {
84     text_string += "this is another line\n";
85     }
86    
87     RE line_matcher(".*\n");
88     string line;
89     StringPiece text(text_string);
90     int counter = 0;
91     while (line_matcher.Consume(&text)) {
92     counter++;
93     }
94     printf("Matched %d lines\n", counter);
95     }
96    
97     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
98    
99     static void LeakTest() {
100     // Check for memory leaks
101     unsigned long long initial_size = 0;
102     for (int i = 0; i < 100000; i++) {
103     if (i == 50000) {
104     initial_size = VirtualProcessSize();
105     printf("Size after 50000: %llu\n", initial_size);
106     }
107     char buf[100];
108     snprintf(buf, sizeof(buf), "pat%09d", i);
109     RE newre(buf);
110     }
111     uint64 final_size = VirtualProcessSize();
112     printf("Size after 100000: %llu\n", final_size);
113     const double growth = double(final_size - initial_size) / final_size;
114     printf("Growth: %0.2f%%", growth * 100);
115     CHECK(growth < 0.02); // Allow < 2% growth
116     }
117    
118     #endif
119    
120     static void RadixTests() {
121     printf("Testing hex\n");
122    
123     #define CHECK_HEX(type, value) \
124     do { \
125     type v; \
126     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
127     CHECK_EQ(v, 0x ## value); \
128     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
129     CHECK_EQ(v, 0x ## value); \
130     } while(0)
131    
132     CHECK_HEX(short, 2bad);
133     CHECK_HEX(unsigned short, 2badU);
134     CHECK_HEX(int, dead);
135     CHECK_HEX(unsigned int, deadU);
136     CHECK_HEX(long, 7eadbeefL);
137     CHECK_HEX(unsigned long, deadbeefUL);
138     #ifdef HAVE_LONG_LONG
139     CHECK_HEX(long long, 12345678deadbeefLL);
140     #endif
141     #ifdef HAVE_UNSIGNED_LONG_LONG
142     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
143     #endif
144    
145     #undef CHECK_HEX
146    
147     printf("Testing octal\n");
148    
149     #define CHECK_OCTAL(type, value) \
150     do { \
151     type v; \
152     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
153     CHECK_EQ(v, 0 ## value); \
154     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
155     CHECK_EQ(v, 0 ## value); \
156     } while(0)
157    
158     CHECK_OCTAL(short, 77777);
159     CHECK_OCTAL(unsigned short, 177777U);
160     CHECK_OCTAL(int, 17777777777);
161     CHECK_OCTAL(unsigned int, 37777777777U);
162     CHECK_OCTAL(long, 17777777777L);
163     CHECK_OCTAL(unsigned long, 37777777777UL);
164     #ifdef HAVE_LONG_LONG
165     CHECK_OCTAL(long long, 777777777777777777777LL);
166     #endif
167     #ifdef HAVE_UNSIGNED_LONG_LONG
168     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
169     #endif
170    
171     #undef CHECK_OCTAL
172    
173     printf("Testing decimal\n");
174    
175     #define CHECK_DECIMAL(type, value) \
176     do { \
177     type v; \
178     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
179     CHECK_EQ(v, value); \
180     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
181     CHECK_EQ(v, value); \
182     } while(0)
183    
184     CHECK_DECIMAL(short, -1);
185     CHECK_DECIMAL(unsigned short, 9999);
186     CHECK_DECIMAL(int, -1000);
187     CHECK_DECIMAL(unsigned int, 12345U);
188     CHECK_DECIMAL(long, -10000000L);
189     CHECK_DECIMAL(unsigned long, 3083324652U);
190     #ifdef HAVE_LONG_LONG
191     CHECK_DECIMAL(long long, -100000000000000LL);
192     #endif
193     #ifdef HAVE_UNSIGNED_LONG_LONG
194     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
195     #endif
196    
197     #undef CHECK_DECIMAL
198    
199     }
200    
201     static void TestReplace() {
202     printf("Testing Replace\n");
203    
204     struct ReplaceTest {
205     const char *regexp;
206     const char *rewrite;
207     const char *original;
208     const char *single;
209     const char *global;
210     };
211     static const ReplaceTest tests[] = {
212     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
213     "\\2\\1ay",
214     "the quick brown fox jumps over the lazy dogs.",
215     "ethay quick brown fox jumps over the lazy dogs.",
216     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
217     { "\\w+",
218     "\\0-NOSPAM",
219     "paul.haahr@google.com",
220     "paul-NOSPAM.haahr@google.com",
221     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
222     { "^",
223     "(START)",
224     "foo",
225     "(START)foo",
226     "(START)foo" },
227     { "^",
228     "(START)",
229     "",
230     "(START)",
231     "(START)" },
232     { "$",
233     "(END)",
234     "",
235     "(END)",
236     "(END)" },
237     { "b",
238     "bb",
239     "ababababab",
240     "abbabababab",
241     "abbabbabbabbabb" },
242     { "b",
243     "bb",
244     "bbbbbb",
245     "bbbbbbb",
246     "bbbbbbbbbbbb" },
247     { "b+",
248     "bb",
249     "bbbbbb",
250     "bb",
251     "bb" },
252     { "b*",
253     "bb",
254     "bbbbbb",
255     "bb",
256     "bb" },
257     { "b*",
258     "bb",
259     "aaaaa",
260     "bbaaaaa",
261     "bbabbabbabbabbabb" },
262     { "", NULL, NULL, NULL, NULL }
263     };
264    
265     for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
266     string one(t->original);
267     CHECK(RE(t->regexp).Replace(t->rewrite, &one));
268     CHECK_EQ(one, t->single);
269     string all(t->original);
270     CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
271     CHECK_EQ(all, t->global);
272     }
273     }
274    
275     static void TestExtract() {
276     printf("Testing Extract\n");
277    
278     string s;
279    
280     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
281     CHECK_EQ(s, "kremvax!boris");
282    
283     // check the RE interface as well
284     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
285     CHECK_EQ(s, "'foo'");
286     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
287     CHECK_EQ(s, "'foo'");
288     }
289    
290     static void TestConsume() {
291     printf("Testing Consume\n");
292    
293     string word;
294    
295     string s(" aaa b!@#$@#$cccc");
296     StringPiece input(s);
297    
298     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
299     CHECK(r.Consume(&input, &word));
300     CHECK_EQ(word, "aaa");
301     CHECK(r.Consume(&input, &word));
302     CHECK_EQ(word, "b");
303     CHECK(! r.Consume(&input, &word));
304     }
305    
306     static void TestFindAndConsume() {
307     printf("Testing FindAndConsume\n");
308    
309     string word;
310    
311     string s(" aaa b!@#$@#$cccc");
312     StringPiece input(s);
313    
314     RE r("(\\w+)"); // matches a word
315     CHECK(r.FindAndConsume(&input, &word));
316     CHECK_EQ(word, "aaa");
317     CHECK(r.FindAndConsume(&input, &word));
318     CHECK_EQ(word, "b");
319     CHECK(r.FindAndConsume(&input, &word));
320     CHECK_EQ(word, "cccc");
321     CHECK(! r.FindAndConsume(&input, &word));
322     }
323    
324     static void TestMatchNumberPeculiarity() {
325     printf("Testing match-number peculiaraity\n");
326    
327     string word1;
328     string word2;
329     string word3;
330    
331     RE r("(foo)|(bar)|(baz)");
332     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
333     CHECK_EQ(word1, "foo");
334     CHECK_EQ(word2, "");
335     CHECK_EQ(word3, "");
336     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
337     CHECK_EQ(word1, "");
338     CHECK_EQ(word2, "bar");
339     CHECK_EQ(word3, "");
340     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
341     CHECK_EQ(word1, "");
342     CHECK_EQ(word2, "");
343     CHECK_EQ(word3, "baz");
344     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
345    
346     string a;
347     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
348     CHECK_EQ(a, "");
349     }
350    
351     static void TestRecursion(int size, const char *pattern, int match_limit) {
352     printf("Testing recursion\n");
353    
354     // Fill up a string repeating the pattern given
355     string domain;
356     domain.resize(size);
357     int patlen = strlen(pattern);
358     for (int i = 0; i < size; ++i) {
359     domain[i] = pattern[i % patlen];
360     }
361     // Just make sure it doesn't crash due to too much recursion.
362     RE_Options options;
363     options.set_match_limit(match_limit);
364     RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
365     re.FullMatch(domain);
366     }
367    
368 nigel 81 //
369     // Options tests contributed by
370     // Giuseppe Maxia, CTO, Stardata s.r.l.
371     // July 2005
372     //
373     static void GetOneOptionResult(
374     const char *option_name,
375     const char *regex,
376     const char *str,
377     RE_Options options,
378     bool full,
379     string expected) {
380 nigel 77
381 nigel 81 printf("Testing Option <%s>\n", option_name);
382     if(VERBOSE_TEST)
383     printf("/%s/ finds \"%s\" within \"%s\" \n",
384     regex,
385     expected.c_str(),
386     str);
387     string captured("");
388     if (full)
389     RE(regex,options).FullMatch(str, &captured);
390     else
391     RE(regex,options).PartialMatch(str, &captured);
392     CHECK_EQ(captured, expected);
393     }
394    
395     static void TestOneOption(
396     const char *option_name,
397     const char *regex,
398     const char *str,
399     RE_Options options,
400     bool full,
401     bool assertive = true) {
402    
403     printf("Testing Option <%s>\n", option_name);
404     if (VERBOSE_TEST)
405     printf("'%s' %s /%s/ \n",
406     str,
407     (assertive? "matches" : "doesn't match"),
408     regex);
409     if (assertive) {
410     if (full)
411     CHECK(RE(regex,options).FullMatch(str));
412     else
413     CHECK(RE(regex,options).PartialMatch(str));
414     } else {
415     if (full)
416     CHECK(!RE(regex,options).FullMatch(str));
417     else
418     CHECK(!RE(regex,options).PartialMatch(str));
419     }
420     }
421    
422     static void Test_CASELESS() {
423     RE_Options options;
424     RE_Options options2;
425    
426     options.set_caseless(true);
427     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
428     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
429     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
430    
431     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
432     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
433     options.set_caseless(false);
434     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
435     }
436    
437     static void Test_MULTILINE() {
438     RE_Options options;
439     RE_Options options2;
440     const char *str = "HELLO\n" "cruel\n" "world\n";
441    
442     options.set_multiline(true);
443     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
444     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
445     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
446     options.set_multiline(false);
447     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
448     }
449    
450     static void Test_DOTALL() {
451     RE_Options options;
452     RE_Options options2;
453     const char *str = "HELLO\n" "cruel\n" "world";
454    
455     options.set_dotall(true);
456     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
457     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
458     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
459     options.set_dotall(false);
460     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
461     }
462    
463     static void Test_DOLLAR_ENDONLY() {
464     RE_Options options;
465     RE_Options options2;
466     const char *str = "HELLO world\n";
467    
468     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
469     options.set_dollar_endonly(true);
470     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
471     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
472     }
473    
474     static void Test_EXTRA() {
475     RE_Options options;
476     const char *str = "HELLO";
477    
478     options.set_extra(true);
479     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
480     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
481     options.set_extra(false);
482     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
483     }
484    
485     static void Test_EXTENDED() {
486     RE_Options options;
487     RE_Options options2;
488     const char *str = "HELLO world";
489    
490     options.set_extended(true);
491     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
492     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
493     TestOneOption("EXTENDED (class)",
494     "^ HE L{2} O "
495     "\\s+ "
496     "\\w+ $ ",
497     str,
498     options,
499     false);
500    
501     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
502     TestOneOption("EXTENDED (function)",
503     "^ HE L{2} O "
504     "\\s+ "
505     "\\w+ $ ",
506     str,
507     pcrecpp::EXTENDED(),
508     false);
509    
510     options.set_extended(false);
511     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
512     }
513    
514     static void Test_NO_AUTO_CAPTURE() {
515     RE_Options options;
516     const char *str = "HELLO world";
517     string captured;
518    
519     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
520     if (VERBOSE_TEST)
521     printf("parentheses capture text\n");
522     RE re("(world|universe)$", options);
523     CHECK(re.Extract("\\1", str , &captured));
524     CHECK_EQ(captured, "world");
525     options.set_no_auto_capture(true);
526     printf("testing Option <NO_AUTO_CAPTURE>\n");
527     if (VERBOSE_TEST)
528     printf("parentheses do not capture text\n");
529     re.Extract("\\1",str, &captured );
530     CHECK_EQ(captured, "world");
531     }
532    
533     static void Test_UNGREEDY() {
534     RE_Options options;
535     const char *str = "HELLO, 'this' is the 'world'";
536    
537     options.set_ungreedy(true);
538     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
539     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
540     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
541    
542     options.set_ungreedy(false);
543     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
544     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
545     }
546    
547     static void Test_all_options() {
548     const char *str = "HELLO\n" "cruel\n" "world";
549     RE_Options options;
550     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
551    
552     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
553     options.set_all_options(0);
554     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
555     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
556    
557     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
558     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
559     " ^ c r u e l $ ",
560     str,
561     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
562     false);
563    
564     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
565     " ^ c r u e l $ ",
566     str,
567     RE_Options()
568     .set_multiline(true)
569     .set_extended(true),
570     false);
571    
572     options.set_all_options(0);
573     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
574    
575     }
576    
577     static void TestOptions() {
578     printf("Testing Options\n");
579     Test_CASELESS();
580     Test_MULTILINE();
581     Test_DOTALL();
582     Test_DOLLAR_ENDONLY();
583     Test_EXTENDED();
584     Test_NO_AUTO_CAPTURE();
585     Test_UNGREEDY();
586     Test_EXTRA();
587     Test_all_options();
588     }
589    
590 nigel 77 int main(int argc, char** argv) {
591     // Treat any flag as --help
592     if (argc > 1 && argv[1][0] == '-') {
593     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
594     " If 'timingX ###' is specified, run the given timing test\n"
595     " with the given number of iterations, rather than running\n"
596     " the default corectness test.\n", argv[0]);
597     return 0;
598     }
599    
600     if (argc > 1) {
601     if ( argc == 2 || atoi(argv[2]) == 0) {
602     printf("timing mode needs a num-iters argument\n");
603     return 1;
604     }
605     if (!strcmp(argv[1], "timing1"))
606     Timing1(atoi(argv[2]));
607     else if (!strcmp(argv[1], "timing2"))
608     Timing2(atoi(argv[2]));
609     else if (!strcmp(argv[1], "timing3"))
610     Timing3(atoi(argv[2]));
611     else
612     printf("Unknown argument '%s'\n", argv[1]);
613     return 0;
614     }
615    
616     printf("Testing FullMatch\n");
617    
618     int i;
619     string s;
620    
621     /***** FullMatch with no args *****/
622    
623     CHECK(RE("h.*o").FullMatch("hello"));
624     CHECK(!RE("h.*o").FullMatch("othello"));
625     CHECK(!RE("h.*o").FullMatch("hello!"));
626    
627     /***** FullMatch with args *****/
628    
629     // Zero-arg
630     CHECK(RE("\\d+").FullMatch("1001"));
631    
632     // Single-arg
633     CHECK(RE("(\\d+)").FullMatch("1001", &i));
634     CHECK_EQ(i, 1001);
635     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
636     CHECK_EQ(i, -123);
637     CHECK(!RE("()\\d+").FullMatch("10", &i));
638     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
639     &i));
640    
641     // Digits surrounding integer-arg
642     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
643     CHECK_EQ(i, 23);
644     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
645     CHECK_EQ(i, 1);
646     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
647     CHECK_EQ(i, -1);
648     CHECK(RE("(\\d)").PartialMatch("1234", &i));
649     CHECK_EQ(i, 1);
650     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
651     CHECK_EQ(i, -1);
652    
653     // String-arg
654     CHECK(RE("h(.*)o").FullMatch("hello", &s));
655     CHECK_EQ(s, string("ell"));
656    
657     // StringPiece-arg
658     StringPiece sp;
659     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
660     CHECK_EQ(sp.size(), 4);
661     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
662     CHECK_EQ(i, 1234);
663    
664     // Multi-arg
665     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
666     CHECK_EQ(s, string("ruby"));
667     CHECK_EQ(i, 1234);
668    
669     // Ignored arg
670     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
671     CHECK_EQ(s, string("ruby"));
672     CHECK_EQ(i, 1234);
673    
674     // Type tests
675     {
676     char c;
677     CHECK(RE("(H)ello").FullMatch("Hello", &c));
678     CHECK_EQ(c, 'H');
679     }
680     {
681     unsigned char c;
682     CHECK(RE("(H)ello").FullMatch("Hello", &c));
683     CHECK_EQ(c, static_cast<unsigned char>('H'));
684     }
685     {
686     short v;
687     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
688     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
689     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
690     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
691     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
692     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
693     }
694     {
695     unsigned short v;
696     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
697     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
698     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
699     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
700     }
701     {
702     int v;
703     static const int max_value = 0x7fffffff;
704     static const int min_value = -max_value - 1;
705     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
706     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
707     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
708     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
709     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
710     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
711     }
712     {
713     unsigned int v;
714     static const unsigned int max_value = 0xfffffffful;
715     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
716     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
717     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
718     }
719     #ifdef HAVE_LONG_LONG
720     {
721     long long v;
722     static const long long max_value = 0x7fffffffffffffffLL;
723     static const long long min_value = -max_value - 1;
724     char buf[32];
725    
726     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
727     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
728    
729     snprintf(buf, sizeof(buf), "%lld", max_value);
730     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
731    
732     snprintf(buf, sizeof(buf), "%lld", min_value);
733     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
734    
735     snprintf(buf, sizeof(buf), "%lld", max_value);
736     assert(buf[strlen(buf)-1] != '9');
737     buf[strlen(buf)-1]++;
738     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
739    
740     snprintf(buf, sizeof(buf), "%lld", min_value);
741     assert(buf[strlen(buf)-1] != '9');
742     buf[strlen(buf)-1]++;
743     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
744     }
745     #endif
746     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
747     {
748     unsigned long long v;
749     long long v2;
750     static const unsigned long long max_value = 0xffffffffffffffffULL;
751     char buf[32];
752    
753     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
754     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
755    
756     snprintf(buf, sizeof(buf), "%llu", max_value);
757     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
758    
759     assert(buf[strlen(buf)-1] != '9');
760     buf[strlen(buf)-1]++;
761     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
762     }
763     #endif
764     {
765     float v;
766     CHECK(RE("(.*)").FullMatch("100", &v));
767     CHECK(RE("(.*)").FullMatch("-100.", &v));
768     CHECK(RE("(.*)").FullMatch("1e23", &v));
769     }
770     {
771     double v;
772     CHECK(RE("(.*)").FullMatch("100", &v));
773     CHECK(RE("(.*)").FullMatch("-100.", &v));
774     CHECK(RE("(.*)").FullMatch("1e23", &v));
775     }
776    
777     // Check that matching is fully anchored
778     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
779     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
780     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
781     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
782    
783     // Braces
784     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
785     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
786     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
787    
788     // Complicated RE
789     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
790     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
791     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
792     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
793    
794     // Check full-match handling (needs '$' tacked on internally)
795     CHECK(RE("fo|foo").FullMatch("fo"));
796     CHECK(RE("fo|foo").FullMatch("foo"));
797     CHECK(RE("fo|foo$").FullMatch("fo"));
798     CHECK(RE("fo|foo$").FullMatch("foo"));
799     CHECK(RE("foo$").FullMatch("foo"));
800     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
801     CHECK(!RE("fo|bar").FullMatch("fox"));
802    
803     // Uncomment the following if we change the handling of '$' to
804     // prevent it from matching a trailing newline
805     if (false) {
806     // Check that we don't get bitten by pcre's special handling of a
807     // '\n' at the end of the string matching '$'
808     CHECK(!RE("foo$").PartialMatch("foo\n"));
809     }
810    
811     // Number of args
812     int a[16];
813     CHECK(RE("").FullMatch(""));
814    
815     memset(a, 0, sizeof(0));
816     CHECK(RE("(\\d){1}").FullMatch("1",
817     &a[0]));
818     CHECK_EQ(a[0], 1);
819    
820     memset(a, 0, sizeof(0));
821     CHECK(RE("(\\d)(\\d)").FullMatch("12",
822     &a[0], &a[1]));
823     CHECK_EQ(a[0], 1);
824     CHECK_EQ(a[1], 2);
825    
826     memset(a, 0, sizeof(0));
827     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
828     &a[0], &a[1], &a[2]));
829     CHECK_EQ(a[0], 1);
830     CHECK_EQ(a[1], 2);
831     CHECK_EQ(a[2], 3);
832    
833     memset(a, 0, sizeof(0));
834     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
835     &a[0], &a[1], &a[2], &a[3]));
836     CHECK_EQ(a[0], 1);
837     CHECK_EQ(a[1], 2);
838     CHECK_EQ(a[2], 3);
839     CHECK_EQ(a[3], 4);
840    
841     memset(a, 0, sizeof(0));
842     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
843     &a[0], &a[1], &a[2],
844     &a[3], &a[4]));
845     CHECK_EQ(a[0], 1);
846     CHECK_EQ(a[1], 2);
847     CHECK_EQ(a[2], 3);
848     CHECK_EQ(a[3], 4);
849     CHECK_EQ(a[4], 5);
850    
851     memset(a, 0, sizeof(0));
852     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
853     &a[0], &a[1], &a[2],
854     &a[3], &a[4], &a[5]));
855     CHECK_EQ(a[0], 1);
856     CHECK_EQ(a[1], 2);
857     CHECK_EQ(a[2], 3);
858     CHECK_EQ(a[3], 4);
859     CHECK_EQ(a[4], 5);
860     CHECK_EQ(a[5], 6);
861    
862     memset(a, 0, sizeof(0));
863     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
864     &a[0], &a[1], &a[2], &a[3],
865     &a[4], &a[5], &a[6]));
866     CHECK_EQ(a[0], 1);
867     CHECK_EQ(a[1], 2);
868     CHECK_EQ(a[2], 3);
869     CHECK_EQ(a[3], 4);
870     CHECK_EQ(a[4], 5);
871     CHECK_EQ(a[5], 6);
872     CHECK_EQ(a[6], 7);
873    
874     memset(a, 0, sizeof(0));
875     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
876     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
877     "1234567890123456",
878     &a[0], &a[1], &a[2], &a[3],
879     &a[4], &a[5], &a[6], &a[7],
880     &a[8], &a[9], &a[10], &a[11],
881     &a[12], &a[13], &a[14], &a[15]));
882     CHECK_EQ(a[0], 1);
883     CHECK_EQ(a[1], 2);
884     CHECK_EQ(a[2], 3);
885     CHECK_EQ(a[3], 4);
886     CHECK_EQ(a[4], 5);
887     CHECK_EQ(a[5], 6);
888     CHECK_EQ(a[6], 7);
889     CHECK_EQ(a[7], 8);
890     CHECK_EQ(a[8], 9);
891     CHECK_EQ(a[9], 0);
892     CHECK_EQ(a[10], 1);
893     CHECK_EQ(a[11], 2);
894     CHECK_EQ(a[12], 3);
895     CHECK_EQ(a[13], 4);
896     CHECK_EQ(a[14], 5);
897     CHECK_EQ(a[15], 6);
898    
899     /***** PartialMatch *****/
900    
901     printf("Testing PartialMatch\n");
902    
903     CHECK(RE("h.*o").PartialMatch("hello"));
904     CHECK(RE("h.*o").PartialMatch("othello"));
905     CHECK(RE("h.*o").PartialMatch("hello!"));
906     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
907    
908     RadixTests();
909     TestReplace();
910     TestExtract();
911     TestConsume();
912     TestFindAndConsume();
913     TestMatchNumberPeculiarity();
914    
915     // Check the pattern() accessor
916     {
917     const string kPattern = "http://([^/]+)/.*";
918     const RE re(kPattern);
919     CHECK_EQ(kPattern, re.pattern());
920     }
921    
922     // Check RE error field.
923     {
924     RE re("foo");
925     CHECK(re.error().empty()); // Must have no error
926     }
927    
928     #ifdef SUPPORT_UTF8
929     // Check UTF-8 handling
930     {
931     printf("Testing UTF-8 handling\n");
932    
933     // Three Japanese characters (nihongo)
934     const char utf8_string[] = {
935     0xe6, 0x97, 0xa5, // 65e5
936     0xe6, 0x9c, 0xac, // 627c
937     0xe8, 0xaa, 0x9e, // 8a9e
938     0
939     };
940     const char utf8_pattern[] = {
941     '.',
942     0xe6, 0x9c, 0xac, // 627c
943     '.',
944     0
945     };
946    
947     // Both should match in either mode, bytes or UTF-8
948     RE re_test1(".........");
949     CHECK(re_test1.FullMatch(utf8_string));
950     RE re_test2("...", pcrecpp::UTF8());
951     CHECK(re_test2.FullMatch(utf8_string));
952    
953     // Check that '.' matches one byte or UTF-8 character
954     // according to the mode.
955     string ss;
956     RE re_test3("(.)");
957     CHECK(re_test3.PartialMatch(utf8_string, &ss));
958     CHECK_EQ(ss, string("\xe6"));
959     RE re_test4("(.)", pcrecpp::UTF8());
960     CHECK(re_test4.PartialMatch(utf8_string, &ss));
961     CHECK_EQ(ss, string("\xe6\x97\xa5"));
962    
963     // Check that string matches itself in either mode
964     RE re_test5(utf8_string);
965     CHECK(re_test5.FullMatch(utf8_string));
966     RE re_test6(utf8_string, pcrecpp::UTF8());
967     CHECK(re_test6.FullMatch(utf8_string));
968    
969     // Check that pattern matches string only in UTF8 mode
970     RE re_test7(utf8_pattern);
971     CHECK(!re_test7.FullMatch(utf8_string));
972     RE re_test8(utf8_pattern, pcrecpp::UTF8());
973     CHECK(re_test8.FullMatch(utf8_string));
974     }
975    
976     // Check that ungreedy, UTF8 regular expressions don't match when they
977     // oughtn't -- see bug 82246.
978     {
979     // This code always worked.
980     const char* pattern = "\\w+X";
981     const string target = "a aX";
982     RE match_sentence(pattern);
983     RE match_sentence_re(pattern, pcrecpp::UTF8());
984    
985     CHECK(!match_sentence.FullMatch(target));
986     CHECK(!match_sentence_re.FullMatch(target));
987     }
988    
989     {
990     const char* pattern = "(?U)\\w+X";
991     const string target = "a aX";
992     RE match_sentence(pattern);
993     RE match_sentence_re(pattern, pcrecpp::UTF8());
994    
995     CHECK(!match_sentence.FullMatch(target));
996     CHECK(!match_sentence_re.FullMatch(target));
997     }
998     #endif /* def SUPPORT_UTF8 */
999    
1000     printf("Testing error reporting\n");
1001    
1002     { RE re("a\\1"); CHECK(!re.error().empty()); }
1003     {
1004     RE re("a[x");
1005     CHECK(!re.error().empty());
1006     }
1007     {
1008     RE re("a[z-a]");
1009     CHECK(!re.error().empty());
1010     }
1011     {
1012     RE re("a[[:foobar:]]");
1013     CHECK(!re.error().empty());
1014     }
1015     {
1016     RE re("a(b");
1017     CHECK(!re.error().empty());
1018     }
1019     {
1020     RE re("a\\");
1021     CHECK(!re.error().empty());
1022     }
1023    
1024     // Test that recursion is stopped: there will be some errors reported
1025     int matchlimit = 5000;
1026     int bytes = 15 * 1024; // enough to crash if there was no match limit
1027     TestRecursion(bytes, ".", matchlimit);
1028     TestRecursion(bytes, "a", matchlimit);
1029     TestRecursion(bytes, "a.", matchlimit);
1030     TestRecursion(bytes, "ab.", matchlimit);
1031     TestRecursion(bytes, "abc.", matchlimit);
1032    
1033 nigel 81 // Test Options
1034     if (getenv("VERBOSE_TEST") != NULL)
1035     VERBOSE_TEST = true;
1036     TestOptions();
1037    
1038 nigel 77 // Done
1039     printf("OK\n");
1040    
1041     return 0;
1042     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12