/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 248 - (hide annotations) (download)
Mon Sep 17 10:09:22 2007 UTC (7 years, 3 months ago) by ph10
File size: 37742 byte(s)
Change condition for the macros for snprintf and strtoll from _WIN32 to 
HAVE_WINDOWS_H because they are needed only when windows.h is present.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 ph10 248 #ifdef HAVE_WINDOWS_H
41 ph10 234 #define snprintf _snprintf
42     #endif
43    
44 nigel 77 #include <stdio.h>
45 nigel 91 #include <cassert>
46 nigel 77 #include <vector>
47     #include "pcrecpp.h"
48    
49     using pcrecpp::StringPiece;
50     using pcrecpp::RE;
51     using pcrecpp::RE_Options;
52     using pcrecpp::Hex;
53     using pcrecpp::Octal;
54     using pcrecpp::CRadix;
55    
56 nigel 81 static bool VERBOSE_TEST = false;
57    
58 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
59     // controlled by NDEBUG, so the check will be executed regardless of
60     // compilation mode. Therefore, it is safe to do things like:
61     // CHECK_EQ(fp->Write(x), 4)
62     #define CHECK(condition) do { \
63     if (!(condition)) { \
64     fprintf(stderr, "%s:%d: Check failed: %s\n", \
65     __FILE__, __LINE__, #condition); \
66     exit(1); \
67     } \
68     } while (0)
69    
70     #define CHECK_EQ(a, b) CHECK(a == b)
71    
72     static void Timing1(int num_iters) {
73     // Same pattern lots of times
74     RE pattern("ruby:\\d+");
75     StringPiece p("ruby:1234");
76     for (int j = num_iters; j > 0; j--) {
77     CHECK(pattern.FullMatch(p));
78     }
79     }
80    
81     static void Timing2(int num_iters) {
82     // Same pattern lots of times
83     RE pattern("ruby:(\\d+)");
84     int i;
85     for (int j = num_iters; j > 0; j--) {
86     CHECK(pattern.FullMatch("ruby:1234", &i));
87     CHECK_EQ(i, 1234);
88     }
89     }
90    
91     static void Timing3(int num_iters) {
92     string text_string;
93     for (int j = num_iters; j > 0; j--) {
94     text_string += "this is another line\n";
95     }
96    
97     RE line_matcher(".*\n");
98     string line;
99     StringPiece text(text_string);
100     int counter = 0;
101     while (line_matcher.Consume(&text)) {
102     counter++;
103     }
104     printf("Matched %d lines\n", counter);
105     }
106    
107     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
108    
109     static void LeakTest() {
110     // Check for memory leaks
111     unsigned long long initial_size = 0;
112     for (int i = 0; i < 100000; i++) {
113     if (i == 50000) {
114     initial_size = VirtualProcessSize();
115     printf("Size after 50000: %llu\n", initial_size);
116     }
117     char buf[100];
118     snprintf(buf, sizeof(buf), "pat%09d", i);
119     RE newre(buf);
120     }
121     uint64 final_size = VirtualProcessSize();
122     printf("Size after 100000: %llu\n", final_size);
123     const double growth = double(final_size - initial_size) / final_size;
124     printf("Growth: %0.2f%%", growth * 100);
125     CHECK(growth < 0.02); // Allow < 2% growth
126     }
127    
128     #endif
129    
130     static void RadixTests() {
131     printf("Testing hex\n");
132    
133     #define CHECK_HEX(type, value) \
134     do { \
135     type v; \
136     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
137     CHECK_EQ(v, 0x ## value); \
138     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
139     CHECK_EQ(v, 0x ## value); \
140     } while(0)
141    
142     CHECK_HEX(short, 2bad);
143     CHECK_HEX(unsigned short, 2badU);
144     CHECK_HEX(int, dead);
145     CHECK_HEX(unsigned int, deadU);
146     CHECK_HEX(long, 7eadbeefL);
147     CHECK_HEX(unsigned long, deadbeefUL);
148     #ifdef HAVE_LONG_LONG
149     CHECK_HEX(long long, 12345678deadbeefLL);
150     #endif
151     #ifdef HAVE_UNSIGNED_LONG_LONG
152     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
153     #endif
154    
155     #undef CHECK_HEX
156    
157     printf("Testing octal\n");
158    
159     #define CHECK_OCTAL(type, value) \
160     do { \
161     type v; \
162     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
163     CHECK_EQ(v, 0 ## value); \
164     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
165     CHECK_EQ(v, 0 ## value); \
166     } while(0)
167    
168     CHECK_OCTAL(short, 77777);
169     CHECK_OCTAL(unsigned short, 177777U);
170     CHECK_OCTAL(int, 17777777777);
171     CHECK_OCTAL(unsigned int, 37777777777U);
172     CHECK_OCTAL(long, 17777777777L);
173     CHECK_OCTAL(unsigned long, 37777777777UL);
174     #ifdef HAVE_LONG_LONG
175     CHECK_OCTAL(long long, 777777777777777777777LL);
176     #endif
177     #ifdef HAVE_UNSIGNED_LONG_LONG
178     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
179     #endif
180    
181     #undef CHECK_OCTAL
182    
183     printf("Testing decimal\n");
184    
185     #define CHECK_DECIMAL(type, value) \
186     do { \
187     type v; \
188     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
189     CHECK_EQ(v, value); \
190     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
191     CHECK_EQ(v, value); \
192     } while(0)
193    
194     CHECK_DECIMAL(short, -1);
195     CHECK_DECIMAL(unsigned short, 9999);
196     CHECK_DECIMAL(int, -1000);
197     CHECK_DECIMAL(unsigned int, 12345U);
198     CHECK_DECIMAL(long, -10000000L);
199     CHECK_DECIMAL(unsigned long, 3083324652U);
200     #ifdef HAVE_LONG_LONG
201     CHECK_DECIMAL(long long, -100000000000000LL);
202     #endif
203     #ifdef HAVE_UNSIGNED_LONG_LONG
204     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
205     #endif
206    
207     #undef CHECK_DECIMAL
208    
209     }
210    
211     static void TestReplace() {
212     printf("Testing Replace\n");
213    
214     struct ReplaceTest {
215     const char *regexp;
216     const char *rewrite;
217     const char *original;
218     const char *single;
219     const char *global;
220     };
221     static const ReplaceTest tests[] = {
222     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
223     "\\2\\1ay",
224     "the quick brown fox jumps over the lazy dogs.",
225     "ethay quick brown fox jumps over the lazy dogs.",
226     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
227     { "\\w+",
228     "\\0-NOSPAM",
229     "paul.haahr@google.com",
230     "paul-NOSPAM.haahr@google.com",
231     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
232     { "^",
233     "(START)",
234     "foo",
235     "(START)foo",
236     "(START)foo" },
237     { "^",
238     "(START)",
239     "",
240     "(START)",
241     "(START)" },
242     { "$",
243     "(END)",
244     "",
245     "(END)",
246     "(END)" },
247     { "b",
248     "bb",
249     "ababababab",
250     "abbabababab",
251     "abbabbabbabbabb" },
252     { "b",
253     "bb",
254     "bbbbbb",
255     "bbbbbbb",
256     "bbbbbbbbbbbb" },
257     { "b+",
258     "bb",
259     "bbbbbb",
260     "bb",
261     "bb" },
262     { "b*",
263     "bb",
264     "bbbbbb",
265     "bb",
266     "bb" },
267     { "b*",
268     "bb",
269     "aaaaa",
270     "bbaaaaa",
271     "bbabbabbabbabbabb" },
272 nigel 91 { "b*",
273     "bb",
274     "aa\naa\n",
275     "bbaa\naa\n",
276     "bbabbabb\nbbabbabb\nbb" },
277     { "b*",
278     "bb",
279     "aa\raa\r",
280     "bbaa\raa\r",
281     "bbabbabb\rbbabbabb\rbb" },
282     { "b*",
283     "bb",
284     "aa\r\naa\r\n",
285     "bbaa\r\naa\r\n",
286     "bbabbabb\r\nbbabbabb\r\nbb" },
287     #ifdef SUPPORT_UTF8
288     { "b*",
289     "bb",
290     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
291     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
292     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
293     { "b*",
294     "bb",
295     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
296     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
297     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
298     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
299     #endif
300 nigel 77 { "", NULL, NULL, NULL, NULL }
301     };
302    
303 nigel 91 #ifdef SUPPORT_UTF8
304     const bool support_utf8 = true;
305     #else
306     const bool support_utf8 = false;
307     #endif
308    
309 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
310 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
311     assert(re.error().empty());
312 nigel 77 string one(t->original);
313 nigel 91 CHECK(re.Replace(t->rewrite, &one));
314 nigel 77 CHECK_EQ(one, t->single);
315     string all(t->original);
316 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
317 nigel 77 CHECK_EQ(all, t->global);
318     }
319 nigel 91
320     // One final test: test \r\n replacement when we're not in CRLF mode
321     {
322     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
323     assert(re.error().empty());
324     string all("aa\r\naa\r\n");
325     CHECK(re.GlobalReplace("bb", &all) > 0);
326     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327     }
328     {
329     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
330     assert(re.error().empty());
331     string all("aa\r\naa\r\n");
332     CHECK(re.GlobalReplace("bb", &all) > 0);
333     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
334     }
335     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
336     // Alas, the answer depends on how pcre was compiled.
337 nigel 77 }
338    
339     static void TestExtract() {
340     printf("Testing Extract\n");
341    
342     string s;
343    
344     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
345     CHECK_EQ(s, "kremvax!boris");
346    
347     // check the RE interface as well
348     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
349     CHECK_EQ(s, "'foo'");
350     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
351     CHECK_EQ(s, "'foo'");
352     }
353    
354     static void TestConsume() {
355     printf("Testing Consume\n");
356    
357     string word;
358    
359     string s(" aaa b!@#$@#$cccc");
360     StringPiece input(s);
361    
362     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
363     CHECK(r.Consume(&input, &word));
364     CHECK_EQ(word, "aaa");
365     CHECK(r.Consume(&input, &word));
366     CHECK_EQ(word, "b");
367     CHECK(! r.Consume(&input, &word));
368     }
369    
370     static void TestFindAndConsume() {
371     printf("Testing FindAndConsume\n");
372    
373     string word;
374    
375     string s(" aaa b!@#$@#$cccc");
376     StringPiece input(s);
377    
378     RE r("(\\w+)"); // matches a word
379     CHECK(r.FindAndConsume(&input, &word));
380     CHECK_EQ(word, "aaa");
381     CHECK(r.FindAndConsume(&input, &word));
382     CHECK_EQ(word, "b");
383     CHECK(r.FindAndConsume(&input, &word));
384     CHECK_EQ(word, "cccc");
385     CHECK(! r.FindAndConsume(&input, &word));
386     }
387    
388     static void TestMatchNumberPeculiarity() {
389     printf("Testing match-number peculiaraity\n");
390    
391     string word1;
392     string word2;
393     string word3;
394    
395     RE r("(foo)|(bar)|(baz)");
396     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
397     CHECK_EQ(word1, "foo");
398     CHECK_EQ(word2, "");
399     CHECK_EQ(word3, "");
400     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
401     CHECK_EQ(word1, "");
402     CHECK_EQ(word2, "bar");
403     CHECK_EQ(word3, "");
404     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
405     CHECK_EQ(word1, "");
406     CHECK_EQ(word2, "");
407     CHECK_EQ(word3, "baz");
408     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
409    
410     string a;
411     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
412     CHECK_EQ(a, "");
413     }
414    
415 nigel 87 static void TestRecursion() {
416 nigel 77 printf("Testing recursion\n");
417    
418 nigel 87 // Get one string that passes (sometimes), one that never does.
419     string text_good("abcdefghijk");
420     string text_bad("acdefghijkl");
421    
422     // According to pcretest, matching text_good against (\w+)*b
423     // requires match_limit of at least 8192, and match_recursion_limit
424     // of at least 37.
425    
426     RE_Options options_ml;
427     options_ml.set_match_limit(8192);
428     RE re("(\\w+)*b", options_ml);
429     CHECK(re.PartialMatch(text_good) == true);
430     CHECK(re.PartialMatch(text_bad) == false);
431     CHECK(re.FullMatch(text_good) == false);
432     CHECK(re.FullMatch(text_bad) == false);
433    
434     options_ml.set_match_limit(1024);
435     RE re2("(\\w+)*b", options_ml);
436     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
437     CHECK(re2.PartialMatch(text_bad) == false);
438     CHECK(re2.FullMatch(text_good) == false);
439     CHECK(re2.FullMatch(text_bad) == false);
440    
441     RE_Options options_mlr;
442     options_mlr.set_match_limit_recursion(50);
443     RE re3("(\\w+)*b", options_mlr);
444     CHECK(re3.PartialMatch(text_good) == true);
445     CHECK(re3.PartialMatch(text_bad) == false);
446     CHECK(re3.FullMatch(text_good) == false);
447     CHECK(re3.FullMatch(text_bad) == false);
448    
449     options_mlr.set_match_limit_recursion(10);
450     RE re4("(\\w+)*b", options_mlr);
451     CHECK(re4.PartialMatch(text_good) == false);
452     CHECK(re4.PartialMatch(text_bad) == false);
453     CHECK(re4.FullMatch(text_good) == false);
454     CHECK(re4.FullMatch(text_bad) == false);
455 nigel 77 }
456    
457 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
458     // the original unquoted string.
459     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
460     string quoted = RE::QuoteMeta(unquoted);
461     RE re(quoted, options);
462     CHECK(re.FullMatch(unquoted));
463     }
464    
465     // A string containing meaningful regexp characters, which is then meta-
466     // quoted, should not generally match a string the unquoted string does.
467     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
468     RE_Options options = RE_Options()) {
469     string quoted = RE::QuoteMeta(unquoted);
470     RE re(quoted, options);
471     CHECK(!re.FullMatch(should_not_match));
472     }
473    
474     // Tests that quoted meta characters match their original strings,
475     // and that a few things that shouldn't match indeed do not.
476     static void TestQuotaMetaSimple() {
477     TestQuoteMeta("foo");
478     TestQuoteMeta("foo.bar");
479     TestQuoteMeta("foo\\.bar");
480     TestQuoteMeta("[1-9]");
481     TestQuoteMeta("1.5-2.0?");
482     TestQuoteMeta("\\d");
483     TestQuoteMeta("Who doesn't like ice cream?");
484     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
485     TestQuoteMeta("((?!)xxx).*yyy");
486     TestQuoteMeta("([");
487     }
488    
489     static void TestQuoteMetaSimpleNegative() {
490     NegativeTestQuoteMeta("foo", "bar");
491     NegativeTestQuoteMeta("...", "bar");
492     NegativeTestQuoteMeta("\\.", ".");
493     NegativeTestQuoteMeta("\\.", "..");
494     NegativeTestQuoteMeta("(a)", "a");
495     NegativeTestQuoteMeta("(a|b)", "a");
496     NegativeTestQuoteMeta("(a|b)", "(a)");
497     NegativeTestQuoteMeta("(a|b)", "a|b");
498     NegativeTestQuoteMeta("[0-9]", "0");
499     NegativeTestQuoteMeta("[0-9]", "0-9");
500     NegativeTestQuoteMeta("[0-9]", "[9]");
501     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
502     }
503    
504     static void TestQuoteMetaLatin1() {
505     TestQuoteMeta("3\xb2 = 9");
506     }
507    
508     static void TestQuoteMetaUtf8() {
509     #ifdef SUPPORT_UTF8
510     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
511     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
512     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
513     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
514     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
515     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
516     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
517     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
518     "27\\\xc2\\\xb0",
519     pcrecpp::UTF8());
520     #endif
521     }
522    
523     static void TestQuoteMetaAll() {
524     printf("Testing QuoteMeta\n");
525     TestQuotaMetaSimple();
526     TestQuoteMetaSimpleNegative();
527     TestQuoteMetaLatin1();
528     TestQuoteMetaUtf8();
529     }
530    
531 nigel 81 //
532     // Options tests contributed by
533     // Giuseppe Maxia, CTO, Stardata s.r.l.
534     // July 2005
535     //
536     static void GetOneOptionResult(
537     const char *option_name,
538     const char *regex,
539     const char *str,
540     RE_Options options,
541     bool full,
542     string expected) {
543 nigel 77
544 nigel 81 printf("Testing Option <%s>\n", option_name);
545     if(VERBOSE_TEST)
546     printf("/%s/ finds \"%s\" within \"%s\" \n",
547     regex,
548     expected.c_str(),
549     str);
550     string captured("");
551     if (full)
552     RE(regex,options).FullMatch(str, &captured);
553     else
554     RE(regex,options).PartialMatch(str, &captured);
555     CHECK_EQ(captured, expected);
556     }
557    
558     static void TestOneOption(
559     const char *option_name,
560     const char *regex,
561     const char *str,
562     RE_Options options,
563     bool full,
564     bool assertive = true) {
565    
566     printf("Testing Option <%s>\n", option_name);
567     if (VERBOSE_TEST)
568     printf("'%s' %s /%s/ \n",
569     str,
570     (assertive? "matches" : "doesn't match"),
571     regex);
572     if (assertive) {
573     if (full)
574     CHECK(RE(regex,options).FullMatch(str));
575     else
576     CHECK(RE(regex,options).PartialMatch(str));
577     } else {
578     if (full)
579     CHECK(!RE(regex,options).FullMatch(str));
580     else
581     CHECK(!RE(regex,options).PartialMatch(str));
582     }
583     }
584    
585     static void Test_CASELESS() {
586     RE_Options options;
587     RE_Options options2;
588    
589     options.set_caseless(true);
590     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
591     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
592     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
593    
594     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
595     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
596     options.set_caseless(false);
597     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
598     }
599    
600     static void Test_MULTILINE() {
601     RE_Options options;
602     RE_Options options2;
603     const char *str = "HELLO\n" "cruel\n" "world\n";
604    
605     options.set_multiline(true);
606     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
607     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
608     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
609     options.set_multiline(false);
610     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
611     }
612    
613     static void Test_DOTALL() {
614     RE_Options options;
615     RE_Options options2;
616     const char *str = "HELLO\n" "cruel\n" "world";
617    
618     options.set_dotall(true);
619     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
620     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
621     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
622     options.set_dotall(false);
623     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
624     }
625    
626     static void Test_DOLLAR_ENDONLY() {
627     RE_Options options;
628     RE_Options options2;
629     const char *str = "HELLO world\n";
630    
631     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
632     options.set_dollar_endonly(true);
633     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
634     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
635     }
636    
637     static void Test_EXTRA() {
638     RE_Options options;
639     const char *str = "HELLO";
640    
641     options.set_extra(true);
642     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
643     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
644     options.set_extra(false);
645     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
646     }
647    
648     static void Test_EXTENDED() {
649     RE_Options options;
650     RE_Options options2;
651     const char *str = "HELLO world";
652    
653     options.set_extended(true);
654     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
655     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
656     TestOneOption("EXTENDED (class)",
657     "^ HE L{2} O "
658     "\\s+ "
659     "\\w+ $ ",
660     str,
661     options,
662     false);
663    
664     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
665     TestOneOption("EXTENDED (function)",
666     "^ HE L{2} O "
667     "\\s+ "
668     "\\w+ $ ",
669     str,
670     pcrecpp::EXTENDED(),
671     false);
672    
673     options.set_extended(false);
674     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
675     }
676    
677     static void Test_NO_AUTO_CAPTURE() {
678     RE_Options options;
679     const char *str = "HELLO world";
680     string captured;
681    
682     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
683     if (VERBOSE_TEST)
684     printf("parentheses capture text\n");
685     RE re("(world|universe)$", options);
686     CHECK(re.Extract("\\1", str , &captured));
687     CHECK_EQ(captured, "world");
688     options.set_no_auto_capture(true);
689     printf("testing Option <NO_AUTO_CAPTURE>\n");
690     if (VERBOSE_TEST)
691     printf("parentheses do not capture text\n");
692     re.Extract("\\1",str, &captured );
693     CHECK_EQ(captured, "world");
694     }
695    
696     static void Test_UNGREEDY() {
697     RE_Options options;
698     const char *str = "HELLO, 'this' is the 'world'";
699    
700     options.set_ungreedy(true);
701     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
702     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
703     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
704    
705     options.set_ungreedy(false);
706     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
707     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
708     }
709    
710     static void Test_all_options() {
711     const char *str = "HELLO\n" "cruel\n" "world";
712     RE_Options options;
713     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
714    
715     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
716     options.set_all_options(0);
717     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
718     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
719    
720     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
721     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
722     " ^ c r u e l $ ",
723     str,
724     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
725     false);
726    
727     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
728     " ^ c r u e l $ ",
729     str,
730     RE_Options()
731     .set_multiline(true)
732     .set_extended(true),
733     false);
734    
735     options.set_all_options(0);
736     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
737    
738     }
739    
740     static void TestOptions() {
741     printf("Testing Options\n");
742     Test_CASELESS();
743     Test_MULTILINE();
744     Test_DOTALL();
745     Test_DOLLAR_ENDONLY();
746     Test_EXTENDED();
747     Test_NO_AUTO_CAPTURE();
748     Test_UNGREEDY();
749     Test_EXTRA();
750     Test_all_options();
751     }
752    
753 nigel 93 static void TestConstructors() {
754     printf("Testing constructors\n");
755    
756     RE_Options options;
757     options.set_dotall(true);
758     const char *str = "HELLO\n" "cruel\n" "world";
759    
760     RE orig("HELLO.*world", options);
761     CHECK(orig.FullMatch(str));
762    
763     RE copy1(orig);
764     CHECK(copy1.FullMatch(str));
765    
766     RE copy2("not a match");
767     CHECK(!copy2.FullMatch(str));
768     copy2 = copy1;
769     CHECK(copy2.FullMatch(str));
770     copy2 = orig;
771     CHECK(copy2.FullMatch(str));
772    
773     // Make sure when we assign to ourselves, nothing bad happens
774     orig = orig;
775     copy1 = copy1;
776     copy2 = copy2;
777     CHECK(orig.FullMatch(str));
778     CHECK(copy1.FullMatch(str));
779     CHECK(copy2.FullMatch(str));
780     }
781    
782 nigel 77 int main(int argc, char** argv) {
783     // Treat any flag as --help
784     if (argc > 1 && argv[1][0] == '-') {
785     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
786     " If 'timingX ###' is specified, run the given timing test\n"
787     " with the given number of iterations, rather than running\n"
788     " the default corectness test.\n", argv[0]);
789     return 0;
790     }
791    
792     if (argc > 1) {
793     if ( argc == 2 || atoi(argv[2]) == 0) {
794     printf("timing mode needs a num-iters argument\n");
795     return 1;
796     }
797     if (!strcmp(argv[1], "timing1"))
798     Timing1(atoi(argv[2]));
799     else if (!strcmp(argv[1], "timing2"))
800     Timing2(atoi(argv[2]));
801     else if (!strcmp(argv[1], "timing3"))
802     Timing3(atoi(argv[2]));
803     else
804     printf("Unknown argument '%s'\n", argv[1]);
805     return 0;
806     }
807    
808     printf("Testing FullMatch\n");
809    
810     int i;
811     string s;
812    
813     /***** FullMatch with no args *****/
814    
815     CHECK(RE("h.*o").FullMatch("hello"));
816 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
817     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
818     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
819     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
820     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
821 nigel 77
822     /***** FullMatch with args *****/
823    
824     // Zero-arg
825     CHECK(RE("\\d+").FullMatch("1001"));
826    
827     // Single-arg
828     CHECK(RE("(\\d+)").FullMatch("1001", &i));
829     CHECK_EQ(i, 1001);
830     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
831     CHECK_EQ(i, -123);
832     CHECK(!RE("()\\d+").FullMatch("10", &i));
833     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
834     &i));
835    
836     // Digits surrounding integer-arg
837     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
838     CHECK_EQ(i, 23);
839     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
840     CHECK_EQ(i, 1);
841     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
842     CHECK_EQ(i, -1);
843     CHECK(RE("(\\d)").PartialMatch("1234", &i));
844     CHECK_EQ(i, 1);
845     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
846     CHECK_EQ(i, -1);
847    
848     // String-arg
849     CHECK(RE("h(.*)o").FullMatch("hello", &s));
850     CHECK_EQ(s, string("ell"));
851    
852     // StringPiece-arg
853     StringPiece sp;
854     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
855     CHECK_EQ(sp.size(), 4);
856     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
857     CHECK_EQ(i, 1234);
858    
859     // Multi-arg
860     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
861     CHECK_EQ(s, string("ruby"));
862     CHECK_EQ(i, 1234);
863    
864     // Ignored arg
865     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
866     CHECK_EQ(s, string("ruby"));
867     CHECK_EQ(i, 1234);
868    
869     // Type tests
870     {
871     char c;
872     CHECK(RE("(H)ello").FullMatch("Hello", &c));
873     CHECK_EQ(c, 'H');
874     }
875     {
876     unsigned char c;
877     CHECK(RE("(H)ello").FullMatch("Hello", &c));
878     CHECK_EQ(c, static_cast<unsigned char>('H'));
879     }
880     {
881     short v;
882     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
883     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
884     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
885     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
886     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
887     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
888     }
889     {
890     unsigned short v;
891     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
892     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
893     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
894     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
895     }
896     {
897     int v;
898     static const int max_value = 0x7fffffff;
899     static const int min_value = -max_value - 1;
900     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
901     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
902     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
903     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
904     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
905     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
906     }
907     {
908     unsigned int v;
909     static const unsigned int max_value = 0xfffffffful;
910     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
911     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
912     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
913     }
914     #ifdef HAVE_LONG_LONG
915 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
916     # define LLD "%I64d"
917 ph10 201 # define LLU "%I64u"
918 ph10 193 # else
919     # define LLD "%lld"
920 ph10 201 # define LLU "%llu"
921 ph10 193 # endif
922 nigel 77 {
923     long long v;
924     static const long long max_value = 0x7fffffffffffffffLL;
925     static const long long min_value = -max_value - 1;
926     char buf[32];
927    
928     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
929     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
930    
931 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
932 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
933    
934 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
935 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
936    
937 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
938 nigel 77 assert(buf[strlen(buf)-1] != '9');
939     buf[strlen(buf)-1]++;
940     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
941    
942 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
943 nigel 77 assert(buf[strlen(buf)-1] != '9');
944     buf[strlen(buf)-1]++;
945     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
946     }
947     #endif
948     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
949     {
950     unsigned long long v;
951     long long v2;
952     static const unsigned long long max_value = 0xffffffffffffffffULL;
953     char buf[32];
954    
955     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
956     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
957    
958 ph10 201 snprintf(buf, sizeof(buf), LLU, max_value);
959 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
960    
961     assert(buf[strlen(buf)-1] != '9');
962     buf[strlen(buf)-1]++;
963     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
964     }
965     #endif
966     {
967     float v;
968     CHECK(RE("(.*)").FullMatch("100", &v));
969     CHECK(RE("(.*)").FullMatch("-100.", &v));
970     CHECK(RE("(.*)").FullMatch("1e23", &v));
971     }
972     {
973     double v;
974     CHECK(RE("(.*)").FullMatch("100", &v));
975     CHECK(RE("(.*)").FullMatch("-100.", &v));
976     CHECK(RE("(.*)").FullMatch("1e23", &v));
977     }
978    
979     // Check that matching is fully anchored
980     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
981     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
982     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
983     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
984    
985     // Braces
986     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
987     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
988     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
989    
990     // Complicated RE
991     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
992     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
993     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
994     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
995    
996     // Check full-match handling (needs '$' tacked on internally)
997     CHECK(RE("fo|foo").FullMatch("fo"));
998     CHECK(RE("fo|foo").FullMatch("foo"));
999     CHECK(RE("fo|foo$").FullMatch("fo"));
1000     CHECK(RE("fo|foo$").FullMatch("foo"));
1001     CHECK(RE("foo$").FullMatch("foo"));
1002     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1003     CHECK(!RE("fo|bar").FullMatch("fox"));
1004    
1005     // Uncomment the following if we change the handling of '$' to
1006     // prevent it from matching a trailing newline
1007     if (false) {
1008     // Check that we don't get bitten by pcre's special handling of a
1009     // '\n' at the end of the string matching '$'
1010     CHECK(!RE("foo$").PartialMatch("foo\n"));
1011     }
1012    
1013     // Number of args
1014     int a[16];
1015     CHECK(RE("").FullMatch(""));
1016    
1017     memset(a, 0, sizeof(0));
1018     CHECK(RE("(\\d){1}").FullMatch("1",
1019     &a[0]));
1020     CHECK_EQ(a[0], 1);
1021    
1022     memset(a, 0, sizeof(0));
1023     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1024     &a[0], &a[1]));
1025     CHECK_EQ(a[0], 1);
1026     CHECK_EQ(a[1], 2);
1027    
1028     memset(a, 0, sizeof(0));
1029     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1030     &a[0], &a[1], &a[2]));
1031     CHECK_EQ(a[0], 1);
1032     CHECK_EQ(a[1], 2);
1033     CHECK_EQ(a[2], 3);
1034    
1035     memset(a, 0, sizeof(0));
1036     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1037     &a[0], &a[1], &a[2], &a[3]));
1038     CHECK_EQ(a[0], 1);
1039     CHECK_EQ(a[1], 2);
1040     CHECK_EQ(a[2], 3);
1041     CHECK_EQ(a[3], 4);
1042    
1043     memset(a, 0, sizeof(0));
1044     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1045     &a[0], &a[1], &a[2],
1046     &a[3], &a[4]));
1047     CHECK_EQ(a[0], 1);
1048     CHECK_EQ(a[1], 2);
1049     CHECK_EQ(a[2], 3);
1050     CHECK_EQ(a[3], 4);
1051     CHECK_EQ(a[4], 5);
1052    
1053     memset(a, 0, sizeof(0));
1054     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1055     &a[0], &a[1], &a[2],
1056     &a[3], &a[4], &a[5]));
1057     CHECK_EQ(a[0], 1);
1058     CHECK_EQ(a[1], 2);
1059     CHECK_EQ(a[2], 3);
1060     CHECK_EQ(a[3], 4);
1061     CHECK_EQ(a[4], 5);
1062     CHECK_EQ(a[5], 6);
1063    
1064     memset(a, 0, sizeof(0));
1065     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1066     &a[0], &a[1], &a[2], &a[3],
1067     &a[4], &a[5], &a[6]));
1068     CHECK_EQ(a[0], 1);
1069     CHECK_EQ(a[1], 2);
1070     CHECK_EQ(a[2], 3);
1071     CHECK_EQ(a[3], 4);
1072     CHECK_EQ(a[4], 5);
1073     CHECK_EQ(a[5], 6);
1074     CHECK_EQ(a[6], 7);
1075    
1076     memset(a, 0, sizeof(0));
1077     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1078     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1079     "1234567890123456",
1080     &a[0], &a[1], &a[2], &a[3],
1081     &a[4], &a[5], &a[6], &a[7],
1082     &a[8], &a[9], &a[10], &a[11],
1083     &a[12], &a[13], &a[14], &a[15]));
1084     CHECK_EQ(a[0], 1);
1085     CHECK_EQ(a[1], 2);
1086     CHECK_EQ(a[2], 3);
1087     CHECK_EQ(a[3], 4);
1088     CHECK_EQ(a[4], 5);
1089     CHECK_EQ(a[5], 6);
1090     CHECK_EQ(a[6], 7);
1091     CHECK_EQ(a[7], 8);
1092     CHECK_EQ(a[8], 9);
1093     CHECK_EQ(a[9], 0);
1094     CHECK_EQ(a[10], 1);
1095     CHECK_EQ(a[11], 2);
1096     CHECK_EQ(a[12], 3);
1097     CHECK_EQ(a[13], 4);
1098     CHECK_EQ(a[14], 5);
1099     CHECK_EQ(a[15], 6);
1100    
1101     /***** PartialMatch *****/
1102    
1103     printf("Testing PartialMatch\n");
1104    
1105     CHECK(RE("h.*o").PartialMatch("hello"));
1106     CHECK(RE("h.*o").PartialMatch("othello"));
1107     CHECK(RE("h.*o").PartialMatch("hello!"));
1108     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1109    
1110 nigel 93 /***** other tests *****/
1111    
1112 nigel 77 RadixTests();
1113     TestReplace();
1114     TestExtract();
1115     TestConsume();
1116     TestFindAndConsume();
1117 nigel 93 TestQuoteMetaAll();
1118 nigel 77 TestMatchNumberPeculiarity();
1119    
1120     // Check the pattern() accessor
1121     {
1122     const string kPattern = "http://([^/]+)/.*";
1123     const RE re(kPattern);
1124     CHECK_EQ(kPattern, re.pattern());
1125     }
1126    
1127     // Check RE error field.
1128     {
1129     RE re("foo");
1130     CHECK(re.error().empty()); // Must have no error
1131     }
1132    
1133     #ifdef SUPPORT_UTF8
1134     // Check UTF-8 handling
1135     {
1136     printf("Testing UTF-8 handling\n");
1137    
1138     // Three Japanese characters (nihongo)
1139     const char utf8_string[] = {
1140     0xe6, 0x97, 0xa5, // 65e5
1141     0xe6, 0x9c, 0xac, // 627c
1142     0xe8, 0xaa, 0x9e, // 8a9e
1143     0
1144     };
1145     const char utf8_pattern[] = {
1146     '.',
1147     0xe6, 0x9c, 0xac, // 627c
1148     '.',
1149     0
1150     };
1151    
1152     // Both should match in either mode, bytes or UTF-8
1153     RE re_test1(".........");
1154     CHECK(re_test1.FullMatch(utf8_string));
1155     RE re_test2("...", pcrecpp::UTF8());
1156     CHECK(re_test2.FullMatch(utf8_string));
1157    
1158     // Check that '.' matches one byte or UTF-8 character
1159     // according to the mode.
1160     string ss;
1161     RE re_test3("(.)");
1162     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1163     CHECK_EQ(ss, string("\xe6"));
1164     RE re_test4("(.)", pcrecpp::UTF8());
1165     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1166     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1167    
1168     // Check that string matches itself in either mode
1169     RE re_test5(utf8_string);
1170     CHECK(re_test5.FullMatch(utf8_string));
1171     RE re_test6(utf8_string, pcrecpp::UTF8());
1172     CHECK(re_test6.FullMatch(utf8_string));
1173    
1174     // Check that pattern matches string only in UTF8 mode
1175     RE re_test7(utf8_pattern);
1176     CHECK(!re_test7.FullMatch(utf8_string));
1177     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1178     CHECK(re_test8.FullMatch(utf8_string));
1179     }
1180    
1181     // Check that ungreedy, UTF8 regular expressions don't match when they
1182     // oughtn't -- see bug 82246.
1183     {
1184     // This code always worked.
1185     const char* pattern = "\\w+X";
1186     const string target = "a aX";
1187     RE match_sentence(pattern);
1188     RE match_sentence_re(pattern, pcrecpp::UTF8());
1189    
1190     CHECK(!match_sentence.FullMatch(target));
1191     CHECK(!match_sentence_re.FullMatch(target));
1192     }
1193    
1194     {
1195     const char* pattern = "(?U)\\w+X";
1196     const string target = "a aX";
1197     RE match_sentence(pattern);
1198     RE match_sentence_re(pattern, pcrecpp::UTF8());
1199    
1200     CHECK(!match_sentence.FullMatch(target));
1201     CHECK(!match_sentence_re.FullMatch(target));
1202     }
1203     #endif /* def SUPPORT_UTF8 */
1204    
1205     printf("Testing error reporting\n");
1206    
1207     { RE re("a\\1"); CHECK(!re.error().empty()); }
1208     {
1209     RE re("a[x");
1210     CHECK(!re.error().empty());
1211     }
1212     {
1213     RE re("a[z-a]");
1214     CHECK(!re.error().empty());
1215     }
1216     {
1217     RE re("a[[:foobar:]]");
1218     CHECK(!re.error().empty());
1219     }
1220     {
1221     RE re("a(b");
1222     CHECK(!re.error().empty());
1223     }
1224     {
1225     RE re("a\\");
1226     CHECK(!re.error().empty());
1227     }
1228    
1229 nigel 87 // Test that recursion is stopped
1230     TestRecursion();
1231 nigel 77
1232 nigel 81 // Test Options
1233     if (getenv("VERBOSE_TEST") != NULL)
1234     VERBOSE_TEST = true;
1235     TestOptions();
1236    
1237 nigel 93 // Test the constructors
1238     TestConstructors();
1239    
1240 nigel 77 // Done
1241     printf("OK\n");
1242    
1243     return 0;
1244     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12