/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 179 - (hide annotations) (download)
Wed Jun 13 08:53:45 2007 UTC (7 years, 4 months ago) by ph10
File size: 37518 byte(s)
Apply C++ patch to fix a bad optimization.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36     #include <stdio.h>
37 nigel 91 #include <cassert>
38 nigel 77 #include <vector>
39     #include "config.h"
40     #include "pcrecpp.h"
41    
42     using pcrecpp::StringPiece;
43     using pcrecpp::RE;
44     using pcrecpp::RE_Options;
45     using pcrecpp::Hex;
46     using pcrecpp::Octal;
47     using pcrecpp::CRadix;
48    
49 nigel 81 static bool VERBOSE_TEST = false;
50    
51 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
52     // controlled by NDEBUG, so the check will be executed regardless of
53     // compilation mode. Therefore, it is safe to do things like:
54     // CHECK_EQ(fp->Write(x), 4)
55     #define CHECK(condition) do { \
56     if (!(condition)) { \
57     fprintf(stderr, "%s:%d: Check failed: %s\n", \
58     __FILE__, __LINE__, #condition); \
59     exit(1); \
60     } \
61     } while (0)
62    
63     #define CHECK_EQ(a, b) CHECK(a == b)
64    
65     static void Timing1(int num_iters) {
66     // Same pattern lots of times
67     RE pattern("ruby:\\d+");
68     StringPiece p("ruby:1234");
69     for (int j = num_iters; j > 0; j--) {
70     CHECK(pattern.FullMatch(p));
71     }
72     }
73    
74     static void Timing2(int num_iters) {
75     // Same pattern lots of times
76     RE pattern("ruby:(\\d+)");
77     int i;
78     for (int j = num_iters; j > 0; j--) {
79     CHECK(pattern.FullMatch("ruby:1234", &i));
80     CHECK_EQ(i, 1234);
81     }
82     }
83    
84     static void Timing3(int num_iters) {
85     string text_string;
86     for (int j = num_iters; j > 0; j--) {
87     text_string += "this is another line\n";
88     }
89    
90     RE line_matcher(".*\n");
91     string line;
92     StringPiece text(text_string);
93     int counter = 0;
94     while (line_matcher.Consume(&text)) {
95     counter++;
96     }
97     printf("Matched %d lines\n", counter);
98     }
99    
100     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101    
102     static void LeakTest() {
103     // Check for memory leaks
104     unsigned long long initial_size = 0;
105     for (int i = 0; i < 100000; i++) {
106     if (i == 50000) {
107     initial_size = VirtualProcessSize();
108     printf("Size after 50000: %llu\n", initial_size);
109     }
110     char buf[100];
111     snprintf(buf, sizeof(buf), "pat%09d", i);
112     RE newre(buf);
113     }
114     uint64 final_size = VirtualProcessSize();
115     printf("Size after 100000: %llu\n", final_size);
116     const double growth = double(final_size - initial_size) / final_size;
117     printf("Growth: %0.2f%%", growth * 100);
118     CHECK(growth < 0.02); // Allow < 2% growth
119     }
120    
121     #endif
122    
123     static void RadixTests() {
124     printf("Testing hex\n");
125    
126     #define CHECK_HEX(type, value) \
127     do { \
128     type v; \
129     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130     CHECK_EQ(v, 0x ## value); \
131     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132     CHECK_EQ(v, 0x ## value); \
133     } while(0)
134    
135     CHECK_HEX(short, 2bad);
136     CHECK_HEX(unsigned short, 2badU);
137     CHECK_HEX(int, dead);
138     CHECK_HEX(unsigned int, deadU);
139     CHECK_HEX(long, 7eadbeefL);
140     CHECK_HEX(unsigned long, deadbeefUL);
141     #ifdef HAVE_LONG_LONG
142     CHECK_HEX(long long, 12345678deadbeefLL);
143     #endif
144     #ifdef HAVE_UNSIGNED_LONG_LONG
145     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146     #endif
147    
148     #undef CHECK_HEX
149    
150     printf("Testing octal\n");
151    
152     #define CHECK_OCTAL(type, value) \
153     do { \
154     type v; \
155     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156     CHECK_EQ(v, 0 ## value); \
157     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158     CHECK_EQ(v, 0 ## value); \
159     } while(0)
160    
161     CHECK_OCTAL(short, 77777);
162     CHECK_OCTAL(unsigned short, 177777U);
163     CHECK_OCTAL(int, 17777777777);
164     CHECK_OCTAL(unsigned int, 37777777777U);
165     CHECK_OCTAL(long, 17777777777L);
166     CHECK_OCTAL(unsigned long, 37777777777UL);
167     #ifdef HAVE_LONG_LONG
168     CHECK_OCTAL(long long, 777777777777777777777LL);
169     #endif
170     #ifdef HAVE_UNSIGNED_LONG_LONG
171     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172     #endif
173    
174     #undef CHECK_OCTAL
175    
176     printf("Testing decimal\n");
177    
178     #define CHECK_DECIMAL(type, value) \
179     do { \
180     type v; \
181     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182     CHECK_EQ(v, value); \
183     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184     CHECK_EQ(v, value); \
185     } while(0)
186    
187     CHECK_DECIMAL(short, -1);
188     CHECK_DECIMAL(unsigned short, 9999);
189     CHECK_DECIMAL(int, -1000);
190     CHECK_DECIMAL(unsigned int, 12345U);
191     CHECK_DECIMAL(long, -10000000L);
192     CHECK_DECIMAL(unsigned long, 3083324652U);
193     #ifdef HAVE_LONG_LONG
194     CHECK_DECIMAL(long long, -100000000000000LL);
195     #endif
196     #ifdef HAVE_UNSIGNED_LONG_LONG
197     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198     #endif
199    
200     #undef CHECK_DECIMAL
201    
202     }
203    
204     static void TestReplace() {
205     printf("Testing Replace\n");
206    
207     struct ReplaceTest {
208     const char *regexp;
209     const char *rewrite;
210     const char *original;
211     const char *single;
212     const char *global;
213     };
214     static const ReplaceTest tests[] = {
215     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216     "\\2\\1ay",
217     "the quick brown fox jumps over the lazy dogs.",
218     "ethay quick brown fox jumps over the lazy dogs.",
219     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220     { "\\w+",
221     "\\0-NOSPAM",
222     "paul.haahr@google.com",
223     "paul-NOSPAM.haahr@google.com",
224     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225     { "^",
226     "(START)",
227     "foo",
228     "(START)foo",
229     "(START)foo" },
230     { "^",
231     "(START)",
232     "",
233     "(START)",
234     "(START)" },
235     { "$",
236     "(END)",
237     "",
238     "(END)",
239     "(END)" },
240     { "b",
241     "bb",
242     "ababababab",
243     "abbabababab",
244     "abbabbabbabbabb" },
245     { "b",
246     "bb",
247     "bbbbbb",
248     "bbbbbbb",
249     "bbbbbbbbbbbb" },
250     { "b+",
251     "bb",
252     "bbbbbb",
253     "bb",
254     "bb" },
255     { "b*",
256     "bb",
257     "bbbbbb",
258     "bb",
259     "bb" },
260     { "b*",
261     "bb",
262     "aaaaa",
263     "bbaaaaa",
264     "bbabbabbabbabbabb" },
265 nigel 91 { "b*",
266     "bb",
267     "aa\naa\n",
268     "bbaa\naa\n",
269     "bbabbabb\nbbabbabb\nbb" },
270     { "b*",
271     "bb",
272     "aa\raa\r",
273     "bbaa\raa\r",
274     "bbabbabb\rbbabbabb\rbb" },
275     { "b*",
276     "bb",
277     "aa\r\naa\r\n",
278     "bbaa\r\naa\r\n",
279     "bbabbabb\r\nbbabbabb\r\nbb" },
280     #ifdef SUPPORT_UTF8
281     { "b*",
282     "bb",
283     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286     { "b*",
287     "bb",
288     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292     #endif
293 nigel 77 { "", NULL, NULL, NULL, NULL }
294     };
295    
296 nigel 91 #ifdef SUPPORT_UTF8
297     const bool support_utf8 = true;
298     #else
299     const bool support_utf8 = false;
300     #endif
301    
302 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304     assert(re.error().empty());
305 nigel 77 string one(t->original);
306 nigel 91 CHECK(re.Replace(t->rewrite, &one));
307 nigel 77 CHECK_EQ(one, t->single);
308     string all(t->original);
309 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 nigel 77 CHECK_EQ(all, t->global);
311     }
312 nigel 91
313     // One final test: test \r\n replacement when we're not in CRLF mode
314     {
315     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316     assert(re.error().empty());
317     string all("aa\r\naa\r\n");
318     CHECK(re.GlobalReplace("bb", &all) > 0);
319     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320     }
321     {
322     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323     assert(re.error().empty());
324     string all("aa\r\naa\r\n");
325     CHECK(re.GlobalReplace("bb", &all) > 0);
326     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327     }
328     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329     // Alas, the answer depends on how pcre was compiled.
330 nigel 77 }
331    
332     static void TestExtract() {
333     printf("Testing Extract\n");
334    
335     string s;
336    
337     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338     CHECK_EQ(s, "kremvax!boris");
339    
340     // check the RE interface as well
341     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342     CHECK_EQ(s, "'foo'");
343     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344     CHECK_EQ(s, "'foo'");
345     }
346    
347     static void TestConsume() {
348     printf("Testing Consume\n");
349    
350     string word;
351    
352     string s(" aaa b!@#$@#$cccc");
353     StringPiece input(s);
354    
355     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356     CHECK(r.Consume(&input, &word));
357     CHECK_EQ(word, "aaa");
358     CHECK(r.Consume(&input, &word));
359     CHECK_EQ(word, "b");
360     CHECK(! r.Consume(&input, &word));
361     }
362    
363     static void TestFindAndConsume() {
364     printf("Testing FindAndConsume\n");
365    
366     string word;
367    
368     string s(" aaa b!@#$@#$cccc");
369     StringPiece input(s);
370    
371     RE r("(\\w+)"); // matches a word
372     CHECK(r.FindAndConsume(&input, &word));
373     CHECK_EQ(word, "aaa");
374     CHECK(r.FindAndConsume(&input, &word));
375     CHECK_EQ(word, "b");
376     CHECK(r.FindAndConsume(&input, &word));
377     CHECK_EQ(word, "cccc");
378     CHECK(! r.FindAndConsume(&input, &word));
379     }
380    
381     static void TestMatchNumberPeculiarity() {
382     printf("Testing match-number peculiaraity\n");
383    
384     string word1;
385     string word2;
386     string word3;
387    
388     RE r("(foo)|(bar)|(baz)");
389     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390     CHECK_EQ(word1, "foo");
391     CHECK_EQ(word2, "");
392     CHECK_EQ(word3, "");
393     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394     CHECK_EQ(word1, "");
395     CHECK_EQ(word2, "bar");
396     CHECK_EQ(word3, "");
397     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398     CHECK_EQ(word1, "");
399     CHECK_EQ(word2, "");
400     CHECK_EQ(word3, "baz");
401     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402    
403     string a;
404     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405     CHECK_EQ(a, "");
406     }
407    
408 nigel 87 static void TestRecursion() {
409 nigel 77 printf("Testing recursion\n");
410    
411 nigel 87 // Get one string that passes (sometimes), one that never does.
412     string text_good("abcdefghijk");
413     string text_bad("acdefghijkl");
414    
415     // According to pcretest, matching text_good against (\w+)*b
416     // requires match_limit of at least 8192, and match_recursion_limit
417     // of at least 37.
418    
419     RE_Options options_ml;
420     options_ml.set_match_limit(8192);
421     RE re("(\\w+)*b", options_ml);
422     CHECK(re.PartialMatch(text_good) == true);
423     CHECK(re.PartialMatch(text_bad) == false);
424     CHECK(re.FullMatch(text_good) == false);
425     CHECK(re.FullMatch(text_bad) == false);
426    
427     options_ml.set_match_limit(1024);
428     RE re2("(\\w+)*b", options_ml);
429     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430     CHECK(re2.PartialMatch(text_bad) == false);
431     CHECK(re2.FullMatch(text_good) == false);
432     CHECK(re2.FullMatch(text_bad) == false);
433    
434     RE_Options options_mlr;
435     options_mlr.set_match_limit_recursion(50);
436     RE re3("(\\w+)*b", options_mlr);
437     CHECK(re3.PartialMatch(text_good) == true);
438     CHECK(re3.PartialMatch(text_bad) == false);
439     CHECK(re3.FullMatch(text_good) == false);
440     CHECK(re3.FullMatch(text_bad) == false);
441    
442     options_mlr.set_match_limit_recursion(10);
443     RE re4("(\\w+)*b", options_mlr);
444     CHECK(re4.PartialMatch(text_good) == false);
445     CHECK(re4.PartialMatch(text_bad) == false);
446     CHECK(re4.FullMatch(text_good) == false);
447     CHECK(re4.FullMatch(text_bad) == false);
448 nigel 77 }
449    
450 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
451     // the original unquoted string.
452     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453     string quoted = RE::QuoteMeta(unquoted);
454     RE re(quoted, options);
455     CHECK(re.FullMatch(unquoted));
456     }
457    
458     // A string containing meaningful regexp characters, which is then meta-
459     // quoted, should not generally match a string the unquoted string does.
460     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461     RE_Options options = RE_Options()) {
462     string quoted = RE::QuoteMeta(unquoted);
463     RE re(quoted, options);
464     CHECK(!re.FullMatch(should_not_match));
465     }
466    
467     // Tests that quoted meta characters match their original strings,
468     // and that a few things that shouldn't match indeed do not.
469     static void TestQuotaMetaSimple() {
470     TestQuoteMeta("foo");
471     TestQuoteMeta("foo.bar");
472     TestQuoteMeta("foo\\.bar");
473     TestQuoteMeta("[1-9]");
474     TestQuoteMeta("1.5-2.0?");
475     TestQuoteMeta("\\d");
476     TestQuoteMeta("Who doesn't like ice cream?");
477     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478     TestQuoteMeta("((?!)xxx).*yyy");
479     TestQuoteMeta("([");
480     }
481    
482     static void TestQuoteMetaSimpleNegative() {
483     NegativeTestQuoteMeta("foo", "bar");
484     NegativeTestQuoteMeta("...", "bar");
485     NegativeTestQuoteMeta("\\.", ".");
486     NegativeTestQuoteMeta("\\.", "..");
487     NegativeTestQuoteMeta("(a)", "a");
488     NegativeTestQuoteMeta("(a|b)", "a");
489     NegativeTestQuoteMeta("(a|b)", "(a)");
490     NegativeTestQuoteMeta("(a|b)", "a|b");
491     NegativeTestQuoteMeta("[0-9]", "0");
492     NegativeTestQuoteMeta("[0-9]", "0-9");
493     NegativeTestQuoteMeta("[0-9]", "[9]");
494     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495     }
496    
497     static void TestQuoteMetaLatin1() {
498     TestQuoteMeta("3\xb2 = 9");
499     }
500    
501     static void TestQuoteMetaUtf8() {
502     #ifdef SUPPORT_UTF8
503     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511     "27\\\xc2\\\xb0",
512     pcrecpp::UTF8());
513     #endif
514     }
515    
516     static void TestQuoteMetaAll() {
517     printf("Testing QuoteMeta\n");
518     TestQuotaMetaSimple();
519     TestQuoteMetaSimpleNegative();
520     TestQuoteMetaLatin1();
521     TestQuoteMetaUtf8();
522     }
523    
524 nigel 81 //
525     // Options tests contributed by
526     // Giuseppe Maxia, CTO, Stardata s.r.l.
527     // July 2005
528     //
529     static void GetOneOptionResult(
530     const char *option_name,
531     const char *regex,
532     const char *str,
533     RE_Options options,
534     bool full,
535     string expected) {
536 nigel 77
537 nigel 81 printf("Testing Option <%s>\n", option_name);
538     if(VERBOSE_TEST)
539     printf("/%s/ finds \"%s\" within \"%s\" \n",
540     regex,
541     expected.c_str(),
542     str);
543     string captured("");
544     if (full)
545     RE(regex,options).FullMatch(str, &captured);
546     else
547     RE(regex,options).PartialMatch(str, &captured);
548     CHECK_EQ(captured, expected);
549     }
550    
551     static void TestOneOption(
552     const char *option_name,
553     const char *regex,
554     const char *str,
555     RE_Options options,
556     bool full,
557     bool assertive = true) {
558    
559     printf("Testing Option <%s>\n", option_name);
560     if (VERBOSE_TEST)
561     printf("'%s' %s /%s/ \n",
562     str,
563     (assertive? "matches" : "doesn't match"),
564     regex);
565     if (assertive) {
566     if (full)
567     CHECK(RE(regex,options).FullMatch(str));
568     else
569     CHECK(RE(regex,options).PartialMatch(str));
570     } else {
571     if (full)
572     CHECK(!RE(regex,options).FullMatch(str));
573     else
574     CHECK(!RE(regex,options).PartialMatch(str));
575     }
576     }
577    
578     static void Test_CASELESS() {
579     RE_Options options;
580     RE_Options options2;
581    
582     options.set_caseless(true);
583     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586    
587     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589     options.set_caseless(false);
590     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591     }
592    
593     static void Test_MULTILINE() {
594     RE_Options options;
595     RE_Options options2;
596     const char *str = "HELLO\n" "cruel\n" "world\n";
597    
598     options.set_multiline(true);
599     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602     options.set_multiline(false);
603     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604     }
605    
606     static void Test_DOTALL() {
607     RE_Options options;
608     RE_Options options2;
609     const char *str = "HELLO\n" "cruel\n" "world";
610    
611     options.set_dotall(true);
612     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615     options.set_dotall(false);
616     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617     }
618    
619     static void Test_DOLLAR_ENDONLY() {
620     RE_Options options;
621     RE_Options options2;
622     const char *str = "HELLO world\n";
623    
624     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625     options.set_dollar_endonly(true);
626     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628     }
629    
630     static void Test_EXTRA() {
631     RE_Options options;
632     const char *str = "HELLO";
633    
634     options.set_extra(true);
635     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637     options.set_extra(false);
638     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639     }
640    
641     static void Test_EXTENDED() {
642     RE_Options options;
643     RE_Options options2;
644     const char *str = "HELLO world";
645    
646     options.set_extended(true);
647     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649     TestOneOption("EXTENDED (class)",
650     "^ HE L{2} O "
651     "\\s+ "
652     "\\w+ $ ",
653     str,
654     options,
655     false);
656    
657     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658     TestOneOption("EXTENDED (function)",
659     "^ HE L{2} O "
660     "\\s+ "
661     "\\w+ $ ",
662     str,
663     pcrecpp::EXTENDED(),
664     false);
665    
666     options.set_extended(false);
667     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668     }
669    
670     static void Test_NO_AUTO_CAPTURE() {
671     RE_Options options;
672     const char *str = "HELLO world";
673     string captured;
674    
675     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676     if (VERBOSE_TEST)
677     printf("parentheses capture text\n");
678     RE re("(world|universe)$", options);
679     CHECK(re.Extract("\\1", str , &captured));
680     CHECK_EQ(captured, "world");
681     options.set_no_auto_capture(true);
682     printf("testing Option <NO_AUTO_CAPTURE>\n");
683     if (VERBOSE_TEST)
684     printf("parentheses do not capture text\n");
685     re.Extract("\\1",str, &captured );
686     CHECK_EQ(captured, "world");
687     }
688    
689     static void Test_UNGREEDY() {
690     RE_Options options;
691     const char *str = "HELLO, 'this' is the 'world'";
692    
693     options.set_ungreedy(true);
694     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697    
698     options.set_ungreedy(false);
699     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701     }
702    
703     static void Test_all_options() {
704     const char *str = "HELLO\n" "cruel\n" "world";
705     RE_Options options;
706     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707    
708     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709     options.set_all_options(0);
710     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712    
713     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715     " ^ c r u e l $ ",
716     str,
717     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718     false);
719    
720     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721     " ^ c r u e l $ ",
722     str,
723     RE_Options()
724     .set_multiline(true)
725     .set_extended(true),
726     false);
727    
728     options.set_all_options(0);
729     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730    
731     }
732    
733     static void TestOptions() {
734     printf("Testing Options\n");
735     Test_CASELESS();
736     Test_MULTILINE();
737     Test_DOTALL();
738     Test_DOLLAR_ENDONLY();
739     Test_EXTENDED();
740     Test_NO_AUTO_CAPTURE();
741     Test_UNGREEDY();
742     Test_EXTRA();
743     Test_all_options();
744     }
745    
746 nigel 93 static void TestConstructors() {
747     printf("Testing constructors\n");
748    
749     RE_Options options;
750     options.set_dotall(true);
751     const char *str = "HELLO\n" "cruel\n" "world";
752    
753     RE orig("HELLO.*world", options);
754     CHECK(orig.FullMatch(str));
755    
756     RE copy1(orig);
757     CHECK(copy1.FullMatch(str));
758    
759     RE copy2("not a match");
760     CHECK(!copy2.FullMatch(str));
761     copy2 = copy1;
762     CHECK(copy2.FullMatch(str));
763     copy2 = orig;
764     CHECK(copy2.FullMatch(str));
765    
766     // Make sure when we assign to ourselves, nothing bad happens
767     orig = orig;
768     copy1 = copy1;
769     copy2 = copy2;
770     CHECK(orig.FullMatch(str));
771     CHECK(copy1.FullMatch(str));
772     CHECK(copy2.FullMatch(str));
773     }
774    
775 nigel 77 int main(int argc, char** argv) {
776     // Treat any flag as --help
777     if (argc > 1 && argv[1][0] == '-') {
778     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779     " If 'timingX ###' is specified, run the given timing test\n"
780     " with the given number of iterations, rather than running\n"
781     " the default corectness test.\n", argv[0]);
782     return 0;
783     }
784    
785     if (argc > 1) {
786     if ( argc == 2 || atoi(argv[2]) == 0) {
787     printf("timing mode needs a num-iters argument\n");
788     return 1;
789     }
790     if (!strcmp(argv[1], "timing1"))
791     Timing1(atoi(argv[2]));
792     else if (!strcmp(argv[1], "timing2"))
793     Timing2(atoi(argv[2]));
794     else if (!strcmp(argv[1], "timing3"))
795     Timing3(atoi(argv[2]));
796     else
797     printf("Unknown argument '%s'\n", argv[1]);
798     return 0;
799     }
800    
801     printf("Testing FullMatch\n");
802    
803     int i;
804     string s;
805    
806     /***** FullMatch with no args *****/
807    
808     CHECK(RE("h.*o").FullMatch("hello"));
809 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
810     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
811     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
812     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
813     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
814 nigel 77
815     /***** FullMatch with args *****/
816    
817     // Zero-arg
818     CHECK(RE("\\d+").FullMatch("1001"));
819    
820     // Single-arg
821     CHECK(RE("(\\d+)").FullMatch("1001", &i));
822     CHECK_EQ(i, 1001);
823     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
824     CHECK_EQ(i, -123);
825     CHECK(!RE("()\\d+").FullMatch("10", &i));
826     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
827     &i));
828    
829     // Digits surrounding integer-arg
830     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
831     CHECK_EQ(i, 23);
832     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
833     CHECK_EQ(i, 1);
834     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
835     CHECK_EQ(i, -1);
836     CHECK(RE("(\\d)").PartialMatch("1234", &i));
837     CHECK_EQ(i, 1);
838     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
839     CHECK_EQ(i, -1);
840    
841     // String-arg
842     CHECK(RE("h(.*)o").FullMatch("hello", &s));
843     CHECK_EQ(s, string("ell"));
844    
845     // StringPiece-arg
846     StringPiece sp;
847     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
848     CHECK_EQ(sp.size(), 4);
849     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
850     CHECK_EQ(i, 1234);
851    
852     // Multi-arg
853     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
854     CHECK_EQ(s, string("ruby"));
855     CHECK_EQ(i, 1234);
856    
857     // Ignored arg
858     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
859     CHECK_EQ(s, string("ruby"));
860     CHECK_EQ(i, 1234);
861    
862     // Type tests
863     {
864     char c;
865     CHECK(RE("(H)ello").FullMatch("Hello", &c));
866     CHECK_EQ(c, 'H');
867     }
868     {
869     unsigned char c;
870     CHECK(RE("(H)ello").FullMatch("Hello", &c));
871     CHECK_EQ(c, static_cast<unsigned char>('H'));
872     }
873     {
874     short v;
875     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
876     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
877     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
878     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
879     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
880     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
881     }
882     {
883     unsigned short v;
884     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
885     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
886     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
887     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
888     }
889     {
890     int v;
891     static const int max_value = 0x7fffffff;
892     static const int min_value = -max_value - 1;
893     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
894     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
895     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
896     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
897     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
898     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
899     }
900     {
901     unsigned int v;
902     static const unsigned int max_value = 0xfffffffful;
903     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
905     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
906     }
907     #ifdef HAVE_LONG_LONG
908     {
909     long long v;
910     static const long long max_value = 0x7fffffffffffffffLL;
911     static const long long min_value = -max_value - 1;
912     char buf[32];
913    
914     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
915     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
916    
917     snprintf(buf, sizeof(buf), "%lld", max_value);
918     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
919    
920     snprintf(buf, sizeof(buf), "%lld", min_value);
921     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
922    
923     snprintf(buf, sizeof(buf), "%lld", max_value);
924     assert(buf[strlen(buf)-1] != '9');
925     buf[strlen(buf)-1]++;
926     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
927    
928     snprintf(buf, sizeof(buf), "%lld", min_value);
929     assert(buf[strlen(buf)-1] != '9');
930     buf[strlen(buf)-1]++;
931     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
932     }
933     #endif
934     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
935     {
936     unsigned long long v;
937     long long v2;
938     static const unsigned long long max_value = 0xffffffffffffffffULL;
939     char buf[32];
940    
941     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
942     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
943    
944     snprintf(buf, sizeof(buf), "%llu", max_value);
945     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
946    
947     assert(buf[strlen(buf)-1] != '9');
948     buf[strlen(buf)-1]++;
949     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
950     }
951     #endif
952     {
953     float v;
954     CHECK(RE("(.*)").FullMatch("100", &v));
955     CHECK(RE("(.*)").FullMatch("-100.", &v));
956     CHECK(RE("(.*)").FullMatch("1e23", &v));
957     }
958     {
959     double v;
960     CHECK(RE("(.*)").FullMatch("100", &v));
961     CHECK(RE("(.*)").FullMatch("-100.", &v));
962     CHECK(RE("(.*)").FullMatch("1e23", &v));
963     }
964    
965     // Check that matching is fully anchored
966     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
967     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
968     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
969     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
970    
971     // Braces
972     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
973     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
974     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
975    
976     // Complicated RE
977     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
978     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
979     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
980     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
981    
982     // Check full-match handling (needs '$' tacked on internally)
983     CHECK(RE("fo|foo").FullMatch("fo"));
984     CHECK(RE("fo|foo").FullMatch("foo"));
985     CHECK(RE("fo|foo$").FullMatch("fo"));
986     CHECK(RE("fo|foo$").FullMatch("foo"));
987     CHECK(RE("foo$").FullMatch("foo"));
988     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
989     CHECK(!RE("fo|bar").FullMatch("fox"));
990    
991     // Uncomment the following if we change the handling of '$' to
992     // prevent it from matching a trailing newline
993     if (false) {
994     // Check that we don't get bitten by pcre's special handling of a
995     // '\n' at the end of the string matching '$'
996     CHECK(!RE("foo$").PartialMatch("foo\n"));
997     }
998    
999     // Number of args
1000     int a[16];
1001     CHECK(RE("").FullMatch(""));
1002    
1003     memset(a, 0, sizeof(0));
1004     CHECK(RE("(\\d){1}").FullMatch("1",
1005     &a[0]));
1006     CHECK_EQ(a[0], 1);
1007    
1008     memset(a, 0, sizeof(0));
1009     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1010     &a[0], &a[1]));
1011     CHECK_EQ(a[0], 1);
1012     CHECK_EQ(a[1], 2);
1013    
1014     memset(a, 0, sizeof(0));
1015     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1016     &a[0], &a[1], &a[2]));
1017     CHECK_EQ(a[0], 1);
1018     CHECK_EQ(a[1], 2);
1019     CHECK_EQ(a[2], 3);
1020    
1021     memset(a, 0, sizeof(0));
1022     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1023     &a[0], &a[1], &a[2], &a[3]));
1024     CHECK_EQ(a[0], 1);
1025     CHECK_EQ(a[1], 2);
1026     CHECK_EQ(a[2], 3);
1027     CHECK_EQ(a[3], 4);
1028    
1029     memset(a, 0, sizeof(0));
1030     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1031     &a[0], &a[1], &a[2],
1032     &a[3], &a[4]));
1033     CHECK_EQ(a[0], 1);
1034     CHECK_EQ(a[1], 2);
1035     CHECK_EQ(a[2], 3);
1036     CHECK_EQ(a[3], 4);
1037     CHECK_EQ(a[4], 5);
1038    
1039     memset(a, 0, sizeof(0));
1040     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1041     &a[0], &a[1], &a[2],
1042     &a[3], &a[4], &a[5]));
1043     CHECK_EQ(a[0], 1);
1044     CHECK_EQ(a[1], 2);
1045     CHECK_EQ(a[2], 3);
1046     CHECK_EQ(a[3], 4);
1047     CHECK_EQ(a[4], 5);
1048     CHECK_EQ(a[5], 6);
1049    
1050     memset(a, 0, sizeof(0));
1051     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1052     &a[0], &a[1], &a[2], &a[3],
1053     &a[4], &a[5], &a[6]));
1054     CHECK_EQ(a[0], 1);
1055     CHECK_EQ(a[1], 2);
1056     CHECK_EQ(a[2], 3);
1057     CHECK_EQ(a[3], 4);
1058     CHECK_EQ(a[4], 5);
1059     CHECK_EQ(a[5], 6);
1060     CHECK_EQ(a[6], 7);
1061    
1062     memset(a, 0, sizeof(0));
1063     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1064     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1065     "1234567890123456",
1066     &a[0], &a[1], &a[2], &a[3],
1067     &a[4], &a[5], &a[6], &a[7],
1068     &a[8], &a[9], &a[10], &a[11],
1069     &a[12], &a[13], &a[14], &a[15]));
1070     CHECK_EQ(a[0], 1);
1071     CHECK_EQ(a[1], 2);
1072     CHECK_EQ(a[2], 3);
1073     CHECK_EQ(a[3], 4);
1074     CHECK_EQ(a[4], 5);
1075     CHECK_EQ(a[5], 6);
1076     CHECK_EQ(a[6], 7);
1077     CHECK_EQ(a[7], 8);
1078     CHECK_EQ(a[8], 9);
1079     CHECK_EQ(a[9], 0);
1080     CHECK_EQ(a[10], 1);
1081     CHECK_EQ(a[11], 2);
1082     CHECK_EQ(a[12], 3);
1083     CHECK_EQ(a[13], 4);
1084     CHECK_EQ(a[14], 5);
1085     CHECK_EQ(a[15], 6);
1086    
1087     /***** PartialMatch *****/
1088    
1089     printf("Testing PartialMatch\n");
1090    
1091     CHECK(RE("h.*o").PartialMatch("hello"));
1092     CHECK(RE("h.*o").PartialMatch("othello"));
1093     CHECK(RE("h.*o").PartialMatch("hello!"));
1094     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1095    
1096 nigel 93 /***** other tests *****/
1097    
1098 nigel 77 RadixTests();
1099     TestReplace();
1100     TestExtract();
1101     TestConsume();
1102     TestFindAndConsume();
1103 nigel 93 TestQuoteMetaAll();
1104 nigel 77 TestMatchNumberPeculiarity();
1105    
1106     // Check the pattern() accessor
1107     {
1108     const string kPattern = "http://([^/]+)/.*";
1109     const RE re(kPattern);
1110     CHECK_EQ(kPattern, re.pattern());
1111     }
1112    
1113     // Check RE error field.
1114     {
1115     RE re("foo");
1116     CHECK(re.error().empty()); // Must have no error
1117     }
1118    
1119     #ifdef SUPPORT_UTF8
1120     // Check UTF-8 handling
1121     {
1122     printf("Testing UTF-8 handling\n");
1123    
1124     // Three Japanese characters (nihongo)
1125     const char utf8_string[] = {
1126     0xe6, 0x97, 0xa5, // 65e5
1127     0xe6, 0x9c, 0xac, // 627c
1128     0xe8, 0xaa, 0x9e, // 8a9e
1129     0
1130     };
1131     const char utf8_pattern[] = {
1132     '.',
1133     0xe6, 0x9c, 0xac, // 627c
1134     '.',
1135     0
1136     };
1137    
1138     // Both should match in either mode, bytes or UTF-8
1139     RE re_test1(".........");
1140     CHECK(re_test1.FullMatch(utf8_string));
1141     RE re_test2("...", pcrecpp::UTF8());
1142     CHECK(re_test2.FullMatch(utf8_string));
1143    
1144     // Check that '.' matches one byte or UTF-8 character
1145     // according to the mode.
1146     string ss;
1147     RE re_test3("(.)");
1148     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1149     CHECK_EQ(ss, string("\xe6"));
1150     RE re_test4("(.)", pcrecpp::UTF8());
1151     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1152     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1153    
1154     // Check that string matches itself in either mode
1155     RE re_test5(utf8_string);
1156     CHECK(re_test5.FullMatch(utf8_string));
1157     RE re_test6(utf8_string, pcrecpp::UTF8());
1158     CHECK(re_test6.FullMatch(utf8_string));
1159    
1160     // Check that pattern matches string only in UTF8 mode
1161     RE re_test7(utf8_pattern);
1162     CHECK(!re_test7.FullMatch(utf8_string));
1163     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1164     CHECK(re_test8.FullMatch(utf8_string));
1165     }
1166    
1167     // Check that ungreedy, UTF8 regular expressions don't match when they
1168     // oughtn't -- see bug 82246.
1169     {
1170     // This code always worked.
1171     const char* pattern = "\\w+X";
1172     const string target = "a aX";
1173     RE match_sentence(pattern);
1174     RE match_sentence_re(pattern, pcrecpp::UTF8());
1175    
1176     CHECK(!match_sentence.FullMatch(target));
1177     CHECK(!match_sentence_re.FullMatch(target));
1178     }
1179    
1180     {
1181     const char* pattern = "(?U)\\w+X";
1182     const string target = "a aX";
1183     RE match_sentence(pattern);
1184     RE match_sentence_re(pattern, pcrecpp::UTF8());
1185    
1186     CHECK(!match_sentence.FullMatch(target));
1187     CHECK(!match_sentence_re.FullMatch(target));
1188     }
1189     #endif /* def SUPPORT_UTF8 */
1190    
1191     printf("Testing error reporting\n");
1192    
1193     { RE re("a\\1"); CHECK(!re.error().empty()); }
1194     {
1195     RE re("a[x");
1196     CHECK(!re.error().empty());
1197     }
1198     {
1199     RE re("a[z-a]");
1200     CHECK(!re.error().empty());
1201     }
1202     {
1203     RE re("a[[:foobar:]]");
1204     CHECK(!re.error().empty());
1205     }
1206     {
1207     RE re("a(b");
1208     CHECK(!re.error().empty());
1209     }
1210     {
1211     RE re("a\\");
1212     CHECK(!re.error().empty());
1213     }
1214    
1215 nigel 87 // Test that recursion is stopped
1216     TestRecursion();
1217 nigel 77
1218 nigel 81 // Test Options
1219     if (getenv("VERBOSE_TEST") != NULL)
1220     VERBOSE_TEST = true;
1221     TestOptions();
1222    
1223 nigel 93 // Test the constructors
1224     TestConstructors();
1225    
1226 nigel 77 // Done
1227     printf("OK\n");
1228    
1229     return 0;
1230     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12