/[pcre]/code/tags/pcre-7.1/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/tags/pcre-7.1/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 156 - (hide annotations) (download)
Tue Apr 24 13:49:23 2007 UTC (7 years, 6 months ago) by ph10
File size: 37234 byte(s)
Tag the PCRE 7.1 release.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36     #include <stdio.h>
37 nigel 91 #include <cassert>
38 nigel 77 #include <vector>
39     #include "config.h"
40     #include "pcrecpp.h"
41    
42     using pcrecpp::StringPiece;
43     using pcrecpp::RE;
44     using pcrecpp::RE_Options;
45     using pcrecpp::Hex;
46     using pcrecpp::Octal;
47     using pcrecpp::CRadix;
48    
49 nigel 81 static bool VERBOSE_TEST = false;
50    
51 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
52     // controlled by NDEBUG, so the check will be executed regardless of
53     // compilation mode. Therefore, it is safe to do things like:
54     // CHECK_EQ(fp->Write(x), 4)
55     #define CHECK(condition) do { \
56     if (!(condition)) { \
57     fprintf(stderr, "%s:%d: Check failed: %s\n", \
58     __FILE__, __LINE__, #condition); \
59     exit(1); \
60     } \
61     } while (0)
62    
63     #define CHECK_EQ(a, b) CHECK(a == b)
64    
65     static void Timing1(int num_iters) {
66     // Same pattern lots of times
67     RE pattern("ruby:\\d+");
68     StringPiece p("ruby:1234");
69     for (int j = num_iters; j > 0; j--) {
70     CHECK(pattern.FullMatch(p));
71     }
72     }
73    
74     static void Timing2(int num_iters) {
75     // Same pattern lots of times
76     RE pattern("ruby:(\\d+)");
77     int i;
78     for (int j = num_iters; j > 0; j--) {
79     CHECK(pattern.FullMatch("ruby:1234", &i));
80     CHECK_EQ(i, 1234);
81     }
82     }
83    
84     static void Timing3(int num_iters) {
85     string text_string;
86     for (int j = num_iters; j > 0; j--) {
87     text_string += "this is another line\n";
88     }
89    
90     RE line_matcher(".*\n");
91     string line;
92     StringPiece text(text_string);
93     int counter = 0;
94     while (line_matcher.Consume(&text)) {
95     counter++;
96     }
97     printf("Matched %d lines\n", counter);
98     }
99    
100     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101    
102     static void LeakTest() {
103     // Check for memory leaks
104     unsigned long long initial_size = 0;
105     for (int i = 0; i < 100000; i++) {
106     if (i == 50000) {
107     initial_size = VirtualProcessSize();
108     printf("Size after 50000: %llu\n", initial_size);
109     }
110     char buf[100];
111     snprintf(buf, sizeof(buf), "pat%09d", i);
112     RE newre(buf);
113     }
114     uint64 final_size = VirtualProcessSize();
115     printf("Size after 100000: %llu\n", final_size);
116     const double growth = double(final_size - initial_size) / final_size;
117     printf("Growth: %0.2f%%", growth * 100);
118     CHECK(growth < 0.02); // Allow < 2% growth
119     }
120    
121     #endif
122    
123     static void RadixTests() {
124     printf("Testing hex\n");
125    
126     #define CHECK_HEX(type, value) \
127     do { \
128     type v; \
129     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130     CHECK_EQ(v, 0x ## value); \
131     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132     CHECK_EQ(v, 0x ## value); \
133     } while(0)
134    
135     CHECK_HEX(short, 2bad);
136     CHECK_HEX(unsigned short, 2badU);
137     CHECK_HEX(int, dead);
138     CHECK_HEX(unsigned int, deadU);
139     CHECK_HEX(long, 7eadbeefL);
140     CHECK_HEX(unsigned long, deadbeefUL);
141     #ifdef HAVE_LONG_LONG
142     CHECK_HEX(long long, 12345678deadbeefLL);
143     #endif
144     #ifdef HAVE_UNSIGNED_LONG_LONG
145     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146     #endif
147    
148     #undef CHECK_HEX
149    
150     printf("Testing octal\n");
151    
152     #define CHECK_OCTAL(type, value) \
153     do { \
154     type v; \
155     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156     CHECK_EQ(v, 0 ## value); \
157     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158     CHECK_EQ(v, 0 ## value); \
159     } while(0)
160    
161     CHECK_OCTAL(short, 77777);
162     CHECK_OCTAL(unsigned short, 177777U);
163     CHECK_OCTAL(int, 17777777777);
164     CHECK_OCTAL(unsigned int, 37777777777U);
165     CHECK_OCTAL(long, 17777777777L);
166     CHECK_OCTAL(unsigned long, 37777777777UL);
167     #ifdef HAVE_LONG_LONG
168     CHECK_OCTAL(long long, 777777777777777777777LL);
169     #endif
170     #ifdef HAVE_UNSIGNED_LONG_LONG
171     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172     #endif
173    
174     #undef CHECK_OCTAL
175    
176     printf("Testing decimal\n");
177    
178     #define CHECK_DECIMAL(type, value) \
179     do { \
180     type v; \
181     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182     CHECK_EQ(v, value); \
183     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184     CHECK_EQ(v, value); \
185     } while(0)
186    
187     CHECK_DECIMAL(short, -1);
188     CHECK_DECIMAL(unsigned short, 9999);
189     CHECK_DECIMAL(int, -1000);
190     CHECK_DECIMAL(unsigned int, 12345U);
191     CHECK_DECIMAL(long, -10000000L);
192     CHECK_DECIMAL(unsigned long, 3083324652U);
193     #ifdef HAVE_LONG_LONG
194     CHECK_DECIMAL(long long, -100000000000000LL);
195     #endif
196     #ifdef HAVE_UNSIGNED_LONG_LONG
197     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198     #endif
199    
200     #undef CHECK_DECIMAL
201    
202     }
203    
204     static void TestReplace() {
205     printf("Testing Replace\n");
206    
207     struct ReplaceTest {
208     const char *regexp;
209     const char *rewrite;
210     const char *original;
211     const char *single;
212     const char *global;
213     };
214     static const ReplaceTest tests[] = {
215     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216     "\\2\\1ay",
217     "the quick brown fox jumps over the lazy dogs.",
218     "ethay quick brown fox jumps over the lazy dogs.",
219     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220     { "\\w+",
221     "\\0-NOSPAM",
222     "paul.haahr@google.com",
223     "paul-NOSPAM.haahr@google.com",
224     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225     { "^",
226     "(START)",
227     "foo",
228     "(START)foo",
229     "(START)foo" },
230     { "^",
231     "(START)",
232     "",
233     "(START)",
234     "(START)" },
235     { "$",
236     "(END)",
237     "",
238     "(END)",
239     "(END)" },
240     { "b",
241     "bb",
242     "ababababab",
243     "abbabababab",
244     "abbabbabbabbabb" },
245     { "b",
246     "bb",
247     "bbbbbb",
248     "bbbbbbb",
249     "bbbbbbbbbbbb" },
250     { "b+",
251     "bb",
252     "bbbbbb",
253     "bb",
254     "bb" },
255     { "b*",
256     "bb",
257     "bbbbbb",
258     "bb",
259     "bb" },
260     { "b*",
261     "bb",
262     "aaaaa",
263     "bbaaaaa",
264     "bbabbabbabbabbabb" },
265 nigel 91 { "b*",
266     "bb",
267     "aa\naa\n",
268     "bbaa\naa\n",
269     "bbabbabb\nbbabbabb\nbb" },
270     { "b*",
271     "bb",
272     "aa\raa\r",
273     "bbaa\raa\r",
274     "bbabbabb\rbbabbabb\rbb" },
275     { "b*",
276     "bb",
277     "aa\r\naa\r\n",
278     "bbaa\r\naa\r\n",
279     "bbabbabb\r\nbbabbabb\r\nbb" },
280     #ifdef SUPPORT_UTF8
281     { "b*",
282     "bb",
283     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286     { "b*",
287     "bb",
288     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292     #endif
293 nigel 77 { "", NULL, NULL, NULL, NULL }
294     };
295    
296 nigel 91 #ifdef SUPPORT_UTF8
297     const bool support_utf8 = true;
298     #else
299     const bool support_utf8 = false;
300     #endif
301    
302 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304     assert(re.error().empty());
305 nigel 77 string one(t->original);
306 nigel 91 CHECK(re.Replace(t->rewrite, &one));
307 nigel 77 CHECK_EQ(one, t->single);
308     string all(t->original);
309 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 nigel 77 CHECK_EQ(all, t->global);
311     }
312 nigel 91
313     // One final test: test \r\n replacement when we're not in CRLF mode
314     {
315     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316     assert(re.error().empty());
317     string all("aa\r\naa\r\n");
318     CHECK(re.GlobalReplace("bb", &all) > 0);
319     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320     }
321     {
322     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323     assert(re.error().empty());
324     string all("aa\r\naa\r\n");
325     CHECK(re.GlobalReplace("bb", &all) > 0);
326     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327     }
328     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329     // Alas, the answer depends on how pcre was compiled.
330 nigel 77 }
331    
332     static void TestExtract() {
333     printf("Testing Extract\n");
334    
335     string s;
336    
337     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338     CHECK_EQ(s, "kremvax!boris");
339    
340     // check the RE interface as well
341     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342     CHECK_EQ(s, "'foo'");
343     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344     CHECK_EQ(s, "'foo'");
345     }
346    
347     static void TestConsume() {
348     printf("Testing Consume\n");
349    
350     string word;
351    
352     string s(" aaa b!@#$@#$cccc");
353     StringPiece input(s);
354    
355     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356     CHECK(r.Consume(&input, &word));
357     CHECK_EQ(word, "aaa");
358     CHECK(r.Consume(&input, &word));
359     CHECK_EQ(word, "b");
360     CHECK(! r.Consume(&input, &word));
361     }
362    
363     static void TestFindAndConsume() {
364     printf("Testing FindAndConsume\n");
365    
366     string word;
367    
368     string s(" aaa b!@#$@#$cccc");
369     StringPiece input(s);
370    
371     RE r("(\\w+)"); // matches a word
372     CHECK(r.FindAndConsume(&input, &word));
373     CHECK_EQ(word, "aaa");
374     CHECK(r.FindAndConsume(&input, &word));
375     CHECK_EQ(word, "b");
376     CHECK(r.FindAndConsume(&input, &word));
377     CHECK_EQ(word, "cccc");
378     CHECK(! r.FindAndConsume(&input, &word));
379     }
380    
381     static void TestMatchNumberPeculiarity() {
382     printf("Testing match-number peculiaraity\n");
383    
384     string word1;
385     string word2;
386     string word3;
387    
388     RE r("(foo)|(bar)|(baz)");
389     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390     CHECK_EQ(word1, "foo");
391     CHECK_EQ(word2, "");
392     CHECK_EQ(word3, "");
393     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394     CHECK_EQ(word1, "");
395     CHECK_EQ(word2, "bar");
396     CHECK_EQ(word3, "");
397     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398     CHECK_EQ(word1, "");
399     CHECK_EQ(word2, "");
400     CHECK_EQ(word3, "baz");
401     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402    
403     string a;
404     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405     CHECK_EQ(a, "");
406     }
407    
408 nigel 87 static void TestRecursion() {
409 nigel 77 printf("Testing recursion\n");
410    
411 nigel 87 // Get one string that passes (sometimes), one that never does.
412     string text_good("abcdefghijk");
413     string text_bad("acdefghijkl");
414    
415     // According to pcretest, matching text_good against (\w+)*b
416     // requires match_limit of at least 8192, and match_recursion_limit
417     // of at least 37.
418    
419     RE_Options options_ml;
420     options_ml.set_match_limit(8192);
421     RE re("(\\w+)*b", options_ml);
422     CHECK(re.PartialMatch(text_good) == true);
423     CHECK(re.PartialMatch(text_bad) == false);
424     CHECK(re.FullMatch(text_good) == false);
425     CHECK(re.FullMatch(text_bad) == false);
426    
427     options_ml.set_match_limit(1024);
428     RE re2("(\\w+)*b", options_ml);
429     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430     CHECK(re2.PartialMatch(text_bad) == false);
431     CHECK(re2.FullMatch(text_good) == false);
432     CHECK(re2.FullMatch(text_bad) == false);
433    
434     RE_Options options_mlr;
435     options_mlr.set_match_limit_recursion(50);
436     RE re3("(\\w+)*b", options_mlr);
437     CHECK(re3.PartialMatch(text_good) == true);
438     CHECK(re3.PartialMatch(text_bad) == false);
439     CHECK(re3.FullMatch(text_good) == false);
440     CHECK(re3.FullMatch(text_bad) == false);
441    
442     options_mlr.set_match_limit_recursion(10);
443     RE re4("(\\w+)*b", options_mlr);
444     CHECK(re4.PartialMatch(text_good) == false);
445     CHECK(re4.PartialMatch(text_bad) == false);
446     CHECK(re4.FullMatch(text_good) == false);
447     CHECK(re4.FullMatch(text_bad) == false);
448 nigel 77 }
449    
450 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
451     // the original unquoted string.
452     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453     string quoted = RE::QuoteMeta(unquoted);
454     RE re(quoted, options);
455     CHECK(re.FullMatch(unquoted));
456     }
457    
458     // A string containing meaningful regexp characters, which is then meta-
459     // quoted, should not generally match a string the unquoted string does.
460     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461     RE_Options options = RE_Options()) {
462     string quoted = RE::QuoteMeta(unquoted);
463     RE re(quoted, options);
464     CHECK(!re.FullMatch(should_not_match));
465     }
466    
467     // Tests that quoted meta characters match their original strings,
468     // and that a few things that shouldn't match indeed do not.
469     static void TestQuotaMetaSimple() {
470     TestQuoteMeta("foo");
471     TestQuoteMeta("foo.bar");
472     TestQuoteMeta("foo\\.bar");
473     TestQuoteMeta("[1-9]");
474     TestQuoteMeta("1.5-2.0?");
475     TestQuoteMeta("\\d");
476     TestQuoteMeta("Who doesn't like ice cream?");
477     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478     TestQuoteMeta("((?!)xxx).*yyy");
479     TestQuoteMeta("([");
480     }
481    
482     static void TestQuoteMetaSimpleNegative() {
483     NegativeTestQuoteMeta("foo", "bar");
484     NegativeTestQuoteMeta("...", "bar");
485     NegativeTestQuoteMeta("\\.", ".");
486     NegativeTestQuoteMeta("\\.", "..");
487     NegativeTestQuoteMeta("(a)", "a");
488     NegativeTestQuoteMeta("(a|b)", "a");
489     NegativeTestQuoteMeta("(a|b)", "(a)");
490     NegativeTestQuoteMeta("(a|b)", "a|b");
491     NegativeTestQuoteMeta("[0-9]", "0");
492     NegativeTestQuoteMeta("[0-9]", "0-9");
493     NegativeTestQuoteMeta("[0-9]", "[9]");
494     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495     }
496    
497     static void TestQuoteMetaLatin1() {
498     TestQuoteMeta("3\xb2 = 9");
499     }
500    
501     static void TestQuoteMetaUtf8() {
502     #ifdef SUPPORT_UTF8
503     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511     "27\\\xc2\\\xb0",
512     pcrecpp::UTF8());
513     #endif
514     }
515    
516     static void TestQuoteMetaAll() {
517     printf("Testing QuoteMeta\n");
518     TestQuotaMetaSimple();
519     TestQuoteMetaSimpleNegative();
520     TestQuoteMetaLatin1();
521     TestQuoteMetaUtf8();
522     }
523    
524 nigel 81 //
525     // Options tests contributed by
526     // Giuseppe Maxia, CTO, Stardata s.r.l.
527     // July 2005
528     //
529     static void GetOneOptionResult(
530     const char *option_name,
531     const char *regex,
532     const char *str,
533     RE_Options options,
534     bool full,
535     string expected) {
536 nigel 77
537 nigel 81 printf("Testing Option <%s>\n", option_name);
538     if(VERBOSE_TEST)
539     printf("/%s/ finds \"%s\" within \"%s\" \n",
540     regex,
541     expected.c_str(),
542     str);
543     string captured("");
544     if (full)
545     RE(regex,options).FullMatch(str, &captured);
546     else
547     RE(regex,options).PartialMatch(str, &captured);
548     CHECK_EQ(captured, expected);
549     }
550    
551     static void TestOneOption(
552     const char *option_name,
553     const char *regex,
554     const char *str,
555     RE_Options options,
556     bool full,
557     bool assertive = true) {
558    
559     printf("Testing Option <%s>\n", option_name);
560     if (VERBOSE_TEST)
561     printf("'%s' %s /%s/ \n",
562     str,
563     (assertive? "matches" : "doesn't match"),
564     regex);
565     if (assertive) {
566     if (full)
567     CHECK(RE(regex,options).FullMatch(str));
568     else
569     CHECK(RE(regex,options).PartialMatch(str));
570     } else {
571     if (full)
572     CHECK(!RE(regex,options).FullMatch(str));
573     else
574     CHECK(!RE(regex,options).PartialMatch(str));
575     }
576     }
577    
578     static void Test_CASELESS() {
579     RE_Options options;
580     RE_Options options2;
581    
582     options.set_caseless(true);
583     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586    
587     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589     options.set_caseless(false);
590     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591     }
592    
593     static void Test_MULTILINE() {
594     RE_Options options;
595     RE_Options options2;
596     const char *str = "HELLO\n" "cruel\n" "world\n";
597    
598     options.set_multiline(true);
599     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602     options.set_multiline(false);
603     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604     }
605    
606     static void Test_DOTALL() {
607     RE_Options options;
608     RE_Options options2;
609     const char *str = "HELLO\n" "cruel\n" "world";
610    
611     options.set_dotall(true);
612     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615     options.set_dotall(false);
616     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617     }
618    
619     static void Test_DOLLAR_ENDONLY() {
620     RE_Options options;
621     RE_Options options2;
622     const char *str = "HELLO world\n";
623    
624     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625     options.set_dollar_endonly(true);
626     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628     }
629    
630     static void Test_EXTRA() {
631     RE_Options options;
632     const char *str = "HELLO";
633    
634     options.set_extra(true);
635     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637     options.set_extra(false);
638     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639     }
640    
641     static void Test_EXTENDED() {
642     RE_Options options;
643     RE_Options options2;
644     const char *str = "HELLO world";
645    
646     options.set_extended(true);
647     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649     TestOneOption("EXTENDED (class)",
650     "^ HE L{2} O "
651     "\\s+ "
652     "\\w+ $ ",
653     str,
654     options,
655     false);
656    
657     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658     TestOneOption("EXTENDED (function)",
659     "^ HE L{2} O "
660     "\\s+ "
661     "\\w+ $ ",
662     str,
663     pcrecpp::EXTENDED(),
664     false);
665    
666     options.set_extended(false);
667     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668     }
669    
670     static void Test_NO_AUTO_CAPTURE() {
671     RE_Options options;
672     const char *str = "HELLO world";
673     string captured;
674    
675     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676     if (VERBOSE_TEST)
677     printf("parentheses capture text\n");
678     RE re("(world|universe)$", options);
679     CHECK(re.Extract("\\1", str , &captured));
680     CHECK_EQ(captured, "world");
681     options.set_no_auto_capture(true);
682     printf("testing Option <NO_AUTO_CAPTURE>\n");
683     if (VERBOSE_TEST)
684     printf("parentheses do not capture text\n");
685     re.Extract("\\1",str, &captured );
686     CHECK_EQ(captured, "world");
687     }
688    
689     static void Test_UNGREEDY() {
690     RE_Options options;
691     const char *str = "HELLO, 'this' is the 'world'";
692    
693     options.set_ungreedy(true);
694     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697    
698     options.set_ungreedy(false);
699     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701     }
702    
703     static void Test_all_options() {
704     const char *str = "HELLO\n" "cruel\n" "world";
705     RE_Options options;
706     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707    
708     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709     options.set_all_options(0);
710     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712    
713     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715     " ^ c r u e l $ ",
716     str,
717     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718     false);
719    
720     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721     " ^ c r u e l $ ",
722     str,
723     RE_Options()
724     .set_multiline(true)
725     .set_extended(true),
726     false);
727    
728     options.set_all_options(0);
729     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730    
731     }
732    
733     static void TestOptions() {
734     printf("Testing Options\n");
735     Test_CASELESS();
736     Test_MULTILINE();
737     Test_DOTALL();
738     Test_DOLLAR_ENDONLY();
739     Test_EXTENDED();
740     Test_NO_AUTO_CAPTURE();
741     Test_UNGREEDY();
742     Test_EXTRA();
743     Test_all_options();
744     }
745    
746 nigel 93 static void TestConstructors() {
747     printf("Testing constructors\n");
748    
749     RE_Options options;
750     options.set_dotall(true);
751     const char *str = "HELLO\n" "cruel\n" "world";
752    
753     RE orig("HELLO.*world", options);
754     CHECK(orig.FullMatch(str));
755    
756     RE copy1(orig);
757     CHECK(copy1.FullMatch(str));
758    
759     RE copy2("not a match");
760     CHECK(!copy2.FullMatch(str));
761     copy2 = copy1;
762     CHECK(copy2.FullMatch(str));
763     copy2 = orig;
764     CHECK(copy2.FullMatch(str));
765    
766     // Make sure when we assign to ourselves, nothing bad happens
767     orig = orig;
768     copy1 = copy1;
769     copy2 = copy2;
770     CHECK(orig.FullMatch(str));
771     CHECK(copy1.FullMatch(str));
772     CHECK(copy2.FullMatch(str));
773     }
774    
775 nigel 77 int main(int argc, char** argv) {
776     // Treat any flag as --help
777     if (argc > 1 && argv[1][0] == '-') {
778     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779     " If 'timingX ###' is specified, run the given timing test\n"
780     " with the given number of iterations, rather than running\n"
781     " the default corectness test.\n", argv[0]);
782     return 0;
783     }
784    
785     if (argc > 1) {
786     if ( argc == 2 || atoi(argv[2]) == 0) {
787     printf("timing mode needs a num-iters argument\n");
788     return 1;
789     }
790     if (!strcmp(argv[1], "timing1"))
791     Timing1(atoi(argv[2]));
792     else if (!strcmp(argv[1], "timing2"))
793     Timing2(atoi(argv[2]));
794     else if (!strcmp(argv[1], "timing3"))
795     Timing3(atoi(argv[2]));
796     else
797     printf("Unknown argument '%s'\n", argv[1]);
798     return 0;
799     }
800    
801     printf("Testing FullMatch\n");
802    
803     int i;
804     string s;
805    
806     /***** FullMatch with no args *****/
807    
808     CHECK(RE("h.*o").FullMatch("hello"));
809     CHECK(!RE("h.*o").FullMatch("othello"));
810     CHECK(!RE("h.*o").FullMatch("hello!"));
811    
812     /***** FullMatch with args *****/
813    
814     // Zero-arg
815     CHECK(RE("\\d+").FullMatch("1001"));
816    
817     // Single-arg
818     CHECK(RE("(\\d+)").FullMatch("1001", &i));
819     CHECK_EQ(i, 1001);
820     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
821     CHECK_EQ(i, -123);
822     CHECK(!RE("()\\d+").FullMatch("10", &i));
823     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
824     &i));
825    
826     // Digits surrounding integer-arg
827     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
828     CHECK_EQ(i, 23);
829     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
830     CHECK_EQ(i, 1);
831     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
832     CHECK_EQ(i, -1);
833     CHECK(RE("(\\d)").PartialMatch("1234", &i));
834     CHECK_EQ(i, 1);
835     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
836     CHECK_EQ(i, -1);
837    
838     // String-arg
839     CHECK(RE("h(.*)o").FullMatch("hello", &s));
840     CHECK_EQ(s, string("ell"));
841    
842     // StringPiece-arg
843     StringPiece sp;
844     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
845     CHECK_EQ(sp.size(), 4);
846     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
847     CHECK_EQ(i, 1234);
848    
849     // Multi-arg
850     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
851     CHECK_EQ(s, string("ruby"));
852     CHECK_EQ(i, 1234);
853    
854     // Ignored arg
855     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
856     CHECK_EQ(s, string("ruby"));
857     CHECK_EQ(i, 1234);
858    
859     // Type tests
860     {
861     char c;
862     CHECK(RE("(H)ello").FullMatch("Hello", &c));
863     CHECK_EQ(c, 'H');
864     }
865     {
866     unsigned char c;
867     CHECK(RE("(H)ello").FullMatch("Hello", &c));
868     CHECK_EQ(c, static_cast<unsigned char>('H'));
869     }
870     {
871     short v;
872     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
873     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
874     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
875     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
876     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
877     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
878     }
879     {
880     unsigned short v;
881     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
882     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
883     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
884     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
885     }
886     {
887     int v;
888     static const int max_value = 0x7fffffff;
889     static const int min_value = -max_value - 1;
890     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
891     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
892     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
893     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
894     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
895     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
896     }
897     {
898     unsigned int v;
899     static const unsigned int max_value = 0xfffffffful;
900     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
901     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
902     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
903     }
904     #ifdef HAVE_LONG_LONG
905     {
906     long long v;
907     static const long long max_value = 0x7fffffffffffffffLL;
908     static const long long min_value = -max_value - 1;
909     char buf[32];
910    
911     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
912     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
913    
914     snprintf(buf, sizeof(buf), "%lld", max_value);
915     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
916    
917     snprintf(buf, sizeof(buf), "%lld", min_value);
918     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
919    
920     snprintf(buf, sizeof(buf), "%lld", max_value);
921     assert(buf[strlen(buf)-1] != '9');
922     buf[strlen(buf)-1]++;
923     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
924    
925     snprintf(buf, sizeof(buf), "%lld", min_value);
926     assert(buf[strlen(buf)-1] != '9');
927     buf[strlen(buf)-1]++;
928     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
929     }
930     #endif
931     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
932     {
933     unsigned long long v;
934     long long v2;
935     static const unsigned long long max_value = 0xffffffffffffffffULL;
936     char buf[32];
937    
938     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
939     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
940    
941     snprintf(buf, sizeof(buf), "%llu", max_value);
942     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
943    
944     assert(buf[strlen(buf)-1] != '9');
945     buf[strlen(buf)-1]++;
946     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
947     }
948     #endif
949     {
950     float v;
951     CHECK(RE("(.*)").FullMatch("100", &v));
952     CHECK(RE("(.*)").FullMatch("-100.", &v));
953     CHECK(RE("(.*)").FullMatch("1e23", &v));
954     }
955     {
956     double v;
957     CHECK(RE("(.*)").FullMatch("100", &v));
958     CHECK(RE("(.*)").FullMatch("-100.", &v));
959     CHECK(RE("(.*)").FullMatch("1e23", &v));
960     }
961    
962     // Check that matching is fully anchored
963     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
964     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
965     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
966     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
967    
968     // Braces
969     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
970     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
971     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
972    
973     // Complicated RE
974     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
975     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
976     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
977     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
978    
979     // Check full-match handling (needs '$' tacked on internally)
980     CHECK(RE("fo|foo").FullMatch("fo"));
981     CHECK(RE("fo|foo").FullMatch("foo"));
982     CHECK(RE("fo|foo$").FullMatch("fo"));
983     CHECK(RE("fo|foo$").FullMatch("foo"));
984     CHECK(RE("foo$").FullMatch("foo"));
985     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
986     CHECK(!RE("fo|bar").FullMatch("fox"));
987    
988     // Uncomment the following if we change the handling of '$' to
989     // prevent it from matching a trailing newline
990     if (false) {
991     // Check that we don't get bitten by pcre's special handling of a
992     // '\n' at the end of the string matching '$'
993     CHECK(!RE("foo$").PartialMatch("foo\n"));
994     }
995    
996     // Number of args
997     int a[16];
998     CHECK(RE("").FullMatch(""));
999    
1000     memset(a, 0, sizeof(0));
1001     CHECK(RE("(\\d){1}").FullMatch("1",
1002     &a[0]));
1003     CHECK_EQ(a[0], 1);
1004    
1005     memset(a, 0, sizeof(0));
1006     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1007     &a[0], &a[1]));
1008     CHECK_EQ(a[0], 1);
1009     CHECK_EQ(a[1], 2);
1010    
1011     memset(a, 0, sizeof(0));
1012     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1013     &a[0], &a[1], &a[2]));
1014     CHECK_EQ(a[0], 1);
1015     CHECK_EQ(a[1], 2);
1016     CHECK_EQ(a[2], 3);
1017    
1018     memset(a, 0, sizeof(0));
1019     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1020     &a[0], &a[1], &a[2], &a[3]));
1021     CHECK_EQ(a[0], 1);
1022     CHECK_EQ(a[1], 2);
1023     CHECK_EQ(a[2], 3);
1024     CHECK_EQ(a[3], 4);
1025    
1026     memset(a, 0, sizeof(0));
1027     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1028     &a[0], &a[1], &a[2],
1029     &a[3], &a[4]));
1030     CHECK_EQ(a[0], 1);
1031     CHECK_EQ(a[1], 2);
1032     CHECK_EQ(a[2], 3);
1033     CHECK_EQ(a[3], 4);
1034     CHECK_EQ(a[4], 5);
1035    
1036     memset(a, 0, sizeof(0));
1037     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1038     &a[0], &a[1], &a[2],
1039     &a[3], &a[4], &a[5]));
1040     CHECK_EQ(a[0], 1);
1041     CHECK_EQ(a[1], 2);
1042     CHECK_EQ(a[2], 3);
1043     CHECK_EQ(a[3], 4);
1044     CHECK_EQ(a[4], 5);
1045     CHECK_EQ(a[5], 6);
1046    
1047     memset(a, 0, sizeof(0));
1048     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1049     &a[0], &a[1], &a[2], &a[3],
1050     &a[4], &a[5], &a[6]));
1051     CHECK_EQ(a[0], 1);
1052     CHECK_EQ(a[1], 2);
1053     CHECK_EQ(a[2], 3);
1054     CHECK_EQ(a[3], 4);
1055     CHECK_EQ(a[4], 5);
1056     CHECK_EQ(a[5], 6);
1057     CHECK_EQ(a[6], 7);
1058    
1059     memset(a, 0, sizeof(0));
1060     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1061     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1062     "1234567890123456",
1063     &a[0], &a[1], &a[2], &a[3],
1064     &a[4], &a[5], &a[6], &a[7],
1065     &a[8], &a[9], &a[10], &a[11],
1066     &a[12], &a[13], &a[14], &a[15]));
1067     CHECK_EQ(a[0], 1);
1068     CHECK_EQ(a[1], 2);
1069     CHECK_EQ(a[2], 3);
1070     CHECK_EQ(a[3], 4);
1071     CHECK_EQ(a[4], 5);
1072     CHECK_EQ(a[5], 6);
1073     CHECK_EQ(a[6], 7);
1074     CHECK_EQ(a[7], 8);
1075     CHECK_EQ(a[8], 9);
1076     CHECK_EQ(a[9], 0);
1077     CHECK_EQ(a[10], 1);
1078     CHECK_EQ(a[11], 2);
1079     CHECK_EQ(a[12], 3);
1080     CHECK_EQ(a[13], 4);
1081     CHECK_EQ(a[14], 5);
1082     CHECK_EQ(a[15], 6);
1083    
1084     /***** PartialMatch *****/
1085    
1086     printf("Testing PartialMatch\n");
1087    
1088     CHECK(RE("h.*o").PartialMatch("hello"));
1089     CHECK(RE("h.*o").PartialMatch("othello"));
1090     CHECK(RE("h.*o").PartialMatch("hello!"));
1091     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1092    
1093 nigel 93 /***** other tests *****/
1094    
1095 nigel 77 RadixTests();
1096     TestReplace();
1097     TestExtract();
1098     TestConsume();
1099     TestFindAndConsume();
1100 nigel 93 TestQuoteMetaAll();
1101 nigel 77 TestMatchNumberPeculiarity();
1102    
1103     // Check the pattern() accessor
1104     {
1105     const string kPattern = "http://([^/]+)/.*";
1106     const RE re(kPattern);
1107     CHECK_EQ(kPattern, re.pattern());
1108     }
1109    
1110     // Check RE error field.
1111     {
1112     RE re("foo");
1113     CHECK(re.error().empty()); // Must have no error
1114     }
1115    
1116     #ifdef SUPPORT_UTF8
1117     // Check UTF-8 handling
1118     {
1119     printf("Testing UTF-8 handling\n");
1120    
1121     // Three Japanese characters (nihongo)
1122     const char utf8_string[] = {
1123     0xe6, 0x97, 0xa5, // 65e5
1124     0xe6, 0x9c, 0xac, // 627c
1125     0xe8, 0xaa, 0x9e, // 8a9e
1126     0
1127     };
1128     const char utf8_pattern[] = {
1129     '.',
1130     0xe6, 0x9c, 0xac, // 627c
1131     '.',
1132     0
1133     };
1134    
1135     // Both should match in either mode, bytes or UTF-8
1136     RE re_test1(".........");
1137     CHECK(re_test1.FullMatch(utf8_string));
1138     RE re_test2("...", pcrecpp::UTF8());
1139     CHECK(re_test2.FullMatch(utf8_string));
1140    
1141     // Check that '.' matches one byte or UTF-8 character
1142     // according to the mode.
1143     string ss;
1144     RE re_test3("(.)");
1145     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1146     CHECK_EQ(ss, string("\xe6"));
1147     RE re_test4("(.)", pcrecpp::UTF8());
1148     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1149     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1150    
1151     // Check that string matches itself in either mode
1152     RE re_test5(utf8_string);
1153     CHECK(re_test5.FullMatch(utf8_string));
1154     RE re_test6(utf8_string, pcrecpp::UTF8());
1155     CHECK(re_test6.FullMatch(utf8_string));
1156    
1157     // Check that pattern matches string only in UTF8 mode
1158     RE re_test7(utf8_pattern);
1159     CHECK(!re_test7.FullMatch(utf8_string));
1160     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1161     CHECK(re_test8.FullMatch(utf8_string));
1162     }
1163    
1164     // Check that ungreedy, UTF8 regular expressions don't match when they
1165     // oughtn't -- see bug 82246.
1166     {
1167     // This code always worked.
1168     const char* pattern = "\\w+X";
1169     const string target = "a aX";
1170     RE match_sentence(pattern);
1171     RE match_sentence_re(pattern, pcrecpp::UTF8());
1172    
1173     CHECK(!match_sentence.FullMatch(target));
1174     CHECK(!match_sentence_re.FullMatch(target));
1175     }
1176    
1177     {
1178     const char* pattern = "(?U)\\w+X";
1179     const string target = "a aX";
1180     RE match_sentence(pattern);
1181     RE match_sentence_re(pattern, pcrecpp::UTF8());
1182    
1183     CHECK(!match_sentence.FullMatch(target));
1184     CHECK(!match_sentence_re.FullMatch(target));
1185     }
1186     #endif /* def SUPPORT_UTF8 */
1187    
1188     printf("Testing error reporting\n");
1189    
1190     { RE re("a\\1"); CHECK(!re.error().empty()); }
1191     {
1192     RE re("a[x");
1193     CHECK(!re.error().empty());
1194     }
1195     {
1196     RE re("a[z-a]");
1197     CHECK(!re.error().empty());
1198     }
1199     {
1200     RE re("a[[:foobar:]]");
1201     CHECK(!re.error().empty());
1202     }
1203     {
1204     RE re("a(b");
1205     CHECK(!re.error().empty());
1206     }
1207     {
1208     RE re("a\\");
1209     CHECK(!re.error().empty());
1210     }
1211    
1212 nigel 87 // Test that recursion is stopped
1213     TestRecursion();
1214 nigel 77
1215 nigel 81 // Test Options
1216     if (getenv("VERBOSE_TEST") != NULL)
1217     VERBOSE_TEST = true;
1218     TestOptions();
1219    
1220 nigel 93 // Test the constructors
1221     TestConstructors();
1222    
1223 nigel 77 // Done
1224     printf("OK\n");
1225    
1226     return 0;
1227     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12