/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 193 - (hide annotations) (download)
Mon Jul 30 11:34:18 2007 UTC (7 years, 3 months ago) by ph10
File size: 37614 byte(s)
Arrange to use "%I64d" instead of "%lld" for long printing in the pcrecpp 
unittest when running under MinGW.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36     #include <stdio.h>
37 nigel 91 #include <cassert>
38 nigel 77 #include <vector>
39     #include "config.h"
40     #include "pcrecpp.h"
41    
42     using pcrecpp::StringPiece;
43     using pcrecpp::RE;
44     using pcrecpp::RE_Options;
45     using pcrecpp::Hex;
46     using pcrecpp::Octal;
47     using pcrecpp::CRadix;
48    
49 nigel 81 static bool VERBOSE_TEST = false;
50    
51 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
52     // controlled by NDEBUG, so the check will be executed regardless of
53     // compilation mode. Therefore, it is safe to do things like:
54     // CHECK_EQ(fp->Write(x), 4)
55     #define CHECK(condition) do { \
56     if (!(condition)) { \
57     fprintf(stderr, "%s:%d: Check failed: %s\n", \
58     __FILE__, __LINE__, #condition); \
59     exit(1); \
60     } \
61     } while (0)
62    
63     #define CHECK_EQ(a, b) CHECK(a == b)
64    
65     static void Timing1(int num_iters) {
66     // Same pattern lots of times
67     RE pattern("ruby:\\d+");
68     StringPiece p("ruby:1234");
69     for (int j = num_iters; j > 0; j--) {
70     CHECK(pattern.FullMatch(p));
71     }
72     }
73    
74     static void Timing2(int num_iters) {
75     // Same pattern lots of times
76     RE pattern("ruby:(\\d+)");
77     int i;
78     for (int j = num_iters; j > 0; j--) {
79     CHECK(pattern.FullMatch("ruby:1234", &i));
80     CHECK_EQ(i, 1234);
81     }
82     }
83    
84     static void Timing3(int num_iters) {
85     string text_string;
86     for (int j = num_iters; j > 0; j--) {
87     text_string += "this is another line\n";
88     }
89    
90     RE line_matcher(".*\n");
91     string line;
92     StringPiece text(text_string);
93     int counter = 0;
94     while (line_matcher.Consume(&text)) {
95     counter++;
96     }
97     printf("Matched %d lines\n", counter);
98     }
99    
100     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101    
102     static void LeakTest() {
103     // Check for memory leaks
104     unsigned long long initial_size = 0;
105     for (int i = 0; i < 100000; i++) {
106     if (i == 50000) {
107     initial_size = VirtualProcessSize();
108     printf("Size after 50000: %llu\n", initial_size);
109     }
110     char buf[100];
111     snprintf(buf, sizeof(buf), "pat%09d", i);
112     RE newre(buf);
113     }
114     uint64 final_size = VirtualProcessSize();
115     printf("Size after 100000: %llu\n", final_size);
116     const double growth = double(final_size - initial_size) / final_size;
117     printf("Growth: %0.2f%%", growth * 100);
118     CHECK(growth < 0.02); // Allow < 2% growth
119     }
120    
121     #endif
122    
123     static void RadixTests() {
124     printf("Testing hex\n");
125    
126     #define CHECK_HEX(type, value) \
127     do { \
128     type v; \
129     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130     CHECK_EQ(v, 0x ## value); \
131     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132     CHECK_EQ(v, 0x ## value); \
133     } while(0)
134    
135     CHECK_HEX(short, 2bad);
136     CHECK_HEX(unsigned short, 2badU);
137     CHECK_HEX(int, dead);
138     CHECK_HEX(unsigned int, deadU);
139     CHECK_HEX(long, 7eadbeefL);
140     CHECK_HEX(unsigned long, deadbeefUL);
141     #ifdef HAVE_LONG_LONG
142     CHECK_HEX(long long, 12345678deadbeefLL);
143     #endif
144     #ifdef HAVE_UNSIGNED_LONG_LONG
145     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146     #endif
147    
148     #undef CHECK_HEX
149    
150     printf("Testing octal\n");
151    
152     #define CHECK_OCTAL(type, value) \
153     do { \
154     type v; \
155     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156     CHECK_EQ(v, 0 ## value); \
157     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158     CHECK_EQ(v, 0 ## value); \
159     } while(0)
160    
161     CHECK_OCTAL(short, 77777);
162     CHECK_OCTAL(unsigned short, 177777U);
163     CHECK_OCTAL(int, 17777777777);
164     CHECK_OCTAL(unsigned int, 37777777777U);
165     CHECK_OCTAL(long, 17777777777L);
166     CHECK_OCTAL(unsigned long, 37777777777UL);
167     #ifdef HAVE_LONG_LONG
168     CHECK_OCTAL(long long, 777777777777777777777LL);
169     #endif
170     #ifdef HAVE_UNSIGNED_LONG_LONG
171     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172     #endif
173    
174     #undef CHECK_OCTAL
175    
176     printf("Testing decimal\n");
177    
178     #define CHECK_DECIMAL(type, value) \
179     do { \
180     type v; \
181     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182     CHECK_EQ(v, value); \
183     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184     CHECK_EQ(v, value); \
185     } while(0)
186    
187     CHECK_DECIMAL(short, -1);
188     CHECK_DECIMAL(unsigned short, 9999);
189     CHECK_DECIMAL(int, -1000);
190     CHECK_DECIMAL(unsigned int, 12345U);
191     CHECK_DECIMAL(long, -10000000L);
192     CHECK_DECIMAL(unsigned long, 3083324652U);
193     #ifdef HAVE_LONG_LONG
194     CHECK_DECIMAL(long long, -100000000000000LL);
195     #endif
196     #ifdef HAVE_UNSIGNED_LONG_LONG
197     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198     #endif
199    
200     #undef CHECK_DECIMAL
201    
202     }
203    
204     static void TestReplace() {
205     printf("Testing Replace\n");
206    
207     struct ReplaceTest {
208     const char *regexp;
209     const char *rewrite;
210     const char *original;
211     const char *single;
212     const char *global;
213     };
214     static const ReplaceTest tests[] = {
215     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216     "\\2\\1ay",
217     "the quick brown fox jumps over the lazy dogs.",
218     "ethay quick brown fox jumps over the lazy dogs.",
219     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220     { "\\w+",
221     "\\0-NOSPAM",
222     "paul.haahr@google.com",
223     "paul-NOSPAM.haahr@google.com",
224     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225     { "^",
226     "(START)",
227     "foo",
228     "(START)foo",
229     "(START)foo" },
230     { "^",
231     "(START)",
232     "",
233     "(START)",
234     "(START)" },
235     { "$",
236     "(END)",
237     "",
238     "(END)",
239     "(END)" },
240     { "b",
241     "bb",
242     "ababababab",
243     "abbabababab",
244     "abbabbabbabbabb" },
245     { "b",
246     "bb",
247     "bbbbbb",
248     "bbbbbbb",
249     "bbbbbbbbbbbb" },
250     { "b+",
251     "bb",
252     "bbbbbb",
253     "bb",
254     "bb" },
255     { "b*",
256     "bb",
257     "bbbbbb",
258     "bb",
259     "bb" },
260     { "b*",
261     "bb",
262     "aaaaa",
263     "bbaaaaa",
264     "bbabbabbabbabbabb" },
265 nigel 91 { "b*",
266     "bb",
267     "aa\naa\n",
268     "bbaa\naa\n",
269     "bbabbabb\nbbabbabb\nbb" },
270     { "b*",
271     "bb",
272     "aa\raa\r",
273     "bbaa\raa\r",
274     "bbabbabb\rbbabbabb\rbb" },
275     { "b*",
276     "bb",
277     "aa\r\naa\r\n",
278     "bbaa\r\naa\r\n",
279     "bbabbabb\r\nbbabbabb\r\nbb" },
280     #ifdef SUPPORT_UTF8
281     { "b*",
282     "bb",
283     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286     { "b*",
287     "bb",
288     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292     #endif
293 nigel 77 { "", NULL, NULL, NULL, NULL }
294     };
295    
296 nigel 91 #ifdef SUPPORT_UTF8
297     const bool support_utf8 = true;
298     #else
299     const bool support_utf8 = false;
300     #endif
301    
302 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304     assert(re.error().empty());
305 nigel 77 string one(t->original);
306 nigel 91 CHECK(re.Replace(t->rewrite, &one));
307 nigel 77 CHECK_EQ(one, t->single);
308     string all(t->original);
309 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 nigel 77 CHECK_EQ(all, t->global);
311     }
312 nigel 91
313     // One final test: test \r\n replacement when we're not in CRLF mode
314     {
315     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316     assert(re.error().empty());
317     string all("aa\r\naa\r\n");
318     CHECK(re.GlobalReplace("bb", &all) > 0);
319     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320     }
321     {
322     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323     assert(re.error().empty());
324     string all("aa\r\naa\r\n");
325     CHECK(re.GlobalReplace("bb", &all) > 0);
326     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327     }
328     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329     // Alas, the answer depends on how pcre was compiled.
330 nigel 77 }
331    
332     static void TestExtract() {
333     printf("Testing Extract\n");
334    
335     string s;
336    
337     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338     CHECK_EQ(s, "kremvax!boris");
339    
340     // check the RE interface as well
341     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342     CHECK_EQ(s, "'foo'");
343     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344     CHECK_EQ(s, "'foo'");
345     }
346    
347     static void TestConsume() {
348     printf("Testing Consume\n");
349    
350     string word;
351    
352     string s(" aaa b!@#$@#$cccc");
353     StringPiece input(s);
354    
355     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356     CHECK(r.Consume(&input, &word));
357     CHECK_EQ(word, "aaa");
358     CHECK(r.Consume(&input, &word));
359     CHECK_EQ(word, "b");
360     CHECK(! r.Consume(&input, &word));
361     }
362    
363     static void TestFindAndConsume() {
364     printf("Testing FindAndConsume\n");
365    
366     string word;
367    
368     string s(" aaa b!@#$@#$cccc");
369     StringPiece input(s);
370    
371     RE r("(\\w+)"); // matches a word
372     CHECK(r.FindAndConsume(&input, &word));
373     CHECK_EQ(word, "aaa");
374     CHECK(r.FindAndConsume(&input, &word));
375     CHECK_EQ(word, "b");
376     CHECK(r.FindAndConsume(&input, &word));
377     CHECK_EQ(word, "cccc");
378     CHECK(! r.FindAndConsume(&input, &word));
379     }
380    
381     static void TestMatchNumberPeculiarity() {
382     printf("Testing match-number peculiaraity\n");
383    
384     string word1;
385     string word2;
386     string word3;
387    
388     RE r("(foo)|(bar)|(baz)");
389     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390     CHECK_EQ(word1, "foo");
391     CHECK_EQ(word2, "");
392     CHECK_EQ(word3, "");
393     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394     CHECK_EQ(word1, "");
395     CHECK_EQ(word2, "bar");
396     CHECK_EQ(word3, "");
397     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398     CHECK_EQ(word1, "");
399     CHECK_EQ(word2, "");
400     CHECK_EQ(word3, "baz");
401     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402    
403     string a;
404     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405     CHECK_EQ(a, "");
406     }
407    
408 nigel 87 static void TestRecursion() {
409 nigel 77 printf("Testing recursion\n");
410    
411 nigel 87 // Get one string that passes (sometimes), one that never does.
412     string text_good("abcdefghijk");
413     string text_bad("acdefghijkl");
414    
415     // According to pcretest, matching text_good against (\w+)*b
416     // requires match_limit of at least 8192, and match_recursion_limit
417     // of at least 37.
418    
419     RE_Options options_ml;
420     options_ml.set_match_limit(8192);
421     RE re("(\\w+)*b", options_ml);
422     CHECK(re.PartialMatch(text_good) == true);
423     CHECK(re.PartialMatch(text_bad) == false);
424     CHECK(re.FullMatch(text_good) == false);
425     CHECK(re.FullMatch(text_bad) == false);
426    
427     options_ml.set_match_limit(1024);
428     RE re2("(\\w+)*b", options_ml);
429     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430     CHECK(re2.PartialMatch(text_bad) == false);
431     CHECK(re2.FullMatch(text_good) == false);
432     CHECK(re2.FullMatch(text_bad) == false);
433    
434     RE_Options options_mlr;
435     options_mlr.set_match_limit_recursion(50);
436     RE re3("(\\w+)*b", options_mlr);
437     CHECK(re3.PartialMatch(text_good) == true);
438     CHECK(re3.PartialMatch(text_bad) == false);
439     CHECK(re3.FullMatch(text_good) == false);
440     CHECK(re3.FullMatch(text_bad) == false);
441    
442     options_mlr.set_match_limit_recursion(10);
443     RE re4("(\\w+)*b", options_mlr);
444     CHECK(re4.PartialMatch(text_good) == false);
445     CHECK(re4.PartialMatch(text_bad) == false);
446     CHECK(re4.FullMatch(text_good) == false);
447     CHECK(re4.FullMatch(text_bad) == false);
448 nigel 77 }
449    
450 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
451     // the original unquoted string.
452     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453     string quoted = RE::QuoteMeta(unquoted);
454     RE re(quoted, options);
455     CHECK(re.FullMatch(unquoted));
456     }
457    
458     // A string containing meaningful regexp characters, which is then meta-
459     // quoted, should not generally match a string the unquoted string does.
460     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461     RE_Options options = RE_Options()) {
462     string quoted = RE::QuoteMeta(unquoted);
463     RE re(quoted, options);
464     CHECK(!re.FullMatch(should_not_match));
465     }
466    
467     // Tests that quoted meta characters match their original strings,
468     // and that a few things that shouldn't match indeed do not.
469     static void TestQuotaMetaSimple() {
470     TestQuoteMeta("foo");
471     TestQuoteMeta("foo.bar");
472     TestQuoteMeta("foo\\.bar");
473     TestQuoteMeta("[1-9]");
474     TestQuoteMeta("1.5-2.0?");
475     TestQuoteMeta("\\d");
476     TestQuoteMeta("Who doesn't like ice cream?");
477     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478     TestQuoteMeta("((?!)xxx).*yyy");
479     TestQuoteMeta("([");
480     }
481    
482     static void TestQuoteMetaSimpleNegative() {
483     NegativeTestQuoteMeta("foo", "bar");
484     NegativeTestQuoteMeta("...", "bar");
485     NegativeTestQuoteMeta("\\.", ".");
486     NegativeTestQuoteMeta("\\.", "..");
487     NegativeTestQuoteMeta("(a)", "a");
488     NegativeTestQuoteMeta("(a|b)", "a");
489     NegativeTestQuoteMeta("(a|b)", "(a)");
490     NegativeTestQuoteMeta("(a|b)", "a|b");
491     NegativeTestQuoteMeta("[0-9]", "0");
492     NegativeTestQuoteMeta("[0-9]", "0-9");
493     NegativeTestQuoteMeta("[0-9]", "[9]");
494     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495     }
496    
497     static void TestQuoteMetaLatin1() {
498     TestQuoteMeta("3\xb2 = 9");
499     }
500    
501     static void TestQuoteMetaUtf8() {
502     #ifdef SUPPORT_UTF8
503     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511     "27\\\xc2\\\xb0",
512     pcrecpp::UTF8());
513     #endif
514     }
515    
516     static void TestQuoteMetaAll() {
517     printf("Testing QuoteMeta\n");
518     TestQuotaMetaSimple();
519     TestQuoteMetaSimpleNegative();
520     TestQuoteMetaLatin1();
521     TestQuoteMetaUtf8();
522     }
523    
524 nigel 81 //
525     // Options tests contributed by
526     // Giuseppe Maxia, CTO, Stardata s.r.l.
527     // July 2005
528     //
529     static void GetOneOptionResult(
530     const char *option_name,
531     const char *regex,
532     const char *str,
533     RE_Options options,
534     bool full,
535     string expected) {
536 nigel 77
537 nigel 81 printf("Testing Option <%s>\n", option_name);
538     if(VERBOSE_TEST)
539     printf("/%s/ finds \"%s\" within \"%s\" \n",
540     regex,
541     expected.c_str(),
542     str);
543     string captured("");
544     if (full)
545     RE(regex,options).FullMatch(str, &captured);
546     else
547     RE(regex,options).PartialMatch(str, &captured);
548     CHECK_EQ(captured, expected);
549     }
550    
551     static void TestOneOption(
552     const char *option_name,
553     const char *regex,
554     const char *str,
555     RE_Options options,
556     bool full,
557     bool assertive = true) {
558    
559     printf("Testing Option <%s>\n", option_name);
560     if (VERBOSE_TEST)
561     printf("'%s' %s /%s/ \n",
562     str,
563     (assertive? "matches" : "doesn't match"),
564     regex);
565     if (assertive) {
566     if (full)
567     CHECK(RE(regex,options).FullMatch(str));
568     else
569     CHECK(RE(regex,options).PartialMatch(str));
570     } else {
571     if (full)
572     CHECK(!RE(regex,options).FullMatch(str));
573     else
574     CHECK(!RE(regex,options).PartialMatch(str));
575     }
576     }
577    
578     static void Test_CASELESS() {
579     RE_Options options;
580     RE_Options options2;
581    
582     options.set_caseless(true);
583     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586    
587     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589     options.set_caseless(false);
590     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591     }
592    
593     static void Test_MULTILINE() {
594     RE_Options options;
595     RE_Options options2;
596     const char *str = "HELLO\n" "cruel\n" "world\n";
597    
598     options.set_multiline(true);
599     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602     options.set_multiline(false);
603     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604     }
605    
606     static void Test_DOTALL() {
607     RE_Options options;
608     RE_Options options2;
609     const char *str = "HELLO\n" "cruel\n" "world";
610    
611     options.set_dotall(true);
612     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615     options.set_dotall(false);
616     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617     }
618    
619     static void Test_DOLLAR_ENDONLY() {
620     RE_Options options;
621     RE_Options options2;
622     const char *str = "HELLO world\n";
623    
624     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625     options.set_dollar_endonly(true);
626     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628     }
629    
630     static void Test_EXTRA() {
631     RE_Options options;
632     const char *str = "HELLO";
633    
634     options.set_extra(true);
635     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637     options.set_extra(false);
638     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639     }
640    
641     static void Test_EXTENDED() {
642     RE_Options options;
643     RE_Options options2;
644     const char *str = "HELLO world";
645    
646     options.set_extended(true);
647     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649     TestOneOption("EXTENDED (class)",
650     "^ HE L{2} O "
651     "\\s+ "
652     "\\w+ $ ",
653     str,
654     options,
655     false);
656    
657     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658     TestOneOption("EXTENDED (function)",
659     "^ HE L{2} O "
660     "\\s+ "
661     "\\w+ $ ",
662     str,
663     pcrecpp::EXTENDED(),
664     false);
665    
666     options.set_extended(false);
667     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668     }
669    
670     static void Test_NO_AUTO_CAPTURE() {
671     RE_Options options;
672     const char *str = "HELLO world";
673     string captured;
674    
675     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676     if (VERBOSE_TEST)
677     printf("parentheses capture text\n");
678     RE re("(world|universe)$", options);
679     CHECK(re.Extract("\\1", str , &captured));
680     CHECK_EQ(captured, "world");
681     options.set_no_auto_capture(true);
682     printf("testing Option <NO_AUTO_CAPTURE>\n");
683     if (VERBOSE_TEST)
684     printf("parentheses do not capture text\n");
685     re.Extract("\\1",str, &captured );
686     CHECK_EQ(captured, "world");
687     }
688    
689     static void Test_UNGREEDY() {
690     RE_Options options;
691     const char *str = "HELLO, 'this' is the 'world'";
692    
693     options.set_ungreedy(true);
694     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697    
698     options.set_ungreedy(false);
699     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701     }
702    
703     static void Test_all_options() {
704     const char *str = "HELLO\n" "cruel\n" "world";
705     RE_Options options;
706     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707    
708     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709     options.set_all_options(0);
710     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712    
713     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715     " ^ c r u e l $ ",
716     str,
717     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718     false);
719    
720     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721     " ^ c r u e l $ ",
722     str,
723     RE_Options()
724     .set_multiline(true)
725     .set_extended(true),
726     false);
727    
728     options.set_all_options(0);
729     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730    
731     }
732    
733     static void TestOptions() {
734     printf("Testing Options\n");
735     Test_CASELESS();
736     Test_MULTILINE();
737     Test_DOTALL();
738     Test_DOLLAR_ENDONLY();
739     Test_EXTENDED();
740     Test_NO_AUTO_CAPTURE();
741     Test_UNGREEDY();
742     Test_EXTRA();
743     Test_all_options();
744     }
745    
746 nigel 93 static void TestConstructors() {
747     printf("Testing constructors\n");
748    
749     RE_Options options;
750     options.set_dotall(true);
751     const char *str = "HELLO\n" "cruel\n" "world";
752    
753     RE orig("HELLO.*world", options);
754     CHECK(orig.FullMatch(str));
755    
756     RE copy1(orig);
757     CHECK(copy1.FullMatch(str));
758    
759     RE copy2("not a match");
760     CHECK(!copy2.FullMatch(str));
761     copy2 = copy1;
762     CHECK(copy2.FullMatch(str));
763     copy2 = orig;
764     CHECK(copy2.FullMatch(str));
765    
766     // Make sure when we assign to ourselves, nothing bad happens
767     orig = orig;
768     copy1 = copy1;
769     copy2 = copy2;
770     CHECK(orig.FullMatch(str));
771     CHECK(copy1.FullMatch(str));
772     CHECK(copy2.FullMatch(str));
773     }
774    
775 nigel 77 int main(int argc, char** argv) {
776     // Treat any flag as --help
777     if (argc > 1 && argv[1][0] == '-') {
778     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779     " If 'timingX ###' is specified, run the given timing test\n"
780     " with the given number of iterations, rather than running\n"
781     " the default corectness test.\n", argv[0]);
782     return 0;
783     }
784    
785     if (argc > 1) {
786     if ( argc == 2 || atoi(argv[2]) == 0) {
787     printf("timing mode needs a num-iters argument\n");
788     return 1;
789     }
790     if (!strcmp(argv[1], "timing1"))
791     Timing1(atoi(argv[2]));
792     else if (!strcmp(argv[1], "timing2"))
793     Timing2(atoi(argv[2]));
794     else if (!strcmp(argv[1], "timing3"))
795     Timing3(atoi(argv[2]));
796     else
797     printf("Unknown argument '%s'\n", argv[1]);
798     return 0;
799     }
800    
801     printf("Testing FullMatch\n");
802    
803     int i;
804     string s;
805    
806     /***** FullMatch with no args *****/
807    
808     CHECK(RE("h.*o").FullMatch("hello"));
809 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
810     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
811     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
812     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
813     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
814 nigel 77
815     /***** FullMatch with args *****/
816    
817     // Zero-arg
818     CHECK(RE("\\d+").FullMatch("1001"));
819    
820     // Single-arg
821     CHECK(RE("(\\d+)").FullMatch("1001", &i));
822     CHECK_EQ(i, 1001);
823     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
824     CHECK_EQ(i, -123);
825     CHECK(!RE("()\\d+").FullMatch("10", &i));
826     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
827     &i));
828    
829     // Digits surrounding integer-arg
830     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
831     CHECK_EQ(i, 23);
832     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
833     CHECK_EQ(i, 1);
834     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
835     CHECK_EQ(i, -1);
836     CHECK(RE("(\\d)").PartialMatch("1234", &i));
837     CHECK_EQ(i, 1);
838     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
839     CHECK_EQ(i, -1);
840    
841     // String-arg
842     CHECK(RE("h(.*)o").FullMatch("hello", &s));
843     CHECK_EQ(s, string("ell"));
844    
845     // StringPiece-arg
846     StringPiece sp;
847     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
848     CHECK_EQ(sp.size(), 4);
849     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
850     CHECK_EQ(i, 1234);
851    
852     // Multi-arg
853     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
854     CHECK_EQ(s, string("ruby"));
855     CHECK_EQ(i, 1234);
856    
857     // Ignored arg
858     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
859     CHECK_EQ(s, string("ruby"));
860     CHECK_EQ(i, 1234);
861    
862     // Type tests
863     {
864     char c;
865     CHECK(RE("(H)ello").FullMatch("Hello", &c));
866     CHECK_EQ(c, 'H');
867     }
868     {
869     unsigned char c;
870     CHECK(RE("(H)ello").FullMatch("Hello", &c));
871     CHECK_EQ(c, static_cast<unsigned char>('H'));
872     }
873     {
874     short v;
875     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
876     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
877     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
878     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
879     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
880     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
881     }
882     {
883     unsigned short v;
884     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
885     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
886     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
887     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
888     }
889     {
890     int v;
891     static const int max_value = 0x7fffffff;
892     static const int min_value = -max_value - 1;
893     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
894     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
895     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
896     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
897     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
898     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
899     }
900     {
901     unsigned int v;
902     static const unsigned int max_value = 0xfffffffful;
903     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
905     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
906     }
907     #ifdef HAVE_LONG_LONG
908 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
909     # define LLD "%I64d"
910     # else
911     # define LLD "%lld"
912     # endif
913 nigel 77 {
914     long long v;
915     static const long long max_value = 0x7fffffffffffffffLL;
916     static const long long min_value = -max_value - 1;
917     char buf[32];
918    
919     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
920     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
921    
922 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
923 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
924    
925 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
926 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
927    
928 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
929 nigel 77 assert(buf[strlen(buf)-1] != '9');
930     buf[strlen(buf)-1]++;
931     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
932    
933 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
934 nigel 77 assert(buf[strlen(buf)-1] != '9');
935     buf[strlen(buf)-1]++;
936     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
937     }
938     #endif
939     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
940     {
941     unsigned long long v;
942     long long v2;
943     static const unsigned long long max_value = 0xffffffffffffffffULL;
944     char buf[32];
945    
946     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
947     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
948    
949     snprintf(buf, sizeof(buf), "%llu", max_value);
950     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
951    
952     assert(buf[strlen(buf)-1] != '9');
953     buf[strlen(buf)-1]++;
954     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
955     }
956     #endif
957     {
958     float v;
959     CHECK(RE("(.*)").FullMatch("100", &v));
960     CHECK(RE("(.*)").FullMatch("-100.", &v));
961     CHECK(RE("(.*)").FullMatch("1e23", &v));
962     }
963     {
964     double v;
965     CHECK(RE("(.*)").FullMatch("100", &v));
966     CHECK(RE("(.*)").FullMatch("-100.", &v));
967     CHECK(RE("(.*)").FullMatch("1e23", &v));
968     }
969    
970     // Check that matching is fully anchored
971     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
972     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
973     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
974     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
975    
976     // Braces
977     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
978     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
979     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
980    
981     // Complicated RE
982     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
983     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
984     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
985     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
986    
987     // Check full-match handling (needs '$' tacked on internally)
988     CHECK(RE("fo|foo").FullMatch("fo"));
989     CHECK(RE("fo|foo").FullMatch("foo"));
990     CHECK(RE("fo|foo$").FullMatch("fo"));
991     CHECK(RE("fo|foo$").FullMatch("foo"));
992     CHECK(RE("foo$").FullMatch("foo"));
993     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
994     CHECK(!RE("fo|bar").FullMatch("fox"));
995    
996     // Uncomment the following if we change the handling of '$' to
997     // prevent it from matching a trailing newline
998     if (false) {
999     // Check that we don't get bitten by pcre's special handling of a
1000     // '\n' at the end of the string matching '$'
1001     CHECK(!RE("foo$").PartialMatch("foo\n"));
1002     }
1003    
1004     // Number of args
1005     int a[16];
1006     CHECK(RE("").FullMatch(""));
1007    
1008     memset(a, 0, sizeof(0));
1009     CHECK(RE("(\\d){1}").FullMatch("1",
1010     &a[0]));
1011     CHECK_EQ(a[0], 1);
1012    
1013     memset(a, 0, sizeof(0));
1014     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1015     &a[0], &a[1]));
1016     CHECK_EQ(a[0], 1);
1017     CHECK_EQ(a[1], 2);
1018    
1019     memset(a, 0, sizeof(0));
1020     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1021     &a[0], &a[1], &a[2]));
1022     CHECK_EQ(a[0], 1);
1023     CHECK_EQ(a[1], 2);
1024     CHECK_EQ(a[2], 3);
1025    
1026     memset(a, 0, sizeof(0));
1027     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1028     &a[0], &a[1], &a[2], &a[3]));
1029     CHECK_EQ(a[0], 1);
1030     CHECK_EQ(a[1], 2);
1031     CHECK_EQ(a[2], 3);
1032     CHECK_EQ(a[3], 4);
1033    
1034     memset(a, 0, sizeof(0));
1035     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1036     &a[0], &a[1], &a[2],
1037     &a[3], &a[4]));
1038     CHECK_EQ(a[0], 1);
1039     CHECK_EQ(a[1], 2);
1040     CHECK_EQ(a[2], 3);
1041     CHECK_EQ(a[3], 4);
1042     CHECK_EQ(a[4], 5);
1043    
1044     memset(a, 0, sizeof(0));
1045     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1046     &a[0], &a[1], &a[2],
1047     &a[3], &a[4], &a[5]));
1048     CHECK_EQ(a[0], 1);
1049     CHECK_EQ(a[1], 2);
1050     CHECK_EQ(a[2], 3);
1051     CHECK_EQ(a[3], 4);
1052     CHECK_EQ(a[4], 5);
1053     CHECK_EQ(a[5], 6);
1054    
1055     memset(a, 0, sizeof(0));
1056     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1057     &a[0], &a[1], &a[2], &a[3],
1058     &a[4], &a[5], &a[6]));
1059     CHECK_EQ(a[0], 1);
1060     CHECK_EQ(a[1], 2);
1061     CHECK_EQ(a[2], 3);
1062     CHECK_EQ(a[3], 4);
1063     CHECK_EQ(a[4], 5);
1064     CHECK_EQ(a[5], 6);
1065     CHECK_EQ(a[6], 7);
1066    
1067     memset(a, 0, sizeof(0));
1068     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1069     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1070     "1234567890123456",
1071     &a[0], &a[1], &a[2], &a[3],
1072     &a[4], &a[5], &a[6], &a[7],
1073     &a[8], &a[9], &a[10], &a[11],
1074     &a[12], &a[13], &a[14], &a[15]));
1075     CHECK_EQ(a[0], 1);
1076     CHECK_EQ(a[1], 2);
1077     CHECK_EQ(a[2], 3);
1078     CHECK_EQ(a[3], 4);
1079     CHECK_EQ(a[4], 5);
1080     CHECK_EQ(a[5], 6);
1081     CHECK_EQ(a[6], 7);
1082     CHECK_EQ(a[7], 8);
1083     CHECK_EQ(a[8], 9);
1084     CHECK_EQ(a[9], 0);
1085     CHECK_EQ(a[10], 1);
1086     CHECK_EQ(a[11], 2);
1087     CHECK_EQ(a[12], 3);
1088     CHECK_EQ(a[13], 4);
1089     CHECK_EQ(a[14], 5);
1090     CHECK_EQ(a[15], 6);
1091    
1092     /***** PartialMatch *****/
1093    
1094     printf("Testing PartialMatch\n");
1095    
1096     CHECK(RE("h.*o").PartialMatch("hello"));
1097     CHECK(RE("h.*o").PartialMatch("othello"));
1098     CHECK(RE("h.*o").PartialMatch("hello!"));
1099     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1100    
1101 nigel 93 /***** other tests *****/
1102    
1103 nigel 77 RadixTests();
1104     TestReplace();
1105     TestExtract();
1106     TestConsume();
1107     TestFindAndConsume();
1108 nigel 93 TestQuoteMetaAll();
1109 nigel 77 TestMatchNumberPeculiarity();
1110    
1111     // Check the pattern() accessor
1112     {
1113     const string kPattern = "http://([^/]+)/.*";
1114     const RE re(kPattern);
1115     CHECK_EQ(kPattern, re.pattern());
1116     }
1117    
1118     // Check RE error field.
1119     {
1120     RE re("foo");
1121     CHECK(re.error().empty()); // Must have no error
1122     }
1123    
1124     #ifdef SUPPORT_UTF8
1125     // Check UTF-8 handling
1126     {
1127     printf("Testing UTF-8 handling\n");
1128    
1129     // Three Japanese characters (nihongo)
1130     const char utf8_string[] = {
1131     0xe6, 0x97, 0xa5, // 65e5
1132     0xe6, 0x9c, 0xac, // 627c
1133     0xe8, 0xaa, 0x9e, // 8a9e
1134     0
1135     };
1136     const char utf8_pattern[] = {
1137     '.',
1138     0xe6, 0x9c, 0xac, // 627c
1139     '.',
1140     0
1141     };
1142    
1143     // Both should match in either mode, bytes or UTF-8
1144     RE re_test1(".........");
1145     CHECK(re_test1.FullMatch(utf8_string));
1146     RE re_test2("...", pcrecpp::UTF8());
1147     CHECK(re_test2.FullMatch(utf8_string));
1148    
1149     // Check that '.' matches one byte or UTF-8 character
1150     // according to the mode.
1151     string ss;
1152     RE re_test3("(.)");
1153     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1154     CHECK_EQ(ss, string("\xe6"));
1155     RE re_test4("(.)", pcrecpp::UTF8());
1156     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1157     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1158    
1159     // Check that string matches itself in either mode
1160     RE re_test5(utf8_string);
1161     CHECK(re_test5.FullMatch(utf8_string));
1162     RE re_test6(utf8_string, pcrecpp::UTF8());
1163     CHECK(re_test6.FullMatch(utf8_string));
1164    
1165     // Check that pattern matches string only in UTF8 mode
1166     RE re_test7(utf8_pattern);
1167     CHECK(!re_test7.FullMatch(utf8_string));
1168     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1169     CHECK(re_test8.FullMatch(utf8_string));
1170     }
1171    
1172     // Check that ungreedy, UTF8 regular expressions don't match when they
1173     // oughtn't -- see bug 82246.
1174     {
1175     // This code always worked.
1176     const char* pattern = "\\w+X";
1177     const string target = "a aX";
1178     RE match_sentence(pattern);
1179     RE match_sentence_re(pattern, pcrecpp::UTF8());
1180    
1181     CHECK(!match_sentence.FullMatch(target));
1182     CHECK(!match_sentence_re.FullMatch(target));
1183     }
1184    
1185     {
1186     const char* pattern = "(?U)\\w+X";
1187     const string target = "a aX";
1188     RE match_sentence(pattern);
1189     RE match_sentence_re(pattern, pcrecpp::UTF8());
1190    
1191     CHECK(!match_sentence.FullMatch(target));
1192     CHECK(!match_sentence_re.FullMatch(target));
1193     }
1194     #endif /* def SUPPORT_UTF8 */
1195    
1196     printf("Testing error reporting\n");
1197    
1198     { RE re("a\\1"); CHECK(!re.error().empty()); }
1199     {
1200     RE re("a[x");
1201     CHECK(!re.error().empty());
1202     }
1203     {
1204     RE re("a[z-a]");
1205     CHECK(!re.error().empty());
1206     }
1207     {
1208     RE re("a[[:foobar:]]");
1209     CHECK(!re.error().empty());
1210     }
1211     {
1212     RE re("a(b");
1213     CHECK(!re.error().empty());
1214     }
1215     {
1216     RE re("a\\");
1217     CHECK(!re.error().empty());
1218     }
1219    
1220 nigel 87 // Test that recursion is stopped
1221     TestRecursion();
1222 nigel 77
1223 nigel 81 // Test Options
1224     if (getenv("VERBOSE_TEST") != NULL)
1225     VERBOSE_TEST = true;
1226     TestOptions();
1227    
1228 nigel 93 // Test the constructors
1229     TestConstructors();
1230    
1231 nigel 77 // Done
1232     printf("OK\n");
1233    
1234     return 0;
1235     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12