/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 200 - (hide annotations) (download)
Wed Aug 1 09:10:40 2007 UTC (6 years, 8 months ago) by ph10
File size: 37643 byte(s)
Correct errors in previous patch; tidy for test release.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 199 #include <config.h>
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113     char buf[100];
114     snprintf(buf, sizeof(buf), "pat%09d", i);
115     RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216     };
217     static const ReplaceTest tests[] = {
218     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
219     "\\2\\1ay",
220     "the quick brown fox jumps over the lazy dogs.",
221     "ethay quick brown fox jumps over the lazy dogs.",
222     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
223     { "\\w+",
224     "\\0-NOSPAM",
225     "paul.haahr@google.com",
226     "paul-NOSPAM.haahr@google.com",
227     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
228     { "^",
229     "(START)",
230     "foo",
231     "(START)foo",
232     "(START)foo" },
233     { "^",
234     "(START)",
235     "",
236     "(START)",
237     "(START)" },
238     { "$",
239     "(END)",
240     "",
241     "(END)",
242     "(END)" },
243     { "b",
244     "bb",
245     "ababababab",
246     "abbabababab",
247     "abbabbabbabbabb" },
248     { "b",
249     "bb",
250     "bbbbbb",
251     "bbbbbbb",
252     "bbbbbbbbbbbb" },
253     { "b+",
254     "bb",
255     "bbbbbb",
256     "bb",
257     "bb" },
258     { "b*",
259     "bb",
260     "bbbbbb",
261     "bb",
262     "bb" },
263     { "b*",
264     "bb",
265     "aaaaa",
266     "bbaaaaa",
267     "bbabbabbabbabbabb" },
268 nigel 91 { "b*",
269     "bb",
270     "aa\naa\n",
271     "bbaa\naa\n",
272     "bbabbabb\nbbabbabb\nbb" },
273     { "b*",
274     "bb",
275     "aa\raa\r",
276     "bbaa\raa\r",
277     "bbabbabb\rbbabbabb\rbb" },
278     { "b*",
279     "bb",
280     "aa\r\naa\r\n",
281     "bbaa\r\naa\r\n",
282     "bbabbabb\r\nbbabbabb\r\nbb" },
283     #ifdef SUPPORT_UTF8
284     { "b*",
285     "bb",
286     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
287     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289     { "b*",
290     "bb",
291     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
292     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295     #endif
296 nigel 77 { "", NULL, NULL, NULL, NULL }
297     };
298    
299 nigel 91 #ifdef SUPPORT_UTF8
300     const bool support_utf8 = true;
301     #else
302     const bool support_utf8 = false;
303     #endif
304    
305 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307     assert(re.error().empty());
308 nigel 77 string one(t->original);
309 nigel 91 CHECK(re.Replace(t->rewrite, &one));
310 nigel 77 CHECK_EQ(one, t->single);
311     string all(t->original);
312 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313 nigel 77 CHECK_EQ(all, t->global);
314     }
315 nigel 91
316     // One final test: test \r\n replacement when we're not in CRLF mode
317     {
318     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319     assert(re.error().empty());
320     string all("aa\r\naa\r\n");
321     CHECK(re.GlobalReplace("bb", &all) > 0);
322     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323     }
324     {
325     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326     assert(re.error().empty());
327     string all("aa\r\naa\r\n");
328     CHECK(re.GlobalReplace("bb", &all) > 0);
329     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330     }
331     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332     // Alas, the answer depends on how pcre was compiled.
333 nigel 77 }
334    
335     static void TestExtract() {
336     printf("Testing Extract\n");
337    
338     string s;
339    
340     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
341     CHECK_EQ(s, "kremvax!boris");
342    
343     // check the RE interface as well
344     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
345     CHECK_EQ(s, "'foo'");
346     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
347     CHECK_EQ(s, "'foo'");
348     }
349    
350     static void TestConsume() {
351     printf("Testing Consume\n");
352    
353     string word;
354    
355     string s(" aaa b!@#$@#$cccc");
356     StringPiece input(s);
357    
358     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
359     CHECK(r.Consume(&input, &word));
360     CHECK_EQ(word, "aaa");
361     CHECK(r.Consume(&input, &word));
362     CHECK_EQ(word, "b");
363     CHECK(! r.Consume(&input, &word));
364     }
365    
366     static void TestFindAndConsume() {
367     printf("Testing FindAndConsume\n");
368    
369     string word;
370    
371     string s(" aaa b!@#$@#$cccc");
372     StringPiece input(s);
373    
374     RE r("(\\w+)"); // matches a word
375     CHECK(r.FindAndConsume(&input, &word));
376     CHECK_EQ(word, "aaa");
377     CHECK(r.FindAndConsume(&input, &word));
378     CHECK_EQ(word, "b");
379     CHECK(r.FindAndConsume(&input, &word));
380     CHECK_EQ(word, "cccc");
381     CHECK(! r.FindAndConsume(&input, &word));
382     }
383    
384     static void TestMatchNumberPeculiarity() {
385     printf("Testing match-number peculiaraity\n");
386    
387     string word1;
388     string word2;
389     string word3;
390    
391     RE r("(foo)|(bar)|(baz)");
392     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
393     CHECK_EQ(word1, "foo");
394     CHECK_EQ(word2, "");
395     CHECK_EQ(word3, "");
396     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
397     CHECK_EQ(word1, "");
398     CHECK_EQ(word2, "bar");
399     CHECK_EQ(word3, "");
400     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
401     CHECK_EQ(word1, "");
402     CHECK_EQ(word2, "");
403     CHECK_EQ(word3, "baz");
404     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
405    
406     string a;
407     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
408     CHECK_EQ(a, "");
409     }
410    
411 nigel 87 static void TestRecursion() {
412 nigel 77 printf("Testing recursion\n");
413    
414 nigel 87 // Get one string that passes (sometimes), one that never does.
415     string text_good("abcdefghijk");
416     string text_bad("acdefghijkl");
417    
418     // According to pcretest, matching text_good against (\w+)*b
419     // requires match_limit of at least 8192, and match_recursion_limit
420     // of at least 37.
421    
422     RE_Options options_ml;
423     options_ml.set_match_limit(8192);
424     RE re("(\\w+)*b", options_ml);
425     CHECK(re.PartialMatch(text_good) == true);
426     CHECK(re.PartialMatch(text_bad) == false);
427     CHECK(re.FullMatch(text_good) == false);
428     CHECK(re.FullMatch(text_bad) == false);
429    
430     options_ml.set_match_limit(1024);
431     RE re2("(\\w+)*b", options_ml);
432     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
433     CHECK(re2.PartialMatch(text_bad) == false);
434     CHECK(re2.FullMatch(text_good) == false);
435     CHECK(re2.FullMatch(text_bad) == false);
436    
437     RE_Options options_mlr;
438     options_mlr.set_match_limit_recursion(50);
439     RE re3("(\\w+)*b", options_mlr);
440     CHECK(re3.PartialMatch(text_good) == true);
441     CHECK(re3.PartialMatch(text_bad) == false);
442     CHECK(re3.FullMatch(text_good) == false);
443     CHECK(re3.FullMatch(text_bad) == false);
444    
445     options_mlr.set_match_limit_recursion(10);
446     RE re4("(\\w+)*b", options_mlr);
447     CHECK(re4.PartialMatch(text_good) == false);
448     CHECK(re4.PartialMatch(text_bad) == false);
449     CHECK(re4.FullMatch(text_good) == false);
450     CHECK(re4.FullMatch(text_bad) == false);
451 nigel 77 }
452    
453 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
454     // the original unquoted string.
455     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456     string quoted = RE::QuoteMeta(unquoted);
457     RE re(quoted, options);
458     CHECK(re.FullMatch(unquoted));
459     }
460    
461     // A string containing meaningful regexp characters, which is then meta-
462     // quoted, should not generally match a string the unquoted string does.
463     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464     RE_Options options = RE_Options()) {
465     string quoted = RE::QuoteMeta(unquoted);
466     RE re(quoted, options);
467     CHECK(!re.FullMatch(should_not_match));
468     }
469    
470     // Tests that quoted meta characters match their original strings,
471     // and that a few things that shouldn't match indeed do not.
472     static void TestQuotaMetaSimple() {
473     TestQuoteMeta("foo");
474     TestQuoteMeta("foo.bar");
475     TestQuoteMeta("foo\\.bar");
476     TestQuoteMeta("[1-9]");
477     TestQuoteMeta("1.5-2.0?");
478     TestQuoteMeta("\\d");
479     TestQuoteMeta("Who doesn't like ice cream?");
480     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481     TestQuoteMeta("((?!)xxx).*yyy");
482     TestQuoteMeta("([");
483     }
484    
485     static void TestQuoteMetaSimpleNegative() {
486     NegativeTestQuoteMeta("foo", "bar");
487     NegativeTestQuoteMeta("...", "bar");
488     NegativeTestQuoteMeta("\\.", ".");
489     NegativeTestQuoteMeta("\\.", "..");
490     NegativeTestQuoteMeta("(a)", "a");
491     NegativeTestQuoteMeta("(a|b)", "a");
492     NegativeTestQuoteMeta("(a|b)", "(a)");
493     NegativeTestQuoteMeta("(a|b)", "a|b");
494     NegativeTestQuoteMeta("[0-9]", "0");
495     NegativeTestQuoteMeta("[0-9]", "0-9");
496     NegativeTestQuoteMeta("[0-9]", "[9]");
497     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498     }
499    
500     static void TestQuoteMetaLatin1() {
501     TestQuoteMeta("3\xb2 = 9");
502     }
503    
504     static void TestQuoteMetaUtf8() {
505     #ifdef SUPPORT_UTF8
506     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
508     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
509     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
510     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
511     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
514     "27\\\xc2\\\xb0",
515     pcrecpp::UTF8());
516     #endif
517     }
518    
519     static void TestQuoteMetaAll() {
520     printf("Testing QuoteMeta\n");
521     TestQuotaMetaSimple();
522     TestQuoteMetaSimpleNegative();
523     TestQuoteMetaLatin1();
524     TestQuoteMetaUtf8();
525     }
526    
527 nigel 81 //
528     // Options tests contributed by
529     // Giuseppe Maxia, CTO, Stardata s.r.l.
530     // July 2005
531     //
532     static void GetOneOptionResult(
533     const char *option_name,
534     const char *regex,
535     const char *str,
536     RE_Options options,
537     bool full,
538     string expected) {
539 nigel 77
540 nigel 81 printf("Testing Option <%s>\n", option_name);
541     if(VERBOSE_TEST)
542     printf("/%s/ finds \"%s\" within \"%s\" \n",
543     regex,
544     expected.c_str(),
545     str);
546     string captured("");
547     if (full)
548     RE(regex,options).FullMatch(str, &captured);
549     else
550     RE(regex,options).PartialMatch(str, &captured);
551     CHECK_EQ(captured, expected);
552     }
553    
554     static void TestOneOption(
555     const char *option_name,
556     const char *regex,
557     const char *str,
558     RE_Options options,
559     bool full,
560     bool assertive = true) {
561    
562     printf("Testing Option <%s>\n", option_name);
563     if (VERBOSE_TEST)
564     printf("'%s' %s /%s/ \n",
565     str,
566     (assertive? "matches" : "doesn't match"),
567     regex);
568     if (assertive) {
569     if (full)
570     CHECK(RE(regex,options).FullMatch(str));
571     else
572     CHECK(RE(regex,options).PartialMatch(str));
573     } else {
574     if (full)
575     CHECK(!RE(regex,options).FullMatch(str));
576     else
577     CHECK(!RE(regex,options).PartialMatch(str));
578     }
579     }
580    
581     static void Test_CASELESS() {
582     RE_Options options;
583     RE_Options options2;
584    
585     options.set_caseless(true);
586     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
587     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
588     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
589    
590     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
591     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
592     options.set_caseless(false);
593     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
594     }
595    
596     static void Test_MULTILINE() {
597     RE_Options options;
598     RE_Options options2;
599     const char *str = "HELLO\n" "cruel\n" "world\n";
600    
601     options.set_multiline(true);
602     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
603     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
604     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
605     options.set_multiline(false);
606     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
607     }
608    
609     static void Test_DOTALL() {
610     RE_Options options;
611     RE_Options options2;
612     const char *str = "HELLO\n" "cruel\n" "world";
613    
614     options.set_dotall(true);
615     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
616     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
617     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
618     options.set_dotall(false);
619     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
620     }
621    
622     static void Test_DOLLAR_ENDONLY() {
623     RE_Options options;
624     RE_Options options2;
625     const char *str = "HELLO world\n";
626    
627     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
628     options.set_dollar_endonly(true);
629     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
630     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
631     }
632    
633     static void Test_EXTRA() {
634     RE_Options options;
635     const char *str = "HELLO";
636    
637     options.set_extra(true);
638     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
639     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
640     options.set_extra(false);
641     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
642     }
643    
644     static void Test_EXTENDED() {
645     RE_Options options;
646     RE_Options options2;
647     const char *str = "HELLO world";
648    
649     options.set_extended(true);
650     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
651     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
652     TestOneOption("EXTENDED (class)",
653     "^ HE L{2} O "
654     "\\s+ "
655     "\\w+ $ ",
656     str,
657     options,
658     false);
659    
660     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
661     TestOneOption("EXTENDED (function)",
662     "^ HE L{2} O "
663     "\\s+ "
664     "\\w+ $ ",
665     str,
666     pcrecpp::EXTENDED(),
667     false);
668    
669     options.set_extended(false);
670     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
671     }
672    
673     static void Test_NO_AUTO_CAPTURE() {
674     RE_Options options;
675     const char *str = "HELLO world";
676     string captured;
677    
678     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
679     if (VERBOSE_TEST)
680     printf("parentheses capture text\n");
681     RE re("(world|universe)$", options);
682     CHECK(re.Extract("\\1", str , &captured));
683     CHECK_EQ(captured, "world");
684     options.set_no_auto_capture(true);
685     printf("testing Option <NO_AUTO_CAPTURE>\n");
686     if (VERBOSE_TEST)
687     printf("parentheses do not capture text\n");
688     re.Extract("\\1",str, &captured );
689     CHECK_EQ(captured, "world");
690     }
691    
692     static void Test_UNGREEDY() {
693     RE_Options options;
694     const char *str = "HELLO, 'this' is the 'world'";
695    
696     options.set_ungreedy(true);
697     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
698     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
699     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
700    
701     options.set_ungreedy(false);
702     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
703     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
704     }
705    
706     static void Test_all_options() {
707     const char *str = "HELLO\n" "cruel\n" "world";
708     RE_Options options;
709     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
710    
711     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
712     options.set_all_options(0);
713     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
714     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
715    
716     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
717     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
718     " ^ c r u e l $ ",
719     str,
720     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
721     false);
722    
723     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
724     " ^ c r u e l $ ",
725     str,
726     RE_Options()
727     .set_multiline(true)
728     .set_extended(true),
729     false);
730    
731     options.set_all_options(0);
732     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
733    
734     }
735    
736     static void TestOptions() {
737     printf("Testing Options\n");
738     Test_CASELESS();
739     Test_MULTILINE();
740     Test_DOTALL();
741     Test_DOLLAR_ENDONLY();
742     Test_EXTENDED();
743     Test_NO_AUTO_CAPTURE();
744     Test_UNGREEDY();
745     Test_EXTRA();
746     Test_all_options();
747     }
748    
749 nigel 93 static void TestConstructors() {
750     printf("Testing constructors\n");
751    
752     RE_Options options;
753     options.set_dotall(true);
754     const char *str = "HELLO\n" "cruel\n" "world";
755    
756     RE orig("HELLO.*world", options);
757     CHECK(orig.FullMatch(str));
758    
759     RE copy1(orig);
760     CHECK(copy1.FullMatch(str));
761    
762     RE copy2("not a match");
763     CHECK(!copy2.FullMatch(str));
764     copy2 = copy1;
765     CHECK(copy2.FullMatch(str));
766     copy2 = orig;
767     CHECK(copy2.FullMatch(str));
768    
769     // Make sure when we assign to ourselves, nothing bad happens
770     orig = orig;
771     copy1 = copy1;
772     copy2 = copy2;
773     CHECK(orig.FullMatch(str));
774     CHECK(copy1.FullMatch(str));
775     CHECK(copy2.FullMatch(str));
776     }
777    
778 nigel 77 int main(int argc, char** argv) {
779     // Treat any flag as --help
780     if (argc > 1 && argv[1][0] == '-') {
781     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
782     " If 'timingX ###' is specified, run the given timing test\n"
783     " with the given number of iterations, rather than running\n"
784     " the default corectness test.\n", argv[0]);
785     return 0;
786     }
787    
788     if (argc > 1) {
789     if ( argc == 2 || atoi(argv[2]) == 0) {
790     printf("timing mode needs a num-iters argument\n");
791     return 1;
792     }
793     if (!strcmp(argv[1], "timing1"))
794     Timing1(atoi(argv[2]));
795     else if (!strcmp(argv[1], "timing2"))
796     Timing2(atoi(argv[2]));
797     else if (!strcmp(argv[1], "timing3"))
798     Timing3(atoi(argv[2]));
799     else
800     printf("Unknown argument '%s'\n", argv[1]);
801     return 0;
802     }
803    
804     printf("Testing FullMatch\n");
805    
806     int i;
807     string s;
808    
809     /***** FullMatch with no args *****/
810    
811     CHECK(RE("h.*o").FullMatch("hello"));
812 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
813     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
814     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
815     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
816     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
817 nigel 77
818     /***** FullMatch with args *****/
819    
820     // Zero-arg
821     CHECK(RE("\\d+").FullMatch("1001"));
822    
823     // Single-arg
824     CHECK(RE("(\\d+)").FullMatch("1001", &i));
825     CHECK_EQ(i, 1001);
826     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
827     CHECK_EQ(i, -123);
828     CHECK(!RE("()\\d+").FullMatch("10", &i));
829     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
830     &i));
831    
832     // Digits surrounding integer-arg
833     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
834     CHECK_EQ(i, 23);
835     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
836     CHECK_EQ(i, 1);
837     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
838     CHECK_EQ(i, -1);
839     CHECK(RE("(\\d)").PartialMatch("1234", &i));
840     CHECK_EQ(i, 1);
841     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
842     CHECK_EQ(i, -1);
843    
844     // String-arg
845     CHECK(RE("h(.*)o").FullMatch("hello", &s));
846     CHECK_EQ(s, string("ell"));
847    
848     // StringPiece-arg
849     StringPiece sp;
850     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
851     CHECK_EQ(sp.size(), 4);
852     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
853     CHECK_EQ(i, 1234);
854    
855     // Multi-arg
856     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
857     CHECK_EQ(s, string("ruby"));
858     CHECK_EQ(i, 1234);
859    
860     // Ignored arg
861     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
862     CHECK_EQ(s, string("ruby"));
863     CHECK_EQ(i, 1234);
864    
865     // Type tests
866     {
867     char c;
868     CHECK(RE("(H)ello").FullMatch("Hello", &c));
869     CHECK_EQ(c, 'H');
870     }
871     {
872     unsigned char c;
873     CHECK(RE("(H)ello").FullMatch("Hello", &c));
874     CHECK_EQ(c, static_cast<unsigned char>('H'));
875     }
876     {
877     short v;
878     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
879     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
880     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
881     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
882     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
883     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
884     }
885     {
886     unsigned short v;
887     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
888     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
889     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
890     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
891     }
892     {
893     int v;
894     static const int max_value = 0x7fffffff;
895     static const int min_value = -max_value - 1;
896     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
897     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
898     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
899     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
900     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
901     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
902     }
903     {
904     unsigned int v;
905     static const unsigned int max_value = 0xfffffffful;
906     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
907     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
908     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
909     }
910     #ifdef HAVE_LONG_LONG
911 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
912     # define LLD "%I64d"
913     # else
914     # define LLD "%lld"
915     # endif
916 nigel 77 {
917     long long v;
918     static const long long max_value = 0x7fffffffffffffffLL;
919     static const long long min_value = -max_value - 1;
920     char buf[32];
921    
922     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
924    
925 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
926 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
927    
928 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
929 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
930    
931 ph10 193 snprintf(buf, sizeof(buf), LLD, max_value);
932 nigel 77 assert(buf[strlen(buf)-1] != '9');
933     buf[strlen(buf)-1]++;
934     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
935    
936 ph10 193 snprintf(buf, sizeof(buf), LLD, min_value);
937 nigel 77 assert(buf[strlen(buf)-1] != '9');
938     buf[strlen(buf)-1]++;
939     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
940     }
941     #endif
942     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
943     {
944     unsigned long long v;
945     long long v2;
946     static const unsigned long long max_value = 0xffffffffffffffffULL;
947     char buf[32];
948    
949     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
950     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
951    
952     snprintf(buf, sizeof(buf), "%llu", max_value);
953     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
954    
955     assert(buf[strlen(buf)-1] != '9');
956     buf[strlen(buf)-1]++;
957     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
958     }
959     #endif
960     {
961     float v;
962     CHECK(RE("(.*)").FullMatch("100", &v));
963     CHECK(RE("(.*)").FullMatch("-100.", &v));
964     CHECK(RE("(.*)").FullMatch("1e23", &v));
965     }
966     {
967     double v;
968     CHECK(RE("(.*)").FullMatch("100", &v));
969     CHECK(RE("(.*)").FullMatch("-100.", &v));
970     CHECK(RE("(.*)").FullMatch("1e23", &v));
971     }
972    
973     // Check that matching is fully anchored
974     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
975     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
976     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
977     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
978    
979     // Braces
980     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
981     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
982     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
983    
984     // Complicated RE
985     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
986     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
987     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
988     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
989    
990     // Check full-match handling (needs '$' tacked on internally)
991     CHECK(RE("fo|foo").FullMatch("fo"));
992     CHECK(RE("fo|foo").FullMatch("foo"));
993     CHECK(RE("fo|foo$").FullMatch("fo"));
994     CHECK(RE("fo|foo$").FullMatch("foo"));
995     CHECK(RE("foo$").FullMatch("foo"));
996     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
997     CHECK(!RE("fo|bar").FullMatch("fox"));
998    
999     // Uncomment the following if we change the handling of '$' to
1000     // prevent it from matching a trailing newline
1001     if (false) {
1002     // Check that we don't get bitten by pcre's special handling of a
1003     // '\n' at the end of the string matching '$'
1004     CHECK(!RE("foo$").PartialMatch("foo\n"));
1005     }
1006    
1007     // Number of args
1008     int a[16];
1009     CHECK(RE("").FullMatch(""));
1010    
1011     memset(a, 0, sizeof(0));
1012     CHECK(RE("(\\d){1}").FullMatch("1",
1013     &a[0]));
1014     CHECK_EQ(a[0], 1);
1015    
1016     memset(a, 0, sizeof(0));
1017     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1018     &a[0], &a[1]));
1019     CHECK_EQ(a[0], 1);
1020     CHECK_EQ(a[1], 2);
1021    
1022     memset(a, 0, sizeof(0));
1023     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1024     &a[0], &a[1], &a[2]));
1025     CHECK_EQ(a[0], 1);
1026     CHECK_EQ(a[1], 2);
1027     CHECK_EQ(a[2], 3);
1028    
1029     memset(a, 0, sizeof(0));
1030     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1031     &a[0], &a[1], &a[2], &a[3]));
1032     CHECK_EQ(a[0], 1);
1033     CHECK_EQ(a[1], 2);
1034     CHECK_EQ(a[2], 3);
1035     CHECK_EQ(a[3], 4);
1036    
1037     memset(a, 0, sizeof(0));
1038     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1039     &a[0], &a[1], &a[2],
1040     &a[3], &a[4]));
1041     CHECK_EQ(a[0], 1);
1042     CHECK_EQ(a[1], 2);
1043     CHECK_EQ(a[2], 3);
1044     CHECK_EQ(a[3], 4);
1045     CHECK_EQ(a[4], 5);
1046    
1047     memset(a, 0, sizeof(0));
1048     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1049     &a[0], &a[1], &a[2],
1050     &a[3], &a[4], &a[5]));
1051     CHECK_EQ(a[0], 1);
1052     CHECK_EQ(a[1], 2);
1053     CHECK_EQ(a[2], 3);
1054     CHECK_EQ(a[3], 4);
1055     CHECK_EQ(a[4], 5);
1056     CHECK_EQ(a[5], 6);
1057    
1058     memset(a, 0, sizeof(0));
1059     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1060     &a[0], &a[1], &a[2], &a[3],
1061     &a[4], &a[5], &a[6]));
1062     CHECK_EQ(a[0], 1);
1063     CHECK_EQ(a[1], 2);
1064     CHECK_EQ(a[2], 3);
1065     CHECK_EQ(a[3], 4);
1066     CHECK_EQ(a[4], 5);
1067     CHECK_EQ(a[5], 6);
1068     CHECK_EQ(a[6], 7);
1069    
1070     memset(a, 0, sizeof(0));
1071     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1072     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1073     "1234567890123456",
1074     &a[0], &a[1], &a[2], &a[3],
1075     &a[4], &a[5], &a[6], &a[7],
1076     &a[8], &a[9], &a[10], &a[11],
1077     &a[12], &a[13], &a[14], &a[15]));
1078     CHECK_EQ(a[0], 1);
1079     CHECK_EQ(a[1], 2);
1080     CHECK_EQ(a[2], 3);
1081     CHECK_EQ(a[3], 4);
1082     CHECK_EQ(a[4], 5);
1083     CHECK_EQ(a[5], 6);
1084     CHECK_EQ(a[6], 7);
1085     CHECK_EQ(a[7], 8);
1086     CHECK_EQ(a[8], 9);
1087     CHECK_EQ(a[9], 0);
1088     CHECK_EQ(a[10], 1);
1089     CHECK_EQ(a[11], 2);
1090     CHECK_EQ(a[12], 3);
1091     CHECK_EQ(a[13], 4);
1092     CHECK_EQ(a[14], 5);
1093     CHECK_EQ(a[15], 6);
1094    
1095     /***** PartialMatch *****/
1096    
1097     printf("Testing PartialMatch\n");
1098    
1099     CHECK(RE("h.*o").PartialMatch("hello"));
1100     CHECK(RE("h.*o").PartialMatch("othello"));
1101     CHECK(RE("h.*o").PartialMatch("hello!"));
1102     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1103    
1104 nigel 93 /***** other tests *****/
1105    
1106 nigel 77 RadixTests();
1107     TestReplace();
1108     TestExtract();
1109     TestConsume();
1110     TestFindAndConsume();
1111 nigel 93 TestQuoteMetaAll();
1112 nigel 77 TestMatchNumberPeculiarity();
1113    
1114     // Check the pattern() accessor
1115     {
1116     const string kPattern = "http://([^/]+)/.*";
1117     const RE re(kPattern);
1118     CHECK_EQ(kPattern, re.pattern());
1119     }
1120    
1121     // Check RE error field.
1122     {
1123     RE re("foo");
1124     CHECK(re.error().empty()); // Must have no error
1125     }
1126    
1127     #ifdef SUPPORT_UTF8
1128     // Check UTF-8 handling
1129     {
1130     printf("Testing UTF-8 handling\n");
1131    
1132     // Three Japanese characters (nihongo)
1133     const char utf8_string[] = {
1134     0xe6, 0x97, 0xa5, // 65e5
1135     0xe6, 0x9c, 0xac, // 627c
1136     0xe8, 0xaa, 0x9e, // 8a9e
1137     0
1138     };
1139     const char utf8_pattern[] = {
1140     '.',
1141     0xe6, 0x9c, 0xac, // 627c
1142     '.',
1143     0
1144     };
1145    
1146     // Both should match in either mode, bytes or UTF-8
1147     RE re_test1(".........");
1148     CHECK(re_test1.FullMatch(utf8_string));
1149     RE re_test2("...", pcrecpp::UTF8());
1150     CHECK(re_test2.FullMatch(utf8_string));
1151    
1152     // Check that '.' matches one byte or UTF-8 character
1153     // according to the mode.
1154     string ss;
1155     RE re_test3("(.)");
1156     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1157     CHECK_EQ(ss, string("\xe6"));
1158     RE re_test4("(.)", pcrecpp::UTF8());
1159     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1160     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1161    
1162     // Check that string matches itself in either mode
1163     RE re_test5(utf8_string);
1164     CHECK(re_test5.FullMatch(utf8_string));
1165     RE re_test6(utf8_string, pcrecpp::UTF8());
1166     CHECK(re_test6.FullMatch(utf8_string));
1167    
1168     // Check that pattern matches string only in UTF8 mode
1169     RE re_test7(utf8_pattern);
1170     CHECK(!re_test7.FullMatch(utf8_string));
1171     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1172     CHECK(re_test8.FullMatch(utf8_string));
1173     }
1174    
1175     // Check that ungreedy, UTF8 regular expressions don't match when they
1176     // oughtn't -- see bug 82246.
1177     {
1178     // This code always worked.
1179     const char* pattern = "\\w+X";
1180     const string target = "a aX";
1181     RE match_sentence(pattern);
1182     RE match_sentence_re(pattern, pcrecpp::UTF8());
1183    
1184     CHECK(!match_sentence.FullMatch(target));
1185     CHECK(!match_sentence_re.FullMatch(target));
1186     }
1187    
1188     {
1189     const char* pattern = "(?U)\\w+X";
1190     const string target = "a aX";
1191     RE match_sentence(pattern);
1192     RE match_sentence_re(pattern, pcrecpp::UTF8());
1193    
1194     CHECK(!match_sentence.FullMatch(target));
1195     CHECK(!match_sentence_re.FullMatch(target));
1196     }
1197     #endif /* def SUPPORT_UTF8 */
1198    
1199     printf("Testing error reporting\n");
1200    
1201     { RE re("a\\1"); CHECK(!re.error().empty()); }
1202     {
1203     RE re("a[x");
1204     CHECK(!re.error().empty());
1205     }
1206     {
1207     RE re("a[z-a]");
1208     CHECK(!re.error().empty());
1209     }
1210     {
1211     RE re("a[[:foobar:]]");
1212     CHECK(!re.error().empty());
1213     }
1214     {
1215     RE re("a(b");
1216     CHECK(!re.error().empty());
1217     }
1218     {
1219     RE re("a\\");
1220     CHECK(!re.error().empty());
1221     }
1222    
1223 nigel 87 // Test that recursion is stopped
1224     TestRecursion();
1225 nigel 77
1226 nigel 81 // Test Options
1227     if (getenv("VERBOSE_TEST") != NULL)
1228     VERBOSE_TEST = true;
1229     TestOptions();
1230    
1231 nigel 93 // Test the constructors
1232     TestConstructors();
1233    
1234 nigel 77 // Done
1235     printf("OK\n");
1236    
1237     return 0;
1238     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12