/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 263 - (hide annotations) (download)
Mon Nov 12 16:53:25 2007 UTC (6 years, 9 months ago) by ph10
File size: 38568 byte(s)
Apply Craig's patch, which makes it possible to "ignore" values in parens 
when parsing an RE using the c++ wrapper.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113 ph10 257 char buf[100]; // definitely big enough
114     sprintf(buf, "pat%09d", i);
115 nigel 77 RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216     };
217     static const ReplaceTest tests[] = {
218     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
219     "\\2\\1ay",
220     "the quick brown fox jumps over the lazy dogs.",
221     "ethay quick brown fox jumps over the lazy dogs.",
222     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
223     { "\\w+",
224     "\\0-NOSPAM",
225     "paul.haahr@google.com",
226     "paul-NOSPAM.haahr@google.com",
227     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
228     { "^",
229     "(START)",
230     "foo",
231     "(START)foo",
232     "(START)foo" },
233     { "^",
234     "(START)",
235     "",
236     "(START)",
237     "(START)" },
238     { "$",
239     "(END)",
240     "",
241     "(END)",
242     "(END)" },
243     { "b",
244     "bb",
245     "ababababab",
246     "abbabababab",
247     "abbabbabbabbabb" },
248     { "b",
249     "bb",
250     "bbbbbb",
251     "bbbbbbb",
252     "bbbbbbbbbbbb" },
253     { "b+",
254     "bb",
255     "bbbbbb",
256     "bb",
257     "bb" },
258     { "b*",
259     "bb",
260     "bbbbbb",
261     "bb",
262     "bb" },
263     { "b*",
264     "bb",
265     "aaaaa",
266     "bbaaaaa",
267     "bbabbabbabbabbabb" },
268 nigel 91 { "b*",
269     "bb",
270     "aa\naa\n",
271     "bbaa\naa\n",
272     "bbabbabb\nbbabbabb\nbb" },
273     { "b*",
274     "bb",
275     "aa\raa\r",
276     "bbaa\raa\r",
277     "bbabbabb\rbbabbabb\rbb" },
278     { "b*",
279     "bb",
280     "aa\r\naa\r\n",
281     "bbaa\r\naa\r\n",
282     "bbabbabb\r\nbbabbabb\r\nbb" },
283     #ifdef SUPPORT_UTF8
284     { "b*",
285     "bb",
286     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
287     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288     "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289     { "b*",
290     "bb",
291     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
292     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294     "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295     #endif
296 nigel 77 { "", NULL, NULL, NULL, NULL }
297     };
298    
299 nigel 91 #ifdef SUPPORT_UTF8
300     const bool support_utf8 = true;
301     #else
302     const bool support_utf8 = false;
303     #endif
304    
305 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307     assert(re.error().empty());
308 nigel 77 string one(t->original);
309 nigel 91 CHECK(re.Replace(t->rewrite, &one));
310 nigel 77 CHECK_EQ(one, t->single);
311     string all(t->original);
312 nigel 91 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313 nigel 77 CHECK_EQ(all, t->global);
314     }
315 nigel 91
316     // One final test: test \r\n replacement when we're not in CRLF mode
317     {
318     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319     assert(re.error().empty());
320     string all("aa\r\naa\r\n");
321     CHECK(re.GlobalReplace("bb", &all) > 0);
322     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323     }
324     {
325     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326     assert(re.error().empty());
327     string all("aa\r\naa\r\n");
328     CHECK(re.GlobalReplace("bb", &all) > 0);
329     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330     }
331     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332     // Alas, the answer depends on how pcre was compiled.
333 nigel 77 }
334    
335     static void TestExtract() {
336     printf("Testing Extract\n");
337    
338     string s;
339    
340     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
341     CHECK_EQ(s, "kremvax!boris");
342    
343     // check the RE interface as well
344     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
345     CHECK_EQ(s, "'foo'");
346     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
347     CHECK_EQ(s, "'foo'");
348     }
349    
350     static void TestConsume() {
351     printf("Testing Consume\n");
352    
353     string word;
354    
355     string s(" aaa b!@#$@#$cccc");
356     StringPiece input(s);
357    
358     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
359     CHECK(r.Consume(&input, &word));
360     CHECK_EQ(word, "aaa");
361     CHECK(r.Consume(&input, &word));
362     CHECK_EQ(word, "b");
363     CHECK(! r.Consume(&input, &word));
364     }
365    
366     static void TestFindAndConsume() {
367     printf("Testing FindAndConsume\n");
368    
369     string word;
370    
371     string s(" aaa b!@#$@#$cccc");
372     StringPiece input(s);
373    
374     RE r("(\\w+)"); // matches a word
375     CHECK(r.FindAndConsume(&input, &word));
376     CHECK_EQ(word, "aaa");
377     CHECK(r.FindAndConsume(&input, &word));
378     CHECK_EQ(word, "b");
379     CHECK(r.FindAndConsume(&input, &word));
380     CHECK_EQ(word, "cccc");
381     CHECK(! r.FindAndConsume(&input, &word));
382     }
383    
384     static void TestMatchNumberPeculiarity() {
385     printf("Testing match-number peculiaraity\n");
386    
387     string word1;
388     string word2;
389     string word3;
390    
391     RE r("(foo)|(bar)|(baz)");
392     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
393     CHECK_EQ(word1, "foo");
394     CHECK_EQ(word2, "");
395     CHECK_EQ(word3, "");
396     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
397     CHECK_EQ(word1, "");
398     CHECK_EQ(word2, "bar");
399     CHECK_EQ(word3, "");
400     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
401     CHECK_EQ(word1, "");
402     CHECK_EQ(word2, "");
403     CHECK_EQ(word3, "baz");
404     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
405    
406     string a;
407     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
408     CHECK_EQ(a, "");
409     }
410    
411 nigel 87 static void TestRecursion() {
412 nigel 77 printf("Testing recursion\n");
413    
414 nigel 87 // Get one string that passes (sometimes), one that never does.
415     string text_good("abcdefghijk");
416     string text_bad("acdefghijkl");
417    
418     // According to pcretest, matching text_good against (\w+)*b
419     // requires match_limit of at least 8192, and match_recursion_limit
420     // of at least 37.
421    
422     RE_Options options_ml;
423     options_ml.set_match_limit(8192);
424     RE re("(\\w+)*b", options_ml);
425     CHECK(re.PartialMatch(text_good) == true);
426     CHECK(re.PartialMatch(text_bad) == false);
427     CHECK(re.FullMatch(text_good) == false);
428     CHECK(re.FullMatch(text_bad) == false);
429    
430     options_ml.set_match_limit(1024);
431     RE re2("(\\w+)*b", options_ml);
432     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
433     CHECK(re2.PartialMatch(text_bad) == false);
434     CHECK(re2.FullMatch(text_good) == false);
435     CHECK(re2.FullMatch(text_bad) == false);
436    
437     RE_Options options_mlr;
438     options_mlr.set_match_limit_recursion(50);
439     RE re3("(\\w+)*b", options_mlr);
440     CHECK(re3.PartialMatch(text_good) == true);
441     CHECK(re3.PartialMatch(text_bad) == false);
442     CHECK(re3.FullMatch(text_good) == false);
443     CHECK(re3.FullMatch(text_bad) == false);
444    
445     options_mlr.set_match_limit_recursion(10);
446     RE re4("(\\w+)*b", options_mlr);
447     CHECK(re4.PartialMatch(text_good) == false);
448     CHECK(re4.PartialMatch(text_bad) == false);
449     CHECK(re4.FullMatch(text_good) == false);
450     CHECK(re4.FullMatch(text_bad) == false);
451 nigel 77 }
452    
453 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
454     // the original unquoted string.
455     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456     string quoted = RE::QuoteMeta(unquoted);
457     RE re(quoted, options);
458     CHECK(re.FullMatch(unquoted));
459     }
460    
461     // A string containing meaningful regexp characters, which is then meta-
462     // quoted, should not generally match a string the unquoted string does.
463     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464     RE_Options options = RE_Options()) {
465     string quoted = RE::QuoteMeta(unquoted);
466     RE re(quoted, options);
467     CHECK(!re.FullMatch(should_not_match));
468     }
469    
470     // Tests that quoted meta characters match their original strings,
471     // and that a few things that shouldn't match indeed do not.
472     static void TestQuotaMetaSimple() {
473     TestQuoteMeta("foo");
474     TestQuoteMeta("foo.bar");
475     TestQuoteMeta("foo\\.bar");
476     TestQuoteMeta("[1-9]");
477     TestQuoteMeta("1.5-2.0?");
478     TestQuoteMeta("\\d");
479     TestQuoteMeta("Who doesn't like ice cream?");
480     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481     TestQuoteMeta("((?!)xxx).*yyy");
482     TestQuoteMeta("([");
483     }
484    
485     static void TestQuoteMetaSimpleNegative() {
486     NegativeTestQuoteMeta("foo", "bar");
487     NegativeTestQuoteMeta("...", "bar");
488     NegativeTestQuoteMeta("\\.", ".");
489     NegativeTestQuoteMeta("\\.", "..");
490     NegativeTestQuoteMeta("(a)", "a");
491     NegativeTestQuoteMeta("(a|b)", "a");
492     NegativeTestQuoteMeta("(a|b)", "(a)");
493     NegativeTestQuoteMeta("(a|b)", "a|b");
494     NegativeTestQuoteMeta("[0-9]", "0");
495     NegativeTestQuoteMeta("[0-9]", "0-9");
496     NegativeTestQuoteMeta("[0-9]", "[9]");
497     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498     }
499    
500     static void TestQuoteMetaLatin1() {
501     TestQuoteMeta("3\xb2 = 9");
502     }
503    
504     static void TestQuoteMetaUtf8() {
505     #ifdef SUPPORT_UTF8
506     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
508     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
509     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
510     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
511     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
514     "27\\\xc2\\\xb0",
515     pcrecpp::UTF8());
516     #endif
517     }
518    
519     static void TestQuoteMetaAll() {
520     printf("Testing QuoteMeta\n");
521     TestQuotaMetaSimple();
522     TestQuoteMetaSimpleNegative();
523     TestQuoteMetaLatin1();
524     TestQuoteMetaUtf8();
525     }
526    
527 nigel 81 //
528     // Options tests contributed by
529     // Giuseppe Maxia, CTO, Stardata s.r.l.
530     // July 2005
531     //
532     static void GetOneOptionResult(
533     const char *option_name,
534     const char *regex,
535     const char *str,
536     RE_Options options,
537     bool full,
538     string expected) {
539 nigel 77
540 nigel 81 printf("Testing Option <%s>\n", option_name);
541     if(VERBOSE_TEST)
542     printf("/%s/ finds \"%s\" within \"%s\" \n",
543     regex,
544     expected.c_str(),
545     str);
546     string captured("");
547     if (full)
548     RE(regex,options).FullMatch(str, &captured);
549     else
550     RE(regex,options).PartialMatch(str, &captured);
551     CHECK_EQ(captured, expected);
552     }
553    
554     static void TestOneOption(
555     const char *option_name,
556     const char *regex,
557     const char *str,
558     RE_Options options,
559     bool full,
560     bool assertive = true) {
561    
562     printf("Testing Option <%s>\n", option_name);
563     if (VERBOSE_TEST)
564     printf("'%s' %s /%s/ \n",
565     str,
566     (assertive? "matches" : "doesn't match"),
567     regex);
568     if (assertive) {
569     if (full)
570     CHECK(RE(regex,options).FullMatch(str));
571     else
572     CHECK(RE(regex,options).PartialMatch(str));
573     } else {
574     if (full)
575     CHECK(!RE(regex,options).FullMatch(str));
576     else
577     CHECK(!RE(regex,options).PartialMatch(str));
578     }
579     }
580    
581     static void Test_CASELESS() {
582     RE_Options options;
583     RE_Options options2;
584    
585     options.set_caseless(true);
586     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
587     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
588     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
589    
590     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
591     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
592     options.set_caseless(false);
593     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
594     }
595    
596     static void Test_MULTILINE() {
597     RE_Options options;
598     RE_Options options2;
599     const char *str = "HELLO\n" "cruel\n" "world\n";
600    
601     options.set_multiline(true);
602     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
603     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
604     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
605     options.set_multiline(false);
606     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
607     }
608    
609     static void Test_DOTALL() {
610     RE_Options options;
611     RE_Options options2;
612     const char *str = "HELLO\n" "cruel\n" "world";
613    
614     options.set_dotall(true);
615     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
616     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
617     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
618     options.set_dotall(false);
619     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
620     }
621    
622     static void Test_DOLLAR_ENDONLY() {
623     RE_Options options;
624     RE_Options options2;
625     const char *str = "HELLO world\n";
626    
627     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
628     options.set_dollar_endonly(true);
629     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
630     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
631     }
632    
633     static void Test_EXTRA() {
634     RE_Options options;
635     const char *str = "HELLO";
636    
637     options.set_extra(true);
638     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
639     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
640     options.set_extra(false);
641     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
642     }
643    
644     static void Test_EXTENDED() {
645     RE_Options options;
646     RE_Options options2;
647     const char *str = "HELLO world";
648    
649     options.set_extended(true);
650     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
651     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
652     TestOneOption("EXTENDED (class)",
653     "^ HE L{2} O "
654     "\\s+ "
655     "\\w+ $ ",
656     str,
657     options,
658     false);
659    
660     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
661     TestOneOption("EXTENDED (function)",
662     "^ HE L{2} O "
663     "\\s+ "
664     "\\w+ $ ",
665     str,
666     pcrecpp::EXTENDED(),
667     false);
668    
669     options.set_extended(false);
670     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
671     }
672    
673     static void Test_NO_AUTO_CAPTURE() {
674     RE_Options options;
675     const char *str = "HELLO world";
676     string captured;
677    
678     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
679     if (VERBOSE_TEST)
680     printf("parentheses capture text\n");
681     RE re("(world|universe)$", options);
682     CHECK(re.Extract("\\1", str , &captured));
683     CHECK_EQ(captured, "world");
684     options.set_no_auto_capture(true);
685     printf("testing Option <NO_AUTO_CAPTURE>\n");
686     if (VERBOSE_TEST)
687     printf("parentheses do not capture text\n");
688     re.Extract("\\1",str, &captured );
689     CHECK_EQ(captured, "world");
690     }
691    
692     static void Test_UNGREEDY() {
693     RE_Options options;
694     const char *str = "HELLO, 'this' is the 'world'";
695    
696     options.set_ungreedy(true);
697     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
698     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
699     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
700    
701     options.set_ungreedy(false);
702     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
703     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
704     }
705    
706     static void Test_all_options() {
707     const char *str = "HELLO\n" "cruel\n" "world";
708     RE_Options options;
709     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
710    
711     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
712     options.set_all_options(0);
713     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
714     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
715    
716     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
717     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
718     " ^ c r u e l $ ",
719     str,
720     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
721     false);
722    
723     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
724     " ^ c r u e l $ ",
725     str,
726     RE_Options()
727     .set_multiline(true)
728     .set_extended(true),
729     false);
730    
731     options.set_all_options(0);
732     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
733    
734     }
735    
736     static void TestOptions() {
737     printf("Testing Options\n");
738     Test_CASELESS();
739     Test_MULTILINE();
740     Test_DOTALL();
741     Test_DOLLAR_ENDONLY();
742     Test_EXTENDED();
743     Test_NO_AUTO_CAPTURE();
744     Test_UNGREEDY();
745     Test_EXTRA();
746     Test_all_options();
747     }
748    
749 nigel 93 static void TestConstructors() {
750     printf("Testing constructors\n");
751    
752     RE_Options options;
753     options.set_dotall(true);
754     const char *str = "HELLO\n" "cruel\n" "world";
755    
756     RE orig("HELLO.*world", options);
757     CHECK(orig.FullMatch(str));
758    
759     RE copy1(orig);
760     CHECK(copy1.FullMatch(str));
761    
762     RE copy2("not a match");
763     CHECK(!copy2.FullMatch(str));
764     copy2 = copy1;
765     CHECK(copy2.FullMatch(str));
766     copy2 = orig;
767     CHECK(copy2.FullMatch(str));
768    
769     // Make sure when we assign to ourselves, nothing bad happens
770     orig = orig;
771     copy1 = copy1;
772     copy2 = copy2;
773     CHECK(orig.FullMatch(str));
774     CHECK(copy1.FullMatch(str));
775     CHECK(copy2.FullMatch(str));
776     }
777    
778 nigel 77 int main(int argc, char** argv) {
779     // Treat any flag as --help
780     if (argc > 1 && argv[1][0] == '-') {
781     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
782     " If 'timingX ###' is specified, run the given timing test\n"
783     " with the given number of iterations, rather than running\n"
784     " the default corectness test.\n", argv[0]);
785     return 0;
786     }
787    
788     if (argc > 1) {
789     if ( argc == 2 || atoi(argv[2]) == 0) {
790     printf("timing mode needs a num-iters argument\n");
791     return 1;
792     }
793     if (!strcmp(argv[1], "timing1"))
794     Timing1(atoi(argv[2]));
795     else if (!strcmp(argv[1], "timing2"))
796     Timing2(atoi(argv[2]));
797     else if (!strcmp(argv[1], "timing3"))
798     Timing3(atoi(argv[2]));
799     else
800     printf("Unknown argument '%s'\n", argv[1]);
801     return 0;
802     }
803    
804     printf("Testing FullMatch\n");
805    
806     int i;
807     string s;
808    
809     /***** FullMatch with no args *****/
810    
811     CHECK(RE("h.*o").FullMatch("hello"));
812 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
813     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
814     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
815     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
816     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
817 nigel 77
818     /***** FullMatch with args *****/
819    
820     // Zero-arg
821     CHECK(RE("\\d+").FullMatch("1001"));
822    
823     // Single-arg
824     CHECK(RE("(\\d+)").FullMatch("1001", &i));
825     CHECK_EQ(i, 1001);
826     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
827     CHECK_EQ(i, -123);
828     CHECK(!RE("()\\d+").FullMatch("10", &i));
829     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
830     &i));
831    
832     // Digits surrounding integer-arg
833     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
834     CHECK_EQ(i, 23);
835     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
836     CHECK_EQ(i, 1);
837     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
838     CHECK_EQ(i, -1);
839     CHECK(RE("(\\d)").PartialMatch("1234", &i));
840     CHECK_EQ(i, 1);
841     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
842     CHECK_EQ(i, -1);
843    
844     // String-arg
845     CHECK(RE("h(.*)o").FullMatch("hello", &s));
846     CHECK_EQ(s, string("ell"));
847    
848     // StringPiece-arg
849     StringPiece sp;
850     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
851     CHECK_EQ(sp.size(), 4);
852     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
853     CHECK_EQ(i, 1234);
854    
855     // Multi-arg
856     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
857     CHECK_EQ(s, string("ruby"));
858     CHECK_EQ(i, 1234);
859    
860 ph10 263 // Ignore non-void* NULL arg
861     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
862     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
863     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
864     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
865     CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
866     CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
867     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
868    
869     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
870     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
871     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
872     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
873     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
874     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
875    
876 nigel 77 // Ignored arg
877     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
878     CHECK_EQ(s, string("ruby"));
879     CHECK_EQ(i, 1234);
880    
881     // Type tests
882     {
883     char c;
884     CHECK(RE("(H)ello").FullMatch("Hello", &c));
885     CHECK_EQ(c, 'H');
886     }
887     {
888     unsigned char c;
889     CHECK(RE("(H)ello").FullMatch("Hello", &c));
890     CHECK_EQ(c, static_cast<unsigned char>('H'));
891     }
892     {
893     short v;
894     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
895     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
896     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
897     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
898     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
899     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
900     }
901     {
902     unsigned short v;
903     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
905     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
906     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
907     }
908     {
909     int v;
910     static const int max_value = 0x7fffffff;
911     static const int min_value = -max_value - 1;
912     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
913     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
914     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
915     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
916     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
917     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
918     }
919     {
920     unsigned int v;
921     static const unsigned int max_value = 0xfffffffful;
922     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
924     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
925     }
926     #ifdef HAVE_LONG_LONG
927 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
928     # define LLD "%I64d"
929 ph10 201 # define LLU "%I64u"
930 ph10 193 # else
931     # define LLD "%lld"
932 ph10 201 # define LLU "%llu"
933 ph10 193 # endif
934 nigel 77 {
935     long long v;
936     static const long long max_value = 0x7fffffffffffffffLL;
937     static const long long min_value = -max_value - 1;
938 ph10 257 char buf[32]; // definitely big enough for a long long
939 nigel 77
940     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
941     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
942    
943 ph10 257 sprintf(buf, LLD, max_value);
944 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
945    
946 ph10 257 sprintf(buf, LLD, min_value);
947 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
948    
949 ph10 257 sprintf(buf, LLD, max_value);
950 nigel 77 assert(buf[strlen(buf)-1] != '9');
951     buf[strlen(buf)-1]++;
952     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
953    
954 ph10 257 sprintf(buf, LLD, min_value);
955 nigel 77 assert(buf[strlen(buf)-1] != '9');
956     buf[strlen(buf)-1]++;
957     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
958     }
959     #endif
960     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
961     {
962     unsigned long long v;
963     long long v2;
964     static const unsigned long long max_value = 0xffffffffffffffffULL;
965 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
966 nigel 77
967     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
968     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
969    
970 ph10 257 sprintf(buf, LLU, max_value);
971 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
972    
973     assert(buf[strlen(buf)-1] != '9');
974     buf[strlen(buf)-1]++;
975     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
976     }
977     #endif
978     {
979     float v;
980     CHECK(RE("(.*)").FullMatch("100", &v));
981     CHECK(RE("(.*)").FullMatch("-100.", &v));
982     CHECK(RE("(.*)").FullMatch("1e23", &v));
983     }
984     {
985     double v;
986     CHECK(RE("(.*)").FullMatch("100", &v));
987     CHECK(RE("(.*)").FullMatch("-100.", &v));
988     CHECK(RE("(.*)").FullMatch("1e23", &v));
989     }
990    
991     // Check that matching is fully anchored
992     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
993     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
994     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
995     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
996    
997     // Braces
998     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
999     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1000     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1001    
1002     // Complicated RE
1003     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1004     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1005     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1006     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1007    
1008     // Check full-match handling (needs '$' tacked on internally)
1009     CHECK(RE("fo|foo").FullMatch("fo"));
1010     CHECK(RE("fo|foo").FullMatch("foo"));
1011     CHECK(RE("fo|foo$").FullMatch("fo"));
1012     CHECK(RE("fo|foo$").FullMatch("foo"));
1013     CHECK(RE("foo$").FullMatch("foo"));
1014     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1015     CHECK(!RE("fo|bar").FullMatch("fox"));
1016    
1017     // Uncomment the following if we change the handling of '$' to
1018     // prevent it from matching a trailing newline
1019     if (false) {
1020     // Check that we don't get bitten by pcre's special handling of a
1021     // '\n' at the end of the string matching '$'
1022     CHECK(!RE("foo$").PartialMatch("foo\n"));
1023     }
1024    
1025     // Number of args
1026     int a[16];
1027     CHECK(RE("").FullMatch(""));
1028    
1029     memset(a, 0, sizeof(0));
1030     CHECK(RE("(\\d){1}").FullMatch("1",
1031     &a[0]));
1032     CHECK_EQ(a[0], 1);
1033    
1034     memset(a, 0, sizeof(0));
1035     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1036     &a[0], &a[1]));
1037     CHECK_EQ(a[0], 1);
1038     CHECK_EQ(a[1], 2);
1039    
1040     memset(a, 0, sizeof(0));
1041     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1042     &a[0], &a[1], &a[2]));
1043     CHECK_EQ(a[0], 1);
1044     CHECK_EQ(a[1], 2);
1045     CHECK_EQ(a[2], 3);
1046    
1047     memset(a, 0, sizeof(0));
1048     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1049     &a[0], &a[1], &a[2], &a[3]));
1050     CHECK_EQ(a[0], 1);
1051     CHECK_EQ(a[1], 2);
1052     CHECK_EQ(a[2], 3);
1053     CHECK_EQ(a[3], 4);
1054    
1055     memset(a, 0, sizeof(0));
1056     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1057     &a[0], &a[1], &a[2],
1058     &a[3], &a[4]));
1059     CHECK_EQ(a[0], 1);
1060     CHECK_EQ(a[1], 2);
1061     CHECK_EQ(a[2], 3);
1062     CHECK_EQ(a[3], 4);
1063     CHECK_EQ(a[4], 5);
1064    
1065     memset(a, 0, sizeof(0));
1066     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1067     &a[0], &a[1], &a[2],
1068     &a[3], &a[4], &a[5]));
1069     CHECK_EQ(a[0], 1);
1070     CHECK_EQ(a[1], 2);
1071     CHECK_EQ(a[2], 3);
1072     CHECK_EQ(a[3], 4);
1073     CHECK_EQ(a[4], 5);
1074     CHECK_EQ(a[5], 6);
1075    
1076     memset(a, 0, sizeof(0));
1077     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1078     &a[0], &a[1], &a[2], &a[3],
1079     &a[4], &a[5], &a[6]));
1080     CHECK_EQ(a[0], 1);
1081     CHECK_EQ(a[1], 2);
1082     CHECK_EQ(a[2], 3);
1083     CHECK_EQ(a[3], 4);
1084     CHECK_EQ(a[4], 5);
1085     CHECK_EQ(a[5], 6);
1086     CHECK_EQ(a[6], 7);
1087    
1088     memset(a, 0, sizeof(0));
1089     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1090     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1091     "1234567890123456",
1092     &a[0], &a[1], &a[2], &a[3],
1093     &a[4], &a[5], &a[6], &a[7],
1094     &a[8], &a[9], &a[10], &a[11],
1095     &a[12], &a[13], &a[14], &a[15]));
1096     CHECK_EQ(a[0], 1);
1097     CHECK_EQ(a[1], 2);
1098     CHECK_EQ(a[2], 3);
1099     CHECK_EQ(a[3], 4);
1100     CHECK_EQ(a[4], 5);
1101     CHECK_EQ(a[5], 6);
1102     CHECK_EQ(a[6], 7);
1103     CHECK_EQ(a[7], 8);
1104     CHECK_EQ(a[8], 9);
1105     CHECK_EQ(a[9], 0);
1106     CHECK_EQ(a[10], 1);
1107     CHECK_EQ(a[11], 2);
1108     CHECK_EQ(a[12], 3);
1109     CHECK_EQ(a[13], 4);
1110     CHECK_EQ(a[14], 5);
1111     CHECK_EQ(a[15], 6);
1112    
1113     /***** PartialMatch *****/
1114    
1115     printf("Testing PartialMatch\n");
1116    
1117     CHECK(RE("h.*o").PartialMatch("hello"));
1118     CHECK(RE("h.*o").PartialMatch("othello"));
1119     CHECK(RE("h.*o").PartialMatch("hello!"));
1120     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1121    
1122 nigel 93 /***** other tests *****/
1123    
1124 nigel 77 RadixTests();
1125     TestReplace();
1126     TestExtract();
1127     TestConsume();
1128     TestFindAndConsume();
1129 nigel 93 TestQuoteMetaAll();
1130 nigel 77 TestMatchNumberPeculiarity();
1131    
1132     // Check the pattern() accessor
1133     {
1134     const string kPattern = "http://([^/]+)/.*";
1135     const RE re(kPattern);
1136     CHECK_EQ(kPattern, re.pattern());
1137     }
1138    
1139     // Check RE error field.
1140     {
1141     RE re("foo");
1142     CHECK(re.error().empty()); // Must have no error
1143     }
1144    
1145     #ifdef SUPPORT_UTF8
1146     // Check UTF-8 handling
1147     {
1148     printf("Testing UTF-8 handling\n");
1149    
1150     // Three Japanese characters (nihongo)
1151 ph10 256 const unsigned char utf8_string[] = {
1152 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1153     0xe6, 0x9c, 0xac, // 627c
1154     0xe8, 0xaa, 0x9e, // 8a9e
1155     0
1156     };
1157 ph10 256 const unsigned char utf8_pattern[] = {
1158 nigel 77 '.',
1159     0xe6, 0x9c, 0xac, // 627c
1160     '.',
1161     0
1162     };
1163    
1164     // Both should match in either mode, bytes or UTF-8
1165     RE re_test1(".........");
1166     CHECK(re_test1.FullMatch(utf8_string));
1167     RE re_test2("...", pcrecpp::UTF8());
1168     CHECK(re_test2.FullMatch(utf8_string));
1169    
1170     // Check that '.' matches one byte or UTF-8 character
1171     // according to the mode.
1172     string ss;
1173     RE re_test3("(.)");
1174     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1175     CHECK_EQ(ss, string("\xe6"));
1176     RE re_test4("(.)", pcrecpp::UTF8());
1177     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1178     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1179    
1180     // Check that string matches itself in either mode
1181     RE re_test5(utf8_string);
1182     CHECK(re_test5.FullMatch(utf8_string));
1183     RE re_test6(utf8_string, pcrecpp::UTF8());
1184     CHECK(re_test6.FullMatch(utf8_string));
1185    
1186     // Check that pattern matches string only in UTF8 mode
1187     RE re_test7(utf8_pattern);
1188     CHECK(!re_test7.FullMatch(utf8_string));
1189     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1190     CHECK(re_test8.FullMatch(utf8_string));
1191     }
1192    
1193     // Check that ungreedy, UTF8 regular expressions don't match when they
1194     // oughtn't -- see bug 82246.
1195     {
1196     // This code always worked.
1197     const char* pattern = "\\w+X";
1198     const string target = "a aX";
1199     RE match_sentence(pattern);
1200     RE match_sentence_re(pattern, pcrecpp::UTF8());
1201    
1202     CHECK(!match_sentence.FullMatch(target));
1203     CHECK(!match_sentence_re.FullMatch(target));
1204     }
1205    
1206     {
1207     const char* pattern = "(?U)\\w+X";
1208     const string target = "a aX";
1209     RE match_sentence(pattern);
1210     RE match_sentence_re(pattern, pcrecpp::UTF8());
1211    
1212     CHECK(!match_sentence.FullMatch(target));
1213     CHECK(!match_sentence_re.FullMatch(target));
1214     }
1215     #endif /* def SUPPORT_UTF8 */
1216    
1217     printf("Testing error reporting\n");
1218    
1219     { RE re("a\\1"); CHECK(!re.error().empty()); }
1220     {
1221     RE re("a[x");
1222     CHECK(!re.error().empty());
1223     }
1224     {
1225     RE re("a[z-a]");
1226     CHECK(!re.error().empty());
1227     }
1228     {
1229     RE re("a[[:foobar:]]");
1230     CHECK(!re.error().empty());
1231     }
1232     {
1233     RE re("a(b");
1234     CHECK(!re.error().empty());
1235     }
1236     {
1237     RE re("a\\");
1238     CHECK(!re.error().empty());
1239     }
1240    
1241 nigel 87 // Test that recursion is stopped
1242     TestRecursion();
1243 nigel 77
1244 nigel 81 // Test Options
1245     if (getenv("VERBOSE_TEST") != NULL)
1246     VERBOSE_TEST = true;
1247     TestOptions();
1248    
1249 nigel 93 // Test the constructors
1250     TestConstructors();
1251    
1252 nigel 77 // Done
1253     printf("OK\n");
1254    
1255     return 0;
1256     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12