/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 594 - (hide annotations) (download)
Sun May 1 08:22:12 2011 UTC (2 years ago) by ph10
File size: 39168 byte(s)
Added #include <string.h> to some .cc files (needed in some environments).

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3 ph10 474 // Copyright (c) 2005 - 2010, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 ph10 594 #include <string.h> /* for memset and strcmp */
42 nigel 91 #include <cassert>
43 nigel 77 #include <vector>
44     #include "pcrecpp.h"
45    
46     using pcrecpp::StringPiece;
47     using pcrecpp::RE;
48     using pcrecpp::RE_Options;
49     using pcrecpp::Hex;
50     using pcrecpp::Octal;
51     using pcrecpp::CRadix;
52    
53 nigel 81 static bool VERBOSE_TEST = false;
54    
55 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
56     // controlled by NDEBUG, so the check will be executed regardless of
57     // compilation mode. Therefore, it is safe to do things like:
58     // CHECK_EQ(fp->Write(x), 4)
59     #define CHECK(condition) do { \
60     if (!(condition)) { \
61     fprintf(stderr, "%s:%d: Check failed: %s\n", \
62     __FILE__, __LINE__, #condition); \
63     exit(1); \
64     } \
65     } while (0)
66    
67     #define CHECK_EQ(a, b) CHECK(a == b)
68    
69     static void Timing1(int num_iters) {
70     // Same pattern lots of times
71     RE pattern("ruby:\\d+");
72     StringPiece p("ruby:1234");
73     for (int j = num_iters; j > 0; j--) {
74     CHECK(pattern.FullMatch(p));
75     }
76     }
77    
78     static void Timing2(int num_iters) {
79     // Same pattern lots of times
80     RE pattern("ruby:(\\d+)");
81     int i;
82     for (int j = num_iters; j > 0; j--) {
83     CHECK(pattern.FullMatch("ruby:1234", &i));
84     CHECK_EQ(i, 1234);
85     }
86     }
87    
88     static void Timing3(int num_iters) {
89     string text_string;
90     for (int j = num_iters; j > 0; j--) {
91     text_string += "this is another line\n";
92     }
93    
94     RE line_matcher(".*\n");
95     string line;
96     StringPiece text(text_string);
97     int counter = 0;
98     while (line_matcher.Consume(&text)) {
99     counter++;
100     }
101     printf("Matched %d lines\n", counter);
102     }
103    
104     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105    
106     static void LeakTest() {
107     // Check for memory leaks
108     unsigned long long initial_size = 0;
109     for (int i = 0; i < 100000; i++) {
110     if (i == 50000) {
111     initial_size = VirtualProcessSize();
112     printf("Size after 50000: %llu\n", initial_size);
113     }
114 ph10 257 char buf[100]; // definitely big enough
115     sprintf(buf, "pat%09d", i);
116 nigel 77 RE newre(buf);
117     }
118     uint64 final_size = VirtualProcessSize();
119     printf("Size after 100000: %llu\n", final_size);
120     const double growth = double(final_size - initial_size) / final_size;
121     printf("Growth: %0.2f%%", growth * 100);
122     CHECK(growth < 0.02); // Allow < 2% growth
123     }
124    
125     #endif
126    
127     static void RadixTests() {
128     printf("Testing hex\n");
129    
130     #define CHECK_HEX(type, value) \
131     do { \
132     type v; \
133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134     CHECK_EQ(v, 0x ## value); \
135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136     CHECK_EQ(v, 0x ## value); \
137     } while(0)
138    
139     CHECK_HEX(short, 2bad);
140     CHECK_HEX(unsigned short, 2badU);
141     CHECK_HEX(int, dead);
142     CHECK_HEX(unsigned int, deadU);
143     CHECK_HEX(long, 7eadbeefL);
144     CHECK_HEX(unsigned long, deadbeefUL);
145     #ifdef HAVE_LONG_LONG
146     CHECK_HEX(long long, 12345678deadbeefLL);
147     #endif
148     #ifdef HAVE_UNSIGNED_LONG_LONG
149     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150     #endif
151    
152     #undef CHECK_HEX
153    
154     printf("Testing octal\n");
155    
156     #define CHECK_OCTAL(type, value) \
157     do { \
158     type v; \
159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160     CHECK_EQ(v, 0 ## value); \
161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162     CHECK_EQ(v, 0 ## value); \
163     } while(0)
164    
165     CHECK_OCTAL(short, 77777);
166     CHECK_OCTAL(unsigned short, 177777U);
167     CHECK_OCTAL(int, 17777777777);
168     CHECK_OCTAL(unsigned int, 37777777777U);
169     CHECK_OCTAL(long, 17777777777L);
170     CHECK_OCTAL(unsigned long, 37777777777UL);
171     #ifdef HAVE_LONG_LONG
172     CHECK_OCTAL(long long, 777777777777777777777LL);
173     #endif
174     #ifdef HAVE_UNSIGNED_LONG_LONG
175     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176     #endif
177    
178     #undef CHECK_OCTAL
179    
180     printf("Testing decimal\n");
181    
182     #define CHECK_DECIMAL(type, value) \
183     do { \
184     type v; \
185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186     CHECK_EQ(v, value); \
187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188     CHECK_EQ(v, value); \
189     } while(0)
190    
191     CHECK_DECIMAL(short, -1);
192     CHECK_DECIMAL(unsigned short, 9999);
193     CHECK_DECIMAL(int, -1000);
194     CHECK_DECIMAL(unsigned int, 12345U);
195     CHECK_DECIMAL(long, -10000000L);
196     CHECK_DECIMAL(unsigned long, 3083324652U);
197     #ifdef HAVE_LONG_LONG
198     CHECK_DECIMAL(long long, -100000000000000LL);
199     #endif
200     #ifdef HAVE_UNSIGNED_LONG_LONG
201     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202     #endif
203    
204     #undef CHECK_DECIMAL
205    
206     }
207    
208     static void TestReplace() {
209     printf("Testing Replace\n");
210    
211     struct ReplaceTest {
212     const char *regexp;
213     const char *rewrite;
214     const char *original;
215     const char *single;
216     const char *global;
217 ph10 297 int global_count; // the expected return value from ReplaceAll
218 nigel 77 };
219     static const ReplaceTest tests[] = {
220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221     "\\2\\1ay",
222     "the quick brown fox jumps over the lazy dogs.",
223     "ethay quick brown fox jumps over the lazy dogs.",
224 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225     9 },
226 nigel 77 { "\\w+",
227     "\\0-NOSPAM",
228     "paul.haahr@google.com",
229     "paul-NOSPAM.haahr@google.com",
230 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231     4 },
232 nigel 77 { "^",
233     "(START)",
234     "foo",
235     "(START)foo",
236 ph10 297 "(START)foo",
237     1 },
238 nigel 77 { "^",
239     "(START)",
240     "",
241     "(START)",
242 ph10 297 "(START)",
243     1 },
244 nigel 77 { "$",
245     "(END)",
246     "",
247     "(END)",
248 ph10 297 "(END)",
249     1 },
250 nigel 77 { "b",
251     "bb",
252     "ababababab",
253     "abbabababab",
254 ph10 297 "abbabbabbabbabb",
255     5 },
256 nigel 77 { "b",
257     "bb",
258     "bbbbbb",
259     "bbbbbbb",
260 ph10 297 "bbbbbbbbbbbb",
261     6 },
262 nigel 77 { "b+",
263     "bb",
264     "bbbbbb",
265     "bb",
266 ph10 297 "bb",
267     1 },
268 nigel 77 { "b*",
269     "bb",
270     "bbbbbb",
271     "bb",
272 ph10 474 "bbbb",
273     2 },
274 nigel 77 { "b*",
275     "bb",
276     "aaaaa",
277     "bbaaaaa",
278 ph10 297 "bbabbabbabbabbabb",
279     6 },
280 nigel 91 { "b*",
281     "bb",
282     "aa\naa\n",
283     "bbaa\naa\n",
284 ph10 297 "bbabbabb\nbbabbabb\nbb",
285     7 },
286 nigel 91 { "b*",
287     "bb",
288     "aa\raa\r",
289     "bbaa\raa\r",
290 ph10 297 "bbabbabb\rbbabbabb\rbb",
291     7 },
292 nigel 91 { "b*",
293     "bb",
294     "aa\r\naa\r\n",
295     "bbaa\r\naa\r\n",
296 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
297     7 },
298 ph10 474 // Check empty-string matching (it's tricky!)
299     { "aa|b*",
300     "@",
301     "aa",
302     "@",
303     "@@",
304     2 },
305     { "b*|aa",
306     "@",
307     "aa",
308     "@aa",
309     "@@@",
310     3 },
311 nigel 91 #ifdef SUPPORT_UTF8
312     { "b*",
313     "bb",
314     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317     5 },
318 nigel 91 { "b*",
319     "bb",
320     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324     9 },
325 nigel 91 #endif
326 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
327 nigel 77 };
328    
329 nigel 91 #ifdef SUPPORT_UTF8
330     const bool support_utf8 = true;
331     #else
332     const bool support_utf8 = false;
333     #endif
334    
335 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337     assert(re.error().empty());
338 nigel 77 string one(t->original);
339 nigel 91 CHECK(re.Replace(t->rewrite, &one));
340 nigel 77 CHECK_EQ(one, t->single);
341     string all(t->original);
342 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
343 nigel 77 CHECK_EQ(all, t->global);
344 ph10 297 CHECK_EQ(replace_count, t->global_count);
345 nigel 77 }
346 nigel 91
347     // One final test: test \r\n replacement when we're not in CRLF mode
348     {
349     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350     assert(re.error().empty());
351     string all("aa\r\naa\r\n");
352 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354     }
355     {
356     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357     assert(re.error().empty());
358     string all("aa\r\naa\r\n");
359 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361     }
362     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363     // Alas, the answer depends on how pcre was compiled.
364 nigel 77 }
365    
366     static void TestExtract() {
367     printf("Testing Extract\n");
368    
369     string s;
370    
371     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372     CHECK_EQ(s, "kremvax!boris");
373    
374     // check the RE interface as well
375     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376     CHECK_EQ(s, "'foo'");
377     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378     CHECK_EQ(s, "'foo'");
379     }
380    
381     static void TestConsume() {
382     printf("Testing Consume\n");
383    
384     string word;
385    
386     string s(" aaa b!@#$@#$cccc");
387     StringPiece input(s);
388    
389     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390     CHECK(r.Consume(&input, &word));
391     CHECK_EQ(word, "aaa");
392     CHECK(r.Consume(&input, &word));
393     CHECK_EQ(word, "b");
394     CHECK(! r.Consume(&input, &word));
395     }
396    
397     static void TestFindAndConsume() {
398     printf("Testing FindAndConsume\n");
399    
400     string word;
401    
402     string s(" aaa b!@#$@#$cccc");
403     StringPiece input(s);
404    
405     RE r("(\\w+)"); // matches a word
406     CHECK(r.FindAndConsume(&input, &word));
407     CHECK_EQ(word, "aaa");
408     CHECK(r.FindAndConsume(&input, &word));
409     CHECK_EQ(word, "b");
410     CHECK(r.FindAndConsume(&input, &word));
411     CHECK_EQ(word, "cccc");
412     CHECK(! r.FindAndConsume(&input, &word));
413     }
414    
415     static void TestMatchNumberPeculiarity() {
416     printf("Testing match-number peculiaraity\n");
417    
418     string word1;
419     string word2;
420     string word3;
421    
422     RE r("(foo)|(bar)|(baz)");
423     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424     CHECK_EQ(word1, "foo");
425     CHECK_EQ(word2, "");
426     CHECK_EQ(word3, "");
427     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428     CHECK_EQ(word1, "");
429     CHECK_EQ(word2, "bar");
430     CHECK_EQ(word3, "");
431     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432     CHECK_EQ(word1, "");
433     CHECK_EQ(word2, "");
434     CHECK_EQ(word3, "baz");
435     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436    
437     string a;
438     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439     CHECK_EQ(a, "");
440     }
441    
442 nigel 87 static void TestRecursion() {
443 nigel 77 printf("Testing recursion\n");
444    
445 nigel 87 // Get one string that passes (sometimes), one that never does.
446     string text_good("abcdefghijk");
447     string text_bad("acdefghijkl");
448    
449     // According to pcretest, matching text_good against (\w+)*b
450     // requires match_limit of at least 8192, and match_recursion_limit
451     // of at least 37.
452    
453     RE_Options options_ml;
454     options_ml.set_match_limit(8192);
455     RE re("(\\w+)*b", options_ml);
456     CHECK(re.PartialMatch(text_good) == true);
457     CHECK(re.PartialMatch(text_bad) == false);
458     CHECK(re.FullMatch(text_good) == false);
459     CHECK(re.FullMatch(text_bad) == false);
460    
461     options_ml.set_match_limit(1024);
462     RE re2("(\\w+)*b", options_ml);
463     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464     CHECK(re2.PartialMatch(text_bad) == false);
465     CHECK(re2.FullMatch(text_good) == false);
466     CHECK(re2.FullMatch(text_bad) == false);
467    
468     RE_Options options_mlr;
469     options_mlr.set_match_limit_recursion(50);
470     RE re3("(\\w+)*b", options_mlr);
471     CHECK(re3.PartialMatch(text_good) == true);
472     CHECK(re3.PartialMatch(text_bad) == false);
473     CHECK(re3.FullMatch(text_good) == false);
474     CHECK(re3.FullMatch(text_bad) == false);
475    
476     options_mlr.set_match_limit_recursion(10);
477     RE re4("(\\w+)*b", options_mlr);
478     CHECK(re4.PartialMatch(text_good) == false);
479     CHECK(re4.PartialMatch(text_bad) == false);
480     CHECK(re4.FullMatch(text_good) == false);
481     CHECK(re4.FullMatch(text_bad) == false);
482 nigel 77 }
483    
484 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
485     // the original unquoted string.
486     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487     string quoted = RE::QuoteMeta(unquoted);
488     RE re(quoted, options);
489     CHECK(re.FullMatch(unquoted));
490     }
491    
492     // A string containing meaningful regexp characters, which is then meta-
493     // quoted, should not generally match a string the unquoted string does.
494     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495     RE_Options options = RE_Options()) {
496     string quoted = RE::QuoteMeta(unquoted);
497     RE re(quoted, options);
498     CHECK(!re.FullMatch(should_not_match));
499     }
500    
501     // Tests that quoted meta characters match their original strings,
502     // and that a few things that shouldn't match indeed do not.
503     static void TestQuotaMetaSimple() {
504     TestQuoteMeta("foo");
505     TestQuoteMeta("foo.bar");
506     TestQuoteMeta("foo\\.bar");
507     TestQuoteMeta("[1-9]");
508     TestQuoteMeta("1.5-2.0?");
509     TestQuoteMeta("\\d");
510     TestQuoteMeta("Who doesn't like ice cream?");
511     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512     TestQuoteMeta("((?!)xxx).*yyy");
513     TestQuoteMeta("([");
514 ph10 326 TestQuoteMeta(string("foo\0bar", 7));
515 nigel 93 }
516    
517     static void TestQuoteMetaSimpleNegative() {
518     NegativeTestQuoteMeta("foo", "bar");
519     NegativeTestQuoteMeta("...", "bar");
520     NegativeTestQuoteMeta("\\.", ".");
521     NegativeTestQuoteMeta("\\.", "..");
522     NegativeTestQuoteMeta("(a)", "a");
523     NegativeTestQuoteMeta("(a|b)", "a");
524     NegativeTestQuoteMeta("(a|b)", "(a)");
525     NegativeTestQuoteMeta("(a|b)", "a|b");
526     NegativeTestQuoteMeta("[0-9]", "0");
527     NegativeTestQuoteMeta("[0-9]", "0-9");
528     NegativeTestQuoteMeta("[0-9]", "[9]");
529     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530     }
531    
532     static void TestQuoteMetaLatin1() {
533     TestQuoteMeta("3\xb2 = 9");
534     }
535    
536     static void TestQuoteMetaUtf8() {
537     #ifdef SUPPORT_UTF8
538     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546     "27\\\xc2\\\xb0",
547     pcrecpp::UTF8());
548     #endif
549     }
550    
551     static void TestQuoteMetaAll() {
552     printf("Testing QuoteMeta\n");
553     TestQuotaMetaSimple();
554     TestQuoteMetaSimpleNegative();
555     TestQuoteMetaLatin1();
556     TestQuoteMetaUtf8();
557     }
558    
559 nigel 81 //
560     // Options tests contributed by
561     // Giuseppe Maxia, CTO, Stardata s.r.l.
562     // July 2005
563     //
564     static void GetOneOptionResult(
565     const char *option_name,
566     const char *regex,
567     const char *str,
568     RE_Options options,
569     bool full,
570     string expected) {
571 nigel 77
572 nigel 81 printf("Testing Option <%s>\n", option_name);
573     if(VERBOSE_TEST)
574     printf("/%s/ finds \"%s\" within \"%s\" \n",
575     regex,
576     expected.c_str(),
577     str);
578     string captured("");
579     if (full)
580     RE(regex,options).FullMatch(str, &captured);
581     else
582     RE(regex,options).PartialMatch(str, &captured);
583     CHECK_EQ(captured, expected);
584     }
585    
586     static void TestOneOption(
587     const char *option_name,
588     const char *regex,
589     const char *str,
590     RE_Options options,
591     bool full,
592     bool assertive = true) {
593    
594     printf("Testing Option <%s>\n", option_name);
595     if (VERBOSE_TEST)
596     printf("'%s' %s /%s/ \n",
597     str,
598     (assertive? "matches" : "doesn't match"),
599     regex);
600     if (assertive) {
601     if (full)
602     CHECK(RE(regex,options).FullMatch(str));
603     else
604     CHECK(RE(regex,options).PartialMatch(str));
605     } else {
606     if (full)
607     CHECK(!RE(regex,options).FullMatch(str));
608     else
609     CHECK(!RE(regex,options).PartialMatch(str));
610     }
611     }
612    
613     static void Test_CASELESS() {
614     RE_Options options;
615     RE_Options options2;
616    
617     options.set_caseless(true);
618     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621    
622     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624     options.set_caseless(false);
625     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
626     }
627    
628     static void Test_MULTILINE() {
629     RE_Options options;
630     RE_Options options2;
631     const char *str = "HELLO\n" "cruel\n" "world\n";
632    
633     options.set_multiline(true);
634     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637     options.set_multiline(false);
638     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639     }
640    
641     static void Test_DOTALL() {
642     RE_Options options;
643     RE_Options options2;
644     const char *str = "HELLO\n" "cruel\n" "world";
645    
646     options.set_dotall(true);
647     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650     options.set_dotall(false);
651     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652     }
653    
654     static void Test_DOLLAR_ENDONLY() {
655     RE_Options options;
656     RE_Options options2;
657     const char *str = "HELLO world\n";
658    
659     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660     options.set_dollar_endonly(true);
661     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
663     }
664    
665     static void Test_EXTRA() {
666     RE_Options options;
667     const char *str = "HELLO";
668    
669     options.set_extra(true);
670     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672     options.set_extra(false);
673     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674     }
675    
676     static void Test_EXTENDED() {
677     RE_Options options;
678     RE_Options options2;
679     const char *str = "HELLO world";
680    
681     options.set_extended(true);
682     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684     TestOneOption("EXTENDED (class)",
685     "^ HE L{2} O "
686     "\\s+ "
687     "\\w+ $ ",
688     str,
689     options,
690     false);
691    
692     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693     TestOneOption("EXTENDED (function)",
694     "^ HE L{2} O "
695     "\\s+ "
696     "\\w+ $ ",
697     str,
698     pcrecpp::EXTENDED(),
699     false);
700    
701     options.set_extended(false);
702     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703     }
704    
705     static void Test_NO_AUTO_CAPTURE() {
706     RE_Options options;
707     const char *str = "HELLO world";
708     string captured;
709    
710     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711     if (VERBOSE_TEST)
712     printf("parentheses capture text\n");
713     RE re("(world|universe)$", options);
714     CHECK(re.Extract("\\1", str , &captured));
715     CHECK_EQ(captured, "world");
716     options.set_no_auto_capture(true);
717     printf("testing Option <NO_AUTO_CAPTURE>\n");
718     if (VERBOSE_TEST)
719     printf("parentheses do not capture text\n");
720     re.Extract("\\1",str, &captured );
721     CHECK_EQ(captured, "world");
722     }
723    
724     static void Test_UNGREEDY() {
725     RE_Options options;
726     const char *str = "HELLO, 'this' is the 'world'";
727    
728     options.set_ungreedy(true);
729     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732    
733     options.set_ungreedy(false);
734     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736     }
737    
738     static void Test_all_options() {
739     const char *str = "HELLO\n" "cruel\n" "world";
740     RE_Options options;
741     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742    
743     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744     options.set_all_options(0);
745     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747    
748     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750     " ^ c r u e l $ ",
751     str,
752     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753     false);
754    
755     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756     " ^ c r u e l $ ",
757     str,
758     RE_Options()
759     .set_multiline(true)
760     .set_extended(true),
761     false);
762    
763     options.set_all_options(0);
764     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765    
766     }
767    
768     static void TestOptions() {
769     printf("Testing Options\n");
770     Test_CASELESS();
771     Test_MULTILINE();
772     Test_DOTALL();
773     Test_DOLLAR_ENDONLY();
774     Test_EXTENDED();
775     Test_NO_AUTO_CAPTURE();
776     Test_UNGREEDY();
777     Test_EXTRA();
778     Test_all_options();
779     }
780    
781 nigel 93 static void TestConstructors() {
782     printf("Testing constructors\n");
783    
784     RE_Options options;
785     options.set_dotall(true);
786     const char *str = "HELLO\n" "cruel\n" "world";
787    
788     RE orig("HELLO.*world", options);
789     CHECK(orig.FullMatch(str));
790    
791     RE copy1(orig);
792     CHECK(copy1.FullMatch(str));
793    
794     RE copy2("not a match");
795     CHECK(!copy2.FullMatch(str));
796     copy2 = copy1;
797     CHECK(copy2.FullMatch(str));
798     copy2 = orig;
799     CHECK(copy2.FullMatch(str));
800    
801     // Make sure when we assign to ourselves, nothing bad happens
802     orig = orig;
803     copy1 = copy1;
804     copy2 = copy2;
805     CHECK(orig.FullMatch(str));
806     CHECK(copy1.FullMatch(str));
807     CHECK(copy2.FullMatch(str));
808     }
809    
810 nigel 77 int main(int argc, char** argv) {
811     // Treat any flag as --help
812     if (argc > 1 && argv[1][0] == '-') {
813     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814     " If 'timingX ###' is specified, run the given timing test\n"
815     " with the given number of iterations, rather than running\n"
816     " the default corectness test.\n", argv[0]);
817     return 0;
818     }
819    
820     if (argc > 1) {
821     if ( argc == 2 || atoi(argv[2]) == 0) {
822     printf("timing mode needs a num-iters argument\n");
823     return 1;
824     }
825     if (!strcmp(argv[1], "timing1"))
826     Timing1(atoi(argv[2]));
827     else if (!strcmp(argv[1], "timing2"))
828     Timing2(atoi(argv[2]));
829     else if (!strcmp(argv[1], "timing3"))
830     Timing3(atoi(argv[2]));
831     else
832     printf("Unknown argument '%s'\n", argv[1]);
833     return 0;
834     }
835    
836     printf("Testing FullMatch\n");
837    
838     int i;
839     string s;
840    
841     /***** FullMatch with no args *****/
842    
843     CHECK(RE("h.*o").FullMatch("hello"));
844 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
845     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
846     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
847     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
848     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
849 nigel 77
850     /***** FullMatch with args *****/
851    
852     // Zero-arg
853     CHECK(RE("\\d+").FullMatch("1001"));
854    
855     // Single-arg
856     CHECK(RE("(\\d+)").FullMatch("1001", &i));
857     CHECK_EQ(i, 1001);
858     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
859     CHECK_EQ(i, -123);
860     CHECK(!RE("()\\d+").FullMatch("10", &i));
861     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
862     &i));
863    
864     // Digits surrounding integer-arg
865     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
866     CHECK_EQ(i, 23);
867     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
868     CHECK_EQ(i, 1);
869     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
870     CHECK_EQ(i, -1);
871     CHECK(RE("(\\d)").PartialMatch("1234", &i));
872     CHECK_EQ(i, 1);
873     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
874     CHECK_EQ(i, -1);
875    
876     // String-arg
877     CHECK(RE("h(.*)o").FullMatch("hello", &s));
878     CHECK_EQ(s, string("ell"));
879    
880     // StringPiece-arg
881     StringPiece sp;
882     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
883     CHECK_EQ(sp.size(), 4);
884     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
885     CHECK_EQ(i, 1234);
886    
887     // Multi-arg
888     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
889     CHECK_EQ(s, string("ruby"));
890     CHECK_EQ(i, 1234);
891    
892 ph10 263 // Ignore non-void* NULL arg
893     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
894     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
895     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
896     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
897 ph10 302 #ifdef HAVE_LONG_LONG
898 ph10 263 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
899 ph10 302 #endif
900 ph10 263 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
901     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
902    
903     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
904     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
905     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
906     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
907     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
908     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
909    
910 nigel 77 // Ignored arg
911     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
912     CHECK_EQ(s, string("ruby"));
913     CHECK_EQ(i, 1234);
914    
915     // Type tests
916     {
917     char c;
918     CHECK(RE("(H)ello").FullMatch("Hello", &c));
919     CHECK_EQ(c, 'H');
920     }
921     {
922     unsigned char c;
923     CHECK(RE("(H)ello").FullMatch("Hello", &c));
924     CHECK_EQ(c, static_cast<unsigned char>('H'));
925     }
926     {
927     short v;
928     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
929     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
930     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
931     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
932     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
933     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
934     }
935     {
936     unsigned short v;
937     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
938     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
939     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
940     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
941     }
942     {
943     int v;
944     static const int max_value = 0x7fffffff;
945     static const int min_value = -max_value - 1;
946     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
947     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
948     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
949     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
950     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
951     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
952     }
953     {
954     unsigned int v;
955     static const unsigned int max_value = 0xfffffffful;
956     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
957     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
958     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
959     }
960     #ifdef HAVE_LONG_LONG
961 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
962     # define LLD "%I64d"
963 ph10 201 # define LLU "%I64u"
964 ph10 193 # else
965     # define LLD "%lld"
966 ph10 201 # define LLU "%llu"
967 ph10 193 # endif
968 nigel 77 {
969     long long v;
970     static const long long max_value = 0x7fffffffffffffffLL;
971     static const long long min_value = -max_value - 1;
972 ph10 257 char buf[32]; // definitely big enough for a long long
973 nigel 77
974     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
975     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
976    
977 ph10 257 sprintf(buf, LLD, max_value);
978 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
979    
980 ph10 257 sprintf(buf, LLD, min_value);
981 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
982    
983 ph10 257 sprintf(buf, LLD, max_value);
984 nigel 77 assert(buf[strlen(buf)-1] != '9');
985     buf[strlen(buf)-1]++;
986     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
987    
988 ph10 257 sprintf(buf, LLD, min_value);
989 nigel 77 assert(buf[strlen(buf)-1] != '9');
990     buf[strlen(buf)-1]++;
991     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
992     }
993     #endif
994     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
995     {
996     unsigned long long v;
997     long long v2;
998     static const unsigned long long max_value = 0xffffffffffffffffULL;
999 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
1000 nigel 77
1001     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1002     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1003    
1004 ph10 257 sprintf(buf, LLU, max_value);
1005 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1006    
1007     assert(buf[strlen(buf)-1] != '9');
1008     buf[strlen(buf)-1]++;
1009     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1010     }
1011     #endif
1012     {
1013     float v;
1014     CHECK(RE("(.*)").FullMatch("100", &v));
1015     CHECK(RE("(.*)").FullMatch("-100.", &v));
1016     CHECK(RE("(.*)").FullMatch("1e23", &v));
1017     }
1018     {
1019     double v;
1020     CHECK(RE("(.*)").FullMatch("100", &v));
1021     CHECK(RE("(.*)").FullMatch("-100.", &v));
1022     CHECK(RE("(.*)").FullMatch("1e23", &v));
1023     }
1024    
1025     // Check that matching is fully anchored
1026     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1027     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1028     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1029     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1030    
1031     // Braces
1032     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1033     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1034     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1035    
1036     // Complicated RE
1037     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1038     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1039     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1040     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1041    
1042     // Check full-match handling (needs '$' tacked on internally)
1043     CHECK(RE("fo|foo").FullMatch("fo"));
1044     CHECK(RE("fo|foo").FullMatch("foo"));
1045     CHECK(RE("fo|foo$").FullMatch("fo"));
1046     CHECK(RE("fo|foo$").FullMatch("foo"));
1047     CHECK(RE("foo$").FullMatch("foo"));
1048     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1049     CHECK(!RE("fo|bar").FullMatch("fox"));
1050    
1051     // Uncomment the following if we change the handling of '$' to
1052     // prevent it from matching a trailing newline
1053     if (false) {
1054     // Check that we don't get bitten by pcre's special handling of a
1055     // '\n' at the end of the string matching '$'
1056     CHECK(!RE("foo$").PartialMatch("foo\n"));
1057     }
1058    
1059     // Number of args
1060     int a[16];
1061     CHECK(RE("").FullMatch(""));
1062    
1063     memset(a, 0, sizeof(0));
1064     CHECK(RE("(\\d){1}").FullMatch("1",
1065     &a[0]));
1066     CHECK_EQ(a[0], 1);
1067    
1068     memset(a, 0, sizeof(0));
1069     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1070     &a[0], &a[1]));
1071     CHECK_EQ(a[0], 1);
1072     CHECK_EQ(a[1], 2);
1073    
1074     memset(a, 0, sizeof(0));
1075     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1076     &a[0], &a[1], &a[2]));
1077     CHECK_EQ(a[0], 1);
1078     CHECK_EQ(a[1], 2);
1079     CHECK_EQ(a[2], 3);
1080    
1081     memset(a, 0, sizeof(0));
1082     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1083     &a[0], &a[1], &a[2], &a[3]));
1084     CHECK_EQ(a[0], 1);
1085     CHECK_EQ(a[1], 2);
1086     CHECK_EQ(a[2], 3);
1087     CHECK_EQ(a[3], 4);
1088    
1089     memset(a, 0, sizeof(0));
1090     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1091     &a[0], &a[1], &a[2],
1092     &a[3], &a[4]));
1093     CHECK_EQ(a[0], 1);
1094     CHECK_EQ(a[1], 2);
1095     CHECK_EQ(a[2], 3);
1096     CHECK_EQ(a[3], 4);
1097     CHECK_EQ(a[4], 5);
1098    
1099     memset(a, 0, sizeof(0));
1100     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1101     &a[0], &a[1], &a[2],
1102     &a[3], &a[4], &a[5]));
1103     CHECK_EQ(a[0], 1);
1104     CHECK_EQ(a[1], 2);
1105     CHECK_EQ(a[2], 3);
1106     CHECK_EQ(a[3], 4);
1107     CHECK_EQ(a[4], 5);
1108     CHECK_EQ(a[5], 6);
1109    
1110     memset(a, 0, sizeof(0));
1111     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1112     &a[0], &a[1], &a[2], &a[3],
1113     &a[4], &a[5], &a[6]));
1114     CHECK_EQ(a[0], 1);
1115     CHECK_EQ(a[1], 2);
1116     CHECK_EQ(a[2], 3);
1117     CHECK_EQ(a[3], 4);
1118     CHECK_EQ(a[4], 5);
1119     CHECK_EQ(a[5], 6);
1120     CHECK_EQ(a[6], 7);
1121    
1122     memset(a, 0, sizeof(0));
1123     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1124     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1125     "1234567890123456",
1126     &a[0], &a[1], &a[2], &a[3],
1127     &a[4], &a[5], &a[6], &a[7],
1128     &a[8], &a[9], &a[10], &a[11],
1129     &a[12], &a[13], &a[14], &a[15]));
1130     CHECK_EQ(a[0], 1);
1131     CHECK_EQ(a[1], 2);
1132     CHECK_EQ(a[2], 3);
1133     CHECK_EQ(a[3], 4);
1134     CHECK_EQ(a[4], 5);
1135     CHECK_EQ(a[5], 6);
1136     CHECK_EQ(a[6], 7);
1137     CHECK_EQ(a[7], 8);
1138     CHECK_EQ(a[8], 9);
1139     CHECK_EQ(a[9], 0);
1140     CHECK_EQ(a[10], 1);
1141     CHECK_EQ(a[11], 2);
1142     CHECK_EQ(a[12], 3);
1143     CHECK_EQ(a[13], 4);
1144     CHECK_EQ(a[14], 5);
1145     CHECK_EQ(a[15], 6);
1146    
1147     /***** PartialMatch *****/
1148    
1149     printf("Testing PartialMatch\n");
1150    
1151     CHECK(RE("h.*o").PartialMatch("hello"));
1152     CHECK(RE("h.*o").PartialMatch("othello"));
1153     CHECK(RE("h.*o").PartialMatch("hello!"));
1154     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1155    
1156 nigel 93 /***** other tests *****/
1157    
1158 nigel 77 RadixTests();
1159     TestReplace();
1160     TestExtract();
1161     TestConsume();
1162     TestFindAndConsume();
1163 nigel 93 TestQuoteMetaAll();
1164 nigel 77 TestMatchNumberPeculiarity();
1165    
1166     // Check the pattern() accessor
1167     {
1168     const string kPattern = "http://([^/]+)/.*";
1169     const RE re(kPattern);
1170     CHECK_EQ(kPattern, re.pattern());
1171     }
1172    
1173     // Check RE error field.
1174     {
1175     RE re("foo");
1176     CHECK(re.error().empty()); // Must have no error
1177     }
1178    
1179     #ifdef SUPPORT_UTF8
1180     // Check UTF-8 handling
1181     {
1182     printf("Testing UTF-8 handling\n");
1183    
1184     // Three Japanese characters (nihongo)
1185 ph10 256 const unsigned char utf8_string[] = {
1186 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1187     0xe6, 0x9c, 0xac, // 627c
1188     0xe8, 0xaa, 0x9e, // 8a9e
1189     0
1190     };
1191 ph10 256 const unsigned char utf8_pattern[] = {
1192 nigel 77 '.',
1193     0xe6, 0x9c, 0xac, // 627c
1194     '.',
1195     0
1196     };
1197    
1198     // Both should match in either mode, bytes or UTF-8
1199     RE re_test1(".........");
1200     CHECK(re_test1.FullMatch(utf8_string));
1201     RE re_test2("...", pcrecpp::UTF8());
1202     CHECK(re_test2.FullMatch(utf8_string));
1203    
1204     // Check that '.' matches one byte or UTF-8 character
1205     // according to the mode.
1206     string ss;
1207     RE re_test3("(.)");
1208     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1209     CHECK_EQ(ss, string("\xe6"));
1210     RE re_test4("(.)", pcrecpp::UTF8());
1211     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1212     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1213    
1214     // Check that string matches itself in either mode
1215     RE re_test5(utf8_string);
1216     CHECK(re_test5.FullMatch(utf8_string));
1217     RE re_test6(utf8_string, pcrecpp::UTF8());
1218     CHECK(re_test6.FullMatch(utf8_string));
1219    
1220     // Check that pattern matches string only in UTF8 mode
1221     RE re_test7(utf8_pattern);
1222     CHECK(!re_test7.FullMatch(utf8_string));
1223     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1224     CHECK(re_test8.FullMatch(utf8_string));
1225     }
1226    
1227     // Check that ungreedy, UTF8 regular expressions don't match when they
1228     // oughtn't -- see bug 82246.
1229     {
1230     // This code always worked.
1231     const char* pattern = "\\w+X";
1232     const string target = "a aX";
1233     RE match_sentence(pattern);
1234     RE match_sentence_re(pattern, pcrecpp::UTF8());
1235    
1236     CHECK(!match_sentence.FullMatch(target));
1237     CHECK(!match_sentence_re.FullMatch(target));
1238     }
1239    
1240     {
1241     const char* pattern = "(?U)\\w+X";
1242     const string target = "a aX";
1243     RE match_sentence(pattern);
1244     RE match_sentence_re(pattern, pcrecpp::UTF8());
1245    
1246     CHECK(!match_sentence.FullMatch(target));
1247     CHECK(!match_sentence_re.FullMatch(target));
1248     }
1249     #endif /* def SUPPORT_UTF8 */
1250    
1251     printf("Testing error reporting\n");
1252    
1253     { RE re("a\\1"); CHECK(!re.error().empty()); }
1254     {
1255     RE re("a[x");
1256     CHECK(!re.error().empty());
1257     }
1258     {
1259     RE re("a[z-a]");
1260     CHECK(!re.error().empty());
1261     }
1262     {
1263     RE re("a[[:foobar:]]");
1264     CHECK(!re.error().empty());
1265     }
1266     {
1267     RE re("a(b");
1268     CHECK(!re.error().empty());
1269     }
1270     {
1271     RE re("a\\");
1272     CHECK(!re.error().empty());
1273     }
1274    
1275 nigel 87 // Test that recursion is stopped
1276     TestRecursion();
1277 nigel 77
1278 nigel 81 // Test Options
1279     if (getenv("VERBOSE_TEST") != NULL)
1280     VERBOSE_TEST = true;
1281     TestOptions();
1282    
1283 nigel 93 // Test the constructors
1284     TestConstructors();
1285    
1286 nigel 77 // Done
1287     printf("OK\n");
1288    
1289     return 0;
1290     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12