/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (hide annotations) (download)
Mon Aug 22 14:57:32 2011 UTC (2 years, 7 months ago) by ph10
File size: 39205 byte(s)
Commit all the changes for JIT support, but without any documentation yet.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3 ph10 474 // Copyright (c) 2005 - 2010, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 ph10 594 #include <string.h> /* for memset and strcmp */
42 nigel 91 #include <cassert>
43 nigel 77 #include <vector>
44     #include "pcrecpp.h"
45    
46     using pcrecpp::StringPiece;
47     using pcrecpp::RE;
48     using pcrecpp::RE_Options;
49     using pcrecpp::Hex;
50     using pcrecpp::Octal;
51     using pcrecpp::CRadix;
52    
53 nigel 81 static bool VERBOSE_TEST = false;
54    
55 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
56     // controlled by NDEBUG, so the check will be executed regardless of
57     // compilation mode. Therefore, it is safe to do things like:
58     // CHECK_EQ(fp->Write(x), 4)
59     #define CHECK(condition) do { \
60     if (!(condition)) { \
61     fprintf(stderr, "%s:%d: Check failed: %s\n", \
62     __FILE__, __LINE__, #condition); \
63     exit(1); \
64     } \
65     } while (0)
66    
67     #define CHECK_EQ(a, b) CHECK(a == b)
68    
69     static void Timing1(int num_iters) {
70     // Same pattern lots of times
71     RE pattern("ruby:\\d+");
72     StringPiece p("ruby:1234");
73     for (int j = num_iters; j > 0; j--) {
74     CHECK(pattern.FullMatch(p));
75     }
76     }
77    
78     static void Timing2(int num_iters) {
79     // Same pattern lots of times
80     RE pattern("ruby:(\\d+)");
81     int i;
82     for (int j = num_iters; j > 0; j--) {
83     CHECK(pattern.FullMatch("ruby:1234", &i));
84     CHECK_EQ(i, 1234);
85     }
86     }
87    
88     static void Timing3(int num_iters) {
89     string text_string;
90     for (int j = num_iters; j > 0; j--) {
91     text_string += "this is another line\n";
92     }
93    
94     RE line_matcher(".*\n");
95     string line;
96     StringPiece text(text_string);
97     int counter = 0;
98     while (line_matcher.Consume(&text)) {
99     counter++;
100     }
101     printf("Matched %d lines\n", counter);
102     }
103    
104     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105    
106     static void LeakTest() {
107     // Check for memory leaks
108     unsigned long long initial_size = 0;
109     for (int i = 0; i < 100000; i++) {
110     if (i == 50000) {
111     initial_size = VirtualProcessSize();
112     printf("Size after 50000: %llu\n", initial_size);
113     }
114 ph10 257 char buf[100]; // definitely big enough
115     sprintf(buf, "pat%09d", i);
116 nigel 77 RE newre(buf);
117     }
118     uint64 final_size = VirtualProcessSize();
119     printf("Size after 100000: %llu\n", final_size);
120     const double growth = double(final_size - initial_size) / final_size;
121     printf("Growth: %0.2f%%", growth * 100);
122     CHECK(growth < 0.02); // Allow < 2% growth
123     }
124    
125     #endif
126    
127     static void RadixTests() {
128     printf("Testing hex\n");
129    
130     #define CHECK_HEX(type, value) \
131     do { \
132     type v; \
133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134     CHECK_EQ(v, 0x ## value); \
135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136     CHECK_EQ(v, 0x ## value); \
137     } while(0)
138    
139     CHECK_HEX(short, 2bad);
140     CHECK_HEX(unsigned short, 2badU);
141     CHECK_HEX(int, dead);
142     CHECK_HEX(unsigned int, deadU);
143     CHECK_HEX(long, 7eadbeefL);
144     CHECK_HEX(unsigned long, deadbeefUL);
145     #ifdef HAVE_LONG_LONG
146     CHECK_HEX(long long, 12345678deadbeefLL);
147     #endif
148     #ifdef HAVE_UNSIGNED_LONG_LONG
149     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150     #endif
151    
152     #undef CHECK_HEX
153    
154     printf("Testing octal\n");
155    
156     #define CHECK_OCTAL(type, value) \
157     do { \
158     type v; \
159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160     CHECK_EQ(v, 0 ## value); \
161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162     CHECK_EQ(v, 0 ## value); \
163     } while(0)
164    
165     CHECK_OCTAL(short, 77777);
166     CHECK_OCTAL(unsigned short, 177777U);
167     CHECK_OCTAL(int, 17777777777);
168     CHECK_OCTAL(unsigned int, 37777777777U);
169     CHECK_OCTAL(long, 17777777777L);
170     CHECK_OCTAL(unsigned long, 37777777777UL);
171     #ifdef HAVE_LONG_LONG
172     CHECK_OCTAL(long long, 777777777777777777777LL);
173     #endif
174     #ifdef HAVE_UNSIGNED_LONG_LONG
175     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176     #endif
177    
178     #undef CHECK_OCTAL
179    
180     printf("Testing decimal\n");
181    
182     #define CHECK_DECIMAL(type, value) \
183     do { \
184     type v; \
185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186     CHECK_EQ(v, value); \
187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188     CHECK_EQ(v, value); \
189     } while(0)
190    
191     CHECK_DECIMAL(short, -1);
192     CHECK_DECIMAL(unsigned short, 9999);
193     CHECK_DECIMAL(int, -1000);
194     CHECK_DECIMAL(unsigned int, 12345U);
195     CHECK_DECIMAL(long, -10000000L);
196     CHECK_DECIMAL(unsigned long, 3083324652U);
197     #ifdef HAVE_LONG_LONG
198     CHECK_DECIMAL(long long, -100000000000000LL);
199     #endif
200     #ifdef HAVE_UNSIGNED_LONG_LONG
201     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202     #endif
203    
204     #undef CHECK_DECIMAL
205    
206     }
207    
208     static void TestReplace() {
209     printf("Testing Replace\n");
210    
211     struct ReplaceTest {
212     const char *regexp;
213     const char *rewrite;
214     const char *original;
215     const char *single;
216     const char *global;
217 ph10 297 int global_count; // the expected return value from ReplaceAll
218 nigel 77 };
219     static const ReplaceTest tests[] = {
220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221     "\\2\\1ay",
222     "the quick brown fox jumps over the lazy dogs.",
223     "ethay quick brown fox jumps over the lazy dogs.",
224 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225     9 },
226 nigel 77 { "\\w+",
227     "\\0-NOSPAM",
228     "paul.haahr@google.com",
229     "paul-NOSPAM.haahr@google.com",
230 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231     4 },
232 nigel 77 { "^",
233     "(START)",
234     "foo",
235     "(START)foo",
236 ph10 297 "(START)foo",
237     1 },
238 nigel 77 { "^",
239     "(START)",
240     "",
241     "(START)",
242 ph10 297 "(START)",
243     1 },
244 nigel 77 { "$",
245     "(END)",
246     "",
247     "(END)",
248 ph10 297 "(END)",
249     1 },
250 nigel 77 { "b",
251     "bb",
252     "ababababab",
253     "abbabababab",
254 ph10 297 "abbabbabbabbabb",
255     5 },
256 nigel 77 { "b",
257     "bb",
258     "bbbbbb",
259     "bbbbbbb",
260 ph10 297 "bbbbbbbbbbbb",
261     6 },
262 nigel 77 { "b+",
263     "bb",
264     "bbbbbb",
265     "bb",
266 ph10 297 "bb",
267     1 },
268 nigel 77 { "b*",
269     "bb",
270     "bbbbbb",
271     "bb",
272 ph10 474 "bbbb",
273     2 },
274 nigel 77 { "b*",
275     "bb",
276     "aaaaa",
277     "bbaaaaa",
278 ph10 297 "bbabbabbabbabbabb",
279     6 },
280 nigel 91 { "b*",
281     "bb",
282     "aa\naa\n",
283     "bbaa\naa\n",
284 ph10 297 "bbabbabb\nbbabbabb\nbb",
285     7 },
286 nigel 91 { "b*",
287     "bb",
288     "aa\raa\r",
289     "bbaa\raa\r",
290 ph10 297 "bbabbabb\rbbabbabb\rbb",
291     7 },
292 nigel 91 { "b*",
293     "bb",
294     "aa\r\naa\r\n",
295     "bbaa\r\naa\r\n",
296 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
297     7 },
298 ph10 474 // Check empty-string matching (it's tricky!)
299     { "aa|b*",
300     "@",
301     "aa",
302     "@",
303     "@@",
304     2 },
305     { "b*|aa",
306     "@",
307     "aa",
308     "@aa",
309     "@@@",
310     3 },
311 nigel 91 #ifdef SUPPORT_UTF8
312     { "b*",
313     "bb",
314     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317     5 },
318 nigel 91 { "b*",
319     "bb",
320     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324     9 },
325 nigel 91 #endif
326 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
327 nigel 77 };
328    
329 nigel 91 #ifdef SUPPORT_UTF8
330     const bool support_utf8 = true;
331     #else
332     const bool support_utf8 = false;
333     #endif
334    
335 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337     assert(re.error().empty());
338 nigel 77 string one(t->original);
339 nigel 91 CHECK(re.Replace(t->rewrite, &one));
340 nigel 77 CHECK_EQ(one, t->single);
341     string all(t->original);
342 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
343 nigel 77 CHECK_EQ(all, t->global);
344 ph10 297 CHECK_EQ(replace_count, t->global_count);
345 nigel 77 }
346 nigel 91
347     // One final test: test \r\n replacement when we're not in CRLF mode
348     {
349     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350     assert(re.error().empty());
351     string all("aa\r\naa\r\n");
352 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354     }
355     {
356     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357     assert(re.error().empty());
358     string all("aa\r\naa\r\n");
359 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361     }
362     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363     // Alas, the answer depends on how pcre was compiled.
364 nigel 77 }
365    
366     static void TestExtract() {
367     printf("Testing Extract\n");
368    
369     string s;
370    
371     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372     CHECK_EQ(s, "kremvax!boris");
373    
374     // check the RE interface as well
375     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376     CHECK_EQ(s, "'foo'");
377     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378     CHECK_EQ(s, "'foo'");
379     }
380    
381     static void TestConsume() {
382     printf("Testing Consume\n");
383    
384     string word;
385    
386     string s(" aaa b!@#$@#$cccc");
387     StringPiece input(s);
388    
389     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390     CHECK(r.Consume(&input, &word));
391     CHECK_EQ(word, "aaa");
392     CHECK(r.Consume(&input, &word));
393     CHECK_EQ(word, "b");
394     CHECK(! r.Consume(&input, &word));
395     }
396    
397     static void TestFindAndConsume() {
398     printf("Testing FindAndConsume\n");
399    
400     string word;
401    
402     string s(" aaa b!@#$@#$cccc");
403     StringPiece input(s);
404    
405     RE r("(\\w+)"); // matches a word
406     CHECK(r.FindAndConsume(&input, &word));
407     CHECK_EQ(word, "aaa");
408     CHECK(r.FindAndConsume(&input, &word));
409     CHECK_EQ(word, "b");
410     CHECK(r.FindAndConsume(&input, &word));
411     CHECK_EQ(word, "cccc");
412     CHECK(! r.FindAndConsume(&input, &word));
413     }
414    
415     static void TestMatchNumberPeculiarity() {
416 ph10 627 printf("Testing match-number peculiarity\n");
417 nigel 77
418     string word1;
419     string word2;
420     string word3;
421    
422     RE r("(foo)|(bar)|(baz)");
423     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424     CHECK_EQ(word1, "foo");
425     CHECK_EQ(word2, "");
426     CHECK_EQ(word3, "");
427     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428     CHECK_EQ(word1, "");
429     CHECK_EQ(word2, "bar");
430     CHECK_EQ(word3, "");
431     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432     CHECK_EQ(word1, "");
433     CHECK_EQ(word2, "");
434     CHECK_EQ(word3, "baz");
435     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436    
437     string a;
438     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439     CHECK_EQ(a, "");
440     }
441    
442 nigel 87 static void TestRecursion() {
443 nigel 77 printf("Testing recursion\n");
444    
445 nigel 87 // Get one string that passes (sometimes), one that never does.
446     string text_good("abcdefghijk");
447     string text_bad("acdefghijkl");
448    
449     // According to pcretest, matching text_good against (\w+)*b
450     // requires match_limit of at least 8192, and match_recursion_limit
451     // of at least 37.
452    
453     RE_Options options_ml;
454     options_ml.set_match_limit(8192);
455     RE re("(\\w+)*b", options_ml);
456     CHECK(re.PartialMatch(text_good) == true);
457     CHECK(re.PartialMatch(text_bad) == false);
458     CHECK(re.FullMatch(text_good) == false);
459     CHECK(re.FullMatch(text_bad) == false);
460    
461     options_ml.set_match_limit(1024);
462     RE re2("(\\w+)*b", options_ml);
463     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464     CHECK(re2.PartialMatch(text_bad) == false);
465     CHECK(re2.FullMatch(text_good) == false);
466     CHECK(re2.FullMatch(text_bad) == false);
467    
468     RE_Options options_mlr;
469     options_mlr.set_match_limit_recursion(50);
470     RE re3("(\\w+)*b", options_mlr);
471     CHECK(re3.PartialMatch(text_good) == true);
472     CHECK(re3.PartialMatch(text_bad) == false);
473     CHECK(re3.FullMatch(text_good) == false);
474     CHECK(re3.FullMatch(text_bad) == false);
475    
476     options_mlr.set_match_limit_recursion(10);
477     RE re4("(\\w+)*b", options_mlr);
478     CHECK(re4.PartialMatch(text_good) == false);
479     CHECK(re4.PartialMatch(text_bad) == false);
480     CHECK(re4.FullMatch(text_good) == false);
481     CHECK(re4.FullMatch(text_bad) == false);
482 nigel 77 }
483    
484 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
485     // the original unquoted string.
486     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487     string quoted = RE::QuoteMeta(unquoted);
488     RE re(quoted, options);
489     CHECK(re.FullMatch(unquoted));
490     }
491    
492     // A string containing meaningful regexp characters, which is then meta-
493     // quoted, should not generally match a string the unquoted string does.
494     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495     RE_Options options = RE_Options()) {
496     string quoted = RE::QuoteMeta(unquoted);
497     RE re(quoted, options);
498     CHECK(!re.FullMatch(should_not_match));
499     }
500    
501     // Tests that quoted meta characters match their original strings,
502     // and that a few things that shouldn't match indeed do not.
503     static void TestQuotaMetaSimple() {
504     TestQuoteMeta("foo");
505     TestQuoteMeta("foo.bar");
506     TestQuoteMeta("foo\\.bar");
507     TestQuoteMeta("[1-9]");
508     TestQuoteMeta("1.5-2.0?");
509     TestQuoteMeta("\\d");
510     TestQuoteMeta("Who doesn't like ice cream?");
511     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512     TestQuoteMeta("((?!)xxx).*yyy");
513     TestQuoteMeta("([");
514 ph10 326 TestQuoteMeta(string("foo\0bar", 7));
515 nigel 93 }
516    
517     static void TestQuoteMetaSimpleNegative() {
518     NegativeTestQuoteMeta("foo", "bar");
519     NegativeTestQuoteMeta("...", "bar");
520     NegativeTestQuoteMeta("\\.", ".");
521     NegativeTestQuoteMeta("\\.", "..");
522     NegativeTestQuoteMeta("(a)", "a");
523     NegativeTestQuoteMeta("(a|b)", "a");
524     NegativeTestQuoteMeta("(a|b)", "(a)");
525     NegativeTestQuoteMeta("(a|b)", "a|b");
526     NegativeTestQuoteMeta("[0-9]", "0");
527     NegativeTestQuoteMeta("[0-9]", "0-9");
528     NegativeTestQuoteMeta("[0-9]", "[9]");
529     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530     }
531    
532     static void TestQuoteMetaLatin1() {
533     TestQuoteMeta("3\xb2 = 9");
534     }
535    
536     static void TestQuoteMetaUtf8() {
537     #ifdef SUPPORT_UTF8
538     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546     "27\\\xc2\\\xb0",
547     pcrecpp::UTF8());
548     #endif
549     }
550    
551     static void TestQuoteMetaAll() {
552     printf("Testing QuoteMeta\n");
553     TestQuotaMetaSimple();
554     TestQuoteMetaSimpleNegative();
555     TestQuoteMetaLatin1();
556     TestQuoteMetaUtf8();
557     }
558    
559 nigel 81 //
560     // Options tests contributed by
561     // Giuseppe Maxia, CTO, Stardata s.r.l.
562     // July 2005
563     //
564     static void GetOneOptionResult(
565     const char *option_name,
566     const char *regex,
567     const char *str,
568     RE_Options options,
569     bool full,
570     string expected) {
571 nigel 77
572 nigel 81 printf("Testing Option <%s>\n", option_name);
573     if(VERBOSE_TEST)
574     printf("/%s/ finds \"%s\" within \"%s\" \n",
575     regex,
576     expected.c_str(),
577     str);
578     string captured("");
579     if (full)
580     RE(regex,options).FullMatch(str, &captured);
581     else
582     RE(regex,options).PartialMatch(str, &captured);
583     CHECK_EQ(captured, expected);
584     }
585    
586     static void TestOneOption(
587     const char *option_name,
588     const char *regex,
589     const char *str,
590     RE_Options options,
591     bool full,
592     bool assertive = true) {
593    
594     printf("Testing Option <%s>\n", option_name);
595     if (VERBOSE_TEST)
596     printf("'%s' %s /%s/ \n",
597     str,
598     (assertive? "matches" : "doesn't match"),
599     regex);
600     if (assertive) {
601     if (full)
602     CHECK(RE(regex,options).FullMatch(str));
603     else
604     CHECK(RE(regex,options).PartialMatch(str));
605     } else {
606     if (full)
607     CHECK(!RE(regex,options).FullMatch(str));
608     else
609     CHECK(!RE(regex,options).PartialMatch(str));
610     }
611     }
612    
613     static void Test_CASELESS() {
614     RE_Options options;
615     RE_Options options2;
616    
617     options.set_caseless(true);
618     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621    
622     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624     options.set_caseless(false);
625     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
626     }
627    
628     static void Test_MULTILINE() {
629     RE_Options options;
630     RE_Options options2;
631     const char *str = "HELLO\n" "cruel\n" "world\n";
632    
633     options.set_multiline(true);
634     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637     options.set_multiline(false);
638     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639     }
640    
641     static void Test_DOTALL() {
642     RE_Options options;
643     RE_Options options2;
644     const char *str = "HELLO\n" "cruel\n" "world";
645    
646     options.set_dotall(true);
647     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650     options.set_dotall(false);
651     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652     }
653    
654     static void Test_DOLLAR_ENDONLY() {
655     RE_Options options;
656     RE_Options options2;
657     const char *str = "HELLO world\n";
658    
659     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660     options.set_dollar_endonly(true);
661     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
663     }
664    
665     static void Test_EXTRA() {
666     RE_Options options;
667     const char *str = "HELLO";
668    
669     options.set_extra(true);
670     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672     options.set_extra(false);
673     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674     }
675    
676     static void Test_EXTENDED() {
677     RE_Options options;
678     RE_Options options2;
679     const char *str = "HELLO world";
680    
681     options.set_extended(true);
682     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684     TestOneOption("EXTENDED (class)",
685     "^ HE L{2} O "
686     "\\s+ "
687     "\\w+ $ ",
688     str,
689     options,
690     false);
691    
692     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693     TestOneOption("EXTENDED (function)",
694     "^ HE L{2} O "
695     "\\s+ "
696     "\\w+ $ ",
697     str,
698     pcrecpp::EXTENDED(),
699     false);
700    
701     options.set_extended(false);
702     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703     }
704    
705     static void Test_NO_AUTO_CAPTURE() {
706     RE_Options options;
707     const char *str = "HELLO world";
708     string captured;
709    
710     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711     if (VERBOSE_TEST)
712     printf("parentheses capture text\n");
713     RE re("(world|universe)$", options);
714     CHECK(re.Extract("\\1", str , &captured));
715     CHECK_EQ(captured, "world");
716     options.set_no_auto_capture(true);
717     printf("testing Option <NO_AUTO_CAPTURE>\n");
718     if (VERBOSE_TEST)
719     printf("parentheses do not capture text\n");
720     re.Extract("\\1",str, &captured );
721     CHECK_EQ(captured, "world");
722     }
723    
724     static void Test_UNGREEDY() {
725     RE_Options options;
726     const char *str = "HELLO, 'this' is the 'world'";
727    
728     options.set_ungreedy(true);
729     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732    
733     options.set_ungreedy(false);
734     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736     }
737    
738     static void Test_all_options() {
739     const char *str = "HELLO\n" "cruel\n" "world";
740     RE_Options options;
741     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742    
743     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744     options.set_all_options(0);
745     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747    
748     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750     " ^ c r u e l $ ",
751     str,
752     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753     false);
754    
755     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756     " ^ c r u e l $ ",
757     str,
758     RE_Options()
759     .set_multiline(true)
760     .set_extended(true),
761     false);
762    
763     options.set_all_options(0);
764     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765    
766     }
767    
768     static void TestOptions() {
769     printf("Testing Options\n");
770     Test_CASELESS();
771     Test_MULTILINE();
772     Test_DOTALL();
773     Test_DOLLAR_ENDONLY();
774     Test_EXTENDED();
775     Test_NO_AUTO_CAPTURE();
776     Test_UNGREEDY();
777     Test_EXTRA();
778     Test_all_options();
779     }
780    
781 nigel 93 static void TestConstructors() {
782     printf("Testing constructors\n");
783    
784     RE_Options options;
785     options.set_dotall(true);
786     const char *str = "HELLO\n" "cruel\n" "world";
787    
788     RE orig("HELLO.*world", options);
789     CHECK(orig.FullMatch(str));
790    
791     RE copy1(orig);
792     CHECK(copy1.FullMatch(str));
793    
794     RE copy2("not a match");
795     CHECK(!copy2.FullMatch(str));
796     copy2 = copy1;
797     CHECK(copy2.FullMatch(str));
798     copy2 = orig;
799     CHECK(copy2.FullMatch(str));
800    
801     // Make sure when we assign to ourselves, nothing bad happens
802     orig = orig;
803     copy1 = copy1;
804     copy2 = copy2;
805     CHECK(orig.FullMatch(str));
806     CHECK(copy1.FullMatch(str));
807     CHECK(copy2.FullMatch(str));
808     }
809    
810 nigel 77 int main(int argc, char** argv) {
811     // Treat any flag as --help
812     if (argc > 1 && argv[1][0] == '-') {
813     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814     " If 'timingX ###' is specified, run the given timing test\n"
815     " with the given number of iterations, rather than running\n"
816     " the default corectness test.\n", argv[0]);
817     return 0;
818     }
819    
820     if (argc > 1) {
821     if ( argc == 2 || atoi(argv[2]) == 0) {
822     printf("timing mode needs a num-iters argument\n");
823     return 1;
824     }
825     if (!strcmp(argv[1], "timing1"))
826     Timing1(atoi(argv[2]));
827     else if (!strcmp(argv[1], "timing2"))
828     Timing2(atoi(argv[2]));
829     else if (!strcmp(argv[1], "timing3"))
830     Timing3(atoi(argv[2]));
831     else
832     printf("Unknown argument '%s'\n", argv[1]);
833     return 0;
834     }
835    
836 ph10 667 printf("PCRE C++ wrapper tests\n");
837 nigel 77 printf("Testing FullMatch\n");
838    
839     int i;
840     string s;
841    
842     /***** FullMatch with no args *****/
843    
844     CHECK(RE("h.*o").FullMatch("hello"));
845 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
846     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
847     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
848     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
849     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
850 nigel 77
851     /***** FullMatch with args *****/
852    
853     // Zero-arg
854     CHECK(RE("\\d+").FullMatch("1001"));
855    
856     // Single-arg
857     CHECK(RE("(\\d+)").FullMatch("1001", &i));
858     CHECK_EQ(i, 1001);
859     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860     CHECK_EQ(i, -123);
861     CHECK(!RE("()\\d+").FullMatch("10", &i));
862     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863     &i));
864    
865     // Digits surrounding integer-arg
866     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867     CHECK_EQ(i, 23);
868     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869     CHECK_EQ(i, 1);
870     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871     CHECK_EQ(i, -1);
872     CHECK(RE("(\\d)").PartialMatch("1234", &i));
873     CHECK_EQ(i, 1);
874     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875     CHECK_EQ(i, -1);
876    
877     // String-arg
878     CHECK(RE("h(.*)o").FullMatch("hello", &s));
879     CHECK_EQ(s, string("ell"));
880    
881     // StringPiece-arg
882     StringPiece sp;
883     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884     CHECK_EQ(sp.size(), 4);
885     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886     CHECK_EQ(i, 1234);
887    
888     // Multi-arg
889     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890     CHECK_EQ(s, string("ruby"));
891     CHECK_EQ(i, 1234);
892    
893 ph10 263 // Ignore non-void* NULL arg
894     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898 ph10 302 #ifdef HAVE_LONG_LONG
899 ph10 263 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900 ph10 302 #endif
901 ph10 263 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
903    
904     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910    
911 nigel 77 // Ignored arg
912     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913     CHECK_EQ(s, string("ruby"));
914     CHECK_EQ(i, 1234);
915    
916     // Type tests
917     {
918     char c;
919     CHECK(RE("(H)ello").FullMatch("Hello", &c));
920     CHECK_EQ(c, 'H');
921     }
922     {
923     unsigned char c;
924     CHECK(RE("(H)ello").FullMatch("Hello", &c));
925     CHECK_EQ(c, static_cast<unsigned char>('H'));
926     }
927     {
928     short v;
929     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
931     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
932     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
933     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
935     }
936     {
937     unsigned short v;
938     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
939     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
940     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
941     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
942     }
943     {
944     int v;
945     static const int max_value = 0x7fffffff;
946     static const int min_value = -max_value - 1;
947     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
948     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
949     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
950     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
953     }
954     {
955     unsigned int v;
956     static const unsigned int max_value = 0xfffffffful;
957     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
959     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
960     }
961     #ifdef HAVE_LONG_LONG
962 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
963     # define LLD "%I64d"
964 ph10 201 # define LLU "%I64u"
965 ph10 193 # else
966     # define LLD "%lld"
967 ph10 201 # define LLU "%llu"
968 ph10 193 # endif
969 nigel 77 {
970     long long v;
971     static const long long max_value = 0x7fffffffffffffffLL;
972     static const long long min_value = -max_value - 1;
973 ph10 257 char buf[32]; // definitely big enough for a long long
974 nigel 77
975     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
977    
978 ph10 257 sprintf(buf, LLD, max_value);
979 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
980    
981 ph10 257 sprintf(buf, LLD, min_value);
982 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
983    
984 ph10 257 sprintf(buf, LLD, max_value);
985 nigel 77 assert(buf[strlen(buf)-1] != '9');
986     buf[strlen(buf)-1]++;
987     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
988    
989 ph10 257 sprintf(buf, LLD, min_value);
990 nigel 77 assert(buf[strlen(buf)-1] != '9');
991     buf[strlen(buf)-1]++;
992     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993     }
994     #endif
995     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
996     {
997     unsigned long long v;
998     long long v2;
999     static const unsigned long long max_value = 0xffffffffffffffffULL;
1000 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
1001 nigel 77
1002     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1004    
1005 ph10 257 sprintf(buf, LLU, max_value);
1006 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1007    
1008     assert(buf[strlen(buf)-1] != '9');
1009     buf[strlen(buf)-1]++;
1010     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1011     }
1012     #endif
1013     {
1014     float v;
1015     CHECK(RE("(.*)").FullMatch("100", &v));
1016     CHECK(RE("(.*)").FullMatch("-100.", &v));
1017     CHECK(RE("(.*)").FullMatch("1e23", &v));
1018     }
1019     {
1020     double v;
1021     CHECK(RE("(.*)").FullMatch("100", &v));
1022     CHECK(RE("(.*)").FullMatch("-100.", &v));
1023     CHECK(RE("(.*)").FullMatch("1e23", &v));
1024     }
1025    
1026     // Check that matching is fully anchored
1027     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1028     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1029     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031    
1032     // Braces
1033     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036    
1037     // Complicated RE
1038     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1042    
1043     // Check full-match handling (needs '$' tacked on internally)
1044     CHECK(RE("fo|foo").FullMatch("fo"));
1045     CHECK(RE("fo|foo").FullMatch("foo"));
1046     CHECK(RE("fo|foo$").FullMatch("fo"));
1047     CHECK(RE("fo|foo$").FullMatch("foo"));
1048     CHECK(RE("foo$").FullMatch("foo"));
1049     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050     CHECK(!RE("fo|bar").FullMatch("fox"));
1051    
1052     // Uncomment the following if we change the handling of '$' to
1053     // prevent it from matching a trailing newline
1054     if (false) {
1055     // Check that we don't get bitten by pcre's special handling of a
1056     // '\n' at the end of the string matching '$'
1057     CHECK(!RE("foo$").PartialMatch("foo\n"));
1058     }
1059    
1060     // Number of args
1061     int a[16];
1062     CHECK(RE("").FullMatch(""));
1063    
1064     memset(a, 0, sizeof(0));
1065     CHECK(RE("(\\d){1}").FullMatch("1",
1066     &a[0]));
1067     CHECK_EQ(a[0], 1);
1068    
1069     memset(a, 0, sizeof(0));
1070     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071     &a[0], &a[1]));
1072     CHECK_EQ(a[0], 1);
1073     CHECK_EQ(a[1], 2);
1074    
1075     memset(a, 0, sizeof(0));
1076     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077     &a[0], &a[1], &a[2]));
1078     CHECK_EQ(a[0], 1);
1079     CHECK_EQ(a[1], 2);
1080     CHECK_EQ(a[2], 3);
1081    
1082     memset(a, 0, sizeof(0));
1083     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084     &a[0], &a[1], &a[2], &a[3]));
1085     CHECK_EQ(a[0], 1);
1086     CHECK_EQ(a[1], 2);
1087     CHECK_EQ(a[2], 3);
1088     CHECK_EQ(a[3], 4);
1089    
1090     memset(a, 0, sizeof(0));
1091     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092     &a[0], &a[1], &a[2],
1093     &a[3], &a[4]));
1094     CHECK_EQ(a[0], 1);
1095     CHECK_EQ(a[1], 2);
1096     CHECK_EQ(a[2], 3);
1097     CHECK_EQ(a[3], 4);
1098     CHECK_EQ(a[4], 5);
1099    
1100     memset(a, 0, sizeof(0));
1101     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102     &a[0], &a[1], &a[2],
1103     &a[3], &a[4], &a[5]));
1104     CHECK_EQ(a[0], 1);
1105     CHECK_EQ(a[1], 2);
1106     CHECK_EQ(a[2], 3);
1107     CHECK_EQ(a[3], 4);
1108     CHECK_EQ(a[4], 5);
1109     CHECK_EQ(a[5], 6);
1110    
1111     memset(a, 0, sizeof(0));
1112     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113     &a[0], &a[1], &a[2], &a[3],
1114     &a[4], &a[5], &a[6]));
1115     CHECK_EQ(a[0], 1);
1116     CHECK_EQ(a[1], 2);
1117     CHECK_EQ(a[2], 3);
1118     CHECK_EQ(a[3], 4);
1119     CHECK_EQ(a[4], 5);
1120     CHECK_EQ(a[5], 6);
1121     CHECK_EQ(a[6], 7);
1122    
1123     memset(a, 0, sizeof(0));
1124     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126     "1234567890123456",
1127     &a[0], &a[1], &a[2], &a[3],
1128     &a[4], &a[5], &a[6], &a[7],
1129     &a[8], &a[9], &a[10], &a[11],
1130     &a[12], &a[13], &a[14], &a[15]));
1131     CHECK_EQ(a[0], 1);
1132     CHECK_EQ(a[1], 2);
1133     CHECK_EQ(a[2], 3);
1134     CHECK_EQ(a[3], 4);
1135     CHECK_EQ(a[4], 5);
1136     CHECK_EQ(a[5], 6);
1137     CHECK_EQ(a[6], 7);
1138     CHECK_EQ(a[7], 8);
1139     CHECK_EQ(a[8], 9);
1140     CHECK_EQ(a[9], 0);
1141     CHECK_EQ(a[10], 1);
1142     CHECK_EQ(a[11], 2);
1143     CHECK_EQ(a[12], 3);
1144     CHECK_EQ(a[13], 4);
1145     CHECK_EQ(a[14], 5);
1146     CHECK_EQ(a[15], 6);
1147    
1148     /***** PartialMatch *****/
1149    
1150     printf("Testing PartialMatch\n");
1151    
1152     CHECK(RE("h.*o").PartialMatch("hello"));
1153     CHECK(RE("h.*o").PartialMatch("othello"));
1154     CHECK(RE("h.*o").PartialMatch("hello!"));
1155     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1156    
1157 nigel 93 /***** other tests *****/
1158    
1159 nigel 77 RadixTests();
1160     TestReplace();
1161     TestExtract();
1162     TestConsume();
1163     TestFindAndConsume();
1164 nigel 93 TestQuoteMetaAll();
1165 nigel 77 TestMatchNumberPeculiarity();
1166    
1167     // Check the pattern() accessor
1168     {
1169     const string kPattern = "http://([^/]+)/.*";
1170     const RE re(kPattern);
1171     CHECK_EQ(kPattern, re.pattern());
1172     }
1173    
1174     // Check RE error field.
1175     {
1176     RE re("foo");
1177     CHECK(re.error().empty()); // Must have no error
1178     }
1179    
1180     #ifdef SUPPORT_UTF8
1181     // Check UTF-8 handling
1182     {
1183     printf("Testing UTF-8 handling\n");
1184    
1185     // Three Japanese characters (nihongo)
1186 ph10 256 const unsigned char utf8_string[] = {
1187 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1188     0xe6, 0x9c, 0xac, // 627c
1189     0xe8, 0xaa, 0x9e, // 8a9e
1190     0
1191     };
1192 ph10 256 const unsigned char utf8_pattern[] = {
1193 nigel 77 '.',
1194     0xe6, 0x9c, 0xac, // 627c
1195     '.',
1196     0
1197     };
1198    
1199     // Both should match in either mode, bytes or UTF-8
1200     RE re_test1(".........");
1201     CHECK(re_test1.FullMatch(utf8_string));
1202     RE re_test2("...", pcrecpp::UTF8());
1203     CHECK(re_test2.FullMatch(utf8_string));
1204    
1205     // Check that '.' matches one byte or UTF-8 character
1206     // according to the mode.
1207     string ss;
1208     RE re_test3("(.)");
1209     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210     CHECK_EQ(ss, string("\xe6"));
1211     RE re_test4("(.)", pcrecpp::UTF8());
1212     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1214    
1215     // Check that string matches itself in either mode
1216     RE re_test5(utf8_string);
1217     CHECK(re_test5.FullMatch(utf8_string));
1218     RE re_test6(utf8_string, pcrecpp::UTF8());
1219     CHECK(re_test6.FullMatch(utf8_string));
1220    
1221     // Check that pattern matches string only in UTF8 mode
1222     RE re_test7(utf8_pattern);
1223     CHECK(!re_test7.FullMatch(utf8_string));
1224     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225     CHECK(re_test8.FullMatch(utf8_string));
1226     }
1227    
1228     // Check that ungreedy, UTF8 regular expressions don't match when they
1229     // oughtn't -- see bug 82246.
1230     {
1231     // This code always worked.
1232     const char* pattern = "\\w+X";
1233     const string target = "a aX";
1234     RE match_sentence(pattern);
1235     RE match_sentence_re(pattern, pcrecpp::UTF8());
1236    
1237     CHECK(!match_sentence.FullMatch(target));
1238     CHECK(!match_sentence_re.FullMatch(target));
1239     }
1240    
1241     {
1242     const char* pattern = "(?U)\\w+X";
1243     const string target = "a aX";
1244     RE match_sentence(pattern);
1245     RE match_sentence_re(pattern, pcrecpp::UTF8());
1246    
1247     CHECK(!match_sentence.FullMatch(target));
1248     CHECK(!match_sentence_re.FullMatch(target));
1249     }
1250     #endif /* def SUPPORT_UTF8 */
1251    
1252     printf("Testing error reporting\n");
1253    
1254     { RE re("a\\1"); CHECK(!re.error().empty()); }
1255     {
1256     RE re("a[x");
1257     CHECK(!re.error().empty());
1258     }
1259     {
1260     RE re("a[z-a]");
1261     CHECK(!re.error().empty());
1262     }
1263     {
1264     RE re("a[[:foobar:]]");
1265     CHECK(!re.error().empty());
1266     }
1267     {
1268     RE re("a(b");
1269     CHECK(!re.error().empty());
1270     }
1271     {
1272     RE re("a\\");
1273     CHECK(!re.error().empty());
1274     }
1275    
1276 nigel 87 // Test that recursion is stopped
1277     TestRecursion();
1278 nigel 77
1279 nigel 81 // Test Options
1280     if (getenv("VERBOSE_TEST") != NULL)
1281     VERBOSE_TEST = true;
1282     TestOptions();
1283    
1284 nigel 93 // Test the constructors
1285     TestConstructors();
1286    
1287 nigel 77 // Done
1288     printf("OK\n");
1289    
1290     return 0;
1291     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12