/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 297 - (hide annotations) (download)
Fri Jan 4 19:44:00 2008 UTC (5 years, 4 months ago) by ph10
File size: 38847 byte(s)
Craig's patch for the double-counting bug in global replace.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113 ph10 257 char buf[100]; // definitely big enough
114     sprintf(buf, "pat%09d", i);
115 nigel 77 RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216 ph10 297 int global_count; // the expected return value from ReplaceAll
217 nigel 77 };
218     static const ReplaceTest tests[] = {
219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220     "\\2\\1ay",
221     "the quick brown fox jumps over the lazy dogs.",
222     "ethay quick brown fox jumps over the lazy dogs.",
223 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224     9 },
225 nigel 77 { "\\w+",
226     "\\0-NOSPAM",
227     "paul.haahr@google.com",
228     "paul-NOSPAM.haahr@google.com",
229 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230     4 },
231 nigel 77 { "^",
232     "(START)",
233     "foo",
234     "(START)foo",
235 ph10 297 "(START)foo",
236     1 },
237 nigel 77 { "^",
238     "(START)",
239     "",
240     "(START)",
241 ph10 297 "(START)",
242     1 },
243 nigel 77 { "$",
244     "(END)",
245     "",
246     "(END)",
247 ph10 297 "(END)",
248     1 },
249 nigel 77 { "b",
250     "bb",
251     "ababababab",
252     "abbabababab",
253 ph10 297 "abbabbabbabbabb",
254     5 },
255 nigel 77 { "b",
256     "bb",
257     "bbbbbb",
258     "bbbbbbb",
259 ph10 297 "bbbbbbbbbbbb",
260     6 },
261 nigel 77 { "b+",
262     "bb",
263     "bbbbbb",
264     "bb",
265 ph10 297 "bb",
266     1 },
267 nigel 77 { "b*",
268     "bb",
269     "bbbbbb",
270     "bb",
271 ph10 297 "bb",
272     1 },
273 nigel 77 { "b*",
274     "bb",
275     "aaaaa",
276     "bbaaaaa",
277 ph10 297 "bbabbabbabbabbabb",
278     6 },
279 nigel 91 { "b*",
280     "bb",
281     "aa\naa\n",
282     "bbaa\naa\n",
283 ph10 297 "bbabbabb\nbbabbabb\nbb",
284     7 },
285 nigel 91 { "b*",
286     "bb",
287     "aa\raa\r",
288     "bbaa\raa\r",
289 ph10 297 "bbabbabb\rbbabbabb\rbb",
290     7 },
291 nigel 91 { "b*",
292     "bb",
293     "aa\r\naa\r\n",
294     "bbaa\r\naa\r\n",
295 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
296     7 },
297 nigel 91 #ifdef SUPPORT_UTF8
298     { "b*",
299     "bb",
300     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303     5 },
304 nigel 91 { "b*",
305     "bb",
306     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310     9 },
311 nigel 91 #endif
312 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
313 nigel 77 };
314    
315 nigel 91 #ifdef SUPPORT_UTF8
316     const bool support_utf8 = true;
317     #else
318     const bool support_utf8 = false;
319     #endif
320    
321 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323     assert(re.error().empty());
324 nigel 77 string one(t->original);
325 nigel 91 CHECK(re.Replace(t->rewrite, &one));
326 nigel 77 CHECK_EQ(one, t->single);
327     string all(t->original);
328 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 nigel 77 CHECK_EQ(all, t->global);
330 ph10 297 CHECK_EQ(replace_count, t->global_count);
331 nigel 77 }
332 nigel 91
333     // One final test: test \r\n replacement when we're not in CRLF mode
334     {
335     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336     assert(re.error().empty());
337     string all("aa\r\naa\r\n");
338 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340     }
341     {
342     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343     assert(re.error().empty());
344     string all("aa\r\naa\r\n");
345 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347     }
348     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349     // Alas, the answer depends on how pcre was compiled.
350 nigel 77 }
351    
352     static void TestExtract() {
353     printf("Testing Extract\n");
354    
355     string s;
356    
357     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358     CHECK_EQ(s, "kremvax!boris");
359    
360     // check the RE interface as well
361     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362     CHECK_EQ(s, "'foo'");
363     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364     CHECK_EQ(s, "'foo'");
365     }
366    
367     static void TestConsume() {
368     printf("Testing Consume\n");
369    
370     string word;
371    
372     string s(" aaa b!@#$@#$cccc");
373     StringPiece input(s);
374    
375     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376     CHECK(r.Consume(&input, &word));
377     CHECK_EQ(word, "aaa");
378     CHECK(r.Consume(&input, &word));
379     CHECK_EQ(word, "b");
380     CHECK(! r.Consume(&input, &word));
381     }
382    
383     static void TestFindAndConsume() {
384     printf("Testing FindAndConsume\n");
385    
386     string word;
387    
388     string s(" aaa b!@#$@#$cccc");
389     StringPiece input(s);
390    
391     RE r("(\\w+)"); // matches a word
392     CHECK(r.FindAndConsume(&input, &word));
393     CHECK_EQ(word, "aaa");
394     CHECK(r.FindAndConsume(&input, &word));
395     CHECK_EQ(word, "b");
396     CHECK(r.FindAndConsume(&input, &word));
397     CHECK_EQ(word, "cccc");
398     CHECK(! r.FindAndConsume(&input, &word));
399     }
400    
401     static void TestMatchNumberPeculiarity() {
402     printf("Testing match-number peculiaraity\n");
403    
404     string word1;
405     string word2;
406     string word3;
407    
408     RE r("(foo)|(bar)|(baz)");
409     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410     CHECK_EQ(word1, "foo");
411     CHECK_EQ(word2, "");
412     CHECK_EQ(word3, "");
413     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
414     CHECK_EQ(word1, "");
415     CHECK_EQ(word2, "bar");
416     CHECK_EQ(word3, "");
417     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
418     CHECK_EQ(word1, "");
419     CHECK_EQ(word2, "");
420     CHECK_EQ(word3, "baz");
421     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
422    
423     string a;
424     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
425     CHECK_EQ(a, "");
426     }
427    
428 nigel 87 static void TestRecursion() {
429 nigel 77 printf("Testing recursion\n");
430    
431 nigel 87 // Get one string that passes (sometimes), one that never does.
432     string text_good("abcdefghijk");
433     string text_bad("acdefghijkl");
434    
435     // According to pcretest, matching text_good against (\w+)*b
436     // requires match_limit of at least 8192, and match_recursion_limit
437     // of at least 37.
438    
439     RE_Options options_ml;
440     options_ml.set_match_limit(8192);
441     RE re("(\\w+)*b", options_ml);
442     CHECK(re.PartialMatch(text_good) == true);
443     CHECK(re.PartialMatch(text_bad) == false);
444     CHECK(re.FullMatch(text_good) == false);
445     CHECK(re.FullMatch(text_bad) == false);
446    
447     options_ml.set_match_limit(1024);
448     RE re2("(\\w+)*b", options_ml);
449     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450     CHECK(re2.PartialMatch(text_bad) == false);
451     CHECK(re2.FullMatch(text_good) == false);
452     CHECK(re2.FullMatch(text_bad) == false);
453    
454     RE_Options options_mlr;
455     options_mlr.set_match_limit_recursion(50);
456     RE re3("(\\w+)*b", options_mlr);
457     CHECK(re3.PartialMatch(text_good) == true);
458     CHECK(re3.PartialMatch(text_bad) == false);
459     CHECK(re3.FullMatch(text_good) == false);
460     CHECK(re3.FullMatch(text_bad) == false);
461    
462     options_mlr.set_match_limit_recursion(10);
463     RE re4("(\\w+)*b", options_mlr);
464     CHECK(re4.PartialMatch(text_good) == false);
465     CHECK(re4.PartialMatch(text_bad) == false);
466     CHECK(re4.FullMatch(text_good) == false);
467     CHECK(re4.FullMatch(text_bad) == false);
468 nigel 77 }
469    
470 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
471     // the original unquoted string.
472     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473     string quoted = RE::QuoteMeta(unquoted);
474     RE re(quoted, options);
475     CHECK(re.FullMatch(unquoted));
476     }
477    
478     // A string containing meaningful regexp characters, which is then meta-
479     // quoted, should not generally match a string the unquoted string does.
480     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481     RE_Options options = RE_Options()) {
482     string quoted = RE::QuoteMeta(unquoted);
483     RE re(quoted, options);
484     CHECK(!re.FullMatch(should_not_match));
485     }
486    
487     // Tests that quoted meta characters match their original strings,
488     // and that a few things that shouldn't match indeed do not.
489     static void TestQuotaMetaSimple() {
490     TestQuoteMeta("foo");
491     TestQuoteMeta("foo.bar");
492     TestQuoteMeta("foo\\.bar");
493     TestQuoteMeta("[1-9]");
494     TestQuoteMeta("1.5-2.0?");
495     TestQuoteMeta("\\d");
496     TestQuoteMeta("Who doesn't like ice cream?");
497     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498     TestQuoteMeta("((?!)xxx).*yyy");
499     TestQuoteMeta("([");
500     }
501    
502     static void TestQuoteMetaSimpleNegative() {
503     NegativeTestQuoteMeta("foo", "bar");
504     NegativeTestQuoteMeta("...", "bar");
505     NegativeTestQuoteMeta("\\.", ".");
506     NegativeTestQuoteMeta("\\.", "..");
507     NegativeTestQuoteMeta("(a)", "a");
508     NegativeTestQuoteMeta("(a|b)", "a");
509     NegativeTestQuoteMeta("(a|b)", "(a)");
510     NegativeTestQuoteMeta("(a|b)", "a|b");
511     NegativeTestQuoteMeta("[0-9]", "0");
512     NegativeTestQuoteMeta("[0-9]", "0-9");
513     NegativeTestQuoteMeta("[0-9]", "[9]");
514     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
515     }
516    
517     static void TestQuoteMetaLatin1() {
518     TestQuoteMeta("3\xb2 = 9");
519     }
520    
521     static void TestQuoteMetaUtf8() {
522     #ifdef SUPPORT_UTF8
523     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
524     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
525     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
526     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
527     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
528     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
529     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
530     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
531     "27\\\xc2\\\xb0",
532     pcrecpp::UTF8());
533     #endif
534     }
535    
536     static void TestQuoteMetaAll() {
537     printf("Testing QuoteMeta\n");
538     TestQuotaMetaSimple();
539     TestQuoteMetaSimpleNegative();
540     TestQuoteMetaLatin1();
541     TestQuoteMetaUtf8();
542     }
543    
544 nigel 81 //
545     // Options tests contributed by
546     // Giuseppe Maxia, CTO, Stardata s.r.l.
547     // July 2005
548     //
549     static void GetOneOptionResult(
550     const char *option_name,
551     const char *regex,
552     const char *str,
553     RE_Options options,
554     bool full,
555     string expected) {
556 nigel 77
557 nigel 81 printf("Testing Option <%s>\n", option_name);
558     if(VERBOSE_TEST)
559     printf("/%s/ finds \"%s\" within \"%s\" \n",
560     regex,
561     expected.c_str(),
562     str);
563     string captured("");
564     if (full)
565     RE(regex,options).FullMatch(str, &captured);
566     else
567     RE(regex,options).PartialMatch(str, &captured);
568     CHECK_EQ(captured, expected);
569     }
570    
571     static void TestOneOption(
572     const char *option_name,
573     const char *regex,
574     const char *str,
575     RE_Options options,
576     bool full,
577     bool assertive = true) {
578    
579     printf("Testing Option <%s>\n", option_name);
580     if (VERBOSE_TEST)
581     printf("'%s' %s /%s/ \n",
582     str,
583     (assertive? "matches" : "doesn't match"),
584     regex);
585     if (assertive) {
586     if (full)
587     CHECK(RE(regex,options).FullMatch(str));
588     else
589     CHECK(RE(regex,options).PartialMatch(str));
590     } else {
591     if (full)
592     CHECK(!RE(regex,options).FullMatch(str));
593     else
594     CHECK(!RE(regex,options).PartialMatch(str));
595     }
596     }
597    
598     static void Test_CASELESS() {
599     RE_Options options;
600     RE_Options options2;
601    
602     options.set_caseless(true);
603     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
604     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
605     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
606    
607     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
608     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
609     options.set_caseless(false);
610     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
611     }
612    
613     static void Test_MULTILINE() {
614     RE_Options options;
615     RE_Options options2;
616     const char *str = "HELLO\n" "cruel\n" "world\n";
617    
618     options.set_multiline(true);
619     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
620     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
621     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
622     options.set_multiline(false);
623     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
624     }
625    
626     static void Test_DOTALL() {
627     RE_Options options;
628     RE_Options options2;
629     const char *str = "HELLO\n" "cruel\n" "world";
630    
631     options.set_dotall(true);
632     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
633     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
634     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
635     options.set_dotall(false);
636     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
637     }
638    
639     static void Test_DOLLAR_ENDONLY() {
640     RE_Options options;
641     RE_Options options2;
642     const char *str = "HELLO world\n";
643    
644     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
645     options.set_dollar_endonly(true);
646     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
647     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
648     }
649    
650     static void Test_EXTRA() {
651     RE_Options options;
652     const char *str = "HELLO";
653    
654     options.set_extra(true);
655     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
656     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
657     options.set_extra(false);
658     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
659     }
660    
661     static void Test_EXTENDED() {
662     RE_Options options;
663     RE_Options options2;
664     const char *str = "HELLO world";
665    
666     options.set_extended(true);
667     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
668     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
669     TestOneOption("EXTENDED (class)",
670     "^ HE L{2} O "
671     "\\s+ "
672     "\\w+ $ ",
673     str,
674     options,
675     false);
676    
677     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
678     TestOneOption("EXTENDED (function)",
679     "^ HE L{2} O "
680     "\\s+ "
681     "\\w+ $ ",
682     str,
683     pcrecpp::EXTENDED(),
684     false);
685    
686     options.set_extended(false);
687     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
688     }
689    
690     static void Test_NO_AUTO_CAPTURE() {
691     RE_Options options;
692     const char *str = "HELLO world";
693     string captured;
694    
695     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
696     if (VERBOSE_TEST)
697     printf("parentheses capture text\n");
698     RE re("(world|universe)$", options);
699     CHECK(re.Extract("\\1", str , &captured));
700     CHECK_EQ(captured, "world");
701     options.set_no_auto_capture(true);
702     printf("testing Option <NO_AUTO_CAPTURE>\n");
703     if (VERBOSE_TEST)
704     printf("parentheses do not capture text\n");
705     re.Extract("\\1",str, &captured );
706     CHECK_EQ(captured, "world");
707     }
708    
709     static void Test_UNGREEDY() {
710     RE_Options options;
711     const char *str = "HELLO, 'this' is the 'world'";
712    
713     options.set_ungreedy(true);
714     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
715     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
716     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
717    
718     options.set_ungreedy(false);
719     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
720     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
721     }
722    
723     static void Test_all_options() {
724     const char *str = "HELLO\n" "cruel\n" "world";
725     RE_Options options;
726     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
727    
728     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
729     options.set_all_options(0);
730     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
731     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
732    
733     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
734     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
735     " ^ c r u e l $ ",
736     str,
737     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
738     false);
739    
740     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
741     " ^ c r u e l $ ",
742     str,
743     RE_Options()
744     .set_multiline(true)
745     .set_extended(true),
746     false);
747    
748     options.set_all_options(0);
749     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
750    
751     }
752    
753     static void TestOptions() {
754     printf("Testing Options\n");
755     Test_CASELESS();
756     Test_MULTILINE();
757     Test_DOTALL();
758     Test_DOLLAR_ENDONLY();
759     Test_EXTENDED();
760     Test_NO_AUTO_CAPTURE();
761     Test_UNGREEDY();
762     Test_EXTRA();
763     Test_all_options();
764     }
765    
766 nigel 93 static void TestConstructors() {
767     printf("Testing constructors\n");
768    
769     RE_Options options;
770     options.set_dotall(true);
771     const char *str = "HELLO\n" "cruel\n" "world";
772    
773     RE orig("HELLO.*world", options);
774     CHECK(orig.FullMatch(str));
775    
776     RE copy1(orig);
777     CHECK(copy1.FullMatch(str));
778    
779     RE copy2("not a match");
780     CHECK(!copy2.FullMatch(str));
781     copy2 = copy1;
782     CHECK(copy2.FullMatch(str));
783     copy2 = orig;
784     CHECK(copy2.FullMatch(str));
785    
786     // Make sure when we assign to ourselves, nothing bad happens
787     orig = orig;
788     copy1 = copy1;
789     copy2 = copy2;
790     CHECK(orig.FullMatch(str));
791     CHECK(copy1.FullMatch(str));
792     CHECK(copy2.FullMatch(str));
793     }
794    
795 nigel 77 int main(int argc, char** argv) {
796     // Treat any flag as --help
797     if (argc > 1 && argv[1][0] == '-') {
798     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
799     " If 'timingX ###' is specified, run the given timing test\n"
800     " with the given number of iterations, rather than running\n"
801     " the default corectness test.\n", argv[0]);
802     return 0;
803     }
804    
805     if (argc > 1) {
806     if ( argc == 2 || atoi(argv[2]) == 0) {
807     printf("timing mode needs a num-iters argument\n");
808     return 1;
809     }
810     if (!strcmp(argv[1], "timing1"))
811     Timing1(atoi(argv[2]));
812     else if (!strcmp(argv[1], "timing2"))
813     Timing2(atoi(argv[2]));
814     else if (!strcmp(argv[1], "timing3"))
815     Timing3(atoi(argv[2]));
816     else
817     printf("Unknown argument '%s'\n", argv[1]);
818     return 0;
819     }
820    
821     printf("Testing FullMatch\n");
822    
823     int i;
824     string s;
825    
826     /***** FullMatch with no args *****/
827    
828     CHECK(RE("h.*o").FullMatch("hello"));
829 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
830     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
831     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
832     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
833     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
834 nigel 77
835     /***** FullMatch with args *****/
836    
837     // Zero-arg
838     CHECK(RE("\\d+").FullMatch("1001"));
839    
840     // Single-arg
841     CHECK(RE("(\\d+)").FullMatch("1001", &i));
842     CHECK_EQ(i, 1001);
843     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
844     CHECK_EQ(i, -123);
845     CHECK(!RE("()\\d+").FullMatch("10", &i));
846     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
847     &i));
848    
849     // Digits surrounding integer-arg
850     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
851     CHECK_EQ(i, 23);
852     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
853     CHECK_EQ(i, 1);
854     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
855     CHECK_EQ(i, -1);
856     CHECK(RE("(\\d)").PartialMatch("1234", &i));
857     CHECK_EQ(i, 1);
858     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
859     CHECK_EQ(i, -1);
860    
861     // String-arg
862     CHECK(RE("h(.*)o").FullMatch("hello", &s));
863     CHECK_EQ(s, string("ell"));
864    
865     // StringPiece-arg
866     StringPiece sp;
867     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
868     CHECK_EQ(sp.size(), 4);
869     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
870     CHECK_EQ(i, 1234);
871    
872     // Multi-arg
873     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
874     CHECK_EQ(s, string("ruby"));
875     CHECK_EQ(i, 1234);
876    
877 ph10 263 // Ignore non-void* NULL arg
878     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
879     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
880     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
881     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
882     CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
883     CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
884     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
885    
886     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
887     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
888     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
889     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
890     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
891     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
892    
893 nigel 77 // Ignored arg
894     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
895     CHECK_EQ(s, string("ruby"));
896     CHECK_EQ(i, 1234);
897    
898     // Type tests
899     {
900     char c;
901     CHECK(RE("(H)ello").FullMatch("Hello", &c));
902     CHECK_EQ(c, 'H');
903     }
904     {
905     unsigned char c;
906     CHECK(RE("(H)ello").FullMatch("Hello", &c));
907     CHECK_EQ(c, static_cast<unsigned char>('H'));
908     }
909     {
910     short v;
911     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
912     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
913     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
914     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
915     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
916     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
917     }
918     {
919     unsigned short v;
920     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
921     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
922     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
923     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
924     }
925     {
926     int v;
927     static const int max_value = 0x7fffffff;
928     static const int min_value = -max_value - 1;
929     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
931     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
932     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
933     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
934     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
935     }
936     {
937     unsigned int v;
938     static const unsigned int max_value = 0xfffffffful;
939     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
940     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
941     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
942     }
943     #ifdef HAVE_LONG_LONG
944 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
945     # define LLD "%I64d"
946 ph10 201 # define LLU "%I64u"
947 ph10 193 # else
948     # define LLD "%lld"
949 ph10 201 # define LLU "%llu"
950 ph10 193 # endif
951 nigel 77 {
952     long long v;
953     static const long long max_value = 0x7fffffffffffffffLL;
954     static const long long min_value = -max_value - 1;
955 ph10 257 char buf[32]; // definitely big enough for a long long
956 nigel 77
957     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
959    
960 ph10 257 sprintf(buf, LLD, max_value);
961 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
962    
963 ph10 257 sprintf(buf, LLD, min_value);
964 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
965    
966 ph10 257 sprintf(buf, LLD, max_value);
967 nigel 77 assert(buf[strlen(buf)-1] != '9');
968     buf[strlen(buf)-1]++;
969     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
970    
971 ph10 257 sprintf(buf, LLD, min_value);
972 nigel 77 assert(buf[strlen(buf)-1] != '9');
973     buf[strlen(buf)-1]++;
974     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
975     }
976     #endif
977     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
978     {
979     unsigned long long v;
980     long long v2;
981     static const unsigned long long max_value = 0xffffffffffffffffULL;
982 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
983 nigel 77
984     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
985     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
986    
987 ph10 257 sprintf(buf, LLU, max_value);
988 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
989    
990     assert(buf[strlen(buf)-1] != '9');
991     buf[strlen(buf)-1]++;
992     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993     }
994     #endif
995     {
996     float v;
997     CHECK(RE("(.*)").FullMatch("100", &v));
998     CHECK(RE("(.*)").FullMatch("-100.", &v));
999     CHECK(RE("(.*)").FullMatch("1e23", &v));
1000     }
1001     {
1002     double v;
1003     CHECK(RE("(.*)").FullMatch("100", &v));
1004     CHECK(RE("(.*)").FullMatch("-100.", &v));
1005     CHECK(RE("(.*)").FullMatch("1e23", &v));
1006     }
1007    
1008     // Check that matching is fully anchored
1009     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1010     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1011     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1012     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1013    
1014     // Braces
1015     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1016     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1017     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1018    
1019     // Complicated RE
1020     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1021     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1022     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1023     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1024    
1025     // Check full-match handling (needs '$' tacked on internally)
1026     CHECK(RE("fo|foo").FullMatch("fo"));
1027     CHECK(RE("fo|foo").FullMatch("foo"));
1028     CHECK(RE("fo|foo$").FullMatch("fo"));
1029     CHECK(RE("fo|foo$").FullMatch("foo"));
1030     CHECK(RE("foo$").FullMatch("foo"));
1031     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1032     CHECK(!RE("fo|bar").FullMatch("fox"));
1033    
1034     // Uncomment the following if we change the handling of '$' to
1035     // prevent it from matching a trailing newline
1036     if (false) {
1037     // Check that we don't get bitten by pcre's special handling of a
1038     // '\n' at the end of the string matching '$'
1039     CHECK(!RE("foo$").PartialMatch("foo\n"));
1040     }
1041    
1042     // Number of args
1043     int a[16];
1044     CHECK(RE("").FullMatch(""));
1045    
1046     memset(a, 0, sizeof(0));
1047     CHECK(RE("(\\d){1}").FullMatch("1",
1048     &a[0]));
1049     CHECK_EQ(a[0], 1);
1050    
1051     memset(a, 0, sizeof(0));
1052     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1053     &a[0], &a[1]));
1054     CHECK_EQ(a[0], 1);
1055     CHECK_EQ(a[1], 2);
1056    
1057     memset(a, 0, sizeof(0));
1058     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1059     &a[0], &a[1], &a[2]));
1060     CHECK_EQ(a[0], 1);
1061     CHECK_EQ(a[1], 2);
1062     CHECK_EQ(a[2], 3);
1063    
1064     memset(a, 0, sizeof(0));
1065     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1066     &a[0], &a[1], &a[2], &a[3]));
1067     CHECK_EQ(a[0], 1);
1068     CHECK_EQ(a[1], 2);
1069     CHECK_EQ(a[2], 3);
1070     CHECK_EQ(a[3], 4);
1071    
1072     memset(a, 0, sizeof(0));
1073     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1074     &a[0], &a[1], &a[2],
1075     &a[3], &a[4]));
1076     CHECK_EQ(a[0], 1);
1077     CHECK_EQ(a[1], 2);
1078     CHECK_EQ(a[2], 3);
1079     CHECK_EQ(a[3], 4);
1080     CHECK_EQ(a[4], 5);
1081    
1082     memset(a, 0, sizeof(0));
1083     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1084     &a[0], &a[1], &a[2],
1085     &a[3], &a[4], &a[5]));
1086     CHECK_EQ(a[0], 1);
1087     CHECK_EQ(a[1], 2);
1088     CHECK_EQ(a[2], 3);
1089     CHECK_EQ(a[3], 4);
1090     CHECK_EQ(a[4], 5);
1091     CHECK_EQ(a[5], 6);
1092    
1093     memset(a, 0, sizeof(0));
1094     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1095     &a[0], &a[1], &a[2], &a[3],
1096     &a[4], &a[5], &a[6]));
1097     CHECK_EQ(a[0], 1);
1098     CHECK_EQ(a[1], 2);
1099     CHECK_EQ(a[2], 3);
1100     CHECK_EQ(a[3], 4);
1101     CHECK_EQ(a[4], 5);
1102     CHECK_EQ(a[5], 6);
1103     CHECK_EQ(a[6], 7);
1104    
1105     memset(a, 0, sizeof(0));
1106     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1107     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1108     "1234567890123456",
1109     &a[0], &a[1], &a[2], &a[3],
1110     &a[4], &a[5], &a[6], &a[7],
1111     &a[8], &a[9], &a[10], &a[11],
1112     &a[12], &a[13], &a[14], &a[15]));
1113     CHECK_EQ(a[0], 1);
1114     CHECK_EQ(a[1], 2);
1115     CHECK_EQ(a[2], 3);
1116     CHECK_EQ(a[3], 4);
1117     CHECK_EQ(a[4], 5);
1118     CHECK_EQ(a[5], 6);
1119     CHECK_EQ(a[6], 7);
1120     CHECK_EQ(a[7], 8);
1121     CHECK_EQ(a[8], 9);
1122     CHECK_EQ(a[9], 0);
1123     CHECK_EQ(a[10], 1);
1124     CHECK_EQ(a[11], 2);
1125     CHECK_EQ(a[12], 3);
1126     CHECK_EQ(a[13], 4);
1127     CHECK_EQ(a[14], 5);
1128     CHECK_EQ(a[15], 6);
1129    
1130     /***** PartialMatch *****/
1131    
1132     printf("Testing PartialMatch\n");
1133    
1134     CHECK(RE("h.*o").PartialMatch("hello"));
1135     CHECK(RE("h.*o").PartialMatch("othello"));
1136     CHECK(RE("h.*o").PartialMatch("hello!"));
1137     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1138    
1139 nigel 93 /***** other tests *****/
1140    
1141 nigel 77 RadixTests();
1142     TestReplace();
1143     TestExtract();
1144     TestConsume();
1145     TestFindAndConsume();
1146 nigel 93 TestQuoteMetaAll();
1147 nigel 77 TestMatchNumberPeculiarity();
1148    
1149     // Check the pattern() accessor
1150     {
1151     const string kPattern = "http://([^/]+)/.*";
1152     const RE re(kPattern);
1153     CHECK_EQ(kPattern, re.pattern());
1154     }
1155    
1156     // Check RE error field.
1157     {
1158     RE re("foo");
1159     CHECK(re.error().empty()); // Must have no error
1160     }
1161    
1162     #ifdef SUPPORT_UTF8
1163     // Check UTF-8 handling
1164     {
1165     printf("Testing UTF-8 handling\n");
1166    
1167     // Three Japanese characters (nihongo)
1168 ph10 256 const unsigned char utf8_string[] = {
1169 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1170     0xe6, 0x9c, 0xac, // 627c
1171     0xe8, 0xaa, 0x9e, // 8a9e
1172     0
1173     };
1174 ph10 256 const unsigned char utf8_pattern[] = {
1175 nigel 77 '.',
1176     0xe6, 0x9c, 0xac, // 627c
1177     '.',
1178     0
1179     };
1180    
1181     // Both should match in either mode, bytes or UTF-8
1182     RE re_test1(".........");
1183     CHECK(re_test1.FullMatch(utf8_string));
1184     RE re_test2("...", pcrecpp::UTF8());
1185     CHECK(re_test2.FullMatch(utf8_string));
1186    
1187     // Check that '.' matches one byte or UTF-8 character
1188     // according to the mode.
1189     string ss;
1190     RE re_test3("(.)");
1191     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1192     CHECK_EQ(ss, string("\xe6"));
1193     RE re_test4("(.)", pcrecpp::UTF8());
1194     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1195     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1196    
1197     // Check that string matches itself in either mode
1198     RE re_test5(utf8_string);
1199     CHECK(re_test5.FullMatch(utf8_string));
1200     RE re_test6(utf8_string, pcrecpp::UTF8());
1201     CHECK(re_test6.FullMatch(utf8_string));
1202    
1203     // Check that pattern matches string only in UTF8 mode
1204     RE re_test7(utf8_pattern);
1205     CHECK(!re_test7.FullMatch(utf8_string));
1206     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1207     CHECK(re_test8.FullMatch(utf8_string));
1208     }
1209    
1210     // Check that ungreedy, UTF8 regular expressions don't match when they
1211     // oughtn't -- see bug 82246.
1212     {
1213     // This code always worked.
1214     const char* pattern = "\\w+X";
1215     const string target = "a aX";
1216     RE match_sentence(pattern);
1217     RE match_sentence_re(pattern, pcrecpp::UTF8());
1218    
1219     CHECK(!match_sentence.FullMatch(target));
1220     CHECK(!match_sentence_re.FullMatch(target));
1221     }
1222    
1223     {
1224     const char* pattern = "(?U)\\w+X";
1225     const string target = "a aX";
1226     RE match_sentence(pattern);
1227     RE match_sentence_re(pattern, pcrecpp::UTF8());
1228    
1229     CHECK(!match_sentence.FullMatch(target));
1230     CHECK(!match_sentence_re.FullMatch(target));
1231     }
1232     #endif /* def SUPPORT_UTF8 */
1233    
1234     printf("Testing error reporting\n");
1235    
1236     { RE re("a\\1"); CHECK(!re.error().empty()); }
1237     {
1238     RE re("a[x");
1239     CHECK(!re.error().empty());
1240     }
1241     {
1242     RE re("a[z-a]");
1243     CHECK(!re.error().empty());
1244     }
1245     {
1246     RE re("a[[:foobar:]]");
1247     CHECK(!re.error().empty());
1248     }
1249     {
1250     RE re("a(b");
1251     CHECK(!re.error().empty());
1252     }
1253     {
1254     RE re("a\\");
1255     CHECK(!re.error().empty());
1256     }
1257    
1258 nigel 87 // Test that recursion is stopped
1259     TestRecursion();
1260 nigel 77
1261 nigel 81 // Test Options
1262     if (getenv("VERBOSE_TEST") != NULL)
1263     VERBOSE_TEST = true;
1264     TestOptions();
1265    
1266 nigel 93 // Test the constructors
1267     TestConstructors();
1268    
1269 nigel 77 // Done
1270     printf("OK\n");
1271    
1272     return 0;
1273     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12