/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 474 - (hide annotations) (download)
Sat Jan 2 16:30:46 2010 UTC (4 years, 8 months ago) by ph10
File size: 39115 byte(s)
Fix C++ wrapper GlobalReplace function for empty matches.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3 ph10 474 // Copyright (c) 2005 - 2010, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113 ph10 257 char buf[100]; // definitely big enough
114     sprintf(buf, "pat%09d", i);
115 nigel 77 RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216 ph10 297 int global_count; // the expected return value from ReplaceAll
217 nigel 77 };
218     static const ReplaceTest tests[] = {
219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220     "\\2\\1ay",
221     "the quick brown fox jumps over the lazy dogs.",
222     "ethay quick brown fox jumps over the lazy dogs.",
223 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224     9 },
225 nigel 77 { "\\w+",
226     "\\0-NOSPAM",
227     "paul.haahr@google.com",
228     "paul-NOSPAM.haahr@google.com",
229 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230     4 },
231 nigel 77 { "^",
232     "(START)",
233     "foo",
234     "(START)foo",
235 ph10 297 "(START)foo",
236     1 },
237 nigel 77 { "^",
238     "(START)",
239     "",
240     "(START)",
241 ph10 297 "(START)",
242     1 },
243 nigel 77 { "$",
244     "(END)",
245     "",
246     "(END)",
247 ph10 297 "(END)",
248     1 },
249 nigel 77 { "b",
250     "bb",
251     "ababababab",
252     "abbabababab",
253 ph10 297 "abbabbabbabbabb",
254     5 },
255 nigel 77 { "b",
256     "bb",
257     "bbbbbb",
258     "bbbbbbb",
259 ph10 297 "bbbbbbbbbbbb",
260     6 },
261 nigel 77 { "b+",
262     "bb",
263     "bbbbbb",
264     "bb",
265 ph10 297 "bb",
266     1 },
267 nigel 77 { "b*",
268     "bb",
269     "bbbbbb",
270     "bb",
271 ph10 474 "bbbb",
272     2 },
273 nigel 77 { "b*",
274     "bb",
275     "aaaaa",
276     "bbaaaaa",
277 ph10 297 "bbabbabbabbabbabb",
278     6 },
279 nigel 91 { "b*",
280     "bb",
281     "aa\naa\n",
282     "bbaa\naa\n",
283 ph10 297 "bbabbabb\nbbabbabb\nbb",
284     7 },
285 nigel 91 { "b*",
286     "bb",
287     "aa\raa\r",
288     "bbaa\raa\r",
289 ph10 297 "bbabbabb\rbbabbabb\rbb",
290     7 },
291 nigel 91 { "b*",
292     "bb",
293     "aa\r\naa\r\n",
294     "bbaa\r\naa\r\n",
295 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
296     7 },
297 ph10 474 // Check empty-string matching (it's tricky!)
298     { "aa|b*",
299     "@",
300     "aa",
301     "@",
302     "@@",
303     2 },
304     { "b*|aa",
305     "@",
306     "aa",
307     "@aa",
308     "@@@",
309     3 },
310 nigel 91 #ifdef SUPPORT_UTF8
311     { "b*",
312     "bb",
313     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
314     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
315 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
316     5 },
317 nigel 91 { "b*",
318     "bb",
319     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
320     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
321     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
322 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
323     9 },
324 nigel 91 #endif
325 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
326 nigel 77 };
327    
328 nigel 91 #ifdef SUPPORT_UTF8
329     const bool support_utf8 = true;
330     #else
331     const bool support_utf8 = false;
332     #endif
333    
334 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
335 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
336     assert(re.error().empty());
337 nigel 77 string one(t->original);
338 nigel 91 CHECK(re.Replace(t->rewrite, &one));
339 nigel 77 CHECK_EQ(one, t->single);
340     string all(t->original);
341 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
342 nigel 77 CHECK_EQ(all, t->global);
343 ph10 297 CHECK_EQ(replace_count, t->global_count);
344 nigel 77 }
345 nigel 91
346     // One final test: test \r\n replacement when we're not in CRLF mode
347     {
348     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
349     assert(re.error().empty());
350     string all("aa\r\naa\r\n");
351 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
352 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
353     }
354     {
355     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
356     assert(re.error().empty());
357     string all("aa\r\naa\r\n");
358 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
359 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
360     }
361     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
362     // Alas, the answer depends on how pcre was compiled.
363 nigel 77 }
364    
365     static void TestExtract() {
366     printf("Testing Extract\n");
367    
368     string s;
369    
370     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
371     CHECK_EQ(s, "kremvax!boris");
372    
373     // check the RE interface as well
374     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
375     CHECK_EQ(s, "'foo'");
376     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
377     CHECK_EQ(s, "'foo'");
378     }
379    
380     static void TestConsume() {
381     printf("Testing Consume\n");
382    
383     string word;
384    
385     string s(" aaa b!@#$@#$cccc");
386     StringPiece input(s);
387    
388     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
389     CHECK(r.Consume(&input, &word));
390     CHECK_EQ(word, "aaa");
391     CHECK(r.Consume(&input, &word));
392     CHECK_EQ(word, "b");
393     CHECK(! r.Consume(&input, &word));
394     }
395    
396     static void TestFindAndConsume() {
397     printf("Testing FindAndConsume\n");
398    
399     string word;
400    
401     string s(" aaa b!@#$@#$cccc");
402     StringPiece input(s);
403    
404     RE r("(\\w+)"); // matches a word
405     CHECK(r.FindAndConsume(&input, &word));
406     CHECK_EQ(word, "aaa");
407     CHECK(r.FindAndConsume(&input, &word));
408     CHECK_EQ(word, "b");
409     CHECK(r.FindAndConsume(&input, &word));
410     CHECK_EQ(word, "cccc");
411     CHECK(! r.FindAndConsume(&input, &word));
412     }
413    
414     static void TestMatchNumberPeculiarity() {
415     printf("Testing match-number peculiaraity\n");
416    
417     string word1;
418     string word2;
419     string word3;
420    
421     RE r("(foo)|(bar)|(baz)");
422     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
423     CHECK_EQ(word1, "foo");
424     CHECK_EQ(word2, "");
425     CHECK_EQ(word3, "");
426     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
427     CHECK_EQ(word1, "");
428     CHECK_EQ(word2, "bar");
429     CHECK_EQ(word3, "");
430     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
431     CHECK_EQ(word1, "");
432     CHECK_EQ(word2, "");
433     CHECK_EQ(word3, "baz");
434     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
435    
436     string a;
437     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
438     CHECK_EQ(a, "");
439     }
440    
441 nigel 87 static void TestRecursion() {
442 nigel 77 printf("Testing recursion\n");
443    
444 nigel 87 // Get one string that passes (sometimes), one that never does.
445     string text_good("abcdefghijk");
446     string text_bad("acdefghijkl");
447    
448     // According to pcretest, matching text_good against (\w+)*b
449     // requires match_limit of at least 8192, and match_recursion_limit
450     // of at least 37.
451    
452     RE_Options options_ml;
453     options_ml.set_match_limit(8192);
454     RE re("(\\w+)*b", options_ml);
455     CHECK(re.PartialMatch(text_good) == true);
456     CHECK(re.PartialMatch(text_bad) == false);
457     CHECK(re.FullMatch(text_good) == false);
458     CHECK(re.FullMatch(text_bad) == false);
459    
460     options_ml.set_match_limit(1024);
461     RE re2("(\\w+)*b", options_ml);
462     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
463     CHECK(re2.PartialMatch(text_bad) == false);
464     CHECK(re2.FullMatch(text_good) == false);
465     CHECK(re2.FullMatch(text_bad) == false);
466    
467     RE_Options options_mlr;
468     options_mlr.set_match_limit_recursion(50);
469     RE re3("(\\w+)*b", options_mlr);
470     CHECK(re3.PartialMatch(text_good) == true);
471     CHECK(re3.PartialMatch(text_bad) == false);
472     CHECK(re3.FullMatch(text_good) == false);
473     CHECK(re3.FullMatch(text_bad) == false);
474    
475     options_mlr.set_match_limit_recursion(10);
476     RE re4("(\\w+)*b", options_mlr);
477     CHECK(re4.PartialMatch(text_good) == false);
478     CHECK(re4.PartialMatch(text_bad) == false);
479     CHECK(re4.FullMatch(text_good) == false);
480     CHECK(re4.FullMatch(text_bad) == false);
481 nigel 77 }
482    
483 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
484     // the original unquoted string.
485     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
486     string quoted = RE::QuoteMeta(unquoted);
487     RE re(quoted, options);
488     CHECK(re.FullMatch(unquoted));
489     }
490    
491     // A string containing meaningful regexp characters, which is then meta-
492     // quoted, should not generally match a string the unquoted string does.
493     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
494     RE_Options options = RE_Options()) {
495     string quoted = RE::QuoteMeta(unquoted);
496     RE re(quoted, options);
497     CHECK(!re.FullMatch(should_not_match));
498     }
499    
500     // Tests that quoted meta characters match their original strings,
501     // and that a few things that shouldn't match indeed do not.
502     static void TestQuotaMetaSimple() {
503     TestQuoteMeta("foo");
504     TestQuoteMeta("foo.bar");
505     TestQuoteMeta("foo\\.bar");
506     TestQuoteMeta("[1-9]");
507     TestQuoteMeta("1.5-2.0?");
508     TestQuoteMeta("\\d");
509     TestQuoteMeta("Who doesn't like ice cream?");
510     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
511     TestQuoteMeta("((?!)xxx).*yyy");
512     TestQuoteMeta("([");
513 ph10 326 TestQuoteMeta(string("foo\0bar", 7));
514 nigel 93 }
515    
516     static void TestQuoteMetaSimpleNegative() {
517     NegativeTestQuoteMeta("foo", "bar");
518     NegativeTestQuoteMeta("...", "bar");
519     NegativeTestQuoteMeta("\\.", ".");
520     NegativeTestQuoteMeta("\\.", "..");
521     NegativeTestQuoteMeta("(a)", "a");
522     NegativeTestQuoteMeta("(a|b)", "a");
523     NegativeTestQuoteMeta("(a|b)", "(a)");
524     NegativeTestQuoteMeta("(a|b)", "a|b");
525     NegativeTestQuoteMeta("[0-9]", "0");
526     NegativeTestQuoteMeta("[0-9]", "0-9");
527     NegativeTestQuoteMeta("[0-9]", "[9]");
528     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
529     }
530    
531     static void TestQuoteMetaLatin1() {
532     TestQuoteMeta("3\xb2 = 9");
533     }
534    
535     static void TestQuoteMetaUtf8() {
536     #ifdef SUPPORT_UTF8
537     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
538     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
539     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
540     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
541     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
542     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
543     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
544     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
545     "27\\\xc2\\\xb0",
546     pcrecpp::UTF8());
547     #endif
548     }
549    
550     static void TestQuoteMetaAll() {
551     printf("Testing QuoteMeta\n");
552     TestQuotaMetaSimple();
553     TestQuoteMetaSimpleNegative();
554     TestQuoteMetaLatin1();
555     TestQuoteMetaUtf8();
556     }
557    
558 nigel 81 //
559     // Options tests contributed by
560     // Giuseppe Maxia, CTO, Stardata s.r.l.
561     // July 2005
562     //
563     static void GetOneOptionResult(
564     const char *option_name,
565     const char *regex,
566     const char *str,
567     RE_Options options,
568     bool full,
569     string expected) {
570 nigel 77
571 nigel 81 printf("Testing Option <%s>\n", option_name);
572     if(VERBOSE_TEST)
573     printf("/%s/ finds \"%s\" within \"%s\" \n",
574     regex,
575     expected.c_str(),
576     str);
577     string captured("");
578     if (full)
579     RE(regex,options).FullMatch(str, &captured);
580     else
581     RE(regex,options).PartialMatch(str, &captured);
582     CHECK_EQ(captured, expected);
583     }
584    
585     static void TestOneOption(
586     const char *option_name,
587     const char *regex,
588     const char *str,
589     RE_Options options,
590     bool full,
591     bool assertive = true) {
592    
593     printf("Testing Option <%s>\n", option_name);
594     if (VERBOSE_TEST)
595     printf("'%s' %s /%s/ \n",
596     str,
597     (assertive? "matches" : "doesn't match"),
598     regex);
599     if (assertive) {
600     if (full)
601     CHECK(RE(regex,options).FullMatch(str));
602     else
603     CHECK(RE(regex,options).PartialMatch(str));
604     } else {
605     if (full)
606     CHECK(!RE(regex,options).FullMatch(str));
607     else
608     CHECK(!RE(regex,options).PartialMatch(str));
609     }
610     }
611    
612     static void Test_CASELESS() {
613     RE_Options options;
614     RE_Options options2;
615    
616     options.set_caseless(true);
617     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
618     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
619     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
620    
621     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
622     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
623     options.set_caseless(false);
624     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
625     }
626    
627     static void Test_MULTILINE() {
628     RE_Options options;
629     RE_Options options2;
630     const char *str = "HELLO\n" "cruel\n" "world\n";
631    
632     options.set_multiline(true);
633     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
634     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
635     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
636     options.set_multiline(false);
637     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
638     }
639    
640     static void Test_DOTALL() {
641     RE_Options options;
642     RE_Options options2;
643     const char *str = "HELLO\n" "cruel\n" "world";
644    
645     options.set_dotall(true);
646     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
647     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
648     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
649     options.set_dotall(false);
650     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
651     }
652    
653     static void Test_DOLLAR_ENDONLY() {
654     RE_Options options;
655     RE_Options options2;
656     const char *str = "HELLO world\n";
657    
658     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
659     options.set_dollar_endonly(true);
660     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
661     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
662     }
663    
664     static void Test_EXTRA() {
665     RE_Options options;
666     const char *str = "HELLO";
667    
668     options.set_extra(true);
669     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
670     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
671     options.set_extra(false);
672     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
673     }
674    
675     static void Test_EXTENDED() {
676     RE_Options options;
677     RE_Options options2;
678     const char *str = "HELLO world";
679    
680     options.set_extended(true);
681     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
682     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
683     TestOneOption("EXTENDED (class)",
684     "^ HE L{2} O "
685     "\\s+ "
686     "\\w+ $ ",
687     str,
688     options,
689     false);
690    
691     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
692     TestOneOption("EXTENDED (function)",
693     "^ HE L{2} O "
694     "\\s+ "
695     "\\w+ $ ",
696     str,
697     pcrecpp::EXTENDED(),
698     false);
699    
700     options.set_extended(false);
701     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
702     }
703    
704     static void Test_NO_AUTO_CAPTURE() {
705     RE_Options options;
706     const char *str = "HELLO world";
707     string captured;
708    
709     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
710     if (VERBOSE_TEST)
711     printf("parentheses capture text\n");
712     RE re("(world|universe)$", options);
713     CHECK(re.Extract("\\1", str , &captured));
714     CHECK_EQ(captured, "world");
715     options.set_no_auto_capture(true);
716     printf("testing Option <NO_AUTO_CAPTURE>\n");
717     if (VERBOSE_TEST)
718     printf("parentheses do not capture text\n");
719     re.Extract("\\1",str, &captured );
720     CHECK_EQ(captured, "world");
721     }
722    
723     static void Test_UNGREEDY() {
724     RE_Options options;
725     const char *str = "HELLO, 'this' is the 'world'";
726    
727     options.set_ungreedy(true);
728     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
729     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
730     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
731    
732     options.set_ungreedy(false);
733     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
734     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
735     }
736    
737     static void Test_all_options() {
738     const char *str = "HELLO\n" "cruel\n" "world";
739     RE_Options options;
740     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
741    
742     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
743     options.set_all_options(0);
744     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
745     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
746    
747     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
748     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
749     " ^ c r u e l $ ",
750     str,
751     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
752     false);
753    
754     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
755     " ^ c r u e l $ ",
756     str,
757     RE_Options()
758     .set_multiline(true)
759     .set_extended(true),
760     false);
761    
762     options.set_all_options(0);
763     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
764    
765     }
766    
767     static void TestOptions() {
768     printf("Testing Options\n");
769     Test_CASELESS();
770     Test_MULTILINE();
771     Test_DOTALL();
772     Test_DOLLAR_ENDONLY();
773     Test_EXTENDED();
774     Test_NO_AUTO_CAPTURE();
775     Test_UNGREEDY();
776     Test_EXTRA();
777     Test_all_options();
778     }
779    
780 nigel 93 static void TestConstructors() {
781     printf("Testing constructors\n");
782    
783     RE_Options options;
784     options.set_dotall(true);
785     const char *str = "HELLO\n" "cruel\n" "world";
786    
787     RE orig("HELLO.*world", options);
788     CHECK(orig.FullMatch(str));
789    
790     RE copy1(orig);
791     CHECK(copy1.FullMatch(str));
792    
793     RE copy2("not a match");
794     CHECK(!copy2.FullMatch(str));
795     copy2 = copy1;
796     CHECK(copy2.FullMatch(str));
797     copy2 = orig;
798     CHECK(copy2.FullMatch(str));
799    
800     // Make sure when we assign to ourselves, nothing bad happens
801     orig = orig;
802     copy1 = copy1;
803     copy2 = copy2;
804     CHECK(orig.FullMatch(str));
805     CHECK(copy1.FullMatch(str));
806     CHECK(copy2.FullMatch(str));
807     }
808    
809 nigel 77 int main(int argc, char** argv) {
810     // Treat any flag as --help
811     if (argc > 1 && argv[1][0] == '-') {
812     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
813     " If 'timingX ###' is specified, run the given timing test\n"
814     " with the given number of iterations, rather than running\n"
815     " the default corectness test.\n", argv[0]);
816     return 0;
817     }
818    
819     if (argc > 1) {
820     if ( argc == 2 || atoi(argv[2]) == 0) {
821     printf("timing mode needs a num-iters argument\n");
822     return 1;
823     }
824     if (!strcmp(argv[1], "timing1"))
825     Timing1(atoi(argv[2]));
826     else if (!strcmp(argv[1], "timing2"))
827     Timing2(atoi(argv[2]));
828     else if (!strcmp(argv[1], "timing3"))
829     Timing3(atoi(argv[2]));
830     else
831     printf("Unknown argument '%s'\n", argv[1]);
832     return 0;
833     }
834    
835     printf("Testing FullMatch\n");
836    
837     int i;
838     string s;
839    
840     /***** FullMatch with no args *****/
841    
842     CHECK(RE("h.*o").FullMatch("hello"));
843 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
844     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
845     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
846     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
847     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
848 nigel 77
849     /***** FullMatch with args *****/
850    
851     // Zero-arg
852     CHECK(RE("\\d+").FullMatch("1001"));
853    
854     // Single-arg
855     CHECK(RE("(\\d+)").FullMatch("1001", &i));
856     CHECK_EQ(i, 1001);
857     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
858     CHECK_EQ(i, -123);
859     CHECK(!RE("()\\d+").FullMatch("10", &i));
860     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
861     &i));
862    
863     // Digits surrounding integer-arg
864     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
865     CHECK_EQ(i, 23);
866     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
867     CHECK_EQ(i, 1);
868     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
869     CHECK_EQ(i, -1);
870     CHECK(RE("(\\d)").PartialMatch("1234", &i));
871     CHECK_EQ(i, 1);
872     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
873     CHECK_EQ(i, -1);
874    
875     // String-arg
876     CHECK(RE("h(.*)o").FullMatch("hello", &s));
877     CHECK_EQ(s, string("ell"));
878    
879     // StringPiece-arg
880     StringPiece sp;
881     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
882     CHECK_EQ(sp.size(), 4);
883     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
884     CHECK_EQ(i, 1234);
885    
886     // Multi-arg
887     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
888     CHECK_EQ(s, string("ruby"));
889     CHECK_EQ(i, 1234);
890    
891 ph10 263 // Ignore non-void* NULL arg
892     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
893     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
894     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
895     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
896 ph10 302 #ifdef HAVE_LONG_LONG
897 ph10 263 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
898 ph10 302 #endif
899 ph10 263 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
900     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
901    
902     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
903     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
904     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
905     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
906     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
907     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
908    
909 nigel 77 // Ignored arg
910     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
911     CHECK_EQ(s, string("ruby"));
912     CHECK_EQ(i, 1234);
913    
914     // Type tests
915     {
916     char c;
917     CHECK(RE("(H)ello").FullMatch("Hello", &c));
918     CHECK_EQ(c, 'H');
919     }
920     {
921     unsigned char c;
922     CHECK(RE("(H)ello").FullMatch("Hello", &c));
923     CHECK_EQ(c, static_cast<unsigned char>('H'));
924     }
925     {
926     short v;
927     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
928     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
929     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
930     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
931     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
932     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
933     }
934     {
935     unsigned short v;
936     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
937     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
938     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
939     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
940     }
941     {
942     int v;
943     static const int max_value = 0x7fffffff;
944     static const int min_value = -max_value - 1;
945     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
946     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
947     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
948     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
949     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
950     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
951     }
952     {
953     unsigned int v;
954     static const unsigned int max_value = 0xfffffffful;
955     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
956     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
957     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
958     }
959     #ifdef HAVE_LONG_LONG
960 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
961     # define LLD "%I64d"
962 ph10 201 # define LLU "%I64u"
963 ph10 193 # else
964     # define LLD "%lld"
965 ph10 201 # define LLU "%llu"
966 ph10 193 # endif
967 nigel 77 {
968     long long v;
969     static const long long max_value = 0x7fffffffffffffffLL;
970     static const long long min_value = -max_value - 1;
971 ph10 257 char buf[32]; // definitely big enough for a long long
972 nigel 77
973     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
974     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
975    
976 ph10 257 sprintf(buf, LLD, max_value);
977 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
978    
979 ph10 257 sprintf(buf, LLD, min_value);
980 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
981    
982 ph10 257 sprintf(buf, LLD, max_value);
983 nigel 77 assert(buf[strlen(buf)-1] != '9');
984     buf[strlen(buf)-1]++;
985     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
986    
987 ph10 257 sprintf(buf, LLD, min_value);
988 nigel 77 assert(buf[strlen(buf)-1] != '9');
989     buf[strlen(buf)-1]++;
990     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
991     }
992     #endif
993     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
994     {
995     unsigned long long v;
996     long long v2;
997     static const unsigned long long max_value = 0xffffffffffffffffULL;
998 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
999 nigel 77
1000     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1001     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1002    
1003 ph10 257 sprintf(buf, LLU, max_value);
1004 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1005    
1006     assert(buf[strlen(buf)-1] != '9');
1007     buf[strlen(buf)-1]++;
1008     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1009     }
1010     #endif
1011     {
1012     float v;
1013     CHECK(RE("(.*)").FullMatch("100", &v));
1014     CHECK(RE("(.*)").FullMatch("-100.", &v));
1015     CHECK(RE("(.*)").FullMatch("1e23", &v));
1016     }
1017     {
1018     double v;
1019     CHECK(RE("(.*)").FullMatch("100", &v));
1020     CHECK(RE("(.*)").FullMatch("-100.", &v));
1021     CHECK(RE("(.*)").FullMatch("1e23", &v));
1022     }
1023    
1024     // Check that matching is fully anchored
1025     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1026     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1027     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1028     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1029    
1030     // Braces
1031     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1032     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1033     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1034    
1035     // Complicated RE
1036     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1037     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1038     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1039     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1040    
1041     // Check full-match handling (needs '$' tacked on internally)
1042     CHECK(RE("fo|foo").FullMatch("fo"));
1043     CHECK(RE("fo|foo").FullMatch("foo"));
1044     CHECK(RE("fo|foo$").FullMatch("fo"));
1045     CHECK(RE("fo|foo$").FullMatch("foo"));
1046     CHECK(RE("foo$").FullMatch("foo"));
1047     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1048     CHECK(!RE("fo|bar").FullMatch("fox"));
1049    
1050     // Uncomment the following if we change the handling of '$' to
1051     // prevent it from matching a trailing newline
1052     if (false) {
1053     // Check that we don't get bitten by pcre's special handling of a
1054     // '\n' at the end of the string matching '$'
1055     CHECK(!RE("foo$").PartialMatch("foo\n"));
1056     }
1057    
1058     // Number of args
1059     int a[16];
1060     CHECK(RE("").FullMatch(""));
1061    
1062     memset(a, 0, sizeof(0));
1063     CHECK(RE("(\\d){1}").FullMatch("1",
1064     &a[0]));
1065     CHECK_EQ(a[0], 1);
1066    
1067     memset(a, 0, sizeof(0));
1068     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1069     &a[0], &a[1]));
1070     CHECK_EQ(a[0], 1);
1071     CHECK_EQ(a[1], 2);
1072    
1073     memset(a, 0, sizeof(0));
1074     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1075     &a[0], &a[1], &a[2]));
1076     CHECK_EQ(a[0], 1);
1077     CHECK_EQ(a[1], 2);
1078     CHECK_EQ(a[2], 3);
1079    
1080     memset(a, 0, sizeof(0));
1081     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1082     &a[0], &a[1], &a[2], &a[3]));
1083     CHECK_EQ(a[0], 1);
1084     CHECK_EQ(a[1], 2);
1085     CHECK_EQ(a[2], 3);
1086     CHECK_EQ(a[3], 4);
1087    
1088     memset(a, 0, sizeof(0));
1089     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1090     &a[0], &a[1], &a[2],
1091     &a[3], &a[4]));
1092     CHECK_EQ(a[0], 1);
1093     CHECK_EQ(a[1], 2);
1094     CHECK_EQ(a[2], 3);
1095     CHECK_EQ(a[3], 4);
1096     CHECK_EQ(a[4], 5);
1097    
1098     memset(a, 0, sizeof(0));
1099     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1100     &a[0], &a[1], &a[2],
1101     &a[3], &a[4], &a[5]));
1102     CHECK_EQ(a[0], 1);
1103     CHECK_EQ(a[1], 2);
1104     CHECK_EQ(a[2], 3);
1105     CHECK_EQ(a[3], 4);
1106     CHECK_EQ(a[4], 5);
1107     CHECK_EQ(a[5], 6);
1108    
1109     memset(a, 0, sizeof(0));
1110     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1111     &a[0], &a[1], &a[2], &a[3],
1112     &a[4], &a[5], &a[6]));
1113     CHECK_EQ(a[0], 1);
1114     CHECK_EQ(a[1], 2);
1115     CHECK_EQ(a[2], 3);
1116     CHECK_EQ(a[3], 4);
1117     CHECK_EQ(a[4], 5);
1118     CHECK_EQ(a[5], 6);
1119     CHECK_EQ(a[6], 7);
1120    
1121     memset(a, 0, sizeof(0));
1122     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1123     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1124     "1234567890123456",
1125     &a[0], &a[1], &a[2], &a[3],
1126     &a[4], &a[5], &a[6], &a[7],
1127     &a[8], &a[9], &a[10], &a[11],
1128     &a[12], &a[13], &a[14], &a[15]));
1129     CHECK_EQ(a[0], 1);
1130     CHECK_EQ(a[1], 2);
1131     CHECK_EQ(a[2], 3);
1132     CHECK_EQ(a[3], 4);
1133     CHECK_EQ(a[4], 5);
1134     CHECK_EQ(a[5], 6);
1135     CHECK_EQ(a[6], 7);
1136     CHECK_EQ(a[7], 8);
1137     CHECK_EQ(a[8], 9);
1138     CHECK_EQ(a[9], 0);
1139     CHECK_EQ(a[10], 1);
1140     CHECK_EQ(a[11], 2);
1141     CHECK_EQ(a[12], 3);
1142     CHECK_EQ(a[13], 4);
1143     CHECK_EQ(a[14], 5);
1144     CHECK_EQ(a[15], 6);
1145    
1146     /***** PartialMatch *****/
1147    
1148     printf("Testing PartialMatch\n");
1149    
1150     CHECK(RE("h.*o").PartialMatch("hello"));
1151     CHECK(RE("h.*o").PartialMatch("othello"));
1152     CHECK(RE("h.*o").PartialMatch("hello!"));
1153     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1154    
1155 nigel 93 /***** other tests *****/
1156    
1157 nigel 77 RadixTests();
1158     TestReplace();
1159     TestExtract();
1160     TestConsume();
1161     TestFindAndConsume();
1162 nigel 93 TestQuoteMetaAll();
1163 nigel 77 TestMatchNumberPeculiarity();
1164    
1165     // Check the pattern() accessor
1166     {
1167     const string kPattern = "http://([^/]+)/.*";
1168     const RE re(kPattern);
1169     CHECK_EQ(kPattern, re.pattern());
1170     }
1171    
1172     // Check RE error field.
1173     {
1174     RE re("foo");
1175     CHECK(re.error().empty()); // Must have no error
1176     }
1177    
1178     #ifdef SUPPORT_UTF8
1179     // Check UTF-8 handling
1180     {
1181     printf("Testing UTF-8 handling\n");
1182    
1183     // Three Japanese characters (nihongo)
1184 ph10 256 const unsigned char utf8_string[] = {
1185 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1186     0xe6, 0x9c, 0xac, // 627c
1187     0xe8, 0xaa, 0x9e, // 8a9e
1188     0
1189     };
1190 ph10 256 const unsigned char utf8_pattern[] = {
1191 nigel 77 '.',
1192     0xe6, 0x9c, 0xac, // 627c
1193     '.',
1194     0
1195     };
1196    
1197     // Both should match in either mode, bytes or UTF-8
1198     RE re_test1(".........");
1199     CHECK(re_test1.FullMatch(utf8_string));
1200     RE re_test2("...", pcrecpp::UTF8());
1201     CHECK(re_test2.FullMatch(utf8_string));
1202    
1203     // Check that '.' matches one byte or UTF-8 character
1204     // according to the mode.
1205     string ss;
1206     RE re_test3("(.)");
1207     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1208     CHECK_EQ(ss, string("\xe6"));
1209     RE re_test4("(.)", pcrecpp::UTF8());
1210     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1211     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1212    
1213     // Check that string matches itself in either mode
1214     RE re_test5(utf8_string);
1215     CHECK(re_test5.FullMatch(utf8_string));
1216     RE re_test6(utf8_string, pcrecpp::UTF8());
1217     CHECK(re_test6.FullMatch(utf8_string));
1218    
1219     // Check that pattern matches string only in UTF8 mode
1220     RE re_test7(utf8_pattern);
1221     CHECK(!re_test7.FullMatch(utf8_string));
1222     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1223     CHECK(re_test8.FullMatch(utf8_string));
1224     }
1225    
1226     // Check that ungreedy, UTF8 regular expressions don't match when they
1227     // oughtn't -- see bug 82246.
1228     {
1229     // This code always worked.
1230     const char* pattern = "\\w+X";
1231     const string target = "a aX";
1232     RE match_sentence(pattern);
1233     RE match_sentence_re(pattern, pcrecpp::UTF8());
1234    
1235     CHECK(!match_sentence.FullMatch(target));
1236     CHECK(!match_sentence_re.FullMatch(target));
1237     }
1238    
1239     {
1240     const char* pattern = "(?U)\\w+X";
1241     const string target = "a aX";
1242     RE match_sentence(pattern);
1243     RE match_sentence_re(pattern, pcrecpp::UTF8());
1244    
1245     CHECK(!match_sentence.FullMatch(target));
1246     CHECK(!match_sentence_re.FullMatch(target));
1247     }
1248     #endif /* def SUPPORT_UTF8 */
1249    
1250     printf("Testing error reporting\n");
1251    
1252     { RE re("a\\1"); CHECK(!re.error().empty()); }
1253     {
1254     RE re("a[x");
1255     CHECK(!re.error().empty());
1256     }
1257     {
1258     RE re("a[z-a]");
1259     CHECK(!re.error().empty());
1260     }
1261     {
1262     RE re("a[[:foobar:]]");
1263     CHECK(!re.error().empty());
1264     }
1265     {
1266     RE re("a(b");
1267     CHECK(!re.error().empty());
1268     }
1269     {
1270     RE re("a\\");
1271     CHECK(!re.error().empty());
1272     }
1273    
1274 nigel 87 // Test that recursion is stopped
1275     TestRecursion();
1276 nigel 77
1277 nigel 81 // Test Options
1278     if (getenv("VERBOSE_TEST") != NULL)
1279     VERBOSE_TEST = true;
1280     TestOptions();
1281    
1282 nigel 93 // Test the constructors
1283     TestConstructors();
1284    
1285 nigel 77 // Done
1286     printf("OK\n");
1287    
1288     return 0;
1289     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12