/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 263 - (show annotations) (download)
Mon Nov 12 16:53:25 2007 UTC (7 years, 1 month ago) by ph10
File size: 38568 byte(s)
Apply Craig's patch, which makes it possible to "ignore" values in parens 
when parsing an RE using the c++ wrapper.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <cassert>
42 #include <vector>
43 #include "pcrecpp.h"
44
45 using pcrecpp::StringPiece;
46 using pcrecpp::RE;
47 using pcrecpp::RE_Options;
48 using pcrecpp::Hex;
49 using pcrecpp::Octal;
50 using pcrecpp::CRadix;
51
52 static bool VERBOSE_TEST = false;
53
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
59 if (!(condition)) { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
62 exit(1); \
63 } \
64 } while (0)
65
66 #define CHECK_EQ(a, b) CHECK(a == b)
67
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
74 }
75 }
76
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
80 int i;
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
83 CHECK_EQ(i, 1234);
84 }
85 }
86
87 static void Timing3(int num_iters) {
88 string text_string;
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
91 }
92
93 RE line_matcher(".*\n");
94 string line;
95 StringPiece text(text_string);
96 int counter = 0;
97 while (line_matcher.Consume(&text)) {
98 counter++;
99 }
100 printf("Matched %d lines\n", counter);
101 }
102
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
109 if (i == 50000) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
112 }
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
115 RE newre(buf);
116 }
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
122 }
123
124 #endif
125
126 static void RadixTests() {
127 printf("Testing hex\n");
128
129 #define CHECK_HEX(type, value) \
130 do { \
131 type v; \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 } while(0)
137
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
146 #endif
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149 #endif
150
151 #undef CHECK_HEX
152
153 printf("Testing octal\n");
154
155 #define CHECK_OCTAL(type, value) \
156 do { \
157 type v; \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 } while(0)
163
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
172 #endif
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175 #endif
176
177 #undef CHECK_OCTAL
178
179 printf("Testing decimal\n");
180
181 #define CHECK_DECIMAL(type, value) \
182 do { \
183 type v; \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
188 } while(0)
189
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
198 #endif
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201 #endif
202
203 #undef CHECK_DECIMAL
204
205 }
206
207 static void TestReplace() {
208 printf("Testing Replace\n");
209
210 struct ReplaceTest {
211 const char *regexp;
212 const char *rewrite;
213 const char *original;
214 const char *single;
215 const char *global;
216 };
217 static const ReplaceTest tests[] = {
218 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
219 "\\2\\1ay",
220 "the quick brown fox jumps over the lazy dogs.",
221 "ethay quick brown fox jumps over the lazy dogs.",
222 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
223 { "\\w+",
224 "\\0-NOSPAM",
225 "paul.haahr@google.com",
226 "paul-NOSPAM.haahr@google.com",
227 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
228 { "^",
229 "(START)",
230 "foo",
231 "(START)foo",
232 "(START)foo" },
233 { "^",
234 "(START)",
235 "",
236 "(START)",
237 "(START)" },
238 { "$",
239 "(END)",
240 "",
241 "(END)",
242 "(END)" },
243 { "b",
244 "bb",
245 "ababababab",
246 "abbabababab",
247 "abbabbabbabbabb" },
248 { "b",
249 "bb",
250 "bbbbbb",
251 "bbbbbbb",
252 "bbbbbbbbbbbb" },
253 { "b+",
254 "bb",
255 "bbbbbb",
256 "bb",
257 "bb" },
258 { "b*",
259 "bb",
260 "bbbbbb",
261 "bb",
262 "bb" },
263 { "b*",
264 "bb",
265 "aaaaa",
266 "bbaaaaa",
267 "bbabbabbabbabbabb" },
268 { "b*",
269 "bb",
270 "aa\naa\n",
271 "bbaa\naa\n",
272 "bbabbabb\nbbabbabb\nbb" },
273 { "b*",
274 "bb",
275 "aa\raa\r",
276 "bbaa\raa\r",
277 "bbabbabb\rbbabbabb\rbb" },
278 { "b*",
279 "bb",
280 "aa\r\naa\r\n",
281 "bbaa\r\naa\r\n",
282 "bbabbabb\r\nbbabbabb\r\nbb" },
283 #ifdef SUPPORT_UTF8
284 { "b*",
285 "bb",
286 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
287 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289 { "b*",
290 "bb",
291 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
292 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295 #endif
296 { "", NULL, NULL, NULL, NULL }
297 };
298
299 #ifdef SUPPORT_UTF8
300 const bool support_utf8 = true;
301 #else
302 const bool support_utf8 = false;
303 #endif
304
305 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307 assert(re.error().empty());
308 string one(t->original);
309 CHECK(re.Replace(t->rewrite, &one));
310 CHECK_EQ(one, t->single);
311 string all(t->original);
312 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313 CHECK_EQ(all, t->global);
314 }
315
316 // One final test: test \r\n replacement when we're not in CRLF mode
317 {
318 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319 assert(re.error().empty());
320 string all("aa\r\naa\r\n");
321 CHECK(re.GlobalReplace("bb", &all) > 0);
322 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323 }
324 {
325 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326 assert(re.error().empty());
327 string all("aa\r\naa\r\n");
328 CHECK(re.GlobalReplace("bb", &all) > 0);
329 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330 }
331 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332 // Alas, the answer depends on how pcre was compiled.
333 }
334
335 static void TestExtract() {
336 printf("Testing Extract\n");
337
338 string s;
339
340 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
341 CHECK_EQ(s, "kremvax!boris");
342
343 // check the RE interface as well
344 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
345 CHECK_EQ(s, "'foo'");
346 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
347 CHECK_EQ(s, "'foo'");
348 }
349
350 static void TestConsume() {
351 printf("Testing Consume\n");
352
353 string word;
354
355 string s(" aaa b!@#$@#$cccc");
356 StringPiece input(s);
357
358 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
359 CHECK(r.Consume(&input, &word));
360 CHECK_EQ(word, "aaa");
361 CHECK(r.Consume(&input, &word));
362 CHECK_EQ(word, "b");
363 CHECK(! r.Consume(&input, &word));
364 }
365
366 static void TestFindAndConsume() {
367 printf("Testing FindAndConsume\n");
368
369 string word;
370
371 string s(" aaa b!@#$@#$cccc");
372 StringPiece input(s);
373
374 RE r("(\\w+)"); // matches a word
375 CHECK(r.FindAndConsume(&input, &word));
376 CHECK_EQ(word, "aaa");
377 CHECK(r.FindAndConsume(&input, &word));
378 CHECK_EQ(word, "b");
379 CHECK(r.FindAndConsume(&input, &word));
380 CHECK_EQ(word, "cccc");
381 CHECK(! r.FindAndConsume(&input, &word));
382 }
383
384 static void TestMatchNumberPeculiarity() {
385 printf("Testing match-number peculiaraity\n");
386
387 string word1;
388 string word2;
389 string word3;
390
391 RE r("(foo)|(bar)|(baz)");
392 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
393 CHECK_EQ(word1, "foo");
394 CHECK_EQ(word2, "");
395 CHECK_EQ(word3, "");
396 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
397 CHECK_EQ(word1, "");
398 CHECK_EQ(word2, "bar");
399 CHECK_EQ(word3, "");
400 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
401 CHECK_EQ(word1, "");
402 CHECK_EQ(word2, "");
403 CHECK_EQ(word3, "baz");
404 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
405
406 string a;
407 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
408 CHECK_EQ(a, "");
409 }
410
411 static void TestRecursion() {
412 printf("Testing recursion\n");
413
414 // Get one string that passes (sometimes), one that never does.
415 string text_good("abcdefghijk");
416 string text_bad("acdefghijkl");
417
418 // According to pcretest, matching text_good against (\w+)*b
419 // requires match_limit of at least 8192, and match_recursion_limit
420 // of at least 37.
421
422 RE_Options options_ml;
423 options_ml.set_match_limit(8192);
424 RE re("(\\w+)*b", options_ml);
425 CHECK(re.PartialMatch(text_good) == true);
426 CHECK(re.PartialMatch(text_bad) == false);
427 CHECK(re.FullMatch(text_good) == false);
428 CHECK(re.FullMatch(text_bad) == false);
429
430 options_ml.set_match_limit(1024);
431 RE re2("(\\w+)*b", options_ml);
432 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
433 CHECK(re2.PartialMatch(text_bad) == false);
434 CHECK(re2.FullMatch(text_good) == false);
435 CHECK(re2.FullMatch(text_bad) == false);
436
437 RE_Options options_mlr;
438 options_mlr.set_match_limit_recursion(50);
439 RE re3("(\\w+)*b", options_mlr);
440 CHECK(re3.PartialMatch(text_good) == true);
441 CHECK(re3.PartialMatch(text_bad) == false);
442 CHECK(re3.FullMatch(text_good) == false);
443 CHECK(re3.FullMatch(text_bad) == false);
444
445 options_mlr.set_match_limit_recursion(10);
446 RE re4("(\\w+)*b", options_mlr);
447 CHECK(re4.PartialMatch(text_good) == false);
448 CHECK(re4.PartialMatch(text_bad) == false);
449 CHECK(re4.FullMatch(text_good) == false);
450 CHECK(re4.FullMatch(text_bad) == false);
451 }
452
453 // A meta-quoted string, interpreted as a pattern, should always match
454 // the original unquoted string.
455 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456 string quoted = RE::QuoteMeta(unquoted);
457 RE re(quoted, options);
458 CHECK(re.FullMatch(unquoted));
459 }
460
461 // A string containing meaningful regexp characters, which is then meta-
462 // quoted, should not generally match a string the unquoted string does.
463 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464 RE_Options options = RE_Options()) {
465 string quoted = RE::QuoteMeta(unquoted);
466 RE re(quoted, options);
467 CHECK(!re.FullMatch(should_not_match));
468 }
469
470 // Tests that quoted meta characters match their original strings,
471 // and that a few things that shouldn't match indeed do not.
472 static void TestQuotaMetaSimple() {
473 TestQuoteMeta("foo");
474 TestQuoteMeta("foo.bar");
475 TestQuoteMeta("foo\\.bar");
476 TestQuoteMeta("[1-9]");
477 TestQuoteMeta("1.5-2.0?");
478 TestQuoteMeta("\\d");
479 TestQuoteMeta("Who doesn't like ice cream?");
480 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481 TestQuoteMeta("((?!)xxx).*yyy");
482 TestQuoteMeta("([");
483 }
484
485 static void TestQuoteMetaSimpleNegative() {
486 NegativeTestQuoteMeta("foo", "bar");
487 NegativeTestQuoteMeta("...", "bar");
488 NegativeTestQuoteMeta("\\.", ".");
489 NegativeTestQuoteMeta("\\.", "..");
490 NegativeTestQuoteMeta("(a)", "a");
491 NegativeTestQuoteMeta("(a|b)", "a");
492 NegativeTestQuoteMeta("(a|b)", "(a)");
493 NegativeTestQuoteMeta("(a|b)", "a|b");
494 NegativeTestQuoteMeta("[0-9]", "0");
495 NegativeTestQuoteMeta("[0-9]", "0-9");
496 NegativeTestQuoteMeta("[0-9]", "[9]");
497 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498 }
499
500 static void TestQuoteMetaLatin1() {
501 TestQuoteMeta("3\xb2 = 9");
502 }
503
504 static void TestQuoteMetaUtf8() {
505 #ifdef SUPPORT_UTF8
506 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
508 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
509 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
510 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
511 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
514 "27\\\xc2\\\xb0",
515 pcrecpp::UTF8());
516 #endif
517 }
518
519 static void TestQuoteMetaAll() {
520 printf("Testing QuoteMeta\n");
521 TestQuotaMetaSimple();
522 TestQuoteMetaSimpleNegative();
523 TestQuoteMetaLatin1();
524 TestQuoteMetaUtf8();
525 }
526
527 //
528 // Options tests contributed by
529 // Giuseppe Maxia, CTO, Stardata s.r.l.
530 // July 2005
531 //
532 static void GetOneOptionResult(
533 const char *option_name,
534 const char *regex,
535 const char *str,
536 RE_Options options,
537 bool full,
538 string expected) {
539
540 printf("Testing Option <%s>\n", option_name);
541 if(VERBOSE_TEST)
542 printf("/%s/ finds \"%s\" within \"%s\" \n",
543 regex,
544 expected.c_str(),
545 str);
546 string captured("");
547 if (full)
548 RE(regex,options).FullMatch(str, &captured);
549 else
550 RE(regex,options).PartialMatch(str, &captured);
551 CHECK_EQ(captured, expected);
552 }
553
554 static void TestOneOption(
555 const char *option_name,
556 const char *regex,
557 const char *str,
558 RE_Options options,
559 bool full,
560 bool assertive = true) {
561
562 printf("Testing Option <%s>\n", option_name);
563 if (VERBOSE_TEST)
564 printf("'%s' %s /%s/ \n",
565 str,
566 (assertive? "matches" : "doesn't match"),
567 regex);
568 if (assertive) {
569 if (full)
570 CHECK(RE(regex,options).FullMatch(str));
571 else
572 CHECK(RE(regex,options).PartialMatch(str));
573 } else {
574 if (full)
575 CHECK(!RE(regex,options).FullMatch(str));
576 else
577 CHECK(!RE(regex,options).PartialMatch(str));
578 }
579 }
580
581 static void Test_CASELESS() {
582 RE_Options options;
583 RE_Options options2;
584
585 options.set_caseless(true);
586 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
587 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
588 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
589
590 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
591 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
592 options.set_caseless(false);
593 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
594 }
595
596 static void Test_MULTILINE() {
597 RE_Options options;
598 RE_Options options2;
599 const char *str = "HELLO\n" "cruel\n" "world\n";
600
601 options.set_multiline(true);
602 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
603 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
604 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
605 options.set_multiline(false);
606 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
607 }
608
609 static void Test_DOTALL() {
610 RE_Options options;
611 RE_Options options2;
612 const char *str = "HELLO\n" "cruel\n" "world";
613
614 options.set_dotall(true);
615 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
616 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
617 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
618 options.set_dotall(false);
619 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
620 }
621
622 static void Test_DOLLAR_ENDONLY() {
623 RE_Options options;
624 RE_Options options2;
625 const char *str = "HELLO world\n";
626
627 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
628 options.set_dollar_endonly(true);
629 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
630 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
631 }
632
633 static void Test_EXTRA() {
634 RE_Options options;
635 const char *str = "HELLO";
636
637 options.set_extra(true);
638 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
639 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
640 options.set_extra(false);
641 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
642 }
643
644 static void Test_EXTENDED() {
645 RE_Options options;
646 RE_Options options2;
647 const char *str = "HELLO world";
648
649 options.set_extended(true);
650 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
651 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
652 TestOneOption("EXTENDED (class)",
653 "^ HE L{2} O "
654 "\\s+ "
655 "\\w+ $ ",
656 str,
657 options,
658 false);
659
660 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
661 TestOneOption("EXTENDED (function)",
662 "^ HE L{2} O "
663 "\\s+ "
664 "\\w+ $ ",
665 str,
666 pcrecpp::EXTENDED(),
667 false);
668
669 options.set_extended(false);
670 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
671 }
672
673 static void Test_NO_AUTO_CAPTURE() {
674 RE_Options options;
675 const char *str = "HELLO world";
676 string captured;
677
678 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
679 if (VERBOSE_TEST)
680 printf("parentheses capture text\n");
681 RE re("(world|universe)$", options);
682 CHECK(re.Extract("\\1", str , &captured));
683 CHECK_EQ(captured, "world");
684 options.set_no_auto_capture(true);
685 printf("testing Option <NO_AUTO_CAPTURE>\n");
686 if (VERBOSE_TEST)
687 printf("parentheses do not capture text\n");
688 re.Extract("\\1",str, &captured );
689 CHECK_EQ(captured, "world");
690 }
691
692 static void Test_UNGREEDY() {
693 RE_Options options;
694 const char *str = "HELLO, 'this' is the 'world'";
695
696 options.set_ungreedy(true);
697 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
698 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
699 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
700
701 options.set_ungreedy(false);
702 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
703 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
704 }
705
706 static void Test_all_options() {
707 const char *str = "HELLO\n" "cruel\n" "world";
708 RE_Options options;
709 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
710
711 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
712 options.set_all_options(0);
713 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
714 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
715
716 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
717 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
718 " ^ c r u e l $ ",
719 str,
720 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
721 false);
722
723 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
724 " ^ c r u e l $ ",
725 str,
726 RE_Options()
727 .set_multiline(true)
728 .set_extended(true),
729 false);
730
731 options.set_all_options(0);
732 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
733
734 }
735
736 static void TestOptions() {
737 printf("Testing Options\n");
738 Test_CASELESS();
739 Test_MULTILINE();
740 Test_DOTALL();
741 Test_DOLLAR_ENDONLY();
742 Test_EXTENDED();
743 Test_NO_AUTO_CAPTURE();
744 Test_UNGREEDY();
745 Test_EXTRA();
746 Test_all_options();
747 }
748
749 static void TestConstructors() {
750 printf("Testing constructors\n");
751
752 RE_Options options;
753 options.set_dotall(true);
754 const char *str = "HELLO\n" "cruel\n" "world";
755
756 RE orig("HELLO.*world", options);
757 CHECK(orig.FullMatch(str));
758
759 RE copy1(orig);
760 CHECK(copy1.FullMatch(str));
761
762 RE copy2("not a match");
763 CHECK(!copy2.FullMatch(str));
764 copy2 = copy1;
765 CHECK(copy2.FullMatch(str));
766 copy2 = orig;
767 CHECK(copy2.FullMatch(str));
768
769 // Make sure when we assign to ourselves, nothing bad happens
770 orig = orig;
771 copy1 = copy1;
772 copy2 = copy2;
773 CHECK(orig.FullMatch(str));
774 CHECK(copy1.FullMatch(str));
775 CHECK(copy2.FullMatch(str));
776 }
777
778 int main(int argc, char** argv) {
779 // Treat any flag as --help
780 if (argc > 1 && argv[1][0] == '-') {
781 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
782 " If 'timingX ###' is specified, run the given timing test\n"
783 " with the given number of iterations, rather than running\n"
784 " the default corectness test.\n", argv[0]);
785 return 0;
786 }
787
788 if (argc > 1) {
789 if ( argc == 2 || atoi(argv[2]) == 0) {
790 printf("timing mode needs a num-iters argument\n");
791 return 1;
792 }
793 if (!strcmp(argv[1], "timing1"))
794 Timing1(atoi(argv[2]));
795 else if (!strcmp(argv[1], "timing2"))
796 Timing2(atoi(argv[2]));
797 else if (!strcmp(argv[1], "timing3"))
798 Timing3(atoi(argv[2]));
799 else
800 printf("Unknown argument '%s'\n", argv[1]);
801 return 0;
802 }
803
804 printf("Testing FullMatch\n");
805
806 int i;
807 string s;
808
809 /***** FullMatch with no args *****/
810
811 CHECK(RE("h.*o").FullMatch("hello"));
812 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
813 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
814 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
815 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
816 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
817
818 /***** FullMatch with args *****/
819
820 // Zero-arg
821 CHECK(RE("\\d+").FullMatch("1001"));
822
823 // Single-arg
824 CHECK(RE("(\\d+)").FullMatch("1001", &i));
825 CHECK_EQ(i, 1001);
826 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
827 CHECK_EQ(i, -123);
828 CHECK(!RE("()\\d+").FullMatch("10", &i));
829 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
830 &i));
831
832 // Digits surrounding integer-arg
833 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
834 CHECK_EQ(i, 23);
835 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
836 CHECK_EQ(i, 1);
837 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
838 CHECK_EQ(i, -1);
839 CHECK(RE("(\\d)").PartialMatch("1234", &i));
840 CHECK_EQ(i, 1);
841 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
842 CHECK_EQ(i, -1);
843
844 // String-arg
845 CHECK(RE("h(.*)o").FullMatch("hello", &s));
846 CHECK_EQ(s, string("ell"));
847
848 // StringPiece-arg
849 StringPiece sp;
850 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
851 CHECK_EQ(sp.size(), 4);
852 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
853 CHECK_EQ(i, 1234);
854
855 // Multi-arg
856 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
857 CHECK_EQ(s, string("ruby"));
858 CHECK_EQ(i, 1234);
859
860 // Ignore non-void* NULL arg
861 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
862 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
863 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
864 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
865 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
866 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
867 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
868
869 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
870 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
871 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
872 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
873 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
874 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
875
876 // Ignored arg
877 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
878 CHECK_EQ(s, string("ruby"));
879 CHECK_EQ(i, 1234);
880
881 // Type tests
882 {
883 char c;
884 CHECK(RE("(H)ello").FullMatch("Hello", &c));
885 CHECK_EQ(c, 'H');
886 }
887 {
888 unsigned char c;
889 CHECK(RE("(H)ello").FullMatch("Hello", &c));
890 CHECK_EQ(c, static_cast<unsigned char>('H'));
891 }
892 {
893 short v;
894 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
895 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
896 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
897 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
898 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
899 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
900 }
901 {
902 unsigned short v;
903 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
905 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
906 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
907 }
908 {
909 int v;
910 static const int max_value = 0x7fffffff;
911 static const int min_value = -max_value - 1;
912 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
913 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
914 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
915 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
916 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
917 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
918 }
919 {
920 unsigned int v;
921 static const unsigned int max_value = 0xfffffffful;
922 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
924 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
925 }
926 #ifdef HAVE_LONG_LONG
927 # if defined(__MINGW__) || defined(__MINGW32__)
928 # define LLD "%I64d"
929 # define LLU "%I64u"
930 # else
931 # define LLD "%lld"
932 # define LLU "%llu"
933 # endif
934 {
935 long long v;
936 static const long long max_value = 0x7fffffffffffffffLL;
937 static const long long min_value = -max_value - 1;
938 char buf[32]; // definitely big enough for a long long
939
940 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
941 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
942
943 sprintf(buf, LLD, max_value);
944 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
945
946 sprintf(buf, LLD, min_value);
947 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
948
949 sprintf(buf, LLD, max_value);
950 assert(buf[strlen(buf)-1] != '9');
951 buf[strlen(buf)-1]++;
952 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
953
954 sprintf(buf, LLD, min_value);
955 assert(buf[strlen(buf)-1] != '9');
956 buf[strlen(buf)-1]++;
957 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
958 }
959 #endif
960 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
961 {
962 unsigned long long v;
963 long long v2;
964 static const unsigned long long max_value = 0xffffffffffffffffULL;
965 char buf[32]; // definitely big enough for a unsigned long long
966
967 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
968 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
969
970 sprintf(buf, LLU, max_value);
971 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
972
973 assert(buf[strlen(buf)-1] != '9');
974 buf[strlen(buf)-1]++;
975 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
976 }
977 #endif
978 {
979 float v;
980 CHECK(RE("(.*)").FullMatch("100", &v));
981 CHECK(RE("(.*)").FullMatch("-100.", &v));
982 CHECK(RE("(.*)").FullMatch("1e23", &v));
983 }
984 {
985 double v;
986 CHECK(RE("(.*)").FullMatch("100", &v));
987 CHECK(RE("(.*)").FullMatch("-100.", &v));
988 CHECK(RE("(.*)").FullMatch("1e23", &v));
989 }
990
991 // Check that matching is fully anchored
992 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
993 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
994 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
995 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
996
997 // Braces
998 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
999 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1000 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1001
1002 // Complicated RE
1003 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1004 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1005 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1006 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1007
1008 // Check full-match handling (needs '$' tacked on internally)
1009 CHECK(RE("fo|foo").FullMatch("fo"));
1010 CHECK(RE("fo|foo").FullMatch("foo"));
1011 CHECK(RE("fo|foo$").FullMatch("fo"));
1012 CHECK(RE("fo|foo$").FullMatch("foo"));
1013 CHECK(RE("foo$").FullMatch("foo"));
1014 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1015 CHECK(!RE("fo|bar").FullMatch("fox"));
1016
1017 // Uncomment the following if we change the handling of '$' to
1018 // prevent it from matching a trailing newline
1019 if (false) {
1020 // Check that we don't get bitten by pcre's special handling of a
1021 // '\n' at the end of the string matching '$'
1022 CHECK(!RE("foo$").PartialMatch("foo\n"));
1023 }
1024
1025 // Number of args
1026 int a[16];
1027 CHECK(RE("").FullMatch(""));
1028
1029 memset(a, 0, sizeof(0));
1030 CHECK(RE("(\\d){1}").FullMatch("1",
1031 &a[0]));
1032 CHECK_EQ(a[0], 1);
1033
1034 memset(a, 0, sizeof(0));
1035 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1036 &a[0], &a[1]));
1037 CHECK_EQ(a[0], 1);
1038 CHECK_EQ(a[1], 2);
1039
1040 memset(a, 0, sizeof(0));
1041 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1042 &a[0], &a[1], &a[2]));
1043 CHECK_EQ(a[0], 1);
1044 CHECK_EQ(a[1], 2);
1045 CHECK_EQ(a[2], 3);
1046
1047 memset(a, 0, sizeof(0));
1048 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1049 &a[0], &a[1], &a[2], &a[3]));
1050 CHECK_EQ(a[0], 1);
1051 CHECK_EQ(a[1], 2);
1052 CHECK_EQ(a[2], 3);
1053 CHECK_EQ(a[3], 4);
1054
1055 memset(a, 0, sizeof(0));
1056 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1057 &a[0], &a[1], &a[2],
1058 &a[3], &a[4]));
1059 CHECK_EQ(a[0], 1);
1060 CHECK_EQ(a[1], 2);
1061 CHECK_EQ(a[2], 3);
1062 CHECK_EQ(a[3], 4);
1063 CHECK_EQ(a[4], 5);
1064
1065 memset(a, 0, sizeof(0));
1066 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1067 &a[0], &a[1], &a[2],
1068 &a[3], &a[4], &a[5]));
1069 CHECK_EQ(a[0], 1);
1070 CHECK_EQ(a[1], 2);
1071 CHECK_EQ(a[2], 3);
1072 CHECK_EQ(a[3], 4);
1073 CHECK_EQ(a[4], 5);
1074 CHECK_EQ(a[5], 6);
1075
1076 memset(a, 0, sizeof(0));
1077 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1078 &a[0], &a[1], &a[2], &a[3],
1079 &a[4], &a[5], &a[6]));
1080 CHECK_EQ(a[0], 1);
1081 CHECK_EQ(a[1], 2);
1082 CHECK_EQ(a[2], 3);
1083 CHECK_EQ(a[3], 4);
1084 CHECK_EQ(a[4], 5);
1085 CHECK_EQ(a[5], 6);
1086 CHECK_EQ(a[6], 7);
1087
1088 memset(a, 0, sizeof(0));
1089 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1090 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1091 "1234567890123456",
1092 &a[0], &a[1], &a[2], &a[3],
1093 &a[4], &a[5], &a[6], &a[7],
1094 &a[8], &a[9], &a[10], &a[11],
1095 &a[12], &a[13], &a[14], &a[15]));
1096 CHECK_EQ(a[0], 1);
1097 CHECK_EQ(a[1], 2);
1098 CHECK_EQ(a[2], 3);
1099 CHECK_EQ(a[3], 4);
1100 CHECK_EQ(a[4], 5);
1101 CHECK_EQ(a[5], 6);
1102 CHECK_EQ(a[6], 7);
1103 CHECK_EQ(a[7], 8);
1104 CHECK_EQ(a[8], 9);
1105 CHECK_EQ(a[9], 0);
1106 CHECK_EQ(a[10], 1);
1107 CHECK_EQ(a[11], 2);
1108 CHECK_EQ(a[12], 3);
1109 CHECK_EQ(a[13], 4);
1110 CHECK_EQ(a[14], 5);
1111 CHECK_EQ(a[15], 6);
1112
1113 /***** PartialMatch *****/
1114
1115 printf("Testing PartialMatch\n");
1116
1117 CHECK(RE("h.*o").PartialMatch("hello"));
1118 CHECK(RE("h.*o").PartialMatch("othello"));
1119 CHECK(RE("h.*o").PartialMatch("hello!"));
1120 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1121
1122 /***** other tests *****/
1123
1124 RadixTests();
1125 TestReplace();
1126 TestExtract();
1127 TestConsume();
1128 TestFindAndConsume();
1129 TestQuoteMetaAll();
1130 TestMatchNumberPeculiarity();
1131
1132 // Check the pattern() accessor
1133 {
1134 const string kPattern = "http://([^/]+)/.*";
1135 const RE re(kPattern);
1136 CHECK_EQ(kPattern, re.pattern());
1137 }
1138
1139 // Check RE error field.
1140 {
1141 RE re("foo");
1142 CHECK(re.error().empty()); // Must have no error
1143 }
1144
1145 #ifdef SUPPORT_UTF8
1146 // Check UTF-8 handling
1147 {
1148 printf("Testing UTF-8 handling\n");
1149
1150 // Three Japanese characters (nihongo)
1151 const unsigned char utf8_string[] = {
1152 0xe6, 0x97, 0xa5, // 65e5
1153 0xe6, 0x9c, 0xac, // 627c
1154 0xe8, 0xaa, 0x9e, // 8a9e
1155 0
1156 };
1157 const unsigned char utf8_pattern[] = {
1158 '.',
1159 0xe6, 0x9c, 0xac, // 627c
1160 '.',
1161 0
1162 };
1163
1164 // Both should match in either mode, bytes or UTF-8
1165 RE re_test1(".........");
1166 CHECK(re_test1.FullMatch(utf8_string));
1167 RE re_test2("...", pcrecpp::UTF8());
1168 CHECK(re_test2.FullMatch(utf8_string));
1169
1170 // Check that '.' matches one byte or UTF-8 character
1171 // according to the mode.
1172 string ss;
1173 RE re_test3("(.)");
1174 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1175 CHECK_EQ(ss, string("\xe6"));
1176 RE re_test4("(.)", pcrecpp::UTF8());
1177 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1178 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1179
1180 // Check that string matches itself in either mode
1181 RE re_test5(utf8_string);
1182 CHECK(re_test5.FullMatch(utf8_string));
1183 RE re_test6(utf8_string, pcrecpp::UTF8());
1184 CHECK(re_test6.FullMatch(utf8_string));
1185
1186 // Check that pattern matches string only in UTF8 mode
1187 RE re_test7(utf8_pattern);
1188 CHECK(!re_test7.FullMatch(utf8_string));
1189 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1190 CHECK(re_test8.FullMatch(utf8_string));
1191 }
1192
1193 // Check that ungreedy, UTF8 regular expressions don't match when they
1194 // oughtn't -- see bug 82246.
1195 {
1196 // This code always worked.
1197 const char* pattern = "\\w+X";
1198 const string target = "a aX";
1199 RE match_sentence(pattern);
1200 RE match_sentence_re(pattern, pcrecpp::UTF8());
1201
1202 CHECK(!match_sentence.FullMatch(target));
1203 CHECK(!match_sentence_re.FullMatch(target));
1204 }
1205
1206 {
1207 const char* pattern = "(?U)\\w+X";
1208 const string target = "a aX";
1209 RE match_sentence(pattern);
1210 RE match_sentence_re(pattern, pcrecpp::UTF8());
1211
1212 CHECK(!match_sentence.FullMatch(target));
1213 CHECK(!match_sentence_re.FullMatch(target));
1214 }
1215 #endif /* def SUPPORT_UTF8 */
1216
1217 printf("Testing error reporting\n");
1218
1219 { RE re("a\\1"); CHECK(!re.error().empty()); }
1220 {
1221 RE re("a[x");
1222 CHECK(!re.error().empty());
1223 }
1224 {
1225 RE re("a[z-a]");
1226 CHECK(!re.error().empty());
1227 }
1228 {
1229 RE re("a[[:foobar:]]");
1230 CHECK(!re.error().empty());
1231 }
1232 {
1233 RE re("a(b");
1234 CHECK(!re.error().empty());
1235 }
1236 {
1237 RE re("a\\");
1238 CHECK(!re.error().empty());
1239 }
1240
1241 // Test that recursion is stopped
1242 TestRecursion();
1243
1244 // Test Options
1245 if (getenv("VERBOSE_TEST") != NULL)
1246 VERBOSE_TEST = true;
1247 TestOptions();
1248
1249 // Test the constructors
1250 TestConstructors();
1251
1252 // Done
1253 printf("OK\n");
1254
1255 return 0;
1256 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12