/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 179 - (show annotations) (download)
Wed Jun 13 08:53:45 2007 UTC (7 years, 3 months ago) by ph10
File size: 37518 byte(s)
Apply C++ patch to fix a bad optimization.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #include <stdio.h>
37 #include <cassert>
38 #include <vector>
39 #include "config.h"
40 #include "pcrecpp.h"
41
42 using pcrecpp::StringPiece;
43 using pcrecpp::RE;
44 using pcrecpp::RE_Options;
45 using pcrecpp::Hex;
46 using pcrecpp::Octal;
47 using pcrecpp::CRadix;
48
49 static bool VERBOSE_TEST = false;
50
51 // CHECK dies with a fatal error if condition is not true. It is *not*
52 // controlled by NDEBUG, so the check will be executed regardless of
53 // compilation mode. Therefore, it is safe to do things like:
54 // CHECK_EQ(fp->Write(x), 4)
55 #define CHECK(condition) do { \
56 if (!(condition)) { \
57 fprintf(stderr, "%s:%d: Check failed: %s\n", \
58 __FILE__, __LINE__, #condition); \
59 exit(1); \
60 } \
61 } while (0)
62
63 #define CHECK_EQ(a, b) CHECK(a == b)
64
65 static void Timing1(int num_iters) {
66 // Same pattern lots of times
67 RE pattern("ruby:\\d+");
68 StringPiece p("ruby:1234");
69 for (int j = num_iters; j > 0; j--) {
70 CHECK(pattern.FullMatch(p));
71 }
72 }
73
74 static void Timing2(int num_iters) {
75 // Same pattern lots of times
76 RE pattern("ruby:(\\d+)");
77 int i;
78 for (int j = num_iters; j > 0; j--) {
79 CHECK(pattern.FullMatch("ruby:1234", &i));
80 CHECK_EQ(i, 1234);
81 }
82 }
83
84 static void Timing3(int num_iters) {
85 string text_string;
86 for (int j = num_iters; j > 0; j--) {
87 text_string += "this is another line\n";
88 }
89
90 RE line_matcher(".*\n");
91 string line;
92 StringPiece text(text_string);
93 int counter = 0;
94 while (line_matcher.Consume(&text)) {
95 counter++;
96 }
97 printf("Matched %d lines\n", counter);
98 }
99
100 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101
102 static void LeakTest() {
103 // Check for memory leaks
104 unsigned long long initial_size = 0;
105 for (int i = 0; i < 100000; i++) {
106 if (i == 50000) {
107 initial_size = VirtualProcessSize();
108 printf("Size after 50000: %llu\n", initial_size);
109 }
110 char buf[100];
111 snprintf(buf, sizeof(buf), "pat%09d", i);
112 RE newre(buf);
113 }
114 uint64 final_size = VirtualProcessSize();
115 printf("Size after 100000: %llu\n", final_size);
116 const double growth = double(final_size - initial_size) / final_size;
117 printf("Growth: %0.2f%%", growth * 100);
118 CHECK(growth < 0.02); // Allow < 2% growth
119 }
120
121 #endif
122
123 static void RadixTests() {
124 printf("Testing hex\n");
125
126 #define CHECK_HEX(type, value) \
127 do { \
128 type v; \
129 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130 CHECK_EQ(v, 0x ## value); \
131 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132 CHECK_EQ(v, 0x ## value); \
133 } while(0)
134
135 CHECK_HEX(short, 2bad);
136 CHECK_HEX(unsigned short, 2badU);
137 CHECK_HEX(int, dead);
138 CHECK_HEX(unsigned int, deadU);
139 CHECK_HEX(long, 7eadbeefL);
140 CHECK_HEX(unsigned long, deadbeefUL);
141 #ifdef HAVE_LONG_LONG
142 CHECK_HEX(long long, 12345678deadbeefLL);
143 #endif
144 #ifdef HAVE_UNSIGNED_LONG_LONG
145 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146 #endif
147
148 #undef CHECK_HEX
149
150 printf("Testing octal\n");
151
152 #define CHECK_OCTAL(type, value) \
153 do { \
154 type v; \
155 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156 CHECK_EQ(v, 0 ## value); \
157 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158 CHECK_EQ(v, 0 ## value); \
159 } while(0)
160
161 CHECK_OCTAL(short, 77777);
162 CHECK_OCTAL(unsigned short, 177777U);
163 CHECK_OCTAL(int, 17777777777);
164 CHECK_OCTAL(unsigned int, 37777777777U);
165 CHECK_OCTAL(long, 17777777777L);
166 CHECK_OCTAL(unsigned long, 37777777777UL);
167 #ifdef HAVE_LONG_LONG
168 CHECK_OCTAL(long long, 777777777777777777777LL);
169 #endif
170 #ifdef HAVE_UNSIGNED_LONG_LONG
171 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172 #endif
173
174 #undef CHECK_OCTAL
175
176 printf("Testing decimal\n");
177
178 #define CHECK_DECIMAL(type, value) \
179 do { \
180 type v; \
181 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182 CHECK_EQ(v, value); \
183 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184 CHECK_EQ(v, value); \
185 } while(0)
186
187 CHECK_DECIMAL(short, -1);
188 CHECK_DECIMAL(unsigned short, 9999);
189 CHECK_DECIMAL(int, -1000);
190 CHECK_DECIMAL(unsigned int, 12345U);
191 CHECK_DECIMAL(long, -10000000L);
192 CHECK_DECIMAL(unsigned long, 3083324652U);
193 #ifdef HAVE_LONG_LONG
194 CHECK_DECIMAL(long long, -100000000000000LL);
195 #endif
196 #ifdef HAVE_UNSIGNED_LONG_LONG
197 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198 #endif
199
200 #undef CHECK_DECIMAL
201
202 }
203
204 static void TestReplace() {
205 printf("Testing Replace\n");
206
207 struct ReplaceTest {
208 const char *regexp;
209 const char *rewrite;
210 const char *original;
211 const char *single;
212 const char *global;
213 };
214 static const ReplaceTest tests[] = {
215 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216 "\\2\\1ay",
217 "the quick brown fox jumps over the lazy dogs.",
218 "ethay quick brown fox jumps over the lazy dogs.",
219 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220 { "\\w+",
221 "\\0-NOSPAM",
222 "paul.haahr@google.com",
223 "paul-NOSPAM.haahr@google.com",
224 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225 { "^",
226 "(START)",
227 "foo",
228 "(START)foo",
229 "(START)foo" },
230 { "^",
231 "(START)",
232 "",
233 "(START)",
234 "(START)" },
235 { "$",
236 "(END)",
237 "",
238 "(END)",
239 "(END)" },
240 { "b",
241 "bb",
242 "ababababab",
243 "abbabababab",
244 "abbabbabbabbabb" },
245 { "b",
246 "bb",
247 "bbbbbb",
248 "bbbbbbb",
249 "bbbbbbbbbbbb" },
250 { "b+",
251 "bb",
252 "bbbbbb",
253 "bb",
254 "bb" },
255 { "b*",
256 "bb",
257 "bbbbbb",
258 "bb",
259 "bb" },
260 { "b*",
261 "bb",
262 "aaaaa",
263 "bbaaaaa",
264 "bbabbabbabbabbabb" },
265 { "b*",
266 "bb",
267 "aa\naa\n",
268 "bbaa\naa\n",
269 "bbabbabb\nbbabbabb\nbb" },
270 { "b*",
271 "bb",
272 "aa\raa\r",
273 "bbaa\raa\r",
274 "bbabbabb\rbbabbabb\rbb" },
275 { "b*",
276 "bb",
277 "aa\r\naa\r\n",
278 "bbaa\r\naa\r\n",
279 "bbabbabb\r\nbbabbabb\r\nbb" },
280 #ifdef SUPPORT_UTF8
281 { "b*",
282 "bb",
283 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286 { "b*",
287 "bb",
288 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292 #endif
293 { "", NULL, NULL, NULL, NULL }
294 };
295
296 #ifdef SUPPORT_UTF8
297 const bool support_utf8 = true;
298 #else
299 const bool support_utf8 = false;
300 #endif
301
302 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304 assert(re.error().empty());
305 string one(t->original);
306 CHECK(re.Replace(t->rewrite, &one));
307 CHECK_EQ(one, t->single);
308 string all(t->original);
309 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 CHECK_EQ(all, t->global);
311 }
312
313 // One final test: test \r\n replacement when we're not in CRLF mode
314 {
315 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316 assert(re.error().empty());
317 string all("aa\r\naa\r\n");
318 CHECK(re.GlobalReplace("bb", &all) > 0);
319 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320 }
321 {
322 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string all("aa\r\naa\r\n");
325 CHECK(re.GlobalReplace("bb", &all) > 0);
326 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327 }
328 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329 // Alas, the answer depends on how pcre was compiled.
330 }
331
332 static void TestExtract() {
333 printf("Testing Extract\n");
334
335 string s;
336
337 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338 CHECK_EQ(s, "kremvax!boris");
339
340 // check the RE interface as well
341 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342 CHECK_EQ(s, "'foo'");
343 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344 CHECK_EQ(s, "'foo'");
345 }
346
347 static void TestConsume() {
348 printf("Testing Consume\n");
349
350 string word;
351
352 string s(" aaa b!@#$@#$cccc");
353 StringPiece input(s);
354
355 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356 CHECK(r.Consume(&input, &word));
357 CHECK_EQ(word, "aaa");
358 CHECK(r.Consume(&input, &word));
359 CHECK_EQ(word, "b");
360 CHECK(! r.Consume(&input, &word));
361 }
362
363 static void TestFindAndConsume() {
364 printf("Testing FindAndConsume\n");
365
366 string word;
367
368 string s(" aaa b!@#$@#$cccc");
369 StringPiece input(s);
370
371 RE r("(\\w+)"); // matches a word
372 CHECK(r.FindAndConsume(&input, &word));
373 CHECK_EQ(word, "aaa");
374 CHECK(r.FindAndConsume(&input, &word));
375 CHECK_EQ(word, "b");
376 CHECK(r.FindAndConsume(&input, &word));
377 CHECK_EQ(word, "cccc");
378 CHECK(! r.FindAndConsume(&input, &word));
379 }
380
381 static void TestMatchNumberPeculiarity() {
382 printf("Testing match-number peculiaraity\n");
383
384 string word1;
385 string word2;
386 string word3;
387
388 RE r("(foo)|(bar)|(baz)");
389 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390 CHECK_EQ(word1, "foo");
391 CHECK_EQ(word2, "");
392 CHECK_EQ(word3, "");
393 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394 CHECK_EQ(word1, "");
395 CHECK_EQ(word2, "bar");
396 CHECK_EQ(word3, "");
397 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398 CHECK_EQ(word1, "");
399 CHECK_EQ(word2, "");
400 CHECK_EQ(word3, "baz");
401 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402
403 string a;
404 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405 CHECK_EQ(a, "");
406 }
407
408 static void TestRecursion() {
409 printf("Testing recursion\n");
410
411 // Get one string that passes (sometimes), one that never does.
412 string text_good("abcdefghijk");
413 string text_bad("acdefghijkl");
414
415 // According to pcretest, matching text_good against (\w+)*b
416 // requires match_limit of at least 8192, and match_recursion_limit
417 // of at least 37.
418
419 RE_Options options_ml;
420 options_ml.set_match_limit(8192);
421 RE re("(\\w+)*b", options_ml);
422 CHECK(re.PartialMatch(text_good) == true);
423 CHECK(re.PartialMatch(text_bad) == false);
424 CHECK(re.FullMatch(text_good) == false);
425 CHECK(re.FullMatch(text_bad) == false);
426
427 options_ml.set_match_limit(1024);
428 RE re2("(\\w+)*b", options_ml);
429 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430 CHECK(re2.PartialMatch(text_bad) == false);
431 CHECK(re2.FullMatch(text_good) == false);
432 CHECK(re2.FullMatch(text_bad) == false);
433
434 RE_Options options_mlr;
435 options_mlr.set_match_limit_recursion(50);
436 RE re3("(\\w+)*b", options_mlr);
437 CHECK(re3.PartialMatch(text_good) == true);
438 CHECK(re3.PartialMatch(text_bad) == false);
439 CHECK(re3.FullMatch(text_good) == false);
440 CHECK(re3.FullMatch(text_bad) == false);
441
442 options_mlr.set_match_limit_recursion(10);
443 RE re4("(\\w+)*b", options_mlr);
444 CHECK(re4.PartialMatch(text_good) == false);
445 CHECK(re4.PartialMatch(text_bad) == false);
446 CHECK(re4.FullMatch(text_good) == false);
447 CHECK(re4.FullMatch(text_bad) == false);
448 }
449
450 // A meta-quoted string, interpreted as a pattern, should always match
451 // the original unquoted string.
452 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453 string quoted = RE::QuoteMeta(unquoted);
454 RE re(quoted, options);
455 CHECK(re.FullMatch(unquoted));
456 }
457
458 // A string containing meaningful regexp characters, which is then meta-
459 // quoted, should not generally match a string the unquoted string does.
460 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461 RE_Options options = RE_Options()) {
462 string quoted = RE::QuoteMeta(unquoted);
463 RE re(quoted, options);
464 CHECK(!re.FullMatch(should_not_match));
465 }
466
467 // Tests that quoted meta characters match their original strings,
468 // and that a few things that shouldn't match indeed do not.
469 static void TestQuotaMetaSimple() {
470 TestQuoteMeta("foo");
471 TestQuoteMeta("foo.bar");
472 TestQuoteMeta("foo\\.bar");
473 TestQuoteMeta("[1-9]");
474 TestQuoteMeta("1.5-2.0?");
475 TestQuoteMeta("\\d");
476 TestQuoteMeta("Who doesn't like ice cream?");
477 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478 TestQuoteMeta("((?!)xxx).*yyy");
479 TestQuoteMeta("([");
480 }
481
482 static void TestQuoteMetaSimpleNegative() {
483 NegativeTestQuoteMeta("foo", "bar");
484 NegativeTestQuoteMeta("...", "bar");
485 NegativeTestQuoteMeta("\\.", ".");
486 NegativeTestQuoteMeta("\\.", "..");
487 NegativeTestQuoteMeta("(a)", "a");
488 NegativeTestQuoteMeta("(a|b)", "a");
489 NegativeTestQuoteMeta("(a|b)", "(a)");
490 NegativeTestQuoteMeta("(a|b)", "a|b");
491 NegativeTestQuoteMeta("[0-9]", "0");
492 NegativeTestQuoteMeta("[0-9]", "0-9");
493 NegativeTestQuoteMeta("[0-9]", "[9]");
494 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495 }
496
497 static void TestQuoteMetaLatin1() {
498 TestQuoteMeta("3\xb2 = 9");
499 }
500
501 static void TestQuoteMetaUtf8() {
502 #ifdef SUPPORT_UTF8
503 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511 "27\\\xc2\\\xb0",
512 pcrecpp::UTF8());
513 #endif
514 }
515
516 static void TestQuoteMetaAll() {
517 printf("Testing QuoteMeta\n");
518 TestQuotaMetaSimple();
519 TestQuoteMetaSimpleNegative();
520 TestQuoteMetaLatin1();
521 TestQuoteMetaUtf8();
522 }
523
524 //
525 // Options tests contributed by
526 // Giuseppe Maxia, CTO, Stardata s.r.l.
527 // July 2005
528 //
529 static void GetOneOptionResult(
530 const char *option_name,
531 const char *regex,
532 const char *str,
533 RE_Options options,
534 bool full,
535 string expected) {
536
537 printf("Testing Option <%s>\n", option_name);
538 if(VERBOSE_TEST)
539 printf("/%s/ finds \"%s\" within \"%s\" \n",
540 regex,
541 expected.c_str(),
542 str);
543 string captured("");
544 if (full)
545 RE(regex,options).FullMatch(str, &captured);
546 else
547 RE(regex,options).PartialMatch(str, &captured);
548 CHECK_EQ(captured, expected);
549 }
550
551 static void TestOneOption(
552 const char *option_name,
553 const char *regex,
554 const char *str,
555 RE_Options options,
556 bool full,
557 bool assertive = true) {
558
559 printf("Testing Option <%s>\n", option_name);
560 if (VERBOSE_TEST)
561 printf("'%s' %s /%s/ \n",
562 str,
563 (assertive? "matches" : "doesn't match"),
564 regex);
565 if (assertive) {
566 if (full)
567 CHECK(RE(regex,options).FullMatch(str));
568 else
569 CHECK(RE(regex,options).PartialMatch(str));
570 } else {
571 if (full)
572 CHECK(!RE(regex,options).FullMatch(str));
573 else
574 CHECK(!RE(regex,options).PartialMatch(str));
575 }
576 }
577
578 static void Test_CASELESS() {
579 RE_Options options;
580 RE_Options options2;
581
582 options.set_caseless(true);
583 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586
587 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589 options.set_caseless(false);
590 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591 }
592
593 static void Test_MULTILINE() {
594 RE_Options options;
595 RE_Options options2;
596 const char *str = "HELLO\n" "cruel\n" "world\n";
597
598 options.set_multiline(true);
599 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602 options.set_multiline(false);
603 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604 }
605
606 static void Test_DOTALL() {
607 RE_Options options;
608 RE_Options options2;
609 const char *str = "HELLO\n" "cruel\n" "world";
610
611 options.set_dotall(true);
612 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615 options.set_dotall(false);
616 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617 }
618
619 static void Test_DOLLAR_ENDONLY() {
620 RE_Options options;
621 RE_Options options2;
622 const char *str = "HELLO world\n";
623
624 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625 options.set_dollar_endonly(true);
626 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628 }
629
630 static void Test_EXTRA() {
631 RE_Options options;
632 const char *str = "HELLO";
633
634 options.set_extra(true);
635 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637 options.set_extra(false);
638 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639 }
640
641 static void Test_EXTENDED() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO world";
645
646 options.set_extended(true);
647 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649 TestOneOption("EXTENDED (class)",
650 "^ HE L{2} O "
651 "\\s+ "
652 "\\w+ $ ",
653 str,
654 options,
655 false);
656
657 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658 TestOneOption("EXTENDED (function)",
659 "^ HE L{2} O "
660 "\\s+ "
661 "\\w+ $ ",
662 str,
663 pcrecpp::EXTENDED(),
664 false);
665
666 options.set_extended(false);
667 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668 }
669
670 static void Test_NO_AUTO_CAPTURE() {
671 RE_Options options;
672 const char *str = "HELLO world";
673 string captured;
674
675 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676 if (VERBOSE_TEST)
677 printf("parentheses capture text\n");
678 RE re("(world|universe)$", options);
679 CHECK(re.Extract("\\1", str , &captured));
680 CHECK_EQ(captured, "world");
681 options.set_no_auto_capture(true);
682 printf("testing Option <NO_AUTO_CAPTURE>\n");
683 if (VERBOSE_TEST)
684 printf("parentheses do not capture text\n");
685 re.Extract("\\1",str, &captured );
686 CHECK_EQ(captured, "world");
687 }
688
689 static void Test_UNGREEDY() {
690 RE_Options options;
691 const char *str = "HELLO, 'this' is the 'world'";
692
693 options.set_ungreedy(true);
694 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697
698 options.set_ungreedy(false);
699 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701 }
702
703 static void Test_all_options() {
704 const char *str = "HELLO\n" "cruel\n" "world";
705 RE_Options options;
706 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707
708 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709 options.set_all_options(0);
710 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712
713 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715 " ^ c r u e l $ ",
716 str,
717 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718 false);
719
720 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721 " ^ c r u e l $ ",
722 str,
723 RE_Options()
724 .set_multiline(true)
725 .set_extended(true),
726 false);
727
728 options.set_all_options(0);
729 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730
731 }
732
733 static void TestOptions() {
734 printf("Testing Options\n");
735 Test_CASELESS();
736 Test_MULTILINE();
737 Test_DOTALL();
738 Test_DOLLAR_ENDONLY();
739 Test_EXTENDED();
740 Test_NO_AUTO_CAPTURE();
741 Test_UNGREEDY();
742 Test_EXTRA();
743 Test_all_options();
744 }
745
746 static void TestConstructors() {
747 printf("Testing constructors\n");
748
749 RE_Options options;
750 options.set_dotall(true);
751 const char *str = "HELLO\n" "cruel\n" "world";
752
753 RE orig("HELLO.*world", options);
754 CHECK(orig.FullMatch(str));
755
756 RE copy1(orig);
757 CHECK(copy1.FullMatch(str));
758
759 RE copy2("not a match");
760 CHECK(!copy2.FullMatch(str));
761 copy2 = copy1;
762 CHECK(copy2.FullMatch(str));
763 copy2 = orig;
764 CHECK(copy2.FullMatch(str));
765
766 // Make sure when we assign to ourselves, nothing bad happens
767 orig = orig;
768 copy1 = copy1;
769 copy2 = copy2;
770 CHECK(orig.FullMatch(str));
771 CHECK(copy1.FullMatch(str));
772 CHECK(copy2.FullMatch(str));
773 }
774
775 int main(int argc, char** argv) {
776 // Treat any flag as --help
777 if (argc > 1 && argv[1][0] == '-') {
778 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779 " If 'timingX ###' is specified, run the given timing test\n"
780 " with the given number of iterations, rather than running\n"
781 " the default corectness test.\n", argv[0]);
782 return 0;
783 }
784
785 if (argc > 1) {
786 if ( argc == 2 || atoi(argv[2]) == 0) {
787 printf("timing mode needs a num-iters argument\n");
788 return 1;
789 }
790 if (!strcmp(argv[1], "timing1"))
791 Timing1(atoi(argv[2]));
792 else if (!strcmp(argv[1], "timing2"))
793 Timing2(atoi(argv[2]));
794 else if (!strcmp(argv[1], "timing3"))
795 Timing3(atoi(argv[2]));
796 else
797 printf("Unknown argument '%s'\n", argv[1]);
798 return 0;
799 }
800
801 printf("Testing FullMatch\n");
802
803 int i;
804 string s;
805
806 /***** FullMatch with no args *****/
807
808 CHECK(RE("h.*o").FullMatch("hello"));
809 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
810 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
811 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
812 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
813 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
814
815 /***** FullMatch with args *****/
816
817 // Zero-arg
818 CHECK(RE("\\d+").FullMatch("1001"));
819
820 // Single-arg
821 CHECK(RE("(\\d+)").FullMatch("1001", &i));
822 CHECK_EQ(i, 1001);
823 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
824 CHECK_EQ(i, -123);
825 CHECK(!RE("()\\d+").FullMatch("10", &i));
826 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
827 &i));
828
829 // Digits surrounding integer-arg
830 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
831 CHECK_EQ(i, 23);
832 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
833 CHECK_EQ(i, 1);
834 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
835 CHECK_EQ(i, -1);
836 CHECK(RE("(\\d)").PartialMatch("1234", &i));
837 CHECK_EQ(i, 1);
838 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
839 CHECK_EQ(i, -1);
840
841 // String-arg
842 CHECK(RE("h(.*)o").FullMatch("hello", &s));
843 CHECK_EQ(s, string("ell"));
844
845 // StringPiece-arg
846 StringPiece sp;
847 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
848 CHECK_EQ(sp.size(), 4);
849 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
850 CHECK_EQ(i, 1234);
851
852 // Multi-arg
853 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
854 CHECK_EQ(s, string("ruby"));
855 CHECK_EQ(i, 1234);
856
857 // Ignored arg
858 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
859 CHECK_EQ(s, string("ruby"));
860 CHECK_EQ(i, 1234);
861
862 // Type tests
863 {
864 char c;
865 CHECK(RE("(H)ello").FullMatch("Hello", &c));
866 CHECK_EQ(c, 'H');
867 }
868 {
869 unsigned char c;
870 CHECK(RE("(H)ello").FullMatch("Hello", &c));
871 CHECK_EQ(c, static_cast<unsigned char>('H'));
872 }
873 {
874 short v;
875 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
876 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
877 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
878 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
879 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
880 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
881 }
882 {
883 unsigned short v;
884 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
885 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
886 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
887 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
888 }
889 {
890 int v;
891 static const int max_value = 0x7fffffff;
892 static const int min_value = -max_value - 1;
893 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
894 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
895 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
896 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
897 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
898 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
899 }
900 {
901 unsigned int v;
902 static const unsigned int max_value = 0xfffffffful;
903 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
905 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
906 }
907 #ifdef HAVE_LONG_LONG
908 {
909 long long v;
910 static const long long max_value = 0x7fffffffffffffffLL;
911 static const long long min_value = -max_value - 1;
912 char buf[32];
913
914 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
915 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
916
917 snprintf(buf, sizeof(buf), "%lld", max_value);
918 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
919
920 snprintf(buf, sizeof(buf), "%lld", min_value);
921 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
922
923 snprintf(buf, sizeof(buf), "%lld", max_value);
924 assert(buf[strlen(buf)-1] != '9');
925 buf[strlen(buf)-1]++;
926 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
927
928 snprintf(buf, sizeof(buf), "%lld", min_value);
929 assert(buf[strlen(buf)-1] != '9');
930 buf[strlen(buf)-1]++;
931 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
932 }
933 #endif
934 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
935 {
936 unsigned long long v;
937 long long v2;
938 static const unsigned long long max_value = 0xffffffffffffffffULL;
939 char buf[32];
940
941 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
942 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
943
944 snprintf(buf, sizeof(buf), "%llu", max_value);
945 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
946
947 assert(buf[strlen(buf)-1] != '9');
948 buf[strlen(buf)-1]++;
949 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
950 }
951 #endif
952 {
953 float v;
954 CHECK(RE("(.*)").FullMatch("100", &v));
955 CHECK(RE("(.*)").FullMatch("-100.", &v));
956 CHECK(RE("(.*)").FullMatch("1e23", &v));
957 }
958 {
959 double v;
960 CHECK(RE("(.*)").FullMatch("100", &v));
961 CHECK(RE("(.*)").FullMatch("-100.", &v));
962 CHECK(RE("(.*)").FullMatch("1e23", &v));
963 }
964
965 // Check that matching is fully anchored
966 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
967 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
968 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
969 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
970
971 // Braces
972 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
973 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
974 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
975
976 // Complicated RE
977 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
978 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
979 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
980 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
981
982 // Check full-match handling (needs '$' tacked on internally)
983 CHECK(RE("fo|foo").FullMatch("fo"));
984 CHECK(RE("fo|foo").FullMatch("foo"));
985 CHECK(RE("fo|foo$").FullMatch("fo"));
986 CHECK(RE("fo|foo$").FullMatch("foo"));
987 CHECK(RE("foo$").FullMatch("foo"));
988 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
989 CHECK(!RE("fo|bar").FullMatch("fox"));
990
991 // Uncomment the following if we change the handling of '$' to
992 // prevent it from matching a trailing newline
993 if (false) {
994 // Check that we don't get bitten by pcre's special handling of a
995 // '\n' at the end of the string matching '$'
996 CHECK(!RE("foo$").PartialMatch("foo\n"));
997 }
998
999 // Number of args
1000 int a[16];
1001 CHECK(RE("").FullMatch(""));
1002
1003 memset(a, 0, sizeof(0));
1004 CHECK(RE("(\\d){1}").FullMatch("1",
1005 &a[0]));
1006 CHECK_EQ(a[0], 1);
1007
1008 memset(a, 0, sizeof(0));
1009 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1010 &a[0], &a[1]));
1011 CHECK_EQ(a[0], 1);
1012 CHECK_EQ(a[1], 2);
1013
1014 memset(a, 0, sizeof(0));
1015 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1016 &a[0], &a[1], &a[2]));
1017 CHECK_EQ(a[0], 1);
1018 CHECK_EQ(a[1], 2);
1019 CHECK_EQ(a[2], 3);
1020
1021 memset(a, 0, sizeof(0));
1022 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1023 &a[0], &a[1], &a[2], &a[3]));
1024 CHECK_EQ(a[0], 1);
1025 CHECK_EQ(a[1], 2);
1026 CHECK_EQ(a[2], 3);
1027 CHECK_EQ(a[3], 4);
1028
1029 memset(a, 0, sizeof(0));
1030 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1031 &a[0], &a[1], &a[2],
1032 &a[3], &a[4]));
1033 CHECK_EQ(a[0], 1);
1034 CHECK_EQ(a[1], 2);
1035 CHECK_EQ(a[2], 3);
1036 CHECK_EQ(a[3], 4);
1037 CHECK_EQ(a[4], 5);
1038
1039 memset(a, 0, sizeof(0));
1040 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1041 &a[0], &a[1], &a[2],
1042 &a[3], &a[4], &a[5]));
1043 CHECK_EQ(a[0], 1);
1044 CHECK_EQ(a[1], 2);
1045 CHECK_EQ(a[2], 3);
1046 CHECK_EQ(a[3], 4);
1047 CHECK_EQ(a[4], 5);
1048 CHECK_EQ(a[5], 6);
1049
1050 memset(a, 0, sizeof(0));
1051 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1052 &a[0], &a[1], &a[2], &a[3],
1053 &a[4], &a[5], &a[6]));
1054 CHECK_EQ(a[0], 1);
1055 CHECK_EQ(a[1], 2);
1056 CHECK_EQ(a[2], 3);
1057 CHECK_EQ(a[3], 4);
1058 CHECK_EQ(a[4], 5);
1059 CHECK_EQ(a[5], 6);
1060 CHECK_EQ(a[6], 7);
1061
1062 memset(a, 0, sizeof(0));
1063 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1064 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1065 "1234567890123456",
1066 &a[0], &a[1], &a[2], &a[3],
1067 &a[4], &a[5], &a[6], &a[7],
1068 &a[8], &a[9], &a[10], &a[11],
1069 &a[12], &a[13], &a[14], &a[15]));
1070 CHECK_EQ(a[0], 1);
1071 CHECK_EQ(a[1], 2);
1072 CHECK_EQ(a[2], 3);
1073 CHECK_EQ(a[3], 4);
1074 CHECK_EQ(a[4], 5);
1075 CHECK_EQ(a[5], 6);
1076 CHECK_EQ(a[6], 7);
1077 CHECK_EQ(a[7], 8);
1078 CHECK_EQ(a[8], 9);
1079 CHECK_EQ(a[9], 0);
1080 CHECK_EQ(a[10], 1);
1081 CHECK_EQ(a[11], 2);
1082 CHECK_EQ(a[12], 3);
1083 CHECK_EQ(a[13], 4);
1084 CHECK_EQ(a[14], 5);
1085 CHECK_EQ(a[15], 6);
1086
1087 /***** PartialMatch *****/
1088
1089 printf("Testing PartialMatch\n");
1090
1091 CHECK(RE("h.*o").PartialMatch("hello"));
1092 CHECK(RE("h.*o").PartialMatch("othello"));
1093 CHECK(RE("h.*o").PartialMatch("hello!"));
1094 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1095
1096 /***** other tests *****/
1097
1098 RadixTests();
1099 TestReplace();
1100 TestExtract();
1101 TestConsume();
1102 TestFindAndConsume();
1103 TestQuoteMetaAll();
1104 TestMatchNumberPeculiarity();
1105
1106 // Check the pattern() accessor
1107 {
1108 const string kPattern = "http://([^/]+)/.*";
1109 const RE re(kPattern);
1110 CHECK_EQ(kPattern, re.pattern());
1111 }
1112
1113 // Check RE error field.
1114 {
1115 RE re("foo");
1116 CHECK(re.error().empty()); // Must have no error
1117 }
1118
1119 #ifdef SUPPORT_UTF8
1120 // Check UTF-8 handling
1121 {
1122 printf("Testing UTF-8 handling\n");
1123
1124 // Three Japanese characters (nihongo)
1125 const char utf8_string[] = {
1126 0xe6, 0x97, 0xa5, // 65e5
1127 0xe6, 0x9c, 0xac, // 627c
1128 0xe8, 0xaa, 0x9e, // 8a9e
1129 0
1130 };
1131 const char utf8_pattern[] = {
1132 '.',
1133 0xe6, 0x9c, 0xac, // 627c
1134 '.',
1135 0
1136 };
1137
1138 // Both should match in either mode, bytes or UTF-8
1139 RE re_test1(".........");
1140 CHECK(re_test1.FullMatch(utf8_string));
1141 RE re_test2("...", pcrecpp::UTF8());
1142 CHECK(re_test2.FullMatch(utf8_string));
1143
1144 // Check that '.' matches one byte or UTF-8 character
1145 // according to the mode.
1146 string ss;
1147 RE re_test3("(.)");
1148 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1149 CHECK_EQ(ss, string("\xe6"));
1150 RE re_test4("(.)", pcrecpp::UTF8());
1151 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1152 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1153
1154 // Check that string matches itself in either mode
1155 RE re_test5(utf8_string);
1156 CHECK(re_test5.FullMatch(utf8_string));
1157 RE re_test6(utf8_string, pcrecpp::UTF8());
1158 CHECK(re_test6.FullMatch(utf8_string));
1159
1160 // Check that pattern matches string only in UTF8 mode
1161 RE re_test7(utf8_pattern);
1162 CHECK(!re_test7.FullMatch(utf8_string));
1163 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1164 CHECK(re_test8.FullMatch(utf8_string));
1165 }
1166
1167 // Check that ungreedy, UTF8 regular expressions don't match when they
1168 // oughtn't -- see bug 82246.
1169 {
1170 // This code always worked.
1171 const char* pattern = "\\w+X";
1172 const string target = "a aX";
1173 RE match_sentence(pattern);
1174 RE match_sentence_re(pattern, pcrecpp::UTF8());
1175
1176 CHECK(!match_sentence.FullMatch(target));
1177 CHECK(!match_sentence_re.FullMatch(target));
1178 }
1179
1180 {
1181 const char* pattern = "(?U)\\w+X";
1182 const string target = "a aX";
1183 RE match_sentence(pattern);
1184 RE match_sentence_re(pattern, pcrecpp::UTF8());
1185
1186 CHECK(!match_sentence.FullMatch(target));
1187 CHECK(!match_sentence_re.FullMatch(target));
1188 }
1189 #endif /* def SUPPORT_UTF8 */
1190
1191 printf("Testing error reporting\n");
1192
1193 { RE re("a\\1"); CHECK(!re.error().empty()); }
1194 {
1195 RE re("a[x");
1196 CHECK(!re.error().empty());
1197 }
1198 {
1199 RE re("a[z-a]");
1200 CHECK(!re.error().empty());
1201 }
1202 {
1203 RE re("a[[:foobar:]]");
1204 CHECK(!re.error().empty());
1205 }
1206 {
1207 RE re("a(b");
1208 CHECK(!re.error().empty());
1209 }
1210 {
1211 RE re("a\\");
1212 CHECK(!re.error().empty());
1213 }
1214
1215 // Test that recursion is stopped
1216 TestRecursion();
1217
1218 // Test Options
1219 if (getenv("VERBOSE_TEST") != NULL)
1220 VERBOSE_TEST = true;
1221 TestOptions();
1222
1223 // Test the constructors
1224 TestConstructors();
1225
1226 // Done
1227 printf("OK\n");
1228
1229 return 0;
1230 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12