/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 248 - (show annotations) (download)
Mon Sep 17 10:09:22 2007 UTC (6 years, 10 months ago) by ph10
File size: 37742 byte(s)
Change condition for the macros for snprintf and strtoll from _WIN32 to 
HAVE_WINDOWS_H because they are needed only when windows.h is present.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #ifdef HAVE_WINDOWS_H
41 #define snprintf _snprintf
42 #endif
43
44 #include <stdio.h>
45 #include <cassert>
46 #include <vector>
47 #include "pcrecpp.h"
48
49 using pcrecpp::StringPiece;
50 using pcrecpp::RE;
51 using pcrecpp::RE_Options;
52 using pcrecpp::Hex;
53 using pcrecpp::Octal;
54 using pcrecpp::CRadix;
55
56 static bool VERBOSE_TEST = false;
57
58 // CHECK dies with a fatal error if condition is not true. It is *not*
59 // controlled by NDEBUG, so the check will be executed regardless of
60 // compilation mode. Therefore, it is safe to do things like:
61 // CHECK_EQ(fp->Write(x), 4)
62 #define CHECK(condition) do { \
63 if (!(condition)) { \
64 fprintf(stderr, "%s:%d: Check failed: %s\n", \
65 __FILE__, __LINE__, #condition); \
66 exit(1); \
67 } \
68 } while (0)
69
70 #define CHECK_EQ(a, b) CHECK(a == b)
71
72 static void Timing1(int num_iters) {
73 // Same pattern lots of times
74 RE pattern("ruby:\\d+");
75 StringPiece p("ruby:1234");
76 for (int j = num_iters; j > 0; j--) {
77 CHECK(pattern.FullMatch(p));
78 }
79 }
80
81 static void Timing2(int num_iters) {
82 // Same pattern lots of times
83 RE pattern("ruby:(\\d+)");
84 int i;
85 for (int j = num_iters; j > 0; j--) {
86 CHECK(pattern.FullMatch("ruby:1234", &i));
87 CHECK_EQ(i, 1234);
88 }
89 }
90
91 static void Timing3(int num_iters) {
92 string text_string;
93 for (int j = num_iters; j > 0; j--) {
94 text_string += "this is another line\n";
95 }
96
97 RE line_matcher(".*\n");
98 string line;
99 StringPiece text(text_string);
100 int counter = 0;
101 while (line_matcher.Consume(&text)) {
102 counter++;
103 }
104 printf("Matched %d lines\n", counter);
105 }
106
107 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
108
109 static void LeakTest() {
110 // Check for memory leaks
111 unsigned long long initial_size = 0;
112 for (int i = 0; i < 100000; i++) {
113 if (i == 50000) {
114 initial_size = VirtualProcessSize();
115 printf("Size after 50000: %llu\n", initial_size);
116 }
117 char buf[100];
118 snprintf(buf, sizeof(buf), "pat%09d", i);
119 RE newre(buf);
120 }
121 uint64 final_size = VirtualProcessSize();
122 printf("Size after 100000: %llu\n", final_size);
123 const double growth = double(final_size - initial_size) / final_size;
124 printf("Growth: %0.2f%%", growth * 100);
125 CHECK(growth < 0.02); // Allow < 2% growth
126 }
127
128 #endif
129
130 static void RadixTests() {
131 printf("Testing hex\n");
132
133 #define CHECK_HEX(type, value) \
134 do { \
135 type v; \
136 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
137 CHECK_EQ(v, 0x ## value); \
138 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
139 CHECK_EQ(v, 0x ## value); \
140 } while(0)
141
142 CHECK_HEX(short, 2bad);
143 CHECK_HEX(unsigned short, 2badU);
144 CHECK_HEX(int, dead);
145 CHECK_HEX(unsigned int, deadU);
146 CHECK_HEX(long, 7eadbeefL);
147 CHECK_HEX(unsigned long, deadbeefUL);
148 #ifdef HAVE_LONG_LONG
149 CHECK_HEX(long long, 12345678deadbeefLL);
150 #endif
151 #ifdef HAVE_UNSIGNED_LONG_LONG
152 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
153 #endif
154
155 #undef CHECK_HEX
156
157 printf("Testing octal\n");
158
159 #define CHECK_OCTAL(type, value) \
160 do { \
161 type v; \
162 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
163 CHECK_EQ(v, 0 ## value); \
164 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
165 CHECK_EQ(v, 0 ## value); \
166 } while(0)
167
168 CHECK_OCTAL(short, 77777);
169 CHECK_OCTAL(unsigned short, 177777U);
170 CHECK_OCTAL(int, 17777777777);
171 CHECK_OCTAL(unsigned int, 37777777777U);
172 CHECK_OCTAL(long, 17777777777L);
173 CHECK_OCTAL(unsigned long, 37777777777UL);
174 #ifdef HAVE_LONG_LONG
175 CHECK_OCTAL(long long, 777777777777777777777LL);
176 #endif
177 #ifdef HAVE_UNSIGNED_LONG_LONG
178 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
179 #endif
180
181 #undef CHECK_OCTAL
182
183 printf("Testing decimal\n");
184
185 #define CHECK_DECIMAL(type, value) \
186 do { \
187 type v; \
188 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
189 CHECK_EQ(v, value); \
190 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
191 CHECK_EQ(v, value); \
192 } while(0)
193
194 CHECK_DECIMAL(short, -1);
195 CHECK_DECIMAL(unsigned short, 9999);
196 CHECK_DECIMAL(int, -1000);
197 CHECK_DECIMAL(unsigned int, 12345U);
198 CHECK_DECIMAL(long, -10000000L);
199 CHECK_DECIMAL(unsigned long, 3083324652U);
200 #ifdef HAVE_LONG_LONG
201 CHECK_DECIMAL(long long, -100000000000000LL);
202 #endif
203 #ifdef HAVE_UNSIGNED_LONG_LONG
204 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
205 #endif
206
207 #undef CHECK_DECIMAL
208
209 }
210
211 static void TestReplace() {
212 printf("Testing Replace\n");
213
214 struct ReplaceTest {
215 const char *regexp;
216 const char *rewrite;
217 const char *original;
218 const char *single;
219 const char *global;
220 };
221 static const ReplaceTest tests[] = {
222 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
223 "\\2\\1ay",
224 "the quick brown fox jumps over the lazy dogs.",
225 "ethay quick brown fox jumps over the lazy dogs.",
226 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
227 { "\\w+",
228 "\\0-NOSPAM",
229 "paul.haahr@google.com",
230 "paul-NOSPAM.haahr@google.com",
231 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
232 { "^",
233 "(START)",
234 "foo",
235 "(START)foo",
236 "(START)foo" },
237 { "^",
238 "(START)",
239 "",
240 "(START)",
241 "(START)" },
242 { "$",
243 "(END)",
244 "",
245 "(END)",
246 "(END)" },
247 { "b",
248 "bb",
249 "ababababab",
250 "abbabababab",
251 "abbabbabbabbabb" },
252 { "b",
253 "bb",
254 "bbbbbb",
255 "bbbbbbb",
256 "bbbbbbbbbbbb" },
257 { "b+",
258 "bb",
259 "bbbbbb",
260 "bb",
261 "bb" },
262 { "b*",
263 "bb",
264 "bbbbbb",
265 "bb",
266 "bb" },
267 { "b*",
268 "bb",
269 "aaaaa",
270 "bbaaaaa",
271 "bbabbabbabbabbabb" },
272 { "b*",
273 "bb",
274 "aa\naa\n",
275 "bbaa\naa\n",
276 "bbabbabb\nbbabbabb\nbb" },
277 { "b*",
278 "bb",
279 "aa\raa\r",
280 "bbaa\raa\r",
281 "bbabbabb\rbbabbabb\rbb" },
282 { "b*",
283 "bb",
284 "aa\r\naa\r\n",
285 "bbaa\r\naa\r\n",
286 "bbabbabb\r\nbbabbabb\r\nbb" },
287 #ifdef SUPPORT_UTF8
288 { "b*",
289 "bb",
290 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
291 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
292 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
293 { "b*",
294 "bb",
295 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
296 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
297 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
298 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
299 #endif
300 { "", NULL, NULL, NULL, NULL }
301 };
302
303 #ifdef SUPPORT_UTF8
304 const bool support_utf8 = true;
305 #else
306 const bool support_utf8 = false;
307 #endif
308
309 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
310 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
311 assert(re.error().empty());
312 string one(t->original);
313 CHECK(re.Replace(t->rewrite, &one));
314 CHECK_EQ(one, t->single);
315 string all(t->original);
316 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
317 CHECK_EQ(all, t->global);
318 }
319
320 // One final test: test \r\n replacement when we're not in CRLF mode
321 {
322 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string all("aa\r\naa\r\n");
325 CHECK(re.GlobalReplace("bb", &all) > 0);
326 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327 }
328 {
329 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
330 assert(re.error().empty());
331 string all("aa\r\naa\r\n");
332 CHECK(re.GlobalReplace("bb", &all) > 0);
333 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
334 }
335 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
336 // Alas, the answer depends on how pcre was compiled.
337 }
338
339 static void TestExtract() {
340 printf("Testing Extract\n");
341
342 string s;
343
344 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
345 CHECK_EQ(s, "kremvax!boris");
346
347 // check the RE interface as well
348 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
349 CHECK_EQ(s, "'foo'");
350 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
351 CHECK_EQ(s, "'foo'");
352 }
353
354 static void TestConsume() {
355 printf("Testing Consume\n");
356
357 string word;
358
359 string s(" aaa b!@#$@#$cccc");
360 StringPiece input(s);
361
362 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
363 CHECK(r.Consume(&input, &word));
364 CHECK_EQ(word, "aaa");
365 CHECK(r.Consume(&input, &word));
366 CHECK_EQ(word, "b");
367 CHECK(! r.Consume(&input, &word));
368 }
369
370 static void TestFindAndConsume() {
371 printf("Testing FindAndConsume\n");
372
373 string word;
374
375 string s(" aaa b!@#$@#$cccc");
376 StringPiece input(s);
377
378 RE r("(\\w+)"); // matches a word
379 CHECK(r.FindAndConsume(&input, &word));
380 CHECK_EQ(word, "aaa");
381 CHECK(r.FindAndConsume(&input, &word));
382 CHECK_EQ(word, "b");
383 CHECK(r.FindAndConsume(&input, &word));
384 CHECK_EQ(word, "cccc");
385 CHECK(! r.FindAndConsume(&input, &word));
386 }
387
388 static void TestMatchNumberPeculiarity() {
389 printf("Testing match-number peculiaraity\n");
390
391 string word1;
392 string word2;
393 string word3;
394
395 RE r("(foo)|(bar)|(baz)");
396 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
397 CHECK_EQ(word1, "foo");
398 CHECK_EQ(word2, "");
399 CHECK_EQ(word3, "");
400 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
401 CHECK_EQ(word1, "");
402 CHECK_EQ(word2, "bar");
403 CHECK_EQ(word3, "");
404 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
405 CHECK_EQ(word1, "");
406 CHECK_EQ(word2, "");
407 CHECK_EQ(word3, "baz");
408 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
409
410 string a;
411 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
412 CHECK_EQ(a, "");
413 }
414
415 static void TestRecursion() {
416 printf("Testing recursion\n");
417
418 // Get one string that passes (sometimes), one that never does.
419 string text_good("abcdefghijk");
420 string text_bad("acdefghijkl");
421
422 // According to pcretest, matching text_good against (\w+)*b
423 // requires match_limit of at least 8192, and match_recursion_limit
424 // of at least 37.
425
426 RE_Options options_ml;
427 options_ml.set_match_limit(8192);
428 RE re("(\\w+)*b", options_ml);
429 CHECK(re.PartialMatch(text_good) == true);
430 CHECK(re.PartialMatch(text_bad) == false);
431 CHECK(re.FullMatch(text_good) == false);
432 CHECK(re.FullMatch(text_bad) == false);
433
434 options_ml.set_match_limit(1024);
435 RE re2("(\\w+)*b", options_ml);
436 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
437 CHECK(re2.PartialMatch(text_bad) == false);
438 CHECK(re2.FullMatch(text_good) == false);
439 CHECK(re2.FullMatch(text_bad) == false);
440
441 RE_Options options_mlr;
442 options_mlr.set_match_limit_recursion(50);
443 RE re3("(\\w+)*b", options_mlr);
444 CHECK(re3.PartialMatch(text_good) == true);
445 CHECK(re3.PartialMatch(text_bad) == false);
446 CHECK(re3.FullMatch(text_good) == false);
447 CHECK(re3.FullMatch(text_bad) == false);
448
449 options_mlr.set_match_limit_recursion(10);
450 RE re4("(\\w+)*b", options_mlr);
451 CHECK(re4.PartialMatch(text_good) == false);
452 CHECK(re4.PartialMatch(text_bad) == false);
453 CHECK(re4.FullMatch(text_good) == false);
454 CHECK(re4.FullMatch(text_bad) == false);
455 }
456
457 // A meta-quoted string, interpreted as a pattern, should always match
458 // the original unquoted string.
459 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
460 string quoted = RE::QuoteMeta(unquoted);
461 RE re(quoted, options);
462 CHECK(re.FullMatch(unquoted));
463 }
464
465 // A string containing meaningful regexp characters, which is then meta-
466 // quoted, should not generally match a string the unquoted string does.
467 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
468 RE_Options options = RE_Options()) {
469 string quoted = RE::QuoteMeta(unquoted);
470 RE re(quoted, options);
471 CHECK(!re.FullMatch(should_not_match));
472 }
473
474 // Tests that quoted meta characters match their original strings,
475 // and that a few things that shouldn't match indeed do not.
476 static void TestQuotaMetaSimple() {
477 TestQuoteMeta("foo");
478 TestQuoteMeta("foo.bar");
479 TestQuoteMeta("foo\\.bar");
480 TestQuoteMeta("[1-9]");
481 TestQuoteMeta("1.5-2.0?");
482 TestQuoteMeta("\\d");
483 TestQuoteMeta("Who doesn't like ice cream?");
484 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
485 TestQuoteMeta("((?!)xxx).*yyy");
486 TestQuoteMeta("([");
487 }
488
489 static void TestQuoteMetaSimpleNegative() {
490 NegativeTestQuoteMeta("foo", "bar");
491 NegativeTestQuoteMeta("...", "bar");
492 NegativeTestQuoteMeta("\\.", ".");
493 NegativeTestQuoteMeta("\\.", "..");
494 NegativeTestQuoteMeta("(a)", "a");
495 NegativeTestQuoteMeta("(a|b)", "a");
496 NegativeTestQuoteMeta("(a|b)", "(a)");
497 NegativeTestQuoteMeta("(a|b)", "a|b");
498 NegativeTestQuoteMeta("[0-9]", "0");
499 NegativeTestQuoteMeta("[0-9]", "0-9");
500 NegativeTestQuoteMeta("[0-9]", "[9]");
501 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
502 }
503
504 static void TestQuoteMetaLatin1() {
505 TestQuoteMeta("3\xb2 = 9");
506 }
507
508 static void TestQuoteMetaUtf8() {
509 #ifdef SUPPORT_UTF8
510 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
511 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
512 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
513 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
514 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
515 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
516 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
517 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
518 "27\\\xc2\\\xb0",
519 pcrecpp::UTF8());
520 #endif
521 }
522
523 static void TestQuoteMetaAll() {
524 printf("Testing QuoteMeta\n");
525 TestQuotaMetaSimple();
526 TestQuoteMetaSimpleNegative();
527 TestQuoteMetaLatin1();
528 TestQuoteMetaUtf8();
529 }
530
531 //
532 // Options tests contributed by
533 // Giuseppe Maxia, CTO, Stardata s.r.l.
534 // July 2005
535 //
536 static void GetOneOptionResult(
537 const char *option_name,
538 const char *regex,
539 const char *str,
540 RE_Options options,
541 bool full,
542 string expected) {
543
544 printf("Testing Option <%s>\n", option_name);
545 if(VERBOSE_TEST)
546 printf("/%s/ finds \"%s\" within \"%s\" \n",
547 regex,
548 expected.c_str(),
549 str);
550 string captured("");
551 if (full)
552 RE(regex,options).FullMatch(str, &captured);
553 else
554 RE(regex,options).PartialMatch(str, &captured);
555 CHECK_EQ(captured, expected);
556 }
557
558 static void TestOneOption(
559 const char *option_name,
560 const char *regex,
561 const char *str,
562 RE_Options options,
563 bool full,
564 bool assertive = true) {
565
566 printf("Testing Option <%s>\n", option_name);
567 if (VERBOSE_TEST)
568 printf("'%s' %s /%s/ \n",
569 str,
570 (assertive? "matches" : "doesn't match"),
571 regex);
572 if (assertive) {
573 if (full)
574 CHECK(RE(regex,options).FullMatch(str));
575 else
576 CHECK(RE(regex,options).PartialMatch(str));
577 } else {
578 if (full)
579 CHECK(!RE(regex,options).FullMatch(str));
580 else
581 CHECK(!RE(regex,options).PartialMatch(str));
582 }
583 }
584
585 static void Test_CASELESS() {
586 RE_Options options;
587 RE_Options options2;
588
589 options.set_caseless(true);
590 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
591 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
592 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
593
594 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
595 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
596 options.set_caseless(false);
597 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
598 }
599
600 static void Test_MULTILINE() {
601 RE_Options options;
602 RE_Options options2;
603 const char *str = "HELLO\n" "cruel\n" "world\n";
604
605 options.set_multiline(true);
606 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
607 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
608 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
609 options.set_multiline(false);
610 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
611 }
612
613 static void Test_DOTALL() {
614 RE_Options options;
615 RE_Options options2;
616 const char *str = "HELLO\n" "cruel\n" "world";
617
618 options.set_dotall(true);
619 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
620 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
621 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
622 options.set_dotall(false);
623 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
624 }
625
626 static void Test_DOLLAR_ENDONLY() {
627 RE_Options options;
628 RE_Options options2;
629 const char *str = "HELLO world\n";
630
631 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
632 options.set_dollar_endonly(true);
633 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
634 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
635 }
636
637 static void Test_EXTRA() {
638 RE_Options options;
639 const char *str = "HELLO";
640
641 options.set_extra(true);
642 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
643 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
644 options.set_extra(false);
645 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
646 }
647
648 static void Test_EXTENDED() {
649 RE_Options options;
650 RE_Options options2;
651 const char *str = "HELLO world";
652
653 options.set_extended(true);
654 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
655 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
656 TestOneOption("EXTENDED (class)",
657 "^ HE L{2} O "
658 "\\s+ "
659 "\\w+ $ ",
660 str,
661 options,
662 false);
663
664 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
665 TestOneOption("EXTENDED (function)",
666 "^ HE L{2} O "
667 "\\s+ "
668 "\\w+ $ ",
669 str,
670 pcrecpp::EXTENDED(),
671 false);
672
673 options.set_extended(false);
674 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
675 }
676
677 static void Test_NO_AUTO_CAPTURE() {
678 RE_Options options;
679 const char *str = "HELLO world";
680 string captured;
681
682 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
683 if (VERBOSE_TEST)
684 printf("parentheses capture text\n");
685 RE re("(world|universe)$", options);
686 CHECK(re.Extract("\\1", str , &captured));
687 CHECK_EQ(captured, "world");
688 options.set_no_auto_capture(true);
689 printf("testing Option <NO_AUTO_CAPTURE>\n");
690 if (VERBOSE_TEST)
691 printf("parentheses do not capture text\n");
692 re.Extract("\\1",str, &captured );
693 CHECK_EQ(captured, "world");
694 }
695
696 static void Test_UNGREEDY() {
697 RE_Options options;
698 const char *str = "HELLO, 'this' is the 'world'";
699
700 options.set_ungreedy(true);
701 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
702 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
703 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
704
705 options.set_ungreedy(false);
706 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
707 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
708 }
709
710 static void Test_all_options() {
711 const char *str = "HELLO\n" "cruel\n" "world";
712 RE_Options options;
713 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
714
715 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
716 options.set_all_options(0);
717 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
718 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
719
720 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
721 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
722 " ^ c r u e l $ ",
723 str,
724 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
725 false);
726
727 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
728 " ^ c r u e l $ ",
729 str,
730 RE_Options()
731 .set_multiline(true)
732 .set_extended(true),
733 false);
734
735 options.set_all_options(0);
736 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
737
738 }
739
740 static void TestOptions() {
741 printf("Testing Options\n");
742 Test_CASELESS();
743 Test_MULTILINE();
744 Test_DOTALL();
745 Test_DOLLAR_ENDONLY();
746 Test_EXTENDED();
747 Test_NO_AUTO_CAPTURE();
748 Test_UNGREEDY();
749 Test_EXTRA();
750 Test_all_options();
751 }
752
753 static void TestConstructors() {
754 printf("Testing constructors\n");
755
756 RE_Options options;
757 options.set_dotall(true);
758 const char *str = "HELLO\n" "cruel\n" "world";
759
760 RE orig("HELLO.*world", options);
761 CHECK(orig.FullMatch(str));
762
763 RE copy1(orig);
764 CHECK(copy1.FullMatch(str));
765
766 RE copy2("not a match");
767 CHECK(!copy2.FullMatch(str));
768 copy2 = copy1;
769 CHECK(copy2.FullMatch(str));
770 copy2 = orig;
771 CHECK(copy2.FullMatch(str));
772
773 // Make sure when we assign to ourselves, nothing bad happens
774 orig = orig;
775 copy1 = copy1;
776 copy2 = copy2;
777 CHECK(orig.FullMatch(str));
778 CHECK(copy1.FullMatch(str));
779 CHECK(copy2.FullMatch(str));
780 }
781
782 int main(int argc, char** argv) {
783 // Treat any flag as --help
784 if (argc > 1 && argv[1][0] == '-') {
785 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
786 " If 'timingX ###' is specified, run the given timing test\n"
787 " with the given number of iterations, rather than running\n"
788 " the default corectness test.\n", argv[0]);
789 return 0;
790 }
791
792 if (argc > 1) {
793 if ( argc == 2 || atoi(argv[2]) == 0) {
794 printf("timing mode needs a num-iters argument\n");
795 return 1;
796 }
797 if (!strcmp(argv[1], "timing1"))
798 Timing1(atoi(argv[2]));
799 else if (!strcmp(argv[1], "timing2"))
800 Timing2(atoi(argv[2]));
801 else if (!strcmp(argv[1], "timing3"))
802 Timing3(atoi(argv[2]));
803 else
804 printf("Unknown argument '%s'\n", argv[1]);
805 return 0;
806 }
807
808 printf("Testing FullMatch\n");
809
810 int i;
811 string s;
812
813 /***** FullMatch with no args *****/
814
815 CHECK(RE("h.*o").FullMatch("hello"));
816 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
817 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
818 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
819 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
820 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
821
822 /***** FullMatch with args *****/
823
824 // Zero-arg
825 CHECK(RE("\\d+").FullMatch("1001"));
826
827 // Single-arg
828 CHECK(RE("(\\d+)").FullMatch("1001", &i));
829 CHECK_EQ(i, 1001);
830 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
831 CHECK_EQ(i, -123);
832 CHECK(!RE("()\\d+").FullMatch("10", &i));
833 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
834 &i));
835
836 // Digits surrounding integer-arg
837 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
838 CHECK_EQ(i, 23);
839 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
840 CHECK_EQ(i, 1);
841 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
842 CHECK_EQ(i, -1);
843 CHECK(RE("(\\d)").PartialMatch("1234", &i));
844 CHECK_EQ(i, 1);
845 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
846 CHECK_EQ(i, -1);
847
848 // String-arg
849 CHECK(RE("h(.*)o").FullMatch("hello", &s));
850 CHECK_EQ(s, string("ell"));
851
852 // StringPiece-arg
853 StringPiece sp;
854 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
855 CHECK_EQ(sp.size(), 4);
856 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
857 CHECK_EQ(i, 1234);
858
859 // Multi-arg
860 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
861 CHECK_EQ(s, string("ruby"));
862 CHECK_EQ(i, 1234);
863
864 // Ignored arg
865 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
866 CHECK_EQ(s, string("ruby"));
867 CHECK_EQ(i, 1234);
868
869 // Type tests
870 {
871 char c;
872 CHECK(RE("(H)ello").FullMatch("Hello", &c));
873 CHECK_EQ(c, 'H');
874 }
875 {
876 unsigned char c;
877 CHECK(RE("(H)ello").FullMatch("Hello", &c));
878 CHECK_EQ(c, static_cast<unsigned char>('H'));
879 }
880 {
881 short v;
882 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
883 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
884 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
885 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
886 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
887 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
888 }
889 {
890 unsigned short v;
891 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
892 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
893 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
894 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
895 }
896 {
897 int v;
898 static const int max_value = 0x7fffffff;
899 static const int min_value = -max_value - 1;
900 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
901 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
902 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
903 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
904 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
905 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
906 }
907 {
908 unsigned int v;
909 static const unsigned int max_value = 0xfffffffful;
910 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
911 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
912 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
913 }
914 #ifdef HAVE_LONG_LONG
915 # if defined(__MINGW__) || defined(__MINGW32__)
916 # define LLD "%I64d"
917 # define LLU "%I64u"
918 # else
919 # define LLD "%lld"
920 # define LLU "%llu"
921 # endif
922 {
923 long long v;
924 static const long long max_value = 0x7fffffffffffffffLL;
925 static const long long min_value = -max_value - 1;
926 char buf[32];
927
928 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
929 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
930
931 snprintf(buf, sizeof(buf), LLD, max_value);
932 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
933
934 snprintf(buf, sizeof(buf), LLD, min_value);
935 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
936
937 snprintf(buf, sizeof(buf), LLD, max_value);
938 assert(buf[strlen(buf)-1] != '9');
939 buf[strlen(buf)-1]++;
940 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
941
942 snprintf(buf, sizeof(buf), LLD, min_value);
943 assert(buf[strlen(buf)-1] != '9');
944 buf[strlen(buf)-1]++;
945 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
946 }
947 #endif
948 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
949 {
950 unsigned long long v;
951 long long v2;
952 static const unsigned long long max_value = 0xffffffffffffffffULL;
953 char buf[32];
954
955 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
956 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
957
958 snprintf(buf, sizeof(buf), LLU, max_value);
959 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
960
961 assert(buf[strlen(buf)-1] != '9');
962 buf[strlen(buf)-1]++;
963 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
964 }
965 #endif
966 {
967 float v;
968 CHECK(RE("(.*)").FullMatch("100", &v));
969 CHECK(RE("(.*)").FullMatch("-100.", &v));
970 CHECK(RE("(.*)").FullMatch("1e23", &v));
971 }
972 {
973 double v;
974 CHECK(RE("(.*)").FullMatch("100", &v));
975 CHECK(RE("(.*)").FullMatch("-100.", &v));
976 CHECK(RE("(.*)").FullMatch("1e23", &v));
977 }
978
979 // Check that matching is fully anchored
980 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
981 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
982 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
983 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
984
985 // Braces
986 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
987 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
988 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
989
990 // Complicated RE
991 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
992 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
993 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
994 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
995
996 // Check full-match handling (needs '$' tacked on internally)
997 CHECK(RE("fo|foo").FullMatch("fo"));
998 CHECK(RE("fo|foo").FullMatch("foo"));
999 CHECK(RE("fo|foo$").FullMatch("fo"));
1000 CHECK(RE("fo|foo$").FullMatch("foo"));
1001 CHECK(RE("foo$").FullMatch("foo"));
1002 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1003 CHECK(!RE("fo|bar").FullMatch("fox"));
1004
1005 // Uncomment the following if we change the handling of '$' to
1006 // prevent it from matching a trailing newline
1007 if (false) {
1008 // Check that we don't get bitten by pcre's special handling of a
1009 // '\n' at the end of the string matching '$'
1010 CHECK(!RE("foo$").PartialMatch("foo\n"));
1011 }
1012
1013 // Number of args
1014 int a[16];
1015 CHECK(RE("").FullMatch(""));
1016
1017 memset(a, 0, sizeof(0));
1018 CHECK(RE("(\\d){1}").FullMatch("1",
1019 &a[0]));
1020 CHECK_EQ(a[0], 1);
1021
1022 memset(a, 0, sizeof(0));
1023 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1024 &a[0], &a[1]));
1025 CHECK_EQ(a[0], 1);
1026 CHECK_EQ(a[1], 2);
1027
1028 memset(a, 0, sizeof(0));
1029 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1030 &a[0], &a[1], &a[2]));
1031 CHECK_EQ(a[0], 1);
1032 CHECK_EQ(a[1], 2);
1033 CHECK_EQ(a[2], 3);
1034
1035 memset(a, 0, sizeof(0));
1036 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1037 &a[0], &a[1], &a[2], &a[3]));
1038 CHECK_EQ(a[0], 1);
1039 CHECK_EQ(a[1], 2);
1040 CHECK_EQ(a[2], 3);
1041 CHECK_EQ(a[3], 4);
1042
1043 memset(a, 0, sizeof(0));
1044 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1045 &a[0], &a[1], &a[2],
1046 &a[3], &a[4]));
1047 CHECK_EQ(a[0], 1);
1048 CHECK_EQ(a[1], 2);
1049 CHECK_EQ(a[2], 3);
1050 CHECK_EQ(a[3], 4);
1051 CHECK_EQ(a[4], 5);
1052
1053 memset(a, 0, sizeof(0));
1054 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1055 &a[0], &a[1], &a[2],
1056 &a[3], &a[4], &a[5]));
1057 CHECK_EQ(a[0], 1);
1058 CHECK_EQ(a[1], 2);
1059 CHECK_EQ(a[2], 3);
1060 CHECK_EQ(a[3], 4);
1061 CHECK_EQ(a[4], 5);
1062 CHECK_EQ(a[5], 6);
1063
1064 memset(a, 0, sizeof(0));
1065 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1066 &a[0], &a[1], &a[2], &a[3],
1067 &a[4], &a[5], &a[6]));
1068 CHECK_EQ(a[0], 1);
1069 CHECK_EQ(a[1], 2);
1070 CHECK_EQ(a[2], 3);
1071 CHECK_EQ(a[3], 4);
1072 CHECK_EQ(a[4], 5);
1073 CHECK_EQ(a[5], 6);
1074 CHECK_EQ(a[6], 7);
1075
1076 memset(a, 0, sizeof(0));
1077 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1078 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1079 "1234567890123456",
1080 &a[0], &a[1], &a[2], &a[3],
1081 &a[4], &a[5], &a[6], &a[7],
1082 &a[8], &a[9], &a[10], &a[11],
1083 &a[12], &a[13], &a[14], &a[15]));
1084 CHECK_EQ(a[0], 1);
1085 CHECK_EQ(a[1], 2);
1086 CHECK_EQ(a[2], 3);
1087 CHECK_EQ(a[3], 4);
1088 CHECK_EQ(a[4], 5);
1089 CHECK_EQ(a[5], 6);
1090 CHECK_EQ(a[6], 7);
1091 CHECK_EQ(a[7], 8);
1092 CHECK_EQ(a[8], 9);
1093 CHECK_EQ(a[9], 0);
1094 CHECK_EQ(a[10], 1);
1095 CHECK_EQ(a[11], 2);
1096 CHECK_EQ(a[12], 3);
1097 CHECK_EQ(a[13], 4);
1098 CHECK_EQ(a[14], 5);
1099 CHECK_EQ(a[15], 6);
1100
1101 /***** PartialMatch *****/
1102
1103 printf("Testing PartialMatch\n");
1104
1105 CHECK(RE("h.*o").PartialMatch("hello"));
1106 CHECK(RE("h.*o").PartialMatch("othello"));
1107 CHECK(RE("h.*o").PartialMatch("hello!"));
1108 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1109
1110 /***** other tests *****/
1111
1112 RadixTests();
1113 TestReplace();
1114 TestExtract();
1115 TestConsume();
1116 TestFindAndConsume();
1117 TestQuoteMetaAll();
1118 TestMatchNumberPeculiarity();
1119
1120 // Check the pattern() accessor
1121 {
1122 const string kPattern = "http://([^/]+)/.*";
1123 const RE re(kPattern);
1124 CHECK_EQ(kPattern, re.pattern());
1125 }
1126
1127 // Check RE error field.
1128 {
1129 RE re("foo");
1130 CHECK(re.error().empty()); // Must have no error
1131 }
1132
1133 #ifdef SUPPORT_UTF8
1134 // Check UTF-8 handling
1135 {
1136 printf("Testing UTF-8 handling\n");
1137
1138 // Three Japanese characters (nihongo)
1139 const char utf8_string[] = {
1140 0xe6, 0x97, 0xa5, // 65e5
1141 0xe6, 0x9c, 0xac, // 627c
1142 0xe8, 0xaa, 0x9e, // 8a9e
1143 0
1144 };
1145 const char utf8_pattern[] = {
1146 '.',
1147 0xe6, 0x9c, 0xac, // 627c
1148 '.',
1149 0
1150 };
1151
1152 // Both should match in either mode, bytes or UTF-8
1153 RE re_test1(".........");
1154 CHECK(re_test1.FullMatch(utf8_string));
1155 RE re_test2("...", pcrecpp::UTF8());
1156 CHECK(re_test2.FullMatch(utf8_string));
1157
1158 // Check that '.' matches one byte or UTF-8 character
1159 // according to the mode.
1160 string ss;
1161 RE re_test3("(.)");
1162 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1163 CHECK_EQ(ss, string("\xe6"));
1164 RE re_test4("(.)", pcrecpp::UTF8());
1165 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1166 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1167
1168 // Check that string matches itself in either mode
1169 RE re_test5(utf8_string);
1170 CHECK(re_test5.FullMatch(utf8_string));
1171 RE re_test6(utf8_string, pcrecpp::UTF8());
1172 CHECK(re_test6.FullMatch(utf8_string));
1173
1174 // Check that pattern matches string only in UTF8 mode
1175 RE re_test7(utf8_pattern);
1176 CHECK(!re_test7.FullMatch(utf8_string));
1177 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1178 CHECK(re_test8.FullMatch(utf8_string));
1179 }
1180
1181 // Check that ungreedy, UTF8 regular expressions don't match when they
1182 // oughtn't -- see bug 82246.
1183 {
1184 // This code always worked.
1185 const char* pattern = "\\w+X";
1186 const string target = "a aX";
1187 RE match_sentence(pattern);
1188 RE match_sentence_re(pattern, pcrecpp::UTF8());
1189
1190 CHECK(!match_sentence.FullMatch(target));
1191 CHECK(!match_sentence_re.FullMatch(target));
1192 }
1193
1194 {
1195 const char* pattern = "(?U)\\w+X";
1196 const string target = "a aX";
1197 RE match_sentence(pattern);
1198 RE match_sentence_re(pattern, pcrecpp::UTF8());
1199
1200 CHECK(!match_sentence.FullMatch(target));
1201 CHECK(!match_sentence_re.FullMatch(target));
1202 }
1203 #endif /* def SUPPORT_UTF8 */
1204
1205 printf("Testing error reporting\n");
1206
1207 { RE re("a\\1"); CHECK(!re.error().empty()); }
1208 {
1209 RE re("a[x");
1210 CHECK(!re.error().empty());
1211 }
1212 {
1213 RE re("a[z-a]");
1214 CHECK(!re.error().empty());
1215 }
1216 {
1217 RE re("a[[:foobar:]]");
1218 CHECK(!re.error().empty());
1219 }
1220 {
1221 RE re("a(b");
1222 CHECK(!re.error().empty());
1223 }
1224 {
1225 RE re("a\\");
1226 CHECK(!re.error().empty());
1227 }
1228
1229 // Test that recursion is stopped
1230 TestRecursion();
1231
1232 // Test Options
1233 if (getenv("VERBOSE_TEST") != NULL)
1234 VERBOSE_TEST = true;
1235 TestOptions();
1236
1237 // Test the constructors
1238 TestConstructors();
1239
1240 // Done
1241 printf("OK\n");
1242
1243 return 0;
1244 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12