/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 193 - (show annotations) (download)
Mon Jul 30 11:34:18 2007 UTC (7 years, 4 months ago) by ph10
File size: 37614 byte(s)
Arrange to use "%I64d" instead of "%lld" for long printing in the pcrecpp 
unittest when running under MinGW.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #include <stdio.h>
37 #include <cassert>
38 #include <vector>
39 #include "config.h"
40 #include "pcrecpp.h"
41
42 using pcrecpp::StringPiece;
43 using pcrecpp::RE;
44 using pcrecpp::RE_Options;
45 using pcrecpp::Hex;
46 using pcrecpp::Octal;
47 using pcrecpp::CRadix;
48
49 static bool VERBOSE_TEST = false;
50
51 // CHECK dies with a fatal error if condition is not true. It is *not*
52 // controlled by NDEBUG, so the check will be executed regardless of
53 // compilation mode. Therefore, it is safe to do things like:
54 // CHECK_EQ(fp->Write(x), 4)
55 #define CHECK(condition) do { \
56 if (!(condition)) { \
57 fprintf(stderr, "%s:%d: Check failed: %s\n", \
58 __FILE__, __LINE__, #condition); \
59 exit(1); \
60 } \
61 } while (0)
62
63 #define CHECK_EQ(a, b) CHECK(a == b)
64
65 static void Timing1(int num_iters) {
66 // Same pattern lots of times
67 RE pattern("ruby:\\d+");
68 StringPiece p("ruby:1234");
69 for (int j = num_iters; j > 0; j--) {
70 CHECK(pattern.FullMatch(p));
71 }
72 }
73
74 static void Timing2(int num_iters) {
75 // Same pattern lots of times
76 RE pattern("ruby:(\\d+)");
77 int i;
78 for (int j = num_iters; j > 0; j--) {
79 CHECK(pattern.FullMatch("ruby:1234", &i));
80 CHECK_EQ(i, 1234);
81 }
82 }
83
84 static void Timing3(int num_iters) {
85 string text_string;
86 for (int j = num_iters; j > 0; j--) {
87 text_string += "this is another line\n";
88 }
89
90 RE line_matcher(".*\n");
91 string line;
92 StringPiece text(text_string);
93 int counter = 0;
94 while (line_matcher.Consume(&text)) {
95 counter++;
96 }
97 printf("Matched %d lines\n", counter);
98 }
99
100 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101
102 static void LeakTest() {
103 // Check for memory leaks
104 unsigned long long initial_size = 0;
105 for (int i = 0; i < 100000; i++) {
106 if (i == 50000) {
107 initial_size = VirtualProcessSize();
108 printf("Size after 50000: %llu\n", initial_size);
109 }
110 char buf[100];
111 snprintf(buf, sizeof(buf), "pat%09d", i);
112 RE newre(buf);
113 }
114 uint64 final_size = VirtualProcessSize();
115 printf("Size after 100000: %llu\n", final_size);
116 const double growth = double(final_size - initial_size) / final_size;
117 printf("Growth: %0.2f%%", growth * 100);
118 CHECK(growth < 0.02); // Allow < 2% growth
119 }
120
121 #endif
122
123 static void RadixTests() {
124 printf("Testing hex\n");
125
126 #define CHECK_HEX(type, value) \
127 do { \
128 type v; \
129 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130 CHECK_EQ(v, 0x ## value); \
131 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132 CHECK_EQ(v, 0x ## value); \
133 } while(0)
134
135 CHECK_HEX(short, 2bad);
136 CHECK_HEX(unsigned short, 2badU);
137 CHECK_HEX(int, dead);
138 CHECK_HEX(unsigned int, deadU);
139 CHECK_HEX(long, 7eadbeefL);
140 CHECK_HEX(unsigned long, deadbeefUL);
141 #ifdef HAVE_LONG_LONG
142 CHECK_HEX(long long, 12345678deadbeefLL);
143 #endif
144 #ifdef HAVE_UNSIGNED_LONG_LONG
145 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146 #endif
147
148 #undef CHECK_HEX
149
150 printf("Testing octal\n");
151
152 #define CHECK_OCTAL(type, value) \
153 do { \
154 type v; \
155 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156 CHECK_EQ(v, 0 ## value); \
157 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158 CHECK_EQ(v, 0 ## value); \
159 } while(0)
160
161 CHECK_OCTAL(short, 77777);
162 CHECK_OCTAL(unsigned short, 177777U);
163 CHECK_OCTAL(int, 17777777777);
164 CHECK_OCTAL(unsigned int, 37777777777U);
165 CHECK_OCTAL(long, 17777777777L);
166 CHECK_OCTAL(unsigned long, 37777777777UL);
167 #ifdef HAVE_LONG_LONG
168 CHECK_OCTAL(long long, 777777777777777777777LL);
169 #endif
170 #ifdef HAVE_UNSIGNED_LONG_LONG
171 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172 #endif
173
174 #undef CHECK_OCTAL
175
176 printf("Testing decimal\n");
177
178 #define CHECK_DECIMAL(type, value) \
179 do { \
180 type v; \
181 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182 CHECK_EQ(v, value); \
183 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184 CHECK_EQ(v, value); \
185 } while(0)
186
187 CHECK_DECIMAL(short, -1);
188 CHECK_DECIMAL(unsigned short, 9999);
189 CHECK_DECIMAL(int, -1000);
190 CHECK_DECIMAL(unsigned int, 12345U);
191 CHECK_DECIMAL(long, -10000000L);
192 CHECK_DECIMAL(unsigned long, 3083324652U);
193 #ifdef HAVE_LONG_LONG
194 CHECK_DECIMAL(long long, -100000000000000LL);
195 #endif
196 #ifdef HAVE_UNSIGNED_LONG_LONG
197 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198 #endif
199
200 #undef CHECK_DECIMAL
201
202 }
203
204 static void TestReplace() {
205 printf("Testing Replace\n");
206
207 struct ReplaceTest {
208 const char *regexp;
209 const char *rewrite;
210 const char *original;
211 const char *single;
212 const char *global;
213 };
214 static const ReplaceTest tests[] = {
215 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216 "\\2\\1ay",
217 "the quick brown fox jumps over the lazy dogs.",
218 "ethay quick brown fox jumps over the lazy dogs.",
219 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220 { "\\w+",
221 "\\0-NOSPAM",
222 "paul.haahr@google.com",
223 "paul-NOSPAM.haahr@google.com",
224 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225 { "^",
226 "(START)",
227 "foo",
228 "(START)foo",
229 "(START)foo" },
230 { "^",
231 "(START)",
232 "",
233 "(START)",
234 "(START)" },
235 { "$",
236 "(END)",
237 "",
238 "(END)",
239 "(END)" },
240 { "b",
241 "bb",
242 "ababababab",
243 "abbabababab",
244 "abbabbabbabbabb" },
245 { "b",
246 "bb",
247 "bbbbbb",
248 "bbbbbbb",
249 "bbbbbbbbbbbb" },
250 { "b+",
251 "bb",
252 "bbbbbb",
253 "bb",
254 "bb" },
255 { "b*",
256 "bb",
257 "bbbbbb",
258 "bb",
259 "bb" },
260 { "b*",
261 "bb",
262 "aaaaa",
263 "bbaaaaa",
264 "bbabbabbabbabbabb" },
265 { "b*",
266 "bb",
267 "aa\naa\n",
268 "bbaa\naa\n",
269 "bbabbabb\nbbabbabb\nbb" },
270 { "b*",
271 "bb",
272 "aa\raa\r",
273 "bbaa\raa\r",
274 "bbabbabb\rbbabbabb\rbb" },
275 { "b*",
276 "bb",
277 "aa\r\naa\r\n",
278 "bbaa\r\naa\r\n",
279 "bbabbabb\r\nbbabbabb\r\nbb" },
280 #ifdef SUPPORT_UTF8
281 { "b*",
282 "bb",
283 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286 { "b*",
287 "bb",
288 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292 #endif
293 { "", NULL, NULL, NULL, NULL }
294 };
295
296 #ifdef SUPPORT_UTF8
297 const bool support_utf8 = true;
298 #else
299 const bool support_utf8 = false;
300 #endif
301
302 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304 assert(re.error().empty());
305 string one(t->original);
306 CHECK(re.Replace(t->rewrite, &one));
307 CHECK_EQ(one, t->single);
308 string all(t->original);
309 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 CHECK_EQ(all, t->global);
311 }
312
313 // One final test: test \r\n replacement when we're not in CRLF mode
314 {
315 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316 assert(re.error().empty());
317 string all("aa\r\naa\r\n");
318 CHECK(re.GlobalReplace("bb", &all) > 0);
319 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320 }
321 {
322 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string all("aa\r\naa\r\n");
325 CHECK(re.GlobalReplace("bb", &all) > 0);
326 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327 }
328 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329 // Alas, the answer depends on how pcre was compiled.
330 }
331
332 static void TestExtract() {
333 printf("Testing Extract\n");
334
335 string s;
336
337 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338 CHECK_EQ(s, "kremvax!boris");
339
340 // check the RE interface as well
341 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342 CHECK_EQ(s, "'foo'");
343 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344 CHECK_EQ(s, "'foo'");
345 }
346
347 static void TestConsume() {
348 printf("Testing Consume\n");
349
350 string word;
351
352 string s(" aaa b!@#$@#$cccc");
353 StringPiece input(s);
354
355 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356 CHECK(r.Consume(&input, &word));
357 CHECK_EQ(word, "aaa");
358 CHECK(r.Consume(&input, &word));
359 CHECK_EQ(word, "b");
360 CHECK(! r.Consume(&input, &word));
361 }
362
363 static void TestFindAndConsume() {
364 printf("Testing FindAndConsume\n");
365
366 string word;
367
368 string s(" aaa b!@#$@#$cccc");
369 StringPiece input(s);
370
371 RE r("(\\w+)"); // matches a word
372 CHECK(r.FindAndConsume(&input, &word));
373 CHECK_EQ(word, "aaa");
374 CHECK(r.FindAndConsume(&input, &word));
375 CHECK_EQ(word, "b");
376 CHECK(r.FindAndConsume(&input, &word));
377 CHECK_EQ(word, "cccc");
378 CHECK(! r.FindAndConsume(&input, &word));
379 }
380
381 static void TestMatchNumberPeculiarity() {
382 printf("Testing match-number peculiaraity\n");
383
384 string word1;
385 string word2;
386 string word3;
387
388 RE r("(foo)|(bar)|(baz)");
389 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390 CHECK_EQ(word1, "foo");
391 CHECK_EQ(word2, "");
392 CHECK_EQ(word3, "");
393 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394 CHECK_EQ(word1, "");
395 CHECK_EQ(word2, "bar");
396 CHECK_EQ(word3, "");
397 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398 CHECK_EQ(word1, "");
399 CHECK_EQ(word2, "");
400 CHECK_EQ(word3, "baz");
401 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402
403 string a;
404 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405 CHECK_EQ(a, "");
406 }
407
408 static void TestRecursion() {
409 printf("Testing recursion\n");
410
411 // Get one string that passes (sometimes), one that never does.
412 string text_good("abcdefghijk");
413 string text_bad("acdefghijkl");
414
415 // According to pcretest, matching text_good against (\w+)*b
416 // requires match_limit of at least 8192, and match_recursion_limit
417 // of at least 37.
418
419 RE_Options options_ml;
420 options_ml.set_match_limit(8192);
421 RE re("(\\w+)*b", options_ml);
422 CHECK(re.PartialMatch(text_good) == true);
423 CHECK(re.PartialMatch(text_bad) == false);
424 CHECK(re.FullMatch(text_good) == false);
425 CHECK(re.FullMatch(text_bad) == false);
426
427 options_ml.set_match_limit(1024);
428 RE re2("(\\w+)*b", options_ml);
429 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430 CHECK(re2.PartialMatch(text_bad) == false);
431 CHECK(re2.FullMatch(text_good) == false);
432 CHECK(re2.FullMatch(text_bad) == false);
433
434 RE_Options options_mlr;
435 options_mlr.set_match_limit_recursion(50);
436 RE re3("(\\w+)*b", options_mlr);
437 CHECK(re3.PartialMatch(text_good) == true);
438 CHECK(re3.PartialMatch(text_bad) == false);
439 CHECK(re3.FullMatch(text_good) == false);
440 CHECK(re3.FullMatch(text_bad) == false);
441
442 options_mlr.set_match_limit_recursion(10);
443 RE re4("(\\w+)*b", options_mlr);
444 CHECK(re4.PartialMatch(text_good) == false);
445 CHECK(re4.PartialMatch(text_bad) == false);
446 CHECK(re4.FullMatch(text_good) == false);
447 CHECK(re4.FullMatch(text_bad) == false);
448 }
449
450 // A meta-quoted string, interpreted as a pattern, should always match
451 // the original unquoted string.
452 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453 string quoted = RE::QuoteMeta(unquoted);
454 RE re(quoted, options);
455 CHECK(re.FullMatch(unquoted));
456 }
457
458 // A string containing meaningful regexp characters, which is then meta-
459 // quoted, should not generally match a string the unquoted string does.
460 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461 RE_Options options = RE_Options()) {
462 string quoted = RE::QuoteMeta(unquoted);
463 RE re(quoted, options);
464 CHECK(!re.FullMatch(should_not_match));
465 }
466
467 // Tests that quoted meta characters match their original strings,
468 // and that a few things that shouldn't match indeed do not.
469 static void TestQuotaMetaSimple() {
470 TestQuoteMeta("foo");
471 TestQuoteMeta("foo.bar");
472 TestQuoteMeta("foo\\.bar");
473 TestQuoteMeta("[1-9]");
474 TestQuoteMeta("1.5-2.0?");
475 TestQuoteMeta("\\d");
476 TestQuoteMeta("Who doesn't like ice cream?");
477 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478 TestQuoteMeta("((?!)xxx).*yyy");
479 TestQuoteMeta("([");
480 }
481
482 static void TestQuoteMetaSimpleNegative() {
483 NegativeTestQuoteMeta("foo", "bar");
484 NegativeTestQuoteMeta("...", "bar");
485 NegativeTestQuoteMeta("\\.", ".");
486 NegativeTestQuoteMeta("\\.", "..");
487 NegativeTestQuoteMeta("(a)", "a");
488 NegativeTestQuoteMeta("(a|b)", "a");
489 NegativeTestQuoteMeta("(a|b)", "(a)");
490 NegativeTestQuoteMeta("(a|b)", "a|b");
491 NegativeTestQuoteMeta("[0-9]", "0");
492 NegativeTestQuoteMeta("[0-9]", "0-9");
493 NegativeTestQuoteMeta("[0-9]", "[9]");
494 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495 }
496
497 static void TestQuoteMetaLatin1() {
498 TestQuoteMeta("3\xb2 = 9");
499 }
500
501 static void TestQuoteMetaUtf8() {
502 #ifdef SUPPORT_UTF8
503 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511 "27\\\xc2\\\xb0",
512 pcrecpp::UTF8());
513 #endif
514 }
515
516 static void TestQuoteMetaAll() {
517 printf("Testing QuoteMeta\n");
518 TestQuotaMetaSimple();
519 TestQuoteMetaSimpleNegative();
520 TestQuoteMetaLatin1();
521 TestQuoteMetaUtf8();
522 }
523
524 //
525 // Options tests contributed by
526 // Giuseppe Maxia, CTO, Stardata s.r.l.
527 // July 2005
528 //
529 static void GetOneOptionResult(
530 const char *option_name,
531 const char *regex,
532 const char *str,
533 RE_Options options,
534 bool full,
535 string expected) {
536
537 printf("Testing Option <%s>\n", option_name);
538 if(VERBOSE_TEST)
539 printf("/%s/ finds \"%s\" within \"%s\" \n",
540 regex,
541 expected.c_str(),
542 str);
543 string captured("");
544 if (full)
545 RE(regex,options).FullMatch(str, &captured);
546 else
547 RE(regex,options).PartialMatch(str, &captured);
548 CHECK_EQ(captured, expected);
549 }
550
551 static void TestOneOption(
552 const char *option_name,
553 const char *regex,
554 const char *str,
555 RE_Options options,
556 bool full,
557 bool assertive = true) {
558
559 printf("Testing Option <%s>\n", option_name);
560 if (VERBOSE_TEST)
561 printf("'%s' %s /%s/ \n",
562 str,
563 (assertive? "matches" : "doesn't match"),
564 regex);
565 if (assertive) {
566 if (full)
567 CHECK(RE(regex,options).FullMatch(str));
568 else
569 CHECK(RE(regex,options).PartialMatch(str));
570 } else {
571 if (full)
572 CHECK(!RE(regex,options).FullMatch(str));
573 else
574 CHECK(!RE(regex,options).PartialMatch(str));
575 }
576 }
577
578 static void Test_CASELESS() {
579 RE_Options options;
580 RE_Options options2;
581
582 options.set_caseless(true);
583 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586
587 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589 options.set_caseless(false);
590 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591 }
592
593 static void Test_MULTILINE() {
594 RE_Options options;
595 RE_Options options2;
596 const char *str = "HELLO\n" "cruel\n" "world\n";
597
598 options.set_multiline(true);
599 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602 options.set_multiline(false);
603 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604 }
605
606 static void Test_DOTALL() {
607 RE_Options options;
608 RE_Options options2;
609 const char *str = "HELLO\n" "cruel\n" "world";
610
611 options.set_dotall(true);
612 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615 options.set_dotall(false);
616 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617 }
618
619 static void Test_DOLLAR_ENDONLY() {
620 RE_Options options;
621 RE_Options options2;
622 const char *str = "HELLO world\n";
623
624 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625 options.set_dollar_endonly(true);
626 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628 }
629
630 static void Test_EXTRA() {
631 RE_Options options;
632 const char *str = "HELLO";
633
634 options.set_extra(true);
635 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637 options.set_extra(false);
638 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639 }
640
641 static void Test_EXTENDED() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO world";
645
646 options.set_extended(true);
647 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649 TestOneOption("EXTENDED (class)",
650 "^ HE L{2} O "
651 "\\s+ "
652 "\\w+ $ ",
653 str,
654 options,
655 false);
656
657 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658 TestOneOption("EXTENDED (function)",
659 "^ HE L{2} O "
660 "\\s+ "
661 "\\w+ $ ",
662 str,
663 pcrecpp::EXTENDED(),
664 false);
665
666 options.set_extended(false);
667 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668 }
669
670 static void Test_NO_AUTO_CAPTURE() {
671 RE_Options options;
672 const char *str = "HELLO world";
673 string captured;
674
675 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676 if (VERBOSE_TEST)
677 printf("parentheses capture text\n");
678 RE re("(world|universe)$", options);
679 CHECK(re.Extract("\\1", str , &captured));
680 CHECK_EQ(captured, "world");
681 options.set_no_auto_capture(true);
682 printf("testing Option <NO_AUTO_CAPTURE>\n");
683 if (VERBOSE_TEST)
684 printf("parentheses do not capture text\n");
685 re.Extract("\\1",str, &captured );
686 CHECK_EQ(captured, "world");
687 }
688
689 static void Test_UNGREEDY() {
690 RE_Options options;
691 const char *str = "HELLO, 'this' is the 'world'";
692
693 options.set_ungreedy(true);
694 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697
698 options.set_ungreedy(false);
699 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701 }
702
703 static void Test_all_options() {
704 const char *str = "HELLO\n" "cruel\n" "world";
705 RE_Options options;
706 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707
708 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709 options.set_all_options(0);
710 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712
713 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715 " ^ c r u e l $ ",
716 str,
717 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718 false);
719
720 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721 " ^ c r u e l $ ",
722 str,
723 RE_Options()
724 .set_multiline(true)
725 .set_extended(true),
726 false);
727
728 options.set_all_options(0);
729 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730
731 }
732
733 static void TestOptions() {
734 printf("Testing Options\n");
735 Test_CASELESS();
736 Test_MULTILINE();
737 Test_DOTALL();
738 Test_DOLLAR_ENDONLY();
739 Test_EXTENDED();
740 Test_NO_AUTO_CAPTURE();
741 Test_UNGREEDY();
742 Test_EXTRA();
743 Test_all_options();
744 }
745
746 static void TestConstructors() {
747 printf("Testing constructors\n");
748
749 RE_Options options;
750 options.set_dotall(true);
751 const char *str = "HELLO\n" "cruel\n" "world";
752
753 RE orig("HELLO.*world", options);
754 CHECK(orig.FullMatch(str));
755
756 RE copy1(orig);
757 CHECK(copy1.FullMatch(str));
758
759 RE copy2("not a match");
760 CHECK(!copy2.FullMatch(str));
761 copy2 = copy1;
762 CHECK(copy2.FullMatch(str));
763 copy2 = orig;
764 CHECK(copy2.FullMatch(str));
765
766 // Make sure when we assign to ourselves, nothing bad happens
767 orig = orig;
768 copy1 = copy1;
769 copy2 = copy2;
770 CHECK(orig.FullMatch(str));
771 CHECK(copy1.FullMatch(str));
772 CHECK(copy2.FullMatch(str));
773 }
774
775 int main(int argc, char** argv) {
776 // Treat any flag as --help
777 if (argc > 1 && argv[1][0] == '-') {
778 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779 " If 'timingX ###' is specified, run the given timing test\n"
780 " with the given number of iterations, rather than running\n"
781 " the default corectness test.\n", argv[0]);
782 return 0;
783 }
784
785 if (argc > 1) {
786 if ( argc == 2 || atoi(argv[2]) == 0) {
787 printf("timing mode needs a num-iters argument\n");
788 return 1;
789 }
790 if (!strcmp(argv[1], "timing1"))
791 Timing1(atoi(argv[2]));
792 else if (!strcmp(argv[1], "timing2"))
793 Timing2(atoi(argv[2]));
794 else if (!strcmp(argv[1], "timing3"))
795 Timing3(atoi(argv[2]));
796 else
797 printf("Unknown argument '%s'\n", argv[1]);
798 return 0;
799 }
800
801 printf("Testing FullMatch\n");
802
803 int i;
804 string s;
805
806 /***** FullMatch with no args *****/
807
808 CHECK(RE("h.*o").FullMatch("hello"));
809 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
810 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
811 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
812 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
813 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
814
815 /***** FullMatch with args *****/
816
817 // Zero-arg
818 CHECK(RE("\\d+").FullMatch("1001"));
819
820 // Single-arg
821 CHECK(RE("(\\d+)").FullMatch("1001", &i));
822 CHECK_EQ(i, 1001);
823 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
824 CHECK_EQ(i, -123);
825 CHECK(!RE("()\\d+").FullMatch("10", &i));
826 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
827 &i));
828
829 // Digits surrounding integer-arg
830 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
831 CHECK_EQ(i, 23);
832 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
833 CHECK_EQ(i, 1);
834 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
835 CHECK_EQ(i, -1);
836 CHECK(RE("(\\d)").PartialMatch("1234", &i));
837 CHECK_EQ(i, 1);
838 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
839 CHECK_EQ(i, -1);
840
841 // String-arg
842 CHECK(RE("h(.*)o").FullMatch("hello", &s));
843 CHECK_EQ(s, string("ell"));
844
845 // StringPiece-arg
846 StringPiece sp;
847 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
848 CHECK_EQ(sp.size(), 4);
849 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
850 CHECK_EQ(i, 1234);
851
852 // Multi-arg
853 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
854 CHECK_EQ(s, string("ruby"));
855 CHECK_EQ(i, 1234);
856
857 // Ignored arg
858 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
859 CHECK_EQ(s, string("ruby"));
860 CHECK_EQ(i, 1234);
861
862 // Type tests
863 {
864 char c;
865 CHECK(RE("(H)ello").FullMatch("Hello", &c));
866 CHECK_EQ(c, 'H');
867 }
868 {
869 unsigned char c;
870 CHECK(RE("(H)ello").FullMatch("Hello", &c));
871 CHECK_EQ(c, static_cast<unsigned char>('H'));
872 }
873 {
874 short v;
875 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
876 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
877 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
878 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
879 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
880 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
881 }
882 {
883 unsigned short v;
884 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
885 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
886 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
887 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
888 }
889 {
890 int v;
891 static const int max_value = 0x7fffffff;
892 static const int min_value = -max_value - 1;
893 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
894 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
895 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
896 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
897 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
898 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
899 }
900 {
901 unsigned int v;
902 static const unsigned int max_value = 0xfffffffful;
903 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
904 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
905 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
906 }
907 #ifdef HAVE_LONG_LONG
908 # if defined(__MINGW__) || defined(__MINGW32__)
909 # define LLD "%I64d"
910 # else
911 # define LLD "%lld"
912 # endif
913 {
914 long long v;
915 static const long long max_value = 0x7fffffffffffffffLL;
916 static const long long min_value = -max_value - 1;
917 char buf[32];
918
919 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
920 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
921
922 snprintf(buf, sizeof(buf), LLD, max_value);
923 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
924
925 snprintf(buf, sizeof(buf), LLD, min_value);
926 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
927
928 snprintf(buf, sizeof(buf), LLD, max_value);
929 assert(buf[strlen(buf)-1] != '9');
930 buf[strlen(buf)-1]++;
931 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
932
933 snprintf(buf, sizeof(buf), LLD, min_value);
934 assert(buf[strlen(buf)-1] != '9');
935 buf[strlen(buf)-1]++;
936 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
937 }
938 #endif
939 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
940 {
941 unsigned long long v;
942 long long v2;
943 static const unsigned long long max_value = 0xffffffffffffffffULL;
944 char buf[32];
945
946 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
947 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
948
949 snprintf(buf, sizeof(buf), "%llu", max_value);
950 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
951
952 assert(buf[strlen(buf)-1] != '9');
953 buf[strlen(buf)-1]++;
954 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
955 }
956 #endif
957 {
958 float v;
959 CHECK(RE("(.*)").FullMatch("100", &v));
960 CHECK(RE("(.*)").FullMatch("-100.", &v));
961 CHECK(RE("(.*)").FullMatch("1e23", &v));
962 }
963 {
964 double v;
965 CHECK(RE("(.*)").FullMatch("100", &v));
966 CHECK(RE("(.*)").FullMatch("-100.", &v));
967 CHECK(RE("(.*)").FullMatch("1e23", &v));
968 }
969
970 // Check that matching is fully anchored
971 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
972 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
973 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
974 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
975
976 // Braces
977 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
978 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
979 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
980
981 // Complicated RE
982 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
983 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
984 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
985 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
986
987 // Check full-match handling (needs '$' tacked on internally)
988 CHECK(RE("fo|foo").FullMatch("fo"));
989 CHECK(RE("fo|foo").FullMatch("foo"));
990 CHECK(RE("fo|foo$").FullMatch("fo"));
991 CHECK(RE("fo|foo$").FullMatch("foo"));
992 CHECK(RE("foo$").FullMatch("foo"));
993 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
994 CHECK(!RE("fo|bar").FullMatch("fox"));
995
996 // Uncomment the following if we change the handling of '$' to
997 // prevent it from matching a trailing newline
998 if (false) {
999 // Check that we don't get bitten by pcre's special handling of a
1000 // '\n' at the end of the string matching '$'
1001 CHECK(!RE("foo$").PartialMatch("foo\n"));
1002 }
1003
1004 // Number of args
1005 int a[16];
1006 CHECK(RE("").FullMatch(""));
1007
1008 memset(a, 0, sizeof(0));
1009 CHECK(RE("(\\d){1}").FullMatch("1",
1010 &a[0]));
1011 CHECK_EQ(a[0], 1);
1012
1013 memset(a, 0, sizeof(0));
1014 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1015 &a[0], &a[1]));
1016 CHECK_EQ(a[0], 1);
1017 CHECK_EQ(a[1], 2);
1018
1019 memset(a, 0, sizeof(0));
1020 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1021 &a[0], &a[1], &a[2]));
1022 CHECK_EQ(a[0], 1);
1023 CHECK_EQ(a[1], 2);
1024 CHECK_EQ(a[2], 3);
1025
1026 memset(a, 0, sizeof(0));
1027 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1028 &a[0], &a[1], &a[2], &a[3]));
1029 CHECK_EQ(a[0], 1);
1030 CHECK_EQ(a[1], 2);
1031 CHECK_EQ(a[2], 3);
1032 CHECK_EQ(a[3], 4);
1033
1034 memset(a, 0, sizeof(0));
1035 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1036 &a[0], &a[1], &a[2],
1037 &a[3], &a[4]));
1038 CHECK_EQ(a[0], 1);
1039 CHECK_EQ(a[1], 2);
1040 CHECK_EQ(a[2], 3);
1041 CHECK_EQ(a[3], 4);
1042 CHECK_EQ(a[4], 5);
1043
1044 memset(a, 0, sizeof(0));
1045 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1046 &a[0], &a[1], &a[2],
1047 &a[3], &a[4], &a[5]));
1048 CHECK_EQ(a[0], 1);
1049 CHECK_EQ(a[1], 2);
1050 CHECK_EQ(a[2], 3);
1051 CHECK_EQ(a[3], 4);
1052 CHECK_EQ(a[4], 5);
1053 CHECK_EQ(a[5], 6);
1054
1055 memset(a, 0, sizeof(0));
1056 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1057 &a[0], &a[1], &a[2], &a[3],
1058 &a[4], &a[5], &a[6]));
1059 CHECK_EQ(a[0], 1);
1060 CHECK_EQ(a[1], 2);
1061 CHECK_EQ(a[2], 3);
1062 CHECK_EQ(a[3], 4);
1063 CHECK_EQ(a[4], 5);
1064 CHECK_EQ(a[5], 6);
1065 CHECK_EQ(a[6], 7);
1066
1067 memset(a, 0, sizeof(0));
1068 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1069 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1070 "1234567890123456",
1071 &a[0], &a[1], &a[2], &a[3],
1072 &a[4], &a[5], &a[6], &a[7],
1073 &a[8], &a[9], &a[10], &a[11],
1074 &a[12], &a[13], &a[14], &a[15]));
1075 CHECK_EQ(a[0], 1);
1076 CHECK_EQ(a[1], 2);
1077 CHECK_EQ(a[2], 3);
1078 CHECK_EQ(a[3], 4);
1079 CHECK_EQ(a[4], 5);
1080 CHECK_EQ(a[5], 6);
1081 CHECK_EQ(a[6], 7);
1082 CHECK_EQ(a[7], 8);
1083 CHECK_EQ(a[8], 9);
1084 CHECK_EQ(a[9], 0);
1085 CHECK_EQ(a[10], 1);
1086 CHECK_EQ(a[11], 2);
1087 CHECK_EQ(a[12], 3);
1088 CHECK_EQ(a[13], 4);
1089 CHECK_EQ(a[14], 5);
1090 CHECK_EQ(a[15], 6);
1091
1092 /***** PartialMatch *****/
1093
1094 printf("Testing PartialMatch\n");
1095
1096 CHECK(RE("h.*o").PartialMatch("hello"));
1097 CHECK(RE("h.*o").PartialMatch("othello"));
1098 CHECK(RE("h.*o").PartialMatch("hello!"));
1099 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1100
1101 /***** other tests *****/
1102
1103 RadixTests();
1104 TestReplace();
1105 TestExtract();
1106 TestConsume();
1107 TestFindAndConsume();
1108 TestQuoteMetaAll();
1109 TestMatchNumberPeculiarity();
1110
1111 // Check the pattern() accessor
1112 {
1113 const string kPattern = "http://([^/]+)/.*";
1114 const RE re(kPattern);
1115 CHECK_EQ(kPattern, re.pattern());
1116 }
1117
1118 // Check RE error field.
1119 {
1120 RE re("foo");
1121 CHECK(re.error().empty()); // Must have no error
1122 }
1123
1124 #ifdef SUPPORT_UTF8
1125 // Check UTF-8 handling
1126 {
1127 printf("Testing UTF-8 handling\n");
1128
1129 // Three Japanese characters (nihongo)
1130 const char utf8_string[] = {
1131 0xe6, 0x97, 0xa5, // 65e5
1132 0xe6, 0x9c, 0xac, // 627c
1133 0xe8, 0xaa, 0x9e, // 8a9e
1134 0
1135 };
1136 const char utf8_pattern[] = {
1137 '.',
1138 0xe6, 0x9c, 0xac, // 627c
1139 '.',
1140 0
1141 };
1142
1143 // Both should match in either mode, bytes or UTF-8
1144 RE re_test1(".........");
1145 CHECK(re_test1.FullMatch(utf8_string));
1146 RE re_test2("...", pcrecpp::UTF8());
1147 CHECK(re_test2.FullMatch(utf8_string));
1148
1149 // Check that '.' matches one byte or UTF-8 character
1150 // according to the mode.
1151 string ss;
1152 RE re_test3("(.)");
1153 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1154 CHECK_EQ(ss, string("\xe6"));
1155 RE re_test4("(.)", pcrecpp::UTF8());
1156 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1157 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1158
1159 // Check that string matches itself in either mode
1160 RE re_test5(utf8_string);
1161 CHECK(re_test5.FullMatch(utf8_string));
1162 RE re_test6(utf8_string, pcrecpp::UTF8());
1163 CHECK(re_test6.FullMatch(utf8_string));
1164
1165 // Check that pattern matches string only in UTF8 mode
1166 RE re_test7(utf8_pattern);
1167 CHECK(!re_test7.FullMatch(utf8_string));
1168 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1169 CHECK(re_test8.FullMatch(utf8_string));
1170 }
1171
1172 // Check that ungreedy, UTF8 regular expressions don't match when they
1173 // oughtn't -- see bug 82246.
1174 {
1175 // This code always worked.
1176 const char* pattern = "\\w+X";
1177 const string target = "a aX";
1178 RE match_sentence(pattern);
1179 RE match_sentence_re(pattern, pcrecpp::UTF8());
1180
1181 CHECK(!match_sentence.FullMatch(target));
1182 CHECK(!match_sentence_re.FullMatch(target));
1183 }
1184
1185 {
1186 const char* pattern = "(?U)\\w+X";
1187 const string target = "a aX";
1188 RE match_sentence(pattern);
1189 RE match_sentence_re(pattern, pcrecpp::UTF8());
1190
1191 CHECK(!match_sentence.FullMatch(target));
1192 CHECK(!match_sentence_re.FullMatch(target));
1193 }
1194 #endif /* def SUPPORT_UTF8 */
1195
1196 printf("Testing error reporting\n");
1197
1198 { RE re("a\\1"); CHECK(!re.error().empty()); }
1199 {
1200 RE re("a[x");
1201 CHECK(!re.error().empty());
1202 }
1203 {
1204 RE re("a[z-a]");
1205 CHECK(!re.error().empty());
1206 }
1207 {
1208 RE re("a[[:foobar:]]");
1209 CHECK(!re.error().empty());
1210 }
1211 {
1212 RE re("a(b");
1213 CHECK(!re.error().empty());
1214 }
1215 {
1216 RE re("a\\");
1217 CHECK(!re.error().empty());
1218 }
1219
1220 // Test that recursion is stopped
1221 TestRecursion();
1222
1223 // Test Options
1224 if (getenv("VERBOSE_TEST") != NULL)
1225 VERBOSE_TEST = true;
1226 TestOptions();
1227
1228 // Test the constructors
1229 TestConstructors();
1230
1231 // Done
1232 printf("OK\n");
1233
1234 return 0;
1235 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12