/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 257 - (show annotations) (download)
Wed Sep 19 09:11:19 2007 UTC (6 years, 11 months ago) by ph10
File size: 37738 byte(s)
Craig's patch to remove the checks for windows.h and instead check for 
_strtoi64 explicitly, and avoid using snprintf() at all.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <cassert>
42 #include <vector>
43 #include "pcrecpp.h"
44
45 using pcrecpp::StringPiece;
46 using pcrecpp::RE;
47 using pcrecpp::RE_Options;
48 using pcrecpp::Hex;
49 using pcrecpp::Octal;
50 using pcrecpp::CRadix;
51
52 static bool VERBOSE_TEST = false;
53
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
59 if (!(condition)) { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
62 exit(1); \
63 } \
64 } while (0)
65
66 #define CHECK_EQ(a, b) CHECK(a == b)
67
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
74 }
75 }
76
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
80 int i;
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
83 CHECK_EQ(i, 1234);
84 }
85 }
86
87 static void Timing3(int num_iters) {
88 string text_string;
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
91 }
92
93 RE line_matcher(".*\n");
94 string line;
95 StringPiece text(text_string);
96 int counter = 0;
97 while (line_matcher.Consume(&text)) {
98 counter++;
99 }
100 printf("Matched %d lines\n", counter);
101 }
102
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
109 if (i == 50000) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
112 }
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
115 RE newre(buf);
116 }
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
122 }
123
124 #endif
125
126 static void RadixTests() {
127 printf("Testing hex\n");
128
129 #define CHECK_HEX(type, value) \
130 do { \
131 type v; \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 } while(0)
137
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
146 #endif
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149 #endif
150
151 #undef CHECK_HEX
152
153 printf("Testing octal\n");
154
155 #define CHECK_OCTAL(type, value) \
156 do { \
157 type v; \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 } while(0)
163
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
172 #endif
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175 #endif
176
177 #undef CHECK_OCTAL
178
179 printf("Testing decimal\n");
180
181 #define CHECK_DECIMAL(type, value) \
182 do { \
183 type v; \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
188 } while(0)
189
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
198 #endif
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201 #endif
202
203 #undef CHECK_DECIMAL
204
205 }
206
207 static void TestReplace() {
208 printf("Testing Replace\n");
209
210 struct ReplaceTest {
211 const char *regexp;
212 const char *rewrite;
213 const char *original;
214 const char *single;
215 const char *global;
216 };
217 static const ReplaceTest tests[] = {
218 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
219 "\\2\\1ay",
220 "the quick brown fox jumps over the lazy dogs.",
221 "ethay quick brown fox jumps over the lazy dogs.",
222 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
223 { "\\w+",
224 "\\0-NOSPAM",
225 "paul.haahr@google.com",
226 "paul-NOSPAM.haahr@google.com",
227 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
228 { "^",
229 "(START)",
230 "foo",
231 "(START)foo",
232 "(START)foo" },
233 { "^",
234 "(START)",
235 "",
236 "(START)",
237 "(START)" },
238 { "$",
239 "(END)",
240 "",
241 "(END)",
242 "(END)" },
243 { "b",
244 "bb",
245 "ababababab",
246 "abbabababab",
247 "abbabbabbabbabb" },
248 { "b",
249 "bb",
250 "bbbbbb",
251 "bbbbbbb",
252 "bbbbbbbbbbbb" },
253 { "b+",
254 "bb",
255 "bbbbbb",
256 "bb",
257 "bb" },
258 { "b*",
259 "bb",
260 "bbbbbb",
261 "bb",
262 "bb" },
263 { "b*",
264 "bb",
265 "aaaaa",
266 "bbaaaaa",
267 "bbabbabbabbabbabb" },
268 { "b*",
269 "bb",
270 "aa\naa\n",
271 "bbaa\naa\n",
272 "bbabbabb\nbbabbabb\nbb" },
273 { "b*",
274 "bb",
275 "aa\raa\r",
276 "bbaa\raa\r",
277 "bbabbabb\rbbabbabb\rbb" },
278 { "b*",
279 "bb",
280 "aa\r\naa\r\n",
281 "bbaa\r\naa\r\n",
282 "bbabbabb\r\nbbabbabb\r\nbb" },
283 #ifdef SUPPORT_UTF8
284 { "b*",
285 "bb",
286 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
287 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
289 { "b*",
290 "bb",
291 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
292 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
295 #endif
296 { "", NULL, NULL, NULL, NULL }
297 };
298
299 #ifdef SUPPORT_UTF8
300 const bool support_utf8 = true;
301 #else
302 const bool support_utf8 = false;
303 #endif
304
305 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
306 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
307 assert(re.error().empty());
308 string one(t->original);
309 CHECK(re.Replace(t->rewrite, &one));
310 CHECK_EQ(one, t->single);
311 string all(t->original);
312 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
313 CHECK_EQ(all, t->global);
314 }
315
316 // One final test: test \r\n replacement when we're not in CRLF mode
317 {
318 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
319 assert(re.error().empty());
320 string all("aa\r\naa\r\n");
321 CHECK(re.GlobalReplace("bb", &all) > 0);
322 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
323 }
324 {
325 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
326 assert(re.error().empty());
327 string all("aa\r\naa\r\n");
328 CHECK(re.GlobalReplace("bb", &all) > 0);
329 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
330 }
331 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332 // Alas, the answer depends on how pcre was compiled.
333 }
334
335 static void TestExtract() {
336 printf("Testing Extract\n");
337
338 string s;
339
340 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
341 CHECK_EQ(s, "kremvax!boris");
342
343 // check the RE interface as well
344 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
345 CHECK_EQ(s, "'foo'");
346 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
347 CHECK_EQ(s, "'foo'");
348 }
349
350 static void TestConsume() {
351 printf("Testing Consume\n");
352
353 string word;
354
355 string s(" aaa b!@#$@#$cccc");
356 StringPiece input(s);
357
358 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
359 CHECK(r.Consume(&input, &word));
360 CHECK_EQ(word, "aaa");
361 CHECK(r.Consume(&input, &word));
362 CHECK_EQ(word, "b");
363 CHECK(! r.Consume(&input, &word));
364 }
365
366 static void TestFindAndConsume() {
367 printf("Testing FindAndConsume\n");
368
369 string word;
370
371 string s(" aaa b!@#$@#$cccc");
372 StringPiece input(s);
373
374 RE r("(\\w+)"); // matches a word
375 CHECK(r.FindAndConsume(&input, &word));
376 CHECK_EQ(word, "aaa");
377 CHECK(r.FindAndConsume(&input, &word));
378 CHECK_EQ(word, "b");
379 CHECK(r.FindAndConsume(&input, &word));
380 CHECK_EQ(word, "cccc");
381 CHECK(! r.FindAndConsume(&input, &word));
382 }
383
384 static void TestMatchNumberPeculiarity() {
385 printf("Testing match-number peculiaraity\n");
386
387 string word1;
388 string word2;
389 string word3;
390
391 RE r("(foo)|(bar)|(baz)");
392 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
393 CHECK_EQ(word1, "foo");
394 CHECK_EQ(word2, "");
395 CHECK_EQ(word3, "");
396 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
397 CHECK_EQ(word1, "");
398 CHECK_EQ(word2, "bar");
399 CHECK_EQ(word3, "");
400 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
401 CHECK_EQ(word1, "");
402 CHECK_EQ(word2, "");
403 CHECK_EQ(word3, "baz");
404 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
405
406 string a;
407 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
408 CHECK_EQ(a, "");
409 }
410
411 static void TestRecursion() {
412 printf("Testing recursion\n");
413
414 // Get one string that passes (sometimes), one that never does.
415 string text_good("abcdefghijk");
416 string text_bad("acdefghijkl");
417
418 // According to pcretest, matching text_good against (\w+)*b
419 // requires match_limit of at least 8192, and match_recursion_limit
420 // of at least 37.
421
422 RE_Options options_ml;
423 options_ml.set_match_limit(8192);
424 RE re("(\\w+)*b", options_ml);
425 CHECK(re.PartialMatch(text_good) == true);
426 CHECK(re.PartialMatch(text_bad) == false);
427 CHECK(re.FullMatch(text_good) == false);
428 CHECK(re.FullMatch(text_bad) == false);
429
430 options_ml.set_match_limit(1024);
431 RE re2("(\\w+)*b", options_ml);
432 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
433 CHECK(re2.PartialMatch(text_bad) == false);
434 CHECK(re2.FullMatch(text_good) == false);
435 CHECK(re2.FullMatch(text_bad) == false);
436
437 RE_Options options_mlr;
438 options_mlr.set_match_limit_recursion(50);
439 RE re3("(\\w+)*b", options_mlr);
440 CHECK(re3.PartialMatch(text_good) == true);
441 CHECK(re3.PartialMatch(text_bad) == false);
442 CHECK(re3.FullMatch(text_good) == false);
443 CHECK(re3.FullMatch(text_bad) == false);
444
445 options_mlr.set_match_limit_recursion(10);
446 RE re4("(\\w+)*b", options_mlr);
447 CHECK(re4.PartialMatch(text_good) == false);
448 CHECK(re4.PartialMatch(text_bad) == false);
449 CHECK(re4.FullMatch(text_good) == false);
450 CHECK(re4.FullMatch(text_bad) == false);
451 }
452
453 // A meta-quoted string, interpreted as a pattern, should always match
454 // the original unquoted string.
455 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
456 string quoted = RE::QuoteMeta(unquoted);
457 RE re(quoted, options);
458 CHECK(re.FullMatch(unquoted));
459 }
460
461 // A string containing meaningful regexp characters, which is then meta-
462 // quoted, should not generally match a string the unquoted string does.
463 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
464 RE_Options options = RE_Options()) {
465 string quoted = RE::QuoteMeta(unquoted);
466 RE re(quoted, options);
467 CHECK(!re.FullMatch(should_not_match));
468 }
469
470 // Tests that quoted meta characters match their original strings,
471 // and that a few things that shouldn't match indeed do not.
472 static void TestQuotaMetaSimple() {
473 TestQuoteMeta("foo");
474 TestQuoteMeta("foo.bar");
475 TestQuoteMeta("foo\\.bar");
476 TestQuoteMeta("[1-9]");
477 TestQuoteMeta("1.5-2.0?");
478 TestQuoteMeta("\\d");
479 TestQuoteMeta("Who doesn't like ice cream?");
480 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481 TestQuoteMeta("((?!)xxx).*yyy");
482 TestQuoteMeta("([");
483 }
484
485 static void TestQuoteMetaSimpleNegative() {
486 NegativeTestQuoteMeta("foo", "bar");
487 NegativeTestQuoteMeta("...", "bar");
488 NegativeTestQuoteMeta("\\.", ".");
489 NegativeTestQuoteMeta("\\.", "..");
490 NegativeTestQuoteMeta("(a)", "a");
491 NegativeTestQuoteMeta("(a|b)", "a");
492 NegativeTestQuoteMeta("(a|b)", "(a)");
493 NegativeTestQuoteMeta("(a|b)", "a|b");
494 NegativeTestQuoteMeta("[0-9]", "0");
495 NegativeTestQuoteMeta("[0-9]", "0-9");
496 NegativeTestQuoteMeta("[0-9]", "[9]");
497 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
498 }
499
500 static void TestQuoteMetaLatin1() {
501 TestQuoteMeta("3\xb2 = 9");
502 }
503
504 static void TestQuoteMetaUtf8() {
505 #ifdef SUPPORT_UTF8
506 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
508 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
509 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
510 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
511 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
514 "27\\\xc2\\\xb0",
515 pcrecpp::UTF8());
516 #endif
517 }
518
519 static void TestQuoteMetaAll() {
520 printf("Testing QuoteMeta\n");
521 TestQuotaMetaSimple();
522 TestQuoteMetaSimpleNegative();
523 TestQuoteMetaLatin1();
524 TestQuoteMetaUtf8();
525 }
526
527 //
528 // Options tests contributed by
529 // Giuseppe Maxia, CTO, Stardata s.r.l.
530 // July 2005
531 //
532 static void GetOneOptionResult(
533 const char *option_name,
534 const char *regex,
535 const char *str,
536 RE_Options options,
537 bool full,
538 string expected) {
539
540 printf("Testing Option <%s>\n", option_name);
541 if(VERBOSE_TEST)
542 printf("/%s/ finds \"%s\" within \"%s\" \n",
543 regex,
544 expected.c_str(),
545 str);
546 string captured("");
547 if (full)
548 RE(regex,options).FullMatch(str, &captured);
549 else
550 RE(regex,options).PartialMatch(str, &captured);
551 CHECK_EQ(captured, expected);
552 }
553
554 static void TestOneOption(
555 const char *option_name,
556 const char *regex,
557 const char *str,
558 RE_Options options,
559 bool full,
560 bool assertive = true) {
561
562 printf("Testing Option <%s>\n", option_name);
563 if (VERBOSE_TEST)
564 printf("'%s' %s /%s/ \n",
565 str,
566 (assertive? "matches" : "doesn't match"),
567 regex);
568 if (assertive) {
569 if (full)
570 CHECK(RE(regex,options).FullMatch(str));
571 else
572 CHECK(RE(regex,options).PartialMatch(str));
573 } else {
574 if (full)
575 CHECK(!RE(regex,options).FullMatch(str));
576 else
577 CHECK(!RE(regex,options).PartialMatch(str));
578 }
579 }
580
581 static void Test_CASELESS() {
582 RE_Options options;
583 RE_Options options2;
584
585 options.set_caseless(true);
586 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
587 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
588 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
589
590 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
591 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
592 options.set_caseless(false);
593 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
594 }
595
596 static void Test_MULTILINE() {
597 RE_Options options;
598 RE_Options options2;
599 const char *str = "HELLO\n" "cruel\n" "world\n";
600
601 options.set_multiline(true);
602 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
603 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
604 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
605 options.set_multiline(false);
606 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
607 }
608
609 static void Test_DOTALL() {
610 RE_Options options;
611 RE_Options options2;
612 const char *str = "HELLO\n" "cruel\n" "world";
613
614 options.set_dotall(true);
615 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
616 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
617 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
618 options.set_dotall(false);
619 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
620 }
621
622 static void Test_DOLLAR_ENDONLY() {
623 RE_Options options;
624 RE_Options options2;
625 const char *str = "HELLO world\n";
626
627 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
628 options.set_dollar_endonly(true);
629 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
630 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
631 }
632
633 static void Test_EXTRA() {
634 RE_Options options;
635 const char *str = "HELLO";
636
637 options.set_extra(true);
638 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
639 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
640 options.set_extra(false);
641 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
642 }
643
644 static void Test_EXTENDED() {
645 RE_Options options;
646 RE_Options options2;
647 const char *str = "HELLO world";
648
649 options.set_extended(true);
650 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
651 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
652 TestOneOption("EXTENDED (class)",
653 "^ HE L{2} O "
654 "\\s+ "
655 "\\w+ $ ",
656 str,
657 options,
658 false);
659
660 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
661 TestOneOption("EXTENDED (function)",
662 "^ HE L{2} O "
663 "\\s+ "
664 "\\w+ $ ",
665 str,
666 pcrecpp::EXTENDED(),
667 false);
668
669 options.set_extended(false);
670 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
671 }
672
673 static void Test_NO_AUTO_CAPTURE() {
674 RE_Options options;
675 const char *str = "HELLO world";
676 string captured;
677
678 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
679 if (VERBOSE_TEST)
680 printf("parentheses capture text\n");
681 RE re("(world|universe)$", options);
682 CHECK(re.Extract("\\1", str , &captured));
683 CHECK_EQ(captured, "world");
684 options.set_no_auto_capture(true);
685 printf("testing Option <NO_AUTO_CAPTURE>\n");
686 if (VERBOSE_TEST)
687 printf("parentheses do not capture text\n");
688 re.Extract("\\1",str, &captured );
689 CHECK_EQ(captured, "world");
690 }
691
692 static void Test_UNGREEDY() {
693 RE_Options options;
694 const char *str = "HELLO, 'this' is the 'world'";
695
696 options.set_ungreedy(true);
697 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
698 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
699 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
700
701 options.set_ungreedy(false);
702 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
703 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
704 }
705
706 static void Test_all_options() {
707 const char *str = "HELLO\n" "cruel\n" "world";
708 RE_Options options;
709 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
710
711 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
712 options.set_all_options(0);
713 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
714 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
715
716 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
717 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
718 " ^ c r u e l $ ",
719 str,
720 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
721 false);
722
723 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
724 " ^ c r u e l $ ",
725 str,
726 RE_Options()
727 .set_multiline(true)
728 .set_extended(true),
729 false);
730
731 options.set_all_options(0);
732 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
733
734 }
735
736 static void TestOptions() {
737 printf("Testing Options\n");
738 Test_CASELESS();
739 Test_MULTILINE();
740 Test_DOTALL();
741 Test_DOLLAR_ENDONLY();
742 Test_EXTENDED();
743 Test_NO_AUTO_CAPTURE();
744 Test_UNGREEDY();
745 Test_EXTRA();
746 Test_all_options();
747 }
748
749 static void TestConstructors() {
750 printf("Testing constructors\n");
751
752 RE_Options options;
753 options.set_dotall(true);
754 const char *str = "HELLO\n" "cruel\n" "world";
755
756 RE orig("HELLO.*world", options);
757 CHECK(orig.FullMatch(str));
758
759 RE copy1(orig);
760 CHECK(copy1.FullMatch(str));
761
762 RE copy2("not a match");
763 CHECK(!copy2.FullMatch(str));
764 copy2 = copy1;
765 CHECK(copy2.FullMatch(str));
766 copy2 = orig;
767 CHECK(copy2.FullMatch(str));
768
769 // Make sure when we assign to ourselves, nothing bad happens
770 orig = orig;
771 copy1 = copy1;
772 copy2 = copy2;
773 CHECK(orig.FullMatch(str));
774 CHECK(copy1.FullMatch(str));
775 CHECK(copy2.FullMatch(str));
776 }
777
778 int main(int argc, char** argv) {
779 // Treat any flag as --help
780 if (argc > 1 && argv[1][0] == '-') {
781 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
782 " If 'timingX ###' is specified, run the given timing test\n"
783 " with the given number of iterations, rather than running\n"
784 " the default corectness test.\n", argv[0]);
785 return 0;
786 }
787
788 if (argc > 1) {
789 if ( argc == 2 || atoi(argv[2]) == 0) {
790 printf("timing mode needs a num-iters argument\n");
791 return 1;
792 }
793 if (!strcmp(argv[1], "timing1"))
794 Timing1(atoi(argv[2]));
795 else if (!strcmp(argv[1], "timing2"))
796 Timing2(atoi(argv[2]));
797 else if (!strcmp(argv[1], "timing3"))
798 Timing3(atoi(argv[2]));
799 else
800 printf("Unknown argument '%s'\n", argv[1]);
801 return 0;
802 }
803
804 printf("Testing FullMatch\n");
805
806 int i;
807 string s;
808
809 /***** FullMatch with no args *****/
810
811 CHECK(RE("h.*o").FullMatch("hello"));
812 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
813 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
814 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
815 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
816 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
817
818 /***** FullMatch with args *****/
819
820 // Zero-arg
821 CHECK(RE("\\d+").FullMatch("1001"));
822
823 // Single-arg
824 CHECK(RE("(\\d+)").FullMatch("1001", &i));
825 CHECK_EQ(i, 1001);
826 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
827 CHECK_EQ(i, -123);
828 CHECK(!RE("()\\d+").FullMatch("10", &i));
829 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
830 &i));
831
832 // Digits surrounding integer-arg
833 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
834 CHECK_EQ(i, 23);
835 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
836 CHECK_EQ(i, 1);
837 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
838 CHECK_EQ(i, -1);
839 CHECK(RE("(\\d)").PartialMatch("1234", &i));
840 CHECK_EQ(i, 1);
841 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
842 CHECK_EQ(i, -1);
843
844 // String-arg
845 CHECK(RE("h(.*)o").FullMatch("hello", &s));
846 CHECK_EQ(s, string("ell"));
847
848 // StringPiece-arg
849 StringPiece sp;
850 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
851 CHECK_EQ(sp.size(), 4);
852 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
853 CHECK_EQ(i, 1234);
854
855 // Multi-arg
856 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
857 CHECK_EQ(s, string("ruby"));
858 CHECK_EQ(i, 1234);
859
860 // Ignored arg
861 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
862 CHECK_EQ(s, string("ruby"));
863 CHECK_EQ(i, 1234);
864
865 // Type tests
866 {
867 char c;
868 CHECK(RE("(H)ello").FullMatch("Hello", &c));
869 CHECK_EQ(c, 'H');
870 }
871 {
872 unsigned char c;
873 CHECK(RE("(H)ello").FullMatch("Hello", &c));
874 CHECK_EQ(c, static_cast<unsigned char>('H'));
875 }
876 {
877 short v;
878 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
879 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
880 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
881 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
882 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
883 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
884 }
885 {
886 unsigned short v;
887 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
888 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
889 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
890 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
891 }
892 {
893 int v;
894 static const int max_value = 0x7fffffff;
895 static const int min_value = -max_value - 1;
896 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
897 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
898 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
899 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
900 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
901 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
902 }
903 {
904 unsigned int v;
905 static const unsigned int max_value = 0xfffffffful;
906 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
907 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
908 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
909 }
910 #ifdef HAVE_LONG_LONG
911 # if defined(__MINGW__) || defined(__MINGW32__)
912 # define LLD "%I64d"
913 # define LLU "%I64u"
914 # else
915 # define LLD "%lld"
916 # define LLU "%llu"
917 # endif
918 {
919 long long v;
920 static const long long max_value = 0x7fffffffffffffffLL;
921 static const long long min_value = -max_value - 1;
922 char buf[32]; // definitely big enough for a long long
923
924 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
925 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
926
927 sprintf(buf, LLD, max_value);
928 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
929
930 sprintf(buf, LLD, min_value);
931 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
932
933 sprintf(buf, LLD, max_value);
934 assert(buf[strlen(buf)-1] != '9');
935 buf[strlen(buf)-1]++;
936 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
937
938 sprintf(buf, LLD, min_value);
939 assert(buf[strlen(buf)-1] != '9');
940 buf[strlen(buf)-1]++;
941 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
942 }
943 #endif
944 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
945 {
946 unsigned long long v;
947 long long v2;
948 static const unsigned long long max_value = 0xffffffffffffffffULL;
949 char buf[32]; // definitely big enough for a unsigned long long
950
951 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
952 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
953
954 sprintf(buf, LLU, max_value);
955 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
956
957 assert(buf[strlen(buf)-1] != '9');
958 buf[strlen(buf)-1]++;
959 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
960 }
961 #endif
962 {
963 float v;
964 CHECK(RE("(.*)").FullMatch("100", &v));
965 CHECK(RE("(.*)").FullMatch("-100.", &v));
966 CHECK(RE("(.*)").FullMatch("1e23", &v));
967 }
968 {
969 double v;
970 CHECK(RE("(.*)").FullMatch("100", &v));
971 CHECK(RE("(.*)").FullMatch("-100.", &v));
972 CHECK(RE("(.*)").FullMatch("1e23", &v));
973 }
974
975 // Check that matching is fully anchored
976 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
977 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
978 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
979 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
980
981 // Braces
982 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
983 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
984 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
985
986 // Complicated RE
987 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
988 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
989 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
990 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
991
992 // Check full-match handling (needs '$' tacked on internally)
993 CHECK(RE("fo|foo").FullMatch("fo"));
994 CHECK(RE("fo|foo").FullMatch("foo"));
995 CHECK(RE("fo|foo$").FullMatch("fo"));
996 CHECK(RE("fo|foo$").FullMatch("foo"));
997 CHECK(RE("foo$").FullMatch("foo"));
998 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
999 CHECK(!RE("fo|bar").FullMatch("fox"));
1000
1001 // Uncomment the following if we change the handling of '$' to
1002 // prevent it from matching a trailing newline
1003 if (false) {
1004 // Check that we don't get bitten by pcre's special handling of a
1005 // '\n' at the end of the string matching '$'
1006 CHECK(!RE("foo$").PartialMatch("foo\n"));
1007 }
1008
1009 // Number of args
1010 int a[16];
1011 CHECK(RE("").FullMatch(""));
1012
1013 memset(a, 0, sizeof(0));
1014 CHECK(RE("(\\d){1}").FullMatch("1",
1015 &a[0]));
1016 CHECK_EQ(a[0], 1);
1017
1018 memset(a, 0, sizeof(0));
1019 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1020 &a[0], &a[1]));
1021 CHECK_EQ(a[0], 1);
1022 CHECK_EQ(a[1], 2);
1023
1024 memset(a, 0, sizeof(0));
1025 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1026 &a[0], &a[1], &a[2]));
1027 CHECK_EQ(a[0], 1);
1028 CHECK_EQ(a[1], 2);
1029 CHECK_EQ(a[2], 3);
1030
1031 memset(a, 0, sizeof(0));
1032 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1033 &a[0], &a[1], &a[2], &a[3]));
1034 CHECK_EQ(a[0], 1);
1035 CHECK_EQ(a[1], 2);
1036 CHECK_EQ(a[2], 3);
1037 CHECK_EQ(a[3], 4);
1038
1039 memset(a, 0, sizeof(0));
1040 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1041 &a[0], &a[1], &a[2],
1042 &a[3], &a[4]));
1043 CHECK_EQ(a[0], 1);
1044 CHECK_EQ(a[1], 2);
1045 CHECK_EQ(a[2], 3);
1046 CHECK_EQ(a[3], 4);
1047 CHECK_EQ(a[4], 5);
1048
1049 memset(a, 0, sizeof(0));
1050 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1051 &a[0], &a[1], &a[2],
1052 &a[3], &a[4], &a[5]));
1053 CHECK_EQ(a[0], 1);
1054 CHECK_EQ(a[1], 2);
1055 CHECK_EQ(a[2], 3);
1056 CHECK_EQ(a[3], 4);
1057 CHECK_EQ(a[4], 5);
1058 CHECK_EQ(a[5], 6);
1059
1060 memset(a, 0, sizeof(0));
1061 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1062 &a[0], &a[1], &a[2], &a[3],
1063 &a[4], &a[5], &a[6]));
1064 CHECK_EQ(a[0], 1);
1065 CHECK_EQ(a[1], 2);
1066 CHECK_EQ(a[2], 3);
1067 CHECK_EQ(a[3], 4);
1068 CHECK_EQ(a[4], 5);
1069 CHECK_EQ(a[5], 6);
1070 CHECK_EQ(a[6], 7);
1071
1072 memset(a, 0, sizeof(0));
1073 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1074 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1075 "1234567890123456",
1076 &a[0], &a[1], &a[2], &a[3],
1077 &a[4], &a[5], &a[6], &a[7],
1078 &a[8], &a[9], &a[10], &a[11],
1079 &a[12], &a[13], &a[14], &a[15]));
1080 CHECK_EQ(a[0], 1);
1081 CHECK_EQ(a[1], 2);
1082 CHECK_EQ(a[2], 3);
1083 CHECK_EQ(a[3], 4);
1084 CHECK_EQ(a[4], 5);
1085 CHECK_EQ(a[5], 6);
1086 CHECK_EQ(a[6], 7);
1087 CHECK_EQ(a[7], 8);
1088 CHECK_EQ(a[8], 9);
1089 CHECK_EQ(a[9], 0);
1090 CHECK_EQ(a[10], 1);
1091 CHECK_EQ(a[11], 2);
1092 CHECK_EQ(a[12], 3);
1093 CHECK_EQ(a[13], 4);
1094 CHECK_EQ(a[14], 5);
1095 CHECK_EQ(a[15], 6);
1096
1097 /***** PartialMatch *****/
1098
1099 printf("Testing PartialMatch\n");
1100
1101 CHECK(RE("h.*o").PartialMatch("hello"));
1102 CHECK(RE("h.*o").PartialMatch("othello"));
1103 CHECK(RE("h.*o").PartialMatch("hello!"));
1104 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1105
1106 /***** other tests *****/
1107
1108 RadixTests();
1109 TestReplace();
1110 TestExtract();
1111 TestConsume();
1112 TestFindAndConsume();
1113 TestQuoteMetaAll();
1114 TestMatchNumberPeculiarity();
1115
1116 // Check the pattern() accessor
1117 {
1118 const string kPattern = "http://([^/]+)/.*";
1119 const RE re(kPattern);
1120 CHECK_EQ(kPattern, re.pattern());
1121 }
1122
1123 // Check RE error field.
1124 {
1125 RE re("foo");
1126 CHECK(re.error().empty()); // Must have no error
1127 }
1128
1129 #ifdef SUPPORT_UTF8
1130 // Check UTF-8 handling
1131 {
1132 printf("Testing UTF-8 handling\n");
1133
1134 // Three Japanese characters (nihongo)
1135 const unsigned char utf8_string[] = {
1136 0xe6, 0x97, 0xa5, // 65e5
1137 0xe6, 0x9c, 0xac, // 627c
1138 0xe8, 0xaa, 0x9e, // 8a9e
1139 0
1140 };
1141 const unsigned char utf8_pattern[] = {
1142 '.',
1143 0xe6, 0x9c, 0xac, // 627c
1144 '.',
1145 0
1146 };
1147
1148 // Both should match in either mode, bytes or UTF-8
1149 RE re_test1(".........");
1150 CHECK(re_test1.FullMatch(utf8_string));
1151 RE re_test2("...", pcrecpp::UTF8());
1152 CHECK(re_test2.FullMatch(utf8_string));
1153
1154 // Check that '.' matches one byte or UTF-8 character
1155 // according to the mode.
1156 string ss;
1157 RE re_test3("(.)");
1158 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1159 CHECK_EQ(ss, string("\xe6"));
1160 RE re_test4("(.)", pcrecpp::UTF8());
1161 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1162 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1163
1164 // Check that string matches itself in either mode
1165 RE re_test5(utf8_string);
1166 CHECK(re_test5.FullMatch(utf8_string));
1167 RE re_test6(utf8_string, pcrecpp::UTF8());
1168 CHECK(re_test6.FullMatch(utf8_string));
1169
1170 // Check that pattern matches string only in UTF8 mode
1171 RE re_test7(utf8_pattern);
1172 CHECK(!re_test7.FullMatch(utf8_string));
1173 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1174 CHECK(re_test8.FullMatch(utf8_string));
1175 }
1176
1177 // Check that ungreedy, UTF8 regular expressions don't match when they
1178 // oughtn't -- see bug 82246.
1179 {
1180 // This code always worked.
1181 const char* pattern = "\\w+X";
1182 const string target = "a aX";
1183 RE match_sentence(pattern);
1184 RE match_sentence_re(pattern, pcrecpp::UTF8());
1185
1186 CHECK(!match_sentence.FullMatch(target));
1187 CHECK(!match_sentence_re.FullMatch(target));
1188 }
1189
1190 {
1191 const char* pattern = "(?U)\\w+X";
1192 const string target = "a aX";
1193 RE match_sentence(pattern);
1194 RE match_sentence_re(pattern, pcrecpp::UTF8());
1195
1196 CHECK(!match_sentence.FullMatch(target));
1197 CHECK(!match_sentence_re.FullMatch(target));
1198 }
1199 #endif /* def SUPPORT_UTF8 */
1200
1201 printf("Testing error reporting\n");
1202
1203 { RE re("a\\1"); CHECK(!re.error().empty()); }
1204 {
1205 RE re("a[x");
1206 CHECK(!re.error().empty());
1207 }
1208 {
1209 RE re("a[z-a]");
1210 CHECK(!re.error().empty());
1211 }
1212 {
1213 RE re("a[[:foobar:]]");
1214 CHECK(!re.error().empty());
1215 }
1216 {
1217 RE re("a(b");
1218 CHECK(!re.error().empty());
1219 }
1220 {
1221 RE re("a\\");
1222 CHECK(!re.error().empty());
1223 }
1224
1225 // Test that recursion is stopped
1226 TestRecursion();
1227
1228 // Test Options
1229 if (getenv("VERBOSE_TEST") != NULL)
1230 VERBOSE_TEST = true;
1231 TestOptions();
1232
1233 // Test the constructors
1234 TestConstructors();
1235
1236 // Done
1237 printf("OK\n");
1238
1239 return 0;
1240 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12