/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 93 - (show annotations) (download)
Sat Feb 24 21:41:42 2007 UTC (7 years, 6 months ago) by nigel
File size: 37234 byte(s)
Load pcre-7.0 into code/trunk.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #include <stdio.h>
37 #include <cassert>
38 #include <vector>
39 #include "config.h"
40 #include "pcrecpp.h"
41
42 using pcrecpp::StringPiece;
43 using pcrecpp::RE;
44 using pcrecpp::RE_Options;
45 using pcrecpp::Hex;
46 using pcrecpp::Octal;
47 using pcrecpp::CRadix;
48
49 static bool VERBOSE_TEST = false;
50
51 // CHECK dies with a fatal error if condition is not true. It is *not*
52 // controlled by NDEBUG, so the check will be executed regardless of
53 // compilation mode. Therefore, it is safe to do things like:
54 // CHECK_EQ(fp->Write(x), 4)
55 #define CHECK(condition) do { \
56 if (!(condition)) { \
57 fprintf(stderr, "%s:%d: Check failed: %s\n", \
58 __FILE__, __LINE__, #condition); \
59 exit(1); \
60 } \
61 } while (0)
62
63 #define CHECK_EQ(a, b) CHECK(a == b)
64
65 static void Timing1(int num_iters) {
66 // Same pattern lots of times
67 RE pattern("ruby:\\d+");
68 StringPiece p("ruby:1234");
69 for (int j = num_iters; j > 0; j--) {
70 CHECK(pattern.FullMatch(p));
71 }
72 }
73
74 static void Timing2(int num_iters) {
75 // Same pattern lots of times
76 RE pattern("ruby:(\\d+)");
77 int i;
78 for (int j = num_iters; j > 0; j--) {
79 CHECK(pattern.FullMatch("ruby:1234", &i));
80 CHECK_EQ(i, 1234);
81 }
82 }
83
84 static void Timing3(int num_iters) {
85 string text_string;
86 for (int j = num_iters; j > 0; j--) {
87 text_string += "this is another line\n";
88 }
89
90 RE line_matcher(".*\n");
91 string line;
92 StringPiece text(text_string);
93 int counter = 0;
94 while (line_matcher.Consume(&text)) {
95 counter++;
96 }
97 printf("Matched %d lines\n", counter);
98 }
99
100 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
101
102 static void LeakTest() {
103 // Check for memory leaks
104 unsigned long long initial_size = 0;
105 for (int i = 0; i < 100000; i++) {
106 if (i == 50000) {
107 initial_size = VirtualProcessSize();
108 printf("Size after 50000: %llu\n", initial_size);
109 }
110 char buf[100];
111 snprintf(buf, sizeof(buf), "pat%09d", i);
112 RE newre(buf);
113 }
114 uint64 final_size = VirtualProcessSize();
115 printf("Size after 100000: %llu\n", final_size);
116 const double growth = double(final_size - initial_size) / final_size;
117 printf("Growth: %0.2f%%", growth * 100);
118 CHECK(growth < 0.02); // Allow < 2% growth
119 }
120
121 #endif
122
123 static void RadixTests() {
124 printf("Testing hex\n");
125
126 #define CHECK_HEX(type, value) \
127 do { \
128 type v; \
129 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
130 CHECK_EQ(v, 0x ## value); \
131 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
132 CHECK_EQ(v, 0x ## value); \
133 } while(0)
134
135 CHECK_HEX(short, 2bad);
136 CHECK_HEX(unsigned short, 2badU);
137 CHECK_HEX(int, dead);
138 CHECK_HEX(unsigned int, deadU);
139 CHECK_HEX(long, 7eadbeefL);
140 CHECK_HEX(unsigned long, deadbeefUL);
141 #ifdef HAVE_LONG_LONG
142 CHECK_HEX(long long, 12345678deadbeefLL);
143 #endif
144 #ifdef HAVE_UNSIGNED_LONG_LONG
145 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
146 #endif
147
148 #undef CHECK_HEX
149
150 printf("Testing octal\n");
151
152 #define CHECK_OCTAL(type, value) \
153 do { \
154 type v; \
155 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
156 CHECK_EQ(v, 0 ## value); \
157 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
158 CHECK_EQ(v, 0 ## value); \
159 } while(0)
160
161 CHECK_OCTAL(short, 77777);
162 CHECK_OCTAL(unsigned short, 177777U);
163 CHECK_OCTAL(int, 17777777777);
164 CHECK_OCTAL(unsigned int, 37777777777U);
165 CHECK_OCTAL(long, 17777777777L);
166 CHECK_OCTAL(unsigned long, 37777777777UL);
167 #ifdef HAVE_LONG_LONG
168 CHECK_OCTAL(long long, 777777777777777777777LL);
169 #endif
170 #ifdef HAVE_UNSIGNED_LONG_LONG
171 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
172 #endif
173
174 #undef CHECK_OCTAL
175
176 printf("Testing decimal\n");
177
178 #define CHECK_DECIMAL(type, value) \
179 do { \
180 type v; \
181 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
182 CHECK_EQ(v, value); \
183 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
184 CHECK_EQ(v, value); \
185 } while(0)
186
187 CHECK_DECIMAL(short, -1);
188 CHECK_DECIMAL(unsigned short, 9999);
189 CHECK_DECIMAL(int, -1000);
190 CHECK_DECIMAL(unsigned int, 12345U);
191 CHECK_DECIMAL(long, -10000000L);
192 CHECK_DECIMAL(unsigned long, 3083324652U);
193 #ifdef HAVE_LONG_LONG
194 CHECK_DECIMAL(long long, -100000000000000LL);
195 #endif
196 #ifdef HAVE_UNSIGNED_LONG_LONG
197 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
198 #endif
199
200 #undef CHECK_DECIMAL
201
202 }
203
204 static void TestReplace() {
205 printf("Testing Replace\n");
206
207 struct ReplaceTest {
208 const char *regexp;
209 const char *rewrite;
210 const char *original;
211 const char *single;
212 const char *global;
213 };
214 static const ReplaceTest tests[] = {
215 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
216 "\\2\\1ay",
217 "the quick brown fox jumps over the lazy dogs.",
218 "ethay quick brown fox jumps over the lazy dogs.",
219 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
220 { "\\w+",
221 "\\0-NOSPAM",
222 "paul.haahr@google.com",
223 "paul-NOSPAM.haahr@google.com",
224 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
225 { "^",
226 "(START)",
227 "foo",
228 "(START)foo",
229 "(START)foo" },
230 { "^",
231 "(START)",
232 "",
233 "(START)",
234 "(START)" },
235 { "$",
236 "(END)",
237 "",
238 "(END)",
239 "(END)" },
240 { "b",
241 "bb",
242 "ababababab",
243 "abbabababab",
244 "abbabbabbabbabb" },
245 { "b",
246 "bb",
247 "bbbbbb",
248 "bbbbbbb",
249 "bbbbbbbbbbbb" },
250 { "b+",
251 "bb",
252 "bbbbbb",
253 "bb",
254 "bb" },
255 { "b*",
256 "bb",
257 "bbbbbb",
258 "bb",
259 "bb" },
260 { "b*",
261 "bb",
262 "aaaaa",
263 "bbaaaaa",
264 "bbabbabbabbabbabb" },
265 { "b*",
266 "bb",
267 "aa\naa\n",
268 "bbaa\naa\n",
269 "bbabbabb\nbbabbabb\nbb" },
270 { "b*",
271 "bb",
272 "aa\raa\r",
273 "bbaa\raa\r",
274 "bbabbabb\rbbabbabb\rbb" },
275 { "b*",
276 "bb",
277 "aa\r\naa\r\n",
278 "bbaa\r\naa\r\n",
279 "bbabbabb\r\nbbabbabb\r\nbb" },
280 #ifdef SUPPORT_UTF8
281 { "b*",
282 "bb",
283 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
284 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
285 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
286 { "b*",
287 "bb",
288 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
289 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
290 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
291 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
292 #endif
293 { "", NULL, NULL, NULL, NULL }
294 };
295
296 #ifdef SUPPORT_UTF8
297 const bool support_utf8 = true;
298 #else
299 const bool support_utf8 = false;
300 #endif
301
302 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
303 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
304 assert(re.error().empty());
305 string one(t->original);
306 CHECK(re.Replace(t->rewrite, &one));
307 CHECK_EQ(one, t->single);
308 string all(t->original);
309 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
310 CHECK_EQ(all, t->global);
311 }
312
313 // One final test: test \r\n replacement when we're not in CRLF mode
314 {
315 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
316 assert(re.error().empty());
317 string all("aa\r\naa\r\n");
318 CHECK(re.GlobalReplace("bb", &all) > 0);
319 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
320 }
321 {
322 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string all("aa\r\naa\r\n");
325 CHECK(re.GlobalReplace("bb", &all) > 0);
326 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
327 }
328 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
329 // Alas, the answer depends on how pcre was compiled.
330 }
331
332 static void TestExtract() {
333 printf("Testing Extract\n");
334
335 string s;
336
337 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
338 CHECK_EQ(s, "kremvax!boris");
339
340 // check the RE interface as well
341 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
342 CHECK_EQ(s, "'foo'");
343 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
344 CHECK_EQ(s, "'foo'");
345 }
346
347 static void TestConsume() {
348 printf("Testing Consume\n");
349
350 string word;
351
352 string s(" aaa b!@#$@#$cccc");
353 StringPiece input(s);
354
355 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
356 CHECK(r.Consume(&input, &word));
357 CHECK_EQ(word, "aaa");
358 CHECK(r.Consume(&input, &word));
359 CHECK_EQ(word, "b");
360 CHECK(! r.Consume(&input, &word));
361 }
362
363 static void TestFindAndConsume() {
364 printf("Testing FindAndConsume\n");
365
366 string word;
367
368 string s(" aaa b!@#$@#$cccc");
369 StringPiece input(s);
370
371 RE r("(\\w+)"); // matches a word
372 CHECK(r.FindAndConsume(&input, &word));
373 CHECK_EQ(word, "aaa");
374 CHECK(r.FindAndConsume(&input, &word));
375 CHECK_EQ(word, "b");
376 CHECK(r.FindAndConsume(&input, &word));
377 CHECK_EQ(word, "cccc");
378 CHECK(! r.FindAndConsume(&input, &word));
379 }
380
381 static void TestMatchNumberPeculiarity() {
382 printf("Testing match-number peculiaraity\n");
383
384 string word1;
385 string word2;
386 string word3;
387
388 RE r("(foo)|(bar)|(baz)");
389 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
390 CHECK_EQ(word1, "foo");
391 CHECK_EQ(word2, "");
392 CHECK_EQ(word3, "");
393 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
394 CHECK_EQ(word1, "");
395 CHECK_EQ(word2, "bar");
396 CHECK_EQ(word3, "");
397 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
398 CHECK_EQ(word1, "");
399 CHECK_EQ(word2, "");
400 CHECK_EQ(word3, "baz");
401 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
402
403 string a;
404 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
405 CHECK_EQ(a, "");
406 }
407
408 static void TestRecursion() {
409 printf("Testing recursion\n");
410
411 // Get one string that passes (sometimes), one that never does.
412 string text_good("abcdefghijk");
413 string text_bad("acdefghijkl");
414
415 // According to pcretest, matching text_good against (\w+)*b
416 // requires match_limit of at least 8192, and match_recursion_limit
417 // of at least 37.
418
419 RE_Options options_ml;
420 options_ml.set_match_limit(8192);
421 RE re("(\\w+)*b", options_ml);
422 CHECK(re.PartialMatch(text_good) == true);
423 CHECK(re.PartialMatch(text_bad) == false);
424 CHECK(re.FullMatch(text_good) == false);
425 CHECK(re.FullMatch(text_bad) == false);
426
427 options_ml.set_match_limit(1024);
428 RE re2("(\\w+)*b", options_ml);
429 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
430 CHECK(re2.PartialMatch(text_bad) == false);
431 CHECK(re2.FullMatch(text_good) == false);
432 CHECK(re2.FullMatch(text_bad) == false);
433
434 RE_Options options_mlr;
435 options_mlr.set_match_limit_recursion(50);
436 RE re3("(\\w+)*b", options_mlr);
437 CHECK(re3.PartialMatch(text_good) == true);
438 CHECK(re3.PartialMatch(text_bad) == false);
439 CHECK(re3.FullMatch(text_good) == false);
440 CHECK(re3.FullMatch(text_bad) == false);
441
442 options_mlr.set_match_limit_recursion(10);
443 RE re4("(\\w+)*b", options_mlr);
444 CHECK(re4.PartialMatch(text_good) == false);
445 CHECK(re4.PartialMatch(text_bad) == false);
446 CHECK(re4.FullMatch(text_good) == false);
447 CHECK(re4.FullMatch(text_bad) == false);
448 }
449
450 // A meta-quoted string, interpreted as a pattern, should always match
451 // the original unquoted string.
452 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
453 string quoted = RE::QuoteMeta(unquoted);
454 RE re(quoted, options);
455 CHECK(re.FullMatch(unquoted));
456 }
457
458 // A string containing meaningful regexp characters, which is then meta-
459 // quoted, should not generally match a string the unquoted string does.
460 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
461 RE_Options options = RE_Options()) {
462 string quoted = RE::QuoteMeta(unquoted);
463 RE re(quoted, options);
464 CHECK(!re.FullMatch(should_not_match));
465 }
466
467 // Tests that quoted meta characters match their original strings,
468 // and that a few things that shouldn't match indeed do not.
469 static void TestQuotaMetaSimple() {
470 TestQuoteMeta("foo");
471 TestQuoteMeta("foo.bar");
472 TestQuoteMeta("foo\\.bar");
473 TestQuoteMeta("[1-9]");
474 TestQuoteMeta("1.5-2.0?");
475 TestQuoteMeta("\\d");
476 TestQuoteMeta("Who doesn't like ice cream?");
477 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
478 TestQuoteMeta("((?!)xxx).*yyy");
479 TestQuoteMeta("([");
480 }
481
482 static void TestQuoteMetaSimpleNegative() {
483 NegativeTestQuoteMeta("foo", "bar");
484 NegativeTestQuoteMeta("...", "bar");
485 NegativeTestQuoteMeta("\\.", ".");
486 NegativeTestQuoteMeta("\\.", "..");
487 NegativeTestQuoteMeta("(a)", "a");
488 NegativeTestQuoteMeta("(a|b)", "a");
489 NegativeTestQuoteMeta("(a|b)", "(a)");
490 NegativeTestQuoteMeta("(a|b)", "a|b");
491 NegativeTestQuoteMeta("[0-9]", "0");
492 NegativeTestQuoteMeta("[0-9]", "0-9");
493 NegativeTestQuoteMeta("[0-9]", "[9]");
494 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
495 }
496
497 static void TestQuoteMetaLatin1() {
498 TestQuoteMeta("3\xb2 = 9");
499 }
500
501 static void TestQuoteMetaUtf8() {
502 #ifdef SUPPORT_UTF8
503 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
504 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
505 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
506 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
507 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
508 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
509 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
510 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
511 "27\\\xc2\\\xb0",
512 pcrecpp::UTF8());
513 #endif
514 }
515
516 static void TestQuoteMetaAll() {
517 printf("Testing QuoteMeta\n");
518 TestQuotaMetaSimple();
519 TestQuoteMetaSimpleNegative();
520 TestQuoteMetaLatin1();
521 TestQuoteMetaUtf8();
522 }
523
524 //
525 // Options tests contributed by
526 // Giuseppe Maxia, CTO, Stardata s.r.l.
527 // July 2005
528 //
529 static void GetOneOptionResult(
530 const char *option_name,
531 const char *regex,
532 const char *str,
533 RE_Options options,
534 bool full,
535 string expected) {
536
537 printf("Testing Option <%s>\n", option_name);
538 if(VERBOSE_TEST)
539 printf("/%s/ finds \"%s\" within \"%s\" \n",
540 regex,
541 expected.c_str(),
542 str);
543 string captured("");
544 if (full)
545 RE(regex,options).FullMatch(str, &captured);
546 else
547 RE(regex,options).PartialMatch(str, &captured);
548 CHECK_EQ(captured, expected);
549 }
550
551 static void TestOneOption(
552 const char *option_name,
553 const char *regex,
554 const char *str,
555 RE_Options options,
556 bool full,
557 bool assertive = true) {
558
559 printf("Testing Option <%s>\n", option_name);
560 if (VERBOSE_TEST)
561 printf("'%s' %s /%s/ \n",
562 str,
563 (assertive? "matches" : "doesn't match"),
564 regex);
565 if (assertive) {
566 if (full)
567 CHECK(RE(regex,options).FullMatch(str));
568 else
569 CHECK(RE(regex,options).PartialMatch(str));
570 } else {
571 if (full)
572 CHECK(!RE(regex,options).FullMatch(str));
573 else
574 CHECK(!RE(regex,options).PartialMatch(str));
575 }
576 }
577
578 static void Test_CASELESS() {
579 RE_Options options;
580 RE_Options options2;
581
582 options.set_caseless(true);
583 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
584 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
585 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
586
587 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
588 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
589 options.set_caseless(false);
590 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
591 }
592
593 static void Test_MULTILINE() {
594 RE_Options options;
595 RE_Options options2;
596 const char *str = "HELLO\n" "cruel\n" "world\n";
597
598 options.set_multiline(true);
599 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
600 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
601 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
602 options.set_multiline(false);
603 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
604 }
605
606 static void Test_DOTALL() {
607 RE_Options options;
608 RE_Options options2;
609 const char *str = "HELLO\n" "cruel\n" "world";
610
611 options.set_dotall(true);
612 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
613 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
614 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
615 options.set_dotall(false);
616 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
617 }
618
619 static void Test_DOLLAR_ENDONLY() {
620 RE_Options options;
621 RE_Options options2;
622 const char *str = "HELLO world\n";
623
624 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
625 options.set_dollar_endonly(true);
626 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
627 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
628 }
629
630 static void Test_EXTRA() {
631 RE_Options options;
632 const char *str = "HELLO";
633
634 options.set_extra(true);
635 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
636 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
637 options.set_extra(false);
638 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
639 }
640
641 static void Test_EXTENDED() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO world";
645
646 options.set_extended(true);
647 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
648 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
649 TestOneOption("EXTENDED (class)",
650 "^ HE L{2} O "
651 "\\s+ "
652 "\\w+ $ ",
653 str,
654 options,
655 false);
656
657 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
658 TestOneOption("EXTENDED (function)",
659 "^ HE L{2} O "
660 "\\s+ "
661 "\\w+ $ ",
662 str,
663 pcrecpp::EXTENDED(),
664 false);
665
666 options.set_extended(false);
667 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
668 }
669
670 static void Test_NO_AUTO_CAPTURE() {
671 RE_Options options;
672 const char *str = "HELLO world";
673 string captured;
674
675 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
676 if (VERBOSE_TEST)
677 printf("parentheses capture text\n");
678 RE re("(world|universe)$", options);
679 CHECK(re.Extract("\\1", str , &captured));
680 CHECK_EQ(captured, "world");
681 options.set_no_auto_capture(true);
682 printf("testing Option <NO_AUTO_CAPTURE>\n");
683 if (VERBOSE_TEST)
684 printf("parentheses do not capture text\n");
685 re.Extract("\\1",str, &captured );
686 CHECK_EQ(captured, "world");
687 }
688
689 static void Test_UNGREEDY() {
690 RE_Options options;
691 const char *str = "HELLO, 'this' is the 'world'";
692
693 options.set_ungreedy(true);
694 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
695 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
696 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
697
698 options.set_ungreedy(false);
699 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
700 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
701 }
702
703 static void Test_all_options() {
704 const char *str = "HELLO\n" "cruel\n" "world";
705 RE_Options options;
706 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
707
708 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
709 options.set_all_options(0);
710 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
711 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
712
713 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
714 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
715 " ^ c r u e l $ ",
716 str,
717 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
718 false);
719
720 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
721 " ^ c r u e l $ ",
722 str,
723 RE_Options()
724 .set_multiline(true)
725 .set_extended(true),
726 false);
727
728 options.set_all_options(0);
729 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
730
731 }
732
733 static void TestOptions() {
734 printf("Testing Options\n");
735 Test_CASELESS();
736 Test_MULTILINE();
737 Test_DOTALL();
738 Test_DOLLAR_ENDONLY();
739 Test_EXTENDED();
740 Test_NO_AUTO_CAPTURE();
741 Test_UNGREEDY();
742 Test_EXTRA();
743 Test_all_options();
744 }
745
746 static void TestConstructors() {
747 printf("Testing constructors\n");
748
749 RE_Options options;
750 options.set_dotall(true);
751 const char *str = "HELLO\n" "cruel\n" "world";
752
753 RE orig("HELLO.*world", options);
754 CHECK(orig.FullMatch(str));
755
756 RE copy1(orig);
757 CHECK(copy1.FullMatch(str));
758
759 RE copy2("not a match");
760 CHECK(!copy2.FullMatch(str));
761 copy2 = copy1;
762 CHECK(copy2.FullMatch(str));
763 copy2 = orig;
764 CHECK(copy2.FullMatch(str));
765
766 // Make sure when we assign to ourselves, nothing bad happens
767 orig = orig;
768 copy1 = copy1;
769 copy2 = copy2;
770 CHECK(orig.FullMatch(str));
771 CHECK(copy1.FullMatch(str));
772 CHECK(copy2.FullMatch(str));
773 }
774
775 int main(int argc, char** argv) {
776 // Treat any flag as --help
777 if (argc > 1 && argv[1][0] == '-') {
778 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
779 " If 'timingX ###' is specified, run the given timing test\n"
780 " with the given number of iterations, rather than running\n"
781 " the default corectness test.\n", argv[0]);
782 return 0;
783 }
784
785 if (argc > 1) {
786 if ( argc == 2 || atoi(argv[2]) == 0) {
787 printf("timing mode needs a num-iters argument\n");
788 return 1;
789 }
790 if (!strcmp(argv[1], "timing1"))
791 Timing1(atoi(argv[2]));
792 else if (!strcmp(argv[1], "timing2"))
793 Timing2(atoi(argv[2]));
794 else if (!strcmp(argv[1], "timing3"))
795 Timing3(atoi(argv[2]));
796 else
797 printf("Unknown argument '%s'\n", argv[1]);
798 return 0;
799 }
800
801 printf("Testing FullMatch\n");
802
803 int i;
804 string s;
805
806 /***** FullMatch with no args *****/
807
808 CHECK(RE("h.*o").FullMatch("hello"));
809 CHECK(!RE("h.*o").FullMatch("othello"));
810 CHECK(!RE("h.*o").FullMatch("hello!"));
811
812 /***** FullMatch with args *****/
813
814 // Zero-arg
815 CHECK(RE("\\d+").FullMatch("1001"));
816
817 // Single-arg
818 CHECK(RE("(\\d+)").FullMatch("1001", &i));
819 CHECK_EQ(i, 1001);
820 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
821 CHECK_EQ(i, -123);
822 CHECK(!RE("()\\d+").FullMatch("10", &i));
823 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
824 &i));
825
826 // Digits surrounding integer-arg
827 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
828 CHECK_EQ(i, 23);
829 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
830 CHECK_EQ(i, 1);
831 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
832 CHECK_EQ(i, -1);
833 CHECK(RE("(\\d)").PartialMatch("1234", &i));
834 CHECK_EQ(i, 1);
835 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
836 CHECK_EQ(i, -1);
837
838 // String-arg
839 CHECK(RE("h(.*)o").FullMatch("hello", &s));
840 CHECK_EQ(s, string("ell"));
841
842 // StringPiece-arg
843 StringPiece sp;
844 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
845 CHECK_EQ(sp.size(), 4);
846 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
847 CHECK_EQ(i, 1234);
848
849 // Multi-arg
850 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
851 CHECK_EQ(s, string("ruby"));
852 CHECK_EQ(i, 1234);
853
854 // Ignored arg
855 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
856 CHECK_EQ(s, string("ruby"));
857 CHECK_EQ(i, 1234);
858
859 // Type tests
860 {
861 char c;
862 CHECK(RE("(H)ello").FullMatch("Hello", &c));
863 CHECK_EQ(c, 'H');
864 }
865 {
866 unsigned char c;
867 CHECK(RE("(H)ello").FullMatch("Hello", &c));
868 CHECK_EQ(c, static_cast<unsigned char>('H'));
869 }
870 {
871 short v;
872 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
873 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
874 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
875 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
876 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
877 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
878 }
879 {
880 unsigned short v;
881 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
882 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
883 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
884 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
885 }
886 {
887 int v;
888 static const int max_value = 0x7fffffff;
889 static const int min_value = -max_value - 1;
890 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
891 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
892 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
893 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
894 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
895 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
896 }
897 {
898 unsigned int v;
899 static const unsigned int max_value = 0xfffffffful;
900 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
901 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
902 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
903 }
904 #ifdef HAVE_LONG_LONG
905 {
906 long long v;
907 static const long long max_value = 0x7fffffffffffffffLL;
908 static const long long min_value = -max_value - 1;
909 char buf[32];
910
911 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
912 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
913
914 snprintf(buf, sizeof(buf), "%lld", max_value);
915 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
916
917 snprintf(buf, sizeof(buf), "%lld", min_value);
918 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
919
920 snprintf(buf, sizeof(buf), "%lld", max_value);
921 assert(buf[strlen(buf)-1] != '9');
922 buf[strlen(buf)-1]++;
923 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
924
925 snprintf(buf, sizeof(buf), "%lld", min_value);
926 assert(buf[strlen(buf)-1] != '9');
927 buf[strlen(buf)-1]++;
928 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
929 }
930 #endif
931 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
932 {
933 unsigned long long v;
934 long long v2;
935 static const unsigned long long max_value = 0xffffffffffffffffULL;
936 char buf[32];
937
938 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
939 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
940
941 snprintf(buf, sizeof(buf), "%llu", max_value);
942 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
943
944 assert(buf[strlen(buf)-1] != '9');
945 buf[strlen(buf)-1]++;
946 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
947 }
948 #endif
949 {
950 float v;
951 CHECK(RE("(.*)").FullMatch("100", &v));
952 CHECK(RE("(.*)").FullMatch("-100.", &v));
953 CHECK(RE("(.*)").FullMatch("1e23", &v));
954 }
955 {
956 double v;
957 CHECK(RE("(.*)").FullMatch("100", &v));
958 CHECK(RE("(.*)").FullMatch("-100.", &v));
959 CHECK(RE("(.*)").FullMatch("1e23", &v));
960 }
961
962 // Check that matching is fully anchored
963 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
964 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
965 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
966 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
967
968 // Braces
969 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
970 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
971 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
972
973 // Complicated RE
974 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
975 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
976 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
977 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
978
979 // Check full-match handling (needs '$' tacked on internally)
980 CHECK(RE("fo|foo").FullMatch("fo"));
981 CHECK(RE("fo|foo").FullMatch("foo"));
982 CHECK(RE("fo|foo$").FullMatch("fo"));
983 CHECK(RE("fo|foo$").FullMatch("foo"));
984 CHECK(RE("foo$").FullMatch("foo"));
985 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
986 CHECK(!RE("fo|bar").FullMatch("fox"));
987
988 // Uncomment the following if we change the handling of '$' to
989 // prevent it from matching a trailing newline
990 if (false) {
991 // Check that we don't get bitten by pcre's special handling of a
992 // '\n' at the end of the string matching '$'
993 CHECK(!RE("foo$").PartialMatch("foo\n"));
994 }
995
996 // Number of args
997 int a[16];
998 CHECK(RE("").FullMatch(""));
999
1000 memset(a, 0, sizeof(0));
1001 CHECK(RE("(\\d){1}").FullMatch("1",
1002 &a[0]));
1003 CHECK_EQ(a[0], 1);
1004
1005 memset(a, 0, sizeof(0));
1006 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1007 &a[0], &a[1]));
1008 CHECK_EQ(a[0], 1);
1009 CHECK_EQ(a[1], 2);
1010
1011 memset(a, 0, sizeof(0));
1012 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1013 &a[0], &a[1], &a[2]));
1014 CHECK_EQ(a[0], 1);
1015 CHECK_EQ(a[1], 2);
1016 CHECK_EQ(a[2], 3);
1017
1018 memset(a, 0, sizeof(0));
1019 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1020 &a[0], &a[1], &a[2], &a[3]));
1021 CHECK_EQ(a[0], 1);
1022 CHECK_EQ(a[1], 2);
1023 CHECK_EQ(a[2], 3);
1024 CHECK_EQ(a[3], 4);
1025
1026 memset(a, 0, sizeof(0));
1027 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1028 &a[0], &a[1], &a[2],
1029 &a[3], &a[4]));
1030 CHECK_EQ(a[0], 1);
1031 CHECK_EQ(a[1], 2);
1032 CHECK_EQ(a[2], 3);
1033 CHECK_EQ(a[3], 4);
1034 CHECK_EQ(a[4], 5);
1035
1036 memset(a, 0, sizeof(0));
1037 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1038 &a[0], &a[1], &a[2],
1039 &a[3], &a[4], &a[5]));
1040 CHECK_EQ(a[0], 1);
1041 CHECK_EQ(a[1], 2);
1042 CHECK_EQ(a[2], 3);
1043 CHECK_EQ(a[3], 4);
1044 CHECK_EQ(a[4], 5);
1045 CHECK_EQ(a[5], 6);
1046
1047 memset(a, 0, sizeof(0));
1048 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1049 &a[0], &a[1], &a[2], &a[3],
1050 &a[4], &a[5], &a[6]));
1051 CHECK_EQ(a[0], 1);
1052 CHECK_EQ(a[1], 2);
1053 CHECK_EQ(a[2], 3);
1054 CHECK_EQ(a[3], 4);
1055 CHECK_EQ(a[4], 5);
1056 CHECK_EQ(a[5], 6);
1057 CHECK_EQ(a[6], 7);
1058
1059 memset(a, 0, sizeof(0));
1060 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1061 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1062 "1234567890123456",
1063 &a[0], &a[1], &a[2], &a[3],
1064 &a[4], &a[5], &a[6], &a[7],
1065 &a[8], &a[9], &a[10], &a[11],
1066 &a[12], &a[13], &a[14], &a[15]));
1067 CHECK_EQ(a[0], 1);
1068 CHECK_EQ(a[1], 2);
1069 CHECK_EQ(a[2], 3);
1070 CHECK_EQ(a[3], 4);
1071 CHECK_EQ(a[4], 5);
1072 CHECK_EQ(a[5], 6);
1073 CHECK_EQ(a[6], 7);
1074 CHECK_EQ(a[7], 8);
1075 CHECK_EQ(a[8], 9);
1076 CHECK_EQ(a[9], 0);
1077 CHECK_EQ(a[10], 1);
1078 CHECK_EQ(a[11], 2);
1079 CHECK_EQ(a[12], 3);
1080 CHECK_EQ(a[13], 4);
1081 CHECK_EQ(a[14], 5);
1082 CHECK_EQ(a[15], 6);
1083
1084 /***** PartialMatch *****/
1085
1086 printf("Testing PartialMatch\n");
1087
1088 CHECK(RE("h.*o").PartialMatch("hello"));
1089 CHECK(RE("h.*o").PartialMatch("othello"));
1090 CHECK(RE("h.*o").PartialMatch("hello!"));
1091 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1092
1093 /***** other tests *****/
1094
1095 RadixTests();
1096 TestReplace();
1097 TestExtract();
1098 TestConsume();
1099 TestFindAndConsume();
1100 TestQuoteMetaAll();
1101 TestMatchNumberPeculiarity();
1102
1103 // Check the pattern() accessor
1104 {
1105 const string kPattern = "http://([^/]+)/.*";
1106 const RE re(kPattern);
1107 CHECK_EQ(kPattern, re.pattern());
1108 }
1109
1110 // Check RE error field.
1111 {
1112 RE re("foo");
1113 CHECK(re.error().empty()); // Must have no error
1114 }
1115
1116 #ifdef SUPPORT_UTF8
1117 // Check UTF-8 handling
1118 {
1119 printf("Testing UTF-8 handling\n");
1120
1121 // Three Japanese characters (nihongo)
1122 const char utf8_string[] = {
1123 0xe6, 0x97, 0xa5, // 65e5
1124 0xe6, 0x9c, 0xac, // 627c
1125 0xe8, 0xaa, 0x9e, // 8a9e
1126 0
1127 };
1128 const char utf8_pattern[] = {
1129 '.',
1130 0xe6, 0x9c, 0xac, // 627c
1131 '.',
1132 0
1133 };
1134
1135 // Both should match in either mode, bytes or UTF-8
1136 RE re_test1(".........");
1137 CHECK(re_test1.FullMatch(utf8_string));
1138 RE re_test2("...", pcrecpp::UTF8());
1139 CHECK(re_test2.FullMatch(utf8_string));
1140
1141 // Check that '.' matches one byte or UTF-8 character
1142 // according to the mode.
1143 string ss;
1144 RE re_test3("(.)");
1145 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1146 CHECK_EQ(ss, string("\xe6"));
1147 RE re_test4("(.)", pcrecpp::UTF8());
1148 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1149 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1150
1151 // Check that string matches itself in either mode
1152 RE re_test5(utf8_string);
1153 CHECK(re_test5.FullMatch(utf8_string));
1154 RE re_test6(utf8_string, pcrecpp::UTF8());
1155 CHECK(re_test6.FullMatch(utf8_string));
1156
1157 // Check that pattern matches string only in UTF8 mode
1158 RE re_test7(utf8_pattern);
1159 CHECK(!re_test7.FullMatch(utf8_string));
1160 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1161 CHECK(re_test8.FullMatch(utf8_string));
1162 }
1163
1164 // Check that ungreedy, UTF8 regular expressions don't match when they
1165 // oughtn't -- see bug 82246.
1166 {
1167 // This code always worked.
1168 const char* pattern = "\\w+X";
1169 const string target = "a aX";
1170 RE match_sentence(pattern);
1171 RE match_sentence_re(pattern, pcrecpp::UTF8());
1172
1173 CHECK(!match_sentence.FullMatch(target));
1174 CHECK(!match_sentence_re.FullMatch(target));
1175 }
1176
1177 {
1178 const char* pattern = "(?U)\\w+X";
1179 const string target = "a aX";
1180 RE match_sentence(pattern);
1181 RE match_sentence_re(pattern, pcrecpp::UTF8());
1182
1183 CHECK(!match_sentence.FullMatch(target));
1184 CHECK(!match_sentence_re.FullMatch(target));
1185 }
1186 #endif /* def SUPPORT_UTF8 */
1187
1188 printf("Testing error reporting\n");
1189
1190 { RE re("a\\1"); CHECK(!re.error().empty()); }
1191 {
1192 RE re("a[x");
1193 CHECK(!re.error().empty());
1194 }
1195 {
1196 RE re("a[z-a]");
1197 CHECK(!re.error().empty());
1198 }
1199 {
1200 RE re("a[[:foobar:]]");
1201 CHECK(!re.error().empty());
1202 }
1203 {
1204 RE re("a(b");
1205 CHECK(!re.error().empty());
1206 }
1207 {
1208 RE re("a\\");
1209 CHECK(!re.error().empty());
1210 }
1211
1212 // Test that recursion is stopped
1213 TestRecursion();
1214
1215 // Test Options
1216 if (getenv("VERBOSE_TEST") != NULL)
1217 VERBOSE_TEST = true;
1218 TestOptions();
1219
1220 // Test the constructors
1221 TestConstructors();
1222
1223 // Done
1224 printf("OK\n");
1225
1226 return 0;
1227 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12