/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 302 - (show annotations) (download)
Sun Jan 20 19:12:46 2008 UTC (6 years, 8 months ago) by ph10
File size: 38876 byte(s)
Exclude "long long" test in pcrecpp_unittest when HAVE_LONG_LONG is not 
defined.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <cassert>
42 #include <vector>
43 #include "pcrecpp.h"
44
45 using pcrecpp::StringPiece;
46 using pcrecpp::RE;
47 using pcrecpp::RE_Options;
48 using pcrecpp::Hex;
49 using pcrecpp::Octal;
50 using pcrecpp::CRadix;
51
52 static bool VERBOSE_TEST = false;
53
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
59 if (!(condition)) { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
62 exit(1); \
63 } \
64 } while (0)
65
66 #define CHECK_EQ(a, b) CHECK(a == b)
67
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
74 }
75 }
76
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
80 int i;
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
83 CHECK_EQ(i, 1234);
84 }
85 }
86
87 static void Timing3(int num_iters) {
88 string text_string;
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
91 }
92
93 RE line_matcher(".*\n");
94 string line;
95 StringPiece text(text_string);
96 int counter = 0;
97 while (line_matcher.Consume(&text)) {
98 counter++;
99 }
100 printf("Matched %d lines\n", counter);
101 }
102
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
109 if (i == 50000) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
112 }
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
115 RE newre(buf);
116 }
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
122 }
123
124 #endif
125
126 static void RadixTests() {
127 printf("Testing hex\n");
128
129 #define CHECK_HEX(type, value) \
130 do { \
131 type v; \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 } while(0)
137
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
146 #endif
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149 #endif
150
151 #undef CHECK_HEX
152
153 printf("Testing octal\n");
154
155 #define CHECK_OCTAL(type, value) \
156 do { \
157 type v; \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 } while(0)
163
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
172 #endif
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175 #endif
176
177 #undef CHECK_OCTAL
178
179 printf("Testing decimal\n");
180
181 #define CHECK_DECIMAL(type, value) \
182 do { \
183 type v; \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
188 } while(0)
189
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
198 #endif
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201 #endif
202
203 #undef CHECK_DECIMAL
204
205 }
206
207 static void TestReplace() {
208 printf("Testing Replace\n");
209
210 struct ReplaceTest {
211 const char *regexp;
212 const char *rewrite;
213 const char *original;
214 const char *single;
215 const char *global;
216 int global_count; // the expected return value from ReplaceAll
217 };
218 static const ReplaceTest tests[] = {
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220 "\\2\\1ay",
221 "the quick brown fox jumps over the lazy dogs.",
222 "ethay quick brown fox jumps over the lazy dogs.",
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224 9 },
225 { "\\w+",
226 "\\0-NOSPAM",
227 "paul.haahr@google.com",
228 "paul-NOSPAM.haahr@google.com",
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230 4 },
231 { "^",
232 "(START)",
233 "foo",
234 "(START)foo",
235 "(START)foo",
236 1 },
237 { "^",
238 "(START)",
239 "",
240 "(START)",
241 "(START)",
242 1 },
243 { "$",
244 "(END)",
245 "",
246 "(END)",
247 "(END)",
248 1 },
249 { "b",
250 "bb",
251 "ababababab",
252 "abbabababab",
253 "abbabbabbabbabb",
254 5 },
255 { "b",
256 "bb",
257 "bbbbbb",
258 "bbbbbbb",
259 "bbbbbbbbbbbb",
260 6 },
261 { "b+",
262 "bb",
263 "bbbbbb",
264 "bb",
265 "bb",
266 1 },
267 { "b*",
268 "bb",
269 "bbbbbb",
270 "bb",
271 "bb",
272 1 },
273 { "b*",
274 "bb",
275 "aaaaa",
276 "bbaaaaa",
277 "bbabbabbabbabbabb",
278 6 },
279 { "b*",
280 "bb",
281 "aa\naa\n",
282 "bbaa\naa\n",
283 "bbabbabb\nbbabbabb\nbb",
284 7 },
285 { "b*",
286 "bb",
287 "aa\raa\r",
288 "bbaa\raa\r",
289 "bbabbabb\rbbabbabb\rbb",
290 7 },
291 { "b*",
292 "bb",
293 "aa\r\naa\r\n",
294 "bbaa\r\naa\r\n",
295 "bbabbabb\r\nbbabbabb\r\nbb",
296 7 },
297 #ifdef SUPPORT_UTF8
298 { "b*",
299 "bb",
300 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303 5 },
304 { "b*",
305 "bb",
306 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310 9 },
311 #endif
312 { "", NULL, NULL, NULL, NULL, 0 }
313 };
314
315 #ifdef SUPPORT_UTF8
316 const bool support_utf8 = true;
317 #else
318 const bool support_utf8 = false;
319 #endif
320
321 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string one(t->original);
325 CHECK(re.Replace(t->rewrite, &one));
326 CHECK_EQ(one, t->single);
327 string all(t->original);
328 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 CHECK_EQ(all, t->global);
330 CHECK_EQ(replace_count, t->global_count);
331 }
332
333 // One final test: test \r\n replacement when we're not in CRLF mode
334 {
335 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336 assert(re.error().empty());
337 string all("aa\r\naa\r\n");
338 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340 }
341 {
342 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343 assert(re.error().empty());
344 string all("aa\r\naa\r\n");
345 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347 }
348 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349 // Alas, the answer depends on how pcre was compiled.
350 }
351
352 static void TestExtract() {
353 printf("Testing Extract\n");
354
355 string s;
356
357 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358 CHECK_EQ(s, "kremvax!boris");
359
360 // check the RE interface as well
361 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362 CHECK_EQ(s, "'foo'");
363 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364 CHECK_EQ(s, "'foo'");
365 }
366
367 static void TestConsume() {
368 printf("Testing Consume\n");
369
370 string word;
371
372 string s(" aaa b!@#$@#$cccc");
373 StringPiece input(s);
374
375 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376 CHECK(r.Consume(&input, &word));
377 CHECK_EQ(word, "aaa");
378 CHECK(r.Consume(&input, &word));
379 CHECK_EQ(word, "b");
380 CHECK(! r.Consume(&input, &word));
381 }
382
383 static void TestFindAndConsume() {
384 printf("Testing FindAndConsume\n");
385
386 string word;
387
388 string s(" aaa b!@#$@#$cccc");
389 StringPiece input(s);
390
391 RE r("(\\w+)"); // matches a word
392 CHECK(r.FindAndConsume(&input, &word));
393 CHECK_EQ(word, "aaa");
394 CHECK(r.FindAndConsume(&input, &word));
395 CHECK_EQ(word, "b");
396 CHECK(r.FindAndConsume(&input, &word));
397 CHECK_EQ(word, "cccc");
398 CHECK(! r.FindAndConsume(&input, &word));
399 }
400
401 static void TestMatchNumberPeculiarity() {
402 printf("Testing match-number peculiaraity\n");
403
404 string word1;
405 string word2;
406 string word3;
407
408 RE r("(foo)|(bar)|(baz)");
409 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410 CHECK_EQ(word1, "foo");
411 CHECK_EQ(word2, "");
412 CHECK_EQ(word3, "");
413 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
414 CHECK_EQ(word1, "");
415 CHECK_EQ(word2, "bar");
416 CHECK_EQ(word3, "");
417 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
418 CHECK_EQ(word1, "");
419 CHECK_EQ(word2, "");
420 CHECK_EQ(word3, "baz");
421 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
422
423 string a;
424 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
425 CHECK_EQ(a, "");
426 }
427
428 static void TestRecursion() {
429 printf("Testing recursion\n");
430
431 // Get one string that passes (sometimes), one that never does.
432 string text_good("abcdefghijk");
433 string text_bad("acdefghijkl");
434
435 // According to pcretest, matching text_good against (\w+)*b
436 // requires match_limit of at least 8192, and match_recursion_limit
437 // of at least 37.
438
439 RE_Options options_ml;
440 options_ml.set_match_limit(8192);
441 RE re("(\\w+)*b", options_ml);
442 CHECK(re.PartialMatch(text_good) == true);
443 CHECK(re.PartialMatch(text_bad) == false);
444 CHECK(re.FullMatch(text_good) == false);
445 CHECK(re.FullMatch(text_bad) == false);
446
447 options_ml.set_match_limit(1024);
448 RE re2("(\\w+)*b", options_ml);
449 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450 CHECK(re2.PartialMatch(text_bad) == false);
451 CHECK(re2.FullMatch(text_good) == false);
452 CHECK(re2.FullMatch(text_bad) == false);
453
454 RE_Options options_mlr;
455 options_mlr.set_match_limit_recursion(50);
456 RE re3("(\\w+)*b", options_mlr);
457 CHECK(re3.PartialMatch(text_good) == true);
458 CHECK(re3.PartialMatch(text_bad) == false);
459 CHECK(re3.FullMatch(text_good) == false);
460 CHECK(re3.FullMatch(text_bad) == false);
461
462 options_mlr.set_match_limit_recursion(10);
463 RE re4("(\\w+)*b", options_mlr);
464 CHECK(re4.PartialMatch(text_good) == false);
465 CHECK(re4.PartialMatch(text_bad) == false);
466 CHECK(re4.FullMatch(text_good) == false);
467 CHECK(re4.FullMatch(text_bad) == false);
468 }
469
470 // A meta-quoted string, interpreted as a pattern, should always match
471 // the original unquoted string.
472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473 string quoted = RE::QuoteMeta(unquoted);
474 RE re(quoted, options);
475 CHECK(re.FullMatch(unquoted));
476 }
477
478 // A string containing meaningful regexp characters, which is then meta-
479 // quoted, should not generally match a string the unquoted string does.
480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481 RE_Options options = RE_Options()) {
482 string quoted = RE::QuoteMeta(unquoted);
483 RE re(quoted, options);
484 CHECK(!re.FullMatch(should_not_match));
485 }
486
487 // Tests that quoted meta characters match their original strings,
488 // and that a few things that shouldn't match indeed do not.
489 static void TestQuotaMetaSimple() {
490 TestQuoteMeta("foo");
491 TestQuoteMeta("foo.bar");
492 TestQuoteMeta("foo\\.bar");
493 TestQuoteMeta("[1-9]");
494 TestQuoteMeta("1.5-2.0?");
495 TestQuoteMeta("\\d");
496 TestQuoteMeta("Who doesn't like ice cream?");
497 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498 TestQuoteMeta("((?!)xxx).*yyy");
499 TestQuoteMeta("([");
500 }
501
502 static void TestQuoteMetaSimpleNegative() {
503 NegativeTestQuoteMeta("foo", "bar");
504 NegativeTestQuoteMeta("...", "bar");
505 NegativeTestQuoteMeta("\\.", ".");
506 NegativeTestQuoteMeta("\\.", "..");
507 NegativeTestQuoteMeta("(a)", "a");
508 NegativeTestQuoteMeta("(a|b)", "a");
509 NegativeTestQuoteMeta("(a|b)", "(a)");
510 NegativeTestQuoteMeta("(a|b)", "a|b");
511 NegativeTestQuoteMeta("[0-9]", "0");
512 NegativeTestQuoteMeta("[0-9]", "0-9");
513 NegativeTestQuoteMeta("[0-9]", "[9]");
514 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
515 }
516
517 static void TestQuoteMetaLatin1() {
518 TestQuoteMeta("3\xb2 = 9");
519 }
520
521 static void TestQuoteMetaUtf8() {
522 #ifdef SUPPORT_UTF8
523 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
524 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
525 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
526 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
527 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
528 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
529 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
530 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
531 "27\\\xc2\\\xb0",
532 pcrecpp::UTF8());
533 #endif
534 }
535
536 static void TestQuoteMetaAll() {
537 printf("Testing QuoteMeta\n");
538 TestQuotaMetaSimple();
539 TestQuoteMetaSimpleNegative();
540 TestQuoteMetaLatin1();
541 TestQuoteMetaUtf8();
542 }
543
544 //
545 // Options tests contributed by
546 // Giuseppe Maxia, CTO, Stardata s.r.l.
547 // July 2005
548 //
549 static void GetOneOptionResult(
550 const char *option_name,
551 const char *regex,
552 const char *str,
553 RE_Options options,
554 bool full,
555 string expected) {
556
557 printf("Testing Option <%s>\n", option_name);
558 if(VERBOSE_TEST)
559 printf("/%s/ finds \"%s\" within \"%s\" \n",
560 regex,
561 expected.c_str(),
562 str);
563 string captured("");
564 if (full)
565 RE(regex,options).FullMatch(str, &captured);
566 else
567 RE(regex,options).PartialMatch(str, &captured);
568 CHECK_EQ(captured, expected);
569 }
570
571 static void TestOneOption(
572 const char *option_name,
573 const char *regex,
574 const char *str,
575 RE_Options options,
576 bool full,
577 bool assertive = true) {
578
579 printf("Testing Option <%s>\n", option_name);
580 if (VERBOSE_TEST)
581 printf("'%s' %s /%s/ \n",
582 str,
583 (assertive? "matches" : "doesn't match"),
584 regex);
585 if (assertive) {
586 if (full)
587 CHECK(RE(regex,options).FullMatch(str));
588 else
589 CHECK(RE(regex,options).PartialMatch(str));
590 } else {
591 if (full)
592 CHECK(!RE(regex,options).FullMatch(str));
593 else
594 CHECK(!RE(regex,options).PartialMatch(str));
595 }
596 }
597
598 static void Test_CASELESS() {
599 RE_Options options;
600 RE_Options options2;
601
602 options.set_caseless(true);
603 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
604 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
605 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
606
607 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
608 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
609 options.set_caseless(false);
610 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
611 }
612
613 static void Test_MULTILINE() {
614 RE_Options options;
615 RE_Options options2;
616 const char *str = "HELLO\n" "cruel\n" "world\n";
617
618 options.set_multiline(true);
619 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
620 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
621 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
622 options.set_multiline(false);
623 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
624 }
625
626 static void Test_DOTALL() {
627 RE_Options options;
628 RE_Options options2;
629 const char *str = "HELLO\n" "cruel\n" "world";
630
631 options.set_dotall(true);
632 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
633 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
634 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
635 options.set_dotall(false);
636 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
637 }
638
639 static void Test_DOLLAR_ENDONLY() {
640 RE_Options options;
641 RE_Options options2;
642 const char *str = "HELLO world\n";
643
644 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
645 options.set_dollar_endonly(true);
646 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
647 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
648 }
649
650 static void Test_EXTRA() {
651 RE_Options options;
652 const char *str = "HELLO";
653
654 options.set_extra(true);
655 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
656 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
657 options.set_extra(false);
658 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
659 }
660
661 static void Test_EXTENDED() {
662 RE_Options options;
663 RE_Options options2;
664 const char *str = "HELLO world";
665
666 options.set_extended(true);
667 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
668 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
669 TestOneOption("EXTENDED (class)",
670 "^ HE L{2} O "
671 "\\s+ "
672 "\\w+ $ ",
673 str,
674 options,
675 false);
676
677 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
678 TestOneOption("EXTENDED (function)",
679 "^ HE L{2} O "
680 "\\s+ "
681 "\\w+ $ ",
682 str,
683 pcrecpp::EXTENDED(),
684 false);
685
686 options.set_extended(false);
687 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
688 }
689
690 static void Test_NO_AUTO_CAPTURE() {
691 RE_Options options;
692 const char *str = "HELLO world";
693 string captured;
694
695 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
696 if (VERBOSE_TEST)
697 printf("parentheses capture text\n");
698 RE re("(world|universe)$", options);
699 CHECK(re.Extract("\\1", str , &captured));
700 CHECK_EQ(captured, "world");
701 options.set_no_auto_capture(true);
702 printf("testing Option <NO_AUTO_CAPTURE>\n");
703 if (VERBOSE_TEST)
704 printf("parentheses do not capture text\n");
705 re.Extract("\\1",str, &captured );
706 CHECK_EQ(captured, "world");
707 }
708
709 static void Test_UNGREEDY() {
710 RE_Options options;
711 const char *str = "HELLO, 'this' is the 'world'";
712
713 options.set_ungreedy(true);
714 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
715 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
716 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
717
718 options.set_ungreedy(false);
719 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
720 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
721 }
722
723 static void Test_all_options() {
724 const char *str = "HELLO\n" "cruel\n" "world";
725 RE_Options options;
726 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
727
728 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
729 options.set_all_options(0);
730 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
731 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
732
733 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
734 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
735 " ^ c r u e l $ ",
736 str,
737 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
738 false);
739
740 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
741 " ^ c r u e l $ ",
742 str,
743 RE_Options()
744 .set_multiline(true)
745 .set_extended(true),
746 false);
747
748 options.set_all_options(0);
749 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
750
751 }
752
753 static void TestOptions() {
754 printf("Testing Options\n");
755 Test_CASELESS();
756 Test_MULTILINE();
757 Test_DOTALL();
758 Test_DOLLAR_ENDONLY();
759 Test_EXTENDED();
760 Test_NO_AUTO_CAPTURE();
761 Test_UNGREEDY();
762 Test_EXTRA();
763 Test_all_options();
764 }
765
766 static void TestConstructors() {
767 printf("Testing constructors\n");
768
769 RE_Options options;
770 options.set_dotall(true);
771 const char *str = "HELLO\n" "cruel\n" "world";
772
773 RE orig("HELLO.*world", options);
774 CHECK(orig.FullMatch(str));
775
776 RE copy1(orig);
777 CHECK(copy1.FullMatch(str));
778
779 RE copy2("not a match");
780 CHECK(!copy2.FullMatch(str));
781 copy2 = copy1;
782 CHECK(copy2.FullMatch(str));
783 copy2 = orig;
784 CHECK(copy2.FullMatch(str));
785
786 // Make sure when we assign to ourselves, nothing bad happens
787 orig = orig;
788 copy1 = copy1;
789 copy2 = copy2;
790 CHECK(orig.FullMatch(str));
791 CHECK(copy1.FullMatch(str));
792 CHECK(copy2.FullMatch(str));
793 }
794
795 int main(int argc, char** argv) {
796 // Treat any flag as --help
797 if (argc > 1 && argv[1][0] == '-') {
798 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
799 " If 'timingX ###' is specified, run the given timing test\n"
800 " with the given number of iterations, rather than running\n"
801 " the default corectness test.\n", argv[0]);
802 return 0;
803 }
804
805 if (argc > 1) {
806 if ( argc == 2 || atoi(argv[2]) == 0) {
807 printf("timing mode needs a num-iters argument\n");
808 return 1;
809 }
810 if (!strcmp(argv[1], "timing1"))
811 Timing1(atoi(argv[2]));
812 else if (!strcmp(argv[1], "timing2"))
813 Timing2(atoi(argv[2]));
814 else if (!strcmp(argv[1], "timing3"))
815 Timing3(atoi(argv[2]));
816 else
817 printf("Unknown argument '%s'\n", argv[1]);
818 return 0;
819 }
820
821 printf("Testing FullMatch\n");
822
823 int i;
824 string s;
825
826 /***** FullMatch with no args *****/
827
828 CHECK(RE("h.*o").FullMatch("hello"));
829 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
830 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
831 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
832 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
833 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
834
835 /***** FullMatch with args *****/
836
837 // Zero-arg
838 CHECK(RE("\\d+").FullMatch("1001"));
839
840 // Single-arg
841 CHECK(RE("(\\d+)").FullMatch("1001", &i));
842 CHECK_EQ(i, 1001);
843 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
844 CHECK_EQ(i, -123);
845 CHECK(!RE("()\\d+").FullMatch("10", &i));
846 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
847 &i));
848
849 // Digits surrounding integer-arg
850 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
851 CHECK_EQ(i, 23);
852 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
853 CHECK_EQ(i, 1);
854 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
855 CHECK_EQ(i, -1);
856 CHECK(RE("(\\d)").PartialMatch("1234", &i));
857 CHECK_EQ(i, 1);
858 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
859 CHECK_EQ(i, -1);
860
861 // String-arg
862 CHECK(RE("h(.*)o").FullMatch("hello", &s));
863 CHECK_EQ(s, string("ell"));
864
865 // StringPiece-arg
866 StringPiece sp;
867 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
868 CHECK_EQ(sp.size(), 4);
869 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
870 CHECK_EQ(i, 1234);
871
872 // Multi-arg
873 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
874 CHECK_EQ(s, string("ruby"));
875 CHECK_EQ(i, 1234);
876
877 // Ignore non-void* NULL arg
878 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
879 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
880 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
881 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
882 #ifdef HAVE_LONG_LONG
883 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
884 #endif
885 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
886 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
887
888 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
889 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
890 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
891 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
892 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
893 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
894
895 // Ignored arg
896 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
897 CHECK_EQ(s, string("ruby"));
898 CHECK_EQ(i, 1234);
899
900 // Type tests
901 {
902 char c;
903 CHECK(RE("(H)ello").FullMatch("Hello", &c));
904 CHECK_EQ(c, 'H');
905 }
906 {
907 unsigned char c;
908 CHECK(RE("(H)ello").FullMatch("Hello", &c));
909 CHECK_EQ(c, static_cast<unsigned char>('H'));
910 }
911 {
912 short v;
913 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
914 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
915 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
916 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
917 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
918 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
919 }
920 {
921 unsigned short v;
922 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
924 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
925 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
926 }
927 {
928 int v;
929 static const int max_value = 0x7fffffff;
930 static const int min_value = -max_value - 1;
931 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
932 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
933 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
934 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
935 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
936 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
937 }
938 {
939 unsigned int v;
940 static const unsigned int max_value = 0xfffffffful;
941 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
942 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
943 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
944 }
945 #ifdef HAVE_LONG_LONG
946 # if defined(__MINGW__) || defined(__MINGW32__)
947 # define LLD "%I64d"
948 # define LLU "%I64u"
949 # else
950 # define LLD "%lld"
951 # define LLU "%llu"
952 # endif
953 {
954 long long v;
955 static const long long max_value = 0x7fffffffffffffffLL;
956 static const long long min_value = -max_value - 1;
957 char buf[32]; // definitely big enough for a long long
958
959 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
960 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
961
962 sprintf(buf, LLD, max_value);
963 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
964
965 sprintf(buf, LLD, min_value);
966 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
967
968 sprintf(buf, LLD, max_value);
969 assert(buf[strlen(buf)-1] != '9');
970 buf[strlen(buf)-1]++;
971 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
972
973 sprintf(buf, LLD, min_value);
974 assert(buf[strlen(buf)-1] != '9');
975 buf[strlen(buf)-1]++;
976 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
977 }
978 #endif
979 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
980 {
981 unsigned long long v;
982 long long v2;
983 static const unsigned long long max_value = 0xffffffffffffffffULL;
984 char buf[32]; // definitely big enough for a unsigned long long
985
986 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
987 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
988
989 sprintf(buf, LLU, max_value);
990 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
991
992 assert(buf[strlen(buf)-1] != '9');
993 buf[strlen(buf)-1]++;
994 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
995 }
996 #endif
997 {
998 float v;
999 CHECK(RE("(.*)").FullMatch("100", &v));
1000 CHECK(RE("(.*)").FullMatch("-100.", &v));
1001 CHECK(RE("(.*)").FullMatch("1e23", &v));
1002 }
1003 {
1004 double v;
1005 CHECK(RE("(.*)").FullMatch("100", &v));
1006 CHECK(RE("(.*)").FullMatch("-100.", &v));
1007 CHECK(RE("(.*)").FullMatch("1e23", &v));
1008 }
1009
1010 // Check that matching is fully anchored
1011 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1012 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1013 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1014 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1015
1016 // Braces
1017 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1018 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1019 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1020
1021 // Complicated RE
1022 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1023 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1024 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1025 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1026
1027 // Check full-match handling (needs '$' tacked on internally)
1028 CHECK(RE("fo|foo").FullMatch("fo"));
1029 CHECK(RE("fo|foo").FullMatch("foo"));
1030 CHECK(RE("fo|foo$").FullMatch("fo"));
1031 CHECK(RE("fo|foo$").FullMatch("foo"));
1032 CHECK(RE("foo$").FullMatch("foo"));
1033 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1034 CHECK(!RE("fo|bar").FullMatch("fox"));
1035
1036 // Uncomment the following if we change the handling of '$' to
1037 // prevent it from matching a trailing newline
1038 if (false) {
1039 // Check that we don't get bitten by pcre's special handling of a
1040 // '\n' at the end of the string matching '$'
1041 CHECK(!RE("foo$").PartialMatch("foo\n"));
1042 }
1043
1044 // Number of args
1045 int a[16];
1046 CHECK(RE("").FullMatch(""));
1047
1048 memset(a, 0, sizeof(0));
1049 CHECK(RE("(\\d){1}").FullMatch("1",
1050 &a[0]));
1051 CHECK_EQ(a[0], 1);
1052
1053 memset(a, 0, sizeof(0));
1054 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1055 &a[0], &a[1]));
1056 CHECK_EQ(a[0], 1);
1057 CHECK_EQ(a[1], 2);
1058
1059 memset(a, 0, sizeof(0));
1060 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1061 &a[0], &a[1], &a[2]));
1062 CHECK_EQ(a[0], 1);
1063 CHECK_EQ(a[1], 2);
1064 CHECK_EQ(a[2], 3);
1065
1066 memset(a, 0, sizeof(0));
1067 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1068 &a[0], &a[1], &a[2], &a[3]));
1069 CHECK_EQ(a[0], 1);
1070 CHECK_EQ(a[1], 2);
1071 CHECK_EQ(a[2], 3);
1072 CHECK_EQ(a[3], 4);
1073
1074 memset(a, 0, sizeof(0));
1075 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1076 &a[0], &a[1], &a[2],
1077 &a[3], &a[4]));
1078 CHECK_EQ(a[0], 1);
1079 CHECK_EQ(a[1], 2);
1080 CHECK_EQ(a[2], 3);
1081 CHECK_EQ(a[3], 4);
1082 CHECK_EQ(a[4], 5);
1083
1084 memset(a, 0, sizeof(0));
1085 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1086 &a[0], &a[1], &a[2],
1087 &a[3], &a[4], &a[5]));
1088 CHECK_EQ(a[0], 1);
1089 CHECK_EQ(a[1], 2);
1090 CHECK_EQ(a[2], 3);
1091 CHECK_EQ(a[3], 4);
1092 CHECK_EQ(a[4], 5);
1093 CHECK_EQ(a[5], 6);
1094
1095 memset(a, 0, sizeof(0));
1096 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1097 &a[0], &a[1], &a[2], &a[3],
1098 &a[4], &a[5], &a[6]));
1099 CHECK_EQ(a[0], 1);
1100 CHECK_EQ(a[1], 2);
1101 CHECK_EQ(a[2], 3);
1102 CHECK_EQ(a[3], 4);
1103 CHECK_EQ(a[4], 5);
1104 CHECK_EQ(a[5], 6);
1105 CHECK_EQ(a[6], 7);
1106
1107 memset(a, 0, sizeof(0));
1108 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1109 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1110 "1234567890123456",
1111 &a[0], &a[1], &a[2], &a[3],
1112 &a[4], &a[5], &a[6], &a[7],
1113 &a[8], &a[9], &a[10], &a[11],
1114 &a[12], &a[13], &a[14], &a[15]));
1115 CHECK_EQ(a[0], 1);
1116 CHECK_EQ(a[1], 2);
1117 CHECK_EQ(a[2], 3);
1118 CHECK_EQ(a[3], 4);
1119 CHECK_EQ(a[4], 5);
1120 CHECK_EQ(a[5], 6);
1121 CHECK_EQ(a[6], 7);
1122 CHECK_EQ(a[7], 8);
1123 CHECK_EQ(a[8], 9);
1124 CHECK_EQ(a[9], 0);
1125 CHECK_EQ(a[10], 1);
1126 CHECK_EQ(a[11], 2);
1127 CHECK_EQ(a[12], 3);
1128 CHECK_EQ(a[13], 4);
1129 CHECK_EQ(a[14], 5);
1130 CHECK_EQ(a[15], 6);
1131
1132 /***** PartialMatch *****/
1133
1134 printf("Testing PartialMatch\n");
1135
1136 CHECK(RE("h.*o").PartialMatch("hello"));
1137 CHECK(RE("h.*o").PartialMatch("othello"));
1138 CHECK(RE("h.*o").PartialMatch("hello!"));
1139 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1140
1141 /***** other tests *****/
1142
1143 RadixTests();
1144 TestReplace();
1145 TestExtract();
1146 TestConsume();
1147 TestFindAndConsume();
1148 TestQuoteMetaAll();
1149 TestMatchNumberPeculiarity();
1150
1151 // Check the pattern() accessor
1152 {
1153 const string kPattern = "http://([^/]+)/.*";
1154 const RE re(kPattern);
1155 CHECK_EQ(kPattern, re.pattern());
1156 }
1157
1158 // Check RE error field.
1159 {
1160 RE re("foo");
1161 CHECK(re.error().empty()); // Must have no error
1162 }
1163
1164 #ifdef SUPPORT_UTF8
1165 // Check UTF-8 handling
1166 {
1167 printf("Testing UTF-8 handling\n");
1168
1169 // Three Japanese characters (nihongo)
1170 const unsigned char utf8_string[] = {
1171 0xe6, 0x97, 0xa5, // 65e5
1172 0xe6, 0x9c, 0xac, // 627c
1173 0xe8, 0xaa, 0x9e, // 8a9e
1174 0
1175 };
1176 const unsigned char utf8_pattern[] = {
1177 '.',
1178 0xe6, 0x9c, 0xac, // 627c
1179 '.',
1180 0
1181 };
1182
1183 // Both should match in either mode, bytes or UTF-8
1184 RE re_test1(".........");
1185 CHECK(re_test1.FullMatch(utf8_string));
1186 RE re_test2("...", pcrecpp::UTF8());
1187 CHECK(re_test2.FullMatch(utf8_string));
1188
1189 // Check that '.' matches one byte or UTF-8 character
1190 // according to the mode.
1191 string ss;
1192 RE re_test3("(.)");
1193 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1194 CHECK_EQ(ss, string("\xe6"));
1195 RE re_test4("(.)", pcrecpp::UTF8());
1196 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1197 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1198
1199 // Check that string matches itself in either mode
1200 RE re_test5(utf8_string);
1201 CHECK(re_test5.FullMatch(utf8_string));
1202 RE re_test6(utf8_string, pcrecpp::UTF8());
1203 CHECK(re_test6.FullMatch(utf8_string));
1204
1205 // Check that pattern matches string only in UTF8 mode
1206 RE re_test7(utf8_pattern);
1207 CHECK(!re_test7.FullMatch(utf8_string));
1208 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1209 CHECK(re_test8.FullMatch(utf8_string));
1210 }
1211
1212 // Check that ungreedy, UTF8 regular expressions don't match when they
1213 // oughtn't -- see bug 82246.
1214 {
1215 // This code always worked.
1216 const char* pattern = "\\w+X";
1217 const string target = "a aX";
1218 RE match_sentence(pattern);
1219 RE match_sentence_re(pattern, pcrecpp::UTF8());
1220
1221 CHECK(!match_sentence.FullMatch(target));
1222 CHECK(!match_sentence_re.FullMatch(target));
1223 }
1224
1225 {
1226 const char* pattern = "(?U)\\w+X";
1227 const string target = "a aX";
1228 RE match_sentence(pattern);
1229 RE match_sentence_re(pattern, pcrecpp::UTF8());
1230
1231 CHECK(!match_sentence.FullMatch(target));
1232 CHECK(!match_sentence_re.FullMatch(target));
1233 }
1234 #endif /* def SUPPORT_UTF8 */
1235
1236 printf("Testing error reporting\n");
1237
1238 { RE re("a\\1"); CHECK(!re.error().empty()); }
1239 {
1240 RE re("a[x");
1241 CHECK(!re.error().empty());
1242 }
1243 {
1244 RE re("a[z-a]");
1245 CHECK(!re.error().empty());
1246 }
1247 {
1248 RE re("a[[:foobar:]]");
1249 CHECK(!re.error().empty());
1250 }
1251 {
1252 RE re("a(b");
1253 CHECK(!re.error().empty());
1254 }
1255 {
1256 RE re("a\\");
1257 CHECK(!re.error().empty());
1258 }
1259
1260 // Test that recursion is stopped
1261 TestRecursion();
1262
1263 // Test Options
1264 if (getenv("VERBOSE_TEST") != NULL)
1265 VERBOSE_TEST = true;
1266 TestOptions();
1267
1268 // Test the constructors
1269 TestConstructors();
1270
1271 // Done
1272 printf("OK\n");
1273
1274 return 0;
1275 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12