/[pcre]/code/tags/pcre-6.2/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/tags/pcre-6.2/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 82 - (show annotations) (download)
Sat Feb 24 21:41:01 2007 UTC (7 years, 6 months ago) by nigel
File size: 31385 byte(s)
Tag code/trunk as code/tags/pcre-6.2.

1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31 //
32 // TODO: Test extractions for PartialMatch/Consume
33
34 #include <stdio.h>
35 #include <vector>
36 #include "config.h"
37 #include "pcrecpp.h"
38
39 using pcrecpp::StringPiece;
40 using pcrecpp::RE;
41 using pcrecpp::RE_Options;
42 using pcrecpp::Hex;
43 using pcrecpp::Octal;
44 using pcrecpp::CRadix;
45
46 static bool VERBOSE_TEST = false;
47
48 // CHECK dies with a fatal error if condition is not true. It is *not*
49 // controlled by NDEBUG, so the check will be executed regardless of
50 // compilation mode. Therefore, it is safe to do things like:
51 // CHECK_EQ(fp->Write(x), 4)
52 #define CHECK(condition) do { \
53 if (!(condition)) { \
54 fprintf(stderr, "%s:%d: Check failed: %s\n", \
55 __FILE__, __LINE__, #condition); \
56 exit(1); \
57 } \
58 } while (0)
59
60 #define CHECK_EQ(a, b) CHECK(a == b)
61
62 static void Timing1(int num_iters) {
63 // Same pattern lots of times
64 RE pattern("ruby:\\d+");
65 StringPiece p("ruby:1234");
66 for (int j = num_iters; j > 0; j--) {
67 CHECK(pattern.FullMatch(p));
68 }
69 }
70
71 static void Timing2(int num_iters) {
72 // Same pattern lots of times
73 RE pattern("ruby:(\\d+)");
74 int i;
75 for (int j = num_iters; j > 0; j--) {
76 CHECK(pattern.FullMatch("ruby:1234", &i));
77 CHECK_EQ(i, 1234);
78 }
79 }
80
81 static void Timing3(int num_iters) {
82 string text_string;
83 for (int j = num_iters; j > 0; j--) {
84 text_string += "this is another line\n";
85 }
86
87 RE line_matcher(".*\n");
88 string line;
89 StringPiece text(text_string);
90 int counter = 0;
91 while (line_matcher.Consume(&text)) {
92 counter++;
93 }
94 printf("Matched %d lines\n", counter);
95 }
96
97 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
98
99 static void LeakTest() {
100 // Check for memory leaks
101 unsigned long long initial_size = 0;
102 for (int i = 0; i < 100000; i++) {
103 if (i == 50000) {
104 initial_size = VirtualProcessSize();
105 printf("Size after 50000: %llu\n", initial_size);
106 }
107 char buf[100];
108 snprintf(buf, sizeof(buf), "pat%09d", i);
109 RE newre(buf);
110 }
111 uint64 final_size = VirtualProcessSize();
112 printf("Size after 100000: %llu\n", final_size);
113 const double growth = double(final_size - initial_size) / final_size;
114 printf("Growth: %0.2f%%", growth * 100);
115 CHECK(growth < 0.02); // Allow < 2% growth
116 }
117
118 #endif
119
120 static void RadixTests() {
121 printf("Testing hex\n");
122
123 #define CHECK_HEX(type, value) \
124 do { \
125 type v; \
126 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
127 CHECK_EQ(v, 0x ## value); \
128 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
129 CHECK_EQ(v, 0x ## value); \
130 } while(0)
131
132 CHECK_HEX(short, 2bad);
133 CHECK_HEX(unsigned short, 2badU);
134 CHECK_HEX(int, dead);
135 CHECK_HEX(unsigned int, deadU);
136 CHECK_HEX(long, 7eadbeefL);
137 CHECK_HEX(unsigned long, deadbeefUL);
138 #ifdef HAVE_LONG_LONG
139 CHECK_HEX(long long, 12345678deadbeefLL);
140 #endif
141 #ifdef HAVE_UNSIGNED_LONG_LONG
142 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
143 #endif
144
145 #undef CHECK_HEX
146
147 printf("Testing octal\n");
148
149 #define CHECK_OCTAL(type, value) \
150 do { \
151 type v; \
152 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
153 CHECK_EQ(v, 0 ## value); \
154 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
155 CHECK_EQ(v, 0 ## value); \
156 } while(0)
157
158 CHECK_OCTAL(short, 77777);
159 CHECK_OCTAL(unsigned short, 177777U);
160 CHECK_OCTAL(int, 17777777777);
161 CHECK_OCTAL(unsigned int, 37777777777U);
162 CHECK_OCTAL(long, 17777777777L);
163 CHECK_OCTAL(unsigned long, 37777777777UL);
164 #ifdef HAVE_LONG_LONG
165 CHECK_OCTAL(long long, 777777777777777777777LL);
166 #endif
167 #ifdef HAVE_UNSIGNED_LONG_LONG
168 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
169 #endif
170
171 #undef CHECK_OCTAL
172
173 printf("Testing decimal\n");
174
175 #define CHECK_DECIMAL(type, value) \
176 do { \
177 type v; \
178 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
179 CHECK_EQ(v, value); \
180 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
181 CHECK_EQ(v, value); \
182 } while(0)
183
184 CHECK_DECIMAL(short, -1);
185 CHECK_DECIMAL(unsigned short, 9999);
186 CHECK_DECIMAL(int, -1000);
187 CHECK_DECIMAL(unsigned int, 12345U);
188 CHECK_DECIMAL(long, -10000000L);
189 CHECK_DECIMAL(unsigned long, 3083324652U);
190 #ifdef HAVE_LONG_LONG
191 CHECK_DECIMAL(long long, -100000000000000LL);
192 #endif
193 #ifdef HAVE_UNSIGNED_LONG_LONG
194 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
195 #endif
196
197 #undef CHECK_DECIMAL
198
199 }
200
201 static void TestReplace() {
202 printf("Testing Replace\n");
203
204 struct ReplaceTest {
205 const char *regexp;
206 const char *rewrite;
207 const char *original;
208 const char *single;
209 const char *global;
210 };
211 static const ReplaceTest tests[] = {
212 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
213 "\\2\\1ay",
214 "the quick brown fox jumps over the lazy dogs.",
215 "ethay quick brown fox jumps over the lazy dogs.",
216 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
217 { "\\w+",
218 "\\0-NOSPAM",
219 "paul.haahr@google.com",
220 "paul-NOSPAM.haahr@google.com",
221 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
222 { "^",
223 "(START)",
224 "foo",
225 "(START)foo",
226 "(START)foo" },
227 { "^",
228 "(START)",
229 "",
230 "(START)",
231 "(START)" },
232 { "$",
233 "(END)",
234 "",
235 "(END)",
236 "(END)" },
237 { "b",
238 "bb",
239 "ababababab",
240 "abbabababab",
241 "abbabbabbabbabb" },
242 { "b",
243 "bb",
244 "bbbbbb",
245 "bbbbbbb",
246 "bbbbbbbbbbbb" },
247 { "b+",
248 "bb",
249 "bbbbbb",
250 "bb",
251 "bb" },
252 { "b*",
253 "bb",
254 "bbbbbb",
255 "bb",
256 "bb" },
257 { "b*",
258 "bb",
259 "aaaaa",
260 "bbaaaaa",
261 "bbabbabbabbabbabb" },
262 { "", NULL, NULL, NULL, NULL }
263 };
264
265 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
266 string one(t->original);
267 CHECK(RE(t->regexp).Replace(t->rewrite, &one));
268 CHECK_EQ(one, t->single);
269 string all(t->original);
270 CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
271 CHECK_EQ(all, t->global);
272 }
273 }
274
275 static void TestExtract() {
276 printf("Testing Extract\n");
277
278 string s;
279
280 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
281 CHECK_EQ(s, "kremvax!boris");
282
283 // check the RE interface as well
284 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
285 CHECK_EQ(s, "'foo'");
286 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
287 CHECK_EQ(s, "'foo'");
288 }
289
290 static void TestConsume() {
291 printf("Testing Consume\n");
292
293 string word;
294
295 string s(" aaa b!@#$@#$cccc");
296 StringPiece input(s);
297
298 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
299 CHECK(r.Consume(&input, &word));
300 CHECK_EQ(word, "aaa");
301 CHECK(r.Consume(&input, &word));
302 CHECK_EQ(word, "b");
303 CHECK(! r.Consume(&input, &word));
304 }
305
306 static void TestFindAndConsume() {
307 printf("Testing FindAndConsume\n");
308
309 string word;
310
311 string s(" aaa b!@#$@#$cccc");
312 StringPiece input(s);
313
314 RE r("(\\w+)"); // matches a word
315 CHECK(r.FindAndConsume(&input, &word));
316 CHECK_EQ(word, "aaa");
317 CHECK(r.FindAndConsume(&input, &word));
318 CHECK_EQ(word, "b");
319 CHECK(r.FindAndConsume(&input, &word));
320 CHECK_EQ(word, "cccc");
321 CHECK(! r.FindAndConsume(&input, &word));
322 }
323
324 static void TestMatchNumberPeculiarity() {
325 printf("Testing match-number peculiaraity\n");
326
327 string word1;
328 string word2;
329 string word3;
330
331 RE r("(foo)|(bar)|(baz)");
332 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
333 CHECK_EQ(word1, "foo");
334 CHECK_EQ(word2, "");
335 CHECK_EQ(word3, "");
336 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
337 CHECK_EQ(word1, "");
338 CHECK_EQ(word2, "bar");
339 CHECK_EQ(word3, "");
340 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
341 CHECK_EQ(word1, "");
342 CHECK_EQ(word2, "");
343 CHECK_EQ(word3, "baz");
344 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
345
346 string a;
347 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
348 CHECK_EQ(a, "");
349 }
350
351 static void TestRecursion(int size, const char *pattern, int match_limit) {
352 printf("Testing recursion\n");
353
354 // Fill up a string repeating the pattern given
355 string domain;
356 domain.resize(size);
357 int patlen = strlen(pattern);
358 for (int i = 0; i < size; ++i) {
359 domain[i] = pattern[i % patlen];
360 }
361 // Just make sure it doesn't crash due to too much recursion.
362 RE_Options options;
363 options.set_match_limit(match_limit);
364 RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
365 re.FullMatch(domain);
366 }
367
368 //
369 // Options tests contributed by
370 // Giuseppe Maxia, CTO, Stardata s.r.l.
371 // July 2005
372 //
373 static void GetOneOptionResult(
374 const char *option_name,
375 const char *regex,
376 const char *str,
377 RE_Options options,
378 bool full,
379 string expected) {
380
381 printf("Testing Option <%s>\n", option_name);
382 if(VERBOSE_TEST)
383 printf("/%s/ finds \"%s\" within \"%s\" \n",
384 regex,
385 expected.c_str(),
386 str);
387 string captured("");
388 if (full)
389 RE(regex,options).FullMatch(str, &captured);
390 else
391 RE(regex,options).PartialMatch(str, &captured);
392 CHECK_EQ(captured, expected);
393 }
394
395 static void TestOneOption(
396 const char *option_name,
397 const char *regex,
398 const char *str,
399 RE_Options options,
400 bool full,
401 bool assertive = true) {
402
403 printf("Testing Option <%s>\n", option_name);
404 if (VERBOSE_TEST)
405 printf("'%s' %s /%s/ \n",
406 str,
407 (assertive? "matches" : "doesn't match"),
408 regex);
409 if (assertive) {
410 if (full)
411 CHECK(RE(regex,options).FullMatch(str));
412 else
413 CHECK(RE(regex,options).PartialMatch(str));
414 } else {
415 if (full)
416 CHECK(!RE(regex,options).FullMatch(str));
417 else
418 CHECK(!RE(regex,options).PartialMatch(str));
419 }
420 }
421
422 static void Test_CASELESS() {
423 RE_Options options;
424 RE_Options options2;
425
426 options.set_caseless(true);
427 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
428 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
429 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
430
431 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
432 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
433 options.set_caseless(false);
434 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
435 }
436
437 static void Test_MULTILINE() {
438 RE_Options options;
439 RE_Options options2;
440 const char *str = "HELLO\n" "cruel\n" "world\n";
441
442 options.set_multiline(true);
443 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
444 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
445 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
446 options.set_multiline(false);
447 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
448 }
449
450 static void Test_DOTALL() {
451 RE_Options options;
452 RE_Options options2;
453 const char *str = "HELLO\n" "cruel\n" "world";
454
455 options.set_dotall(true);
456 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
457 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
458 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
459 options.set_dotall(false);
460 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
461 }
462
463 static void Test_DOLLAR_ENDONLY() {
464 RE_Options options;
465 RE_Options options2;
466 const char *str = "HELLO world\n";
467
468 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
469 options.set_dollar_endonly(true);
470 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
471 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
472 }
473
474 static void Test_EXTRA() {
475 RE_Options options;
476 const char *str = "HELLO";
477
478 options.set_extra(true);
479 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
480 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
481 options.set_extra(false);
482 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
483 }
484
485 static void Test_EXTENDED() {
486 RE_Options options;
487 RE_Options options2;
488 const char *str = "HELLO world";
489
490 options.set_extended(true);
491 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
492 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
493 TestOneOption("EXTENDED (class)",
494 "^ HE L{2} O "
495 "\\s+ "
496 "\\w+ $ ",
497 str,
498 options,
499 false);
500
501 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
502 TestOneOption("EXTENDED (function)",
503 "^ HE L{2} O "
504 "\\s+ "
505 "\\w+ $ ",
506 str,
507 pcrecpp::EXTENDED(),
508 false);
509
510 options.set_extended(false);
511 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
512 }
513
514 static void Test_NO_AUTO_CAPTURE() {
515 RE_Options options;
516 const char *str = "HELLO world";
517 string captured;
518
519 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
520 if (VERBOSE_TEST)
521 printf("parentheses capture text\n");
522 RE re("(world|universe)$", options);
523 CHECK(re.Extract("\\1", str , &captured));
524 CHECK_EQ(captured, "world");
525 options.set_no_auto_capture(true);
526 printf("testing Option <NO_AUTO_CAPTURE>\n");
527 if (VERBOSE_TEST)
528 printf("parentheses do not capture text\n");
529 re.Extract("\\1",str, &captured );
530 CHECK_EQ(captured, "world");
531 }
532
533 static void Test_UNGREEDY() {
534 RE_Options options;
535 const char *str = "HELLO, 'this' is the 'world'";
536
537 options.set_ungreedy(true);
538 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
539 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
540 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
541
542 options.set_ungreedy(false);
543 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
544 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
545 }
546
547 static void Test_all_options() {
548 const char *str = "HELLO\n" "cruel\n" "world";
549 RE_Options options;
550 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
551
552 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
553 options.set_all_options(0);
554 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
555 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
556
557 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
558 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
559 " ^ c r u e l $ ",
560 str,
561 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
562 false);
563
564 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
565 " ^ c r u e l $ ",
566 str,
567 RE_Options()
568 .set_multiline(true)
569 .set_extended(true),
570 false);
571
572 options.set_all_options(0);
573 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
574
575 }
576
577 static void TestOptions() {
578 printf("Testing Options\n");
579 Test_CASELESS();
580 Test_MULTILINE();
581 Test_DOTALL();
582 Test_DOLLAR_ENDONLY();
583 Test_EXTENDED();
584 Test_NO_AUTO_CAPTURE();
585 Test_UNGREEDY();
586 Test_EXTRA();
587 Test_all_options();
588 }
589
590 int main(int argc, char** argv) {
591 // Treat any flag as --help
592 if (argc > 1 && argv[1][0] == '-') {
593 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
594 " If 'timingX ###' is specified, run the given timing test\n"
595 " with the given number of iterations, rather than running\n"
596 " the default corectness test.\n", argv[0]);
597 return 0;
598 }
599
600 if (argc > 1) {
601 if ( argc == 2 || atoi(argv[2]) == 0) {
602 printf("timing mode needs a num-iters argument\n");
603 return 1;
604 }
605 if (!strcmp(argv[1], "timing1"))
606 Timing1(atoi(argv[2]));
607 else if (!strcmp(argv[1], "timing2"))
608 Timing2(atoi(argv[2]));
609 else if (!strcmp(argv[1], "timing3"))
610 Timing3(atoi(argv[2]));
611 else
612 printf("Unknown argument '%s'\n", argv[1]);
613 return 0;
614 }
615
616 printf("Testing FullMatch\n");
617
618 int i;
619 string s;
620
621 /***** FullMatch with no args *****/
622
623 CHECK(RE("h.*o").FullMatch("hello"));
624 CHECK(!RE("h.*o").FullMatch("othello"));
625 CHECK(!RE("h.*o").FullMatch("hello!"));
626
627 /***** FullMatch with args *****/
628
629 // Zero-arg
630 CHECK(RE("\\d+").FullMatch("1001"));
631
632 // Single-arg
633 CHECK(RE("(\\d+)").FullMatch("1001", &i));
634 CHECK_EQ(i, 1001);
635 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
636 CHECK_EQ(i, -123);
637 CHECK(!RE("()\\d+").FullMatch("10", &i));
638 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
639 &i));
640
641 // Digits surrounding integer-arg
642 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
643 CHECK_EQ(i, 23);
644 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
645 CHECK_EQ(i, 1);
646 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
647 CHECK_EQ(i, -1);
648 CHECK(RE("(\\d)").PartialMatch("1234", &i));
649 CHECK_EQ(i, 1);
650 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
651 CHECK_EQ(i, -1);
652
653 // String-arg
654 CHECK(RE("h(.*)o").FullMatch("hello", &s));
655 CHECK_EQ(s, string("ell"));
656
657 // StringPiece-arg
658 StringPiece sp;
659 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
660 CHECK_EQ(sp.size(), 4);
661 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
662 CHECK_EQ(i, 1234);
663
664 // Multi-arg
665 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
666 CHECK_EQ(s, string("ruby"));
667 CHECK_EQ(i, 1234);
668
669 // Ignored arg
670 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
671 CHECK_EQ(s, string("ruby"));
672 CHECK_EQ(i, 1234);
673
674 // Type tests
675 {
676 char c;
677 CHECK(RE("(H)ello").FullMatch("Hello", &c));
678 CHECK_EQ(c, 'H');
679 }
680 {
681 unsigned char c;
682 CHECK(RE("(H)ello").FullMatch("Hello", &c));
683 CHECK_EQ(c, static_cast<unsigned char>('H'));
684 }
685 {
686 short v;
687 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
688 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
689 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
690 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
691 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
692 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
693 }
694 {
695 unsigned short v;
696 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
697 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
698 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
699 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
700 }
701 {
702 int v;
703 static const int max_value = 0x7fffffff;
704 static const int min_value = -max_value - 1;
705 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
706 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
707 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
708 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
709 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
710 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
711 }
712 {
713 unsigned int v;
714 static const unsigned int max_value = 0xfffffffful;
715 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
716 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
717 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
718 }
719 #ifdef HAVE_LONG_LONG
720 {
721 long long v;
722 static const long long max_value = 0x7fffffffffffffffLL;
723 static const long long min_value = -max_value - 1;
724 char buf[32];
725
726 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
727 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
728
729 snprintf(buf, sizeof(buf), "%lld", max_value);
730 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
731
732 snprintf(buf, sizeof(buf), "%lld", min_value);
733 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
734
735 snprintf(buf, sizeof(buf), "%lld", max_value);
736 assert(buf[strlen(buf)-1] != '9');
737 buf[strlen(buf)-1]++;
738 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
739
740 snprintf(buf, sizeof(buf), "%lld", min_value);
741 assert(buf[strlen(buf)-1] != '9');
742 buf[strlen(buf)-1]++;
743 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
744 }
745 #endif
746 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
747 {
748 unsigned long long v;
749 long long v2;
750 static const unsigned long long max_value = 0xffffffffffffffffULL;
751 char buf[32];
752
753 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
754 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
755
756 snprintf(buf, sizeof(buf), "%llu", max_value);
757 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
758
759 assert(buf[strlen(buf)-1] != '9');
760 buf[strlen(buf)-1]++;
761 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
762 }
763 #endif
764 {
765 float v;
766 CHECK(RE("(.*)").FullMatch("100", &v));
767 CHECK(RE("(.*)").FullMatch("-100.", &v));
768 CHECK(RE("(.*)").FullMatch("1e23", &v));
769 }
770 {
771 double v;
772 CHECK(RE("(.*)").FullMatch("100", &v));
773 CHECK(RE("(.*)").FullMatch("-100.", &v));
774 CHECK(RE("(.*)").FullMatch("1e23", &v));
775 }
776
777 // Check that matching is fully anchored
778 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
779 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
780 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
781 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
782
783 // Braces
784 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
785 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
786 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
787
788 // Complicated RE
789 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
790 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
791 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
792 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
793
794 // Check full-match handling (needs '$' tacked on internally)
795 CHECK(RE("fo|foo").FullMatch("fo"));
796 CHECK(RE("fo|foo").FullMatch("foo"));
797 CHECK(RE("fo|foo$").FullMatch("fo"));
798 CHECK(RE("fo|foo$").FullMatch("foo"));
799 CHECK(RE("foo$").FullMatch("foo"));
800 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
801 CHECK(!RE("fo|bar").FullMatch("fox"));
802
803 // Uncomment the following if we change the handling of '$' to
804 // prevent it from matching a trailing newline
805 if (false) {
806 // Check that we don't get bitten by pcre's special handling of a
807 // '\n' at the end of the string matching '$'
808 CHECK(!RE("foo$").PartialMatch("foo\n"));
809 }
810
811 // Number of args
812 int a[16];
813 CHECK(RE("").FullMatch(""));
814
815 memset(a, 0, sizeof(0));
816 CHECK(RE("(\\d){1}").FullMatch("1",
817 &a[0]));
818 CHECK_EQ(a[0], 1);
819
820 memset(a, 0, sizeof(0));
821 CHECK(RE("(\\d)(\\d)").FullMatch("12",
822 &a[0], &a[1]));
823 CHECK_EQ(a[0], 1);
824 CHECK_EQ(a[1], 2);
825
826 memset(a, 0, sizeof(0));
827 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
828 &a[0], &a[1], &a[2]));
829 CHECK_EQ(a[0], 1);
830 CHECK_EQ(a[1], 2);
831 CHECK_EQ(a[2], 3);
832
833 memset(a, 0, sizeof(0));
834 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
835 &a[0], &a[1], &a[2], &a[3]));
836 CHECK_EQ(a[0], 1);
837 CHECK_EQ(a[1], 2);
838 CHECK_EQ(a[2], 3);
839 CHECK_EQ(a[3], 4);
840
841 memset(a, 0, sizeof(0));
842 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
843 &a[0], &a[1], &a[2],
844 &a[3], &a[4]));
845 CHECK_EQ(a[0], 1);
846 CHECK_EQ(a[1], 2);
847 CHECK_EQ(a[2], 3);
848 CHECK_EQ(a[3], 4);
849 CHECK_EQ(a[4], 5);
850
851 memset(a, 0, sizeof(0));
852 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
853 &a[0], &a[1], &a[2],
854 &a[3], &a[4], &a[5]));
855 CHECK_EQ(a[0], 1);
856 CHECK_EQ(a[1], 2);
857 CHECK_EQ(a[2], 3);
858 CHECK_EQ(a[3], 4);
859 CHECK_EQ(a[4], 5);
860 CHECK_EQ(a[5], 6);
861
862 memset(a, 0, sizeof(0));
863 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
864 &a[0], &a[1], &a[2], &a[3],
865 &a[4], &a[5], &a[6]));
866 CHECK_EQ(a[0], 1);
867 CHECK_EQ(a[1], 2);
868 CHECK_EQ(a[2], 3);
869 CHECK_EQ(a[3], 4);
870 CHECK_EQ(a[4], 5);
871 CHECK_EQ(a[5], 6);
872 CHECK_EQ(a[6], 7);
873
874 memset(a, 0, sizeof(0));
875 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
876 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
877 "1234567890123456",
878 &a[0], &a[1], &a[2], &a[3],
879 &a[4], &a[5], &a[6], &a[7],
880 &a[8], &a[9], &a[10], &a[11],
881 &a[12], &a[13], &a[14], &a[15]));
882 CHECK_EQ(a[0], 1);
883 CHECK_EQ(a[1], 2);
884 CHECK_EQ(a[2], 3);
885 CHECK_EQ(a[3], 4);
886 CHECK_EQ(a[4], 5);
887 CHECK_EQ(a[5], 6);
888 CHECK_EQ(a[6], 7);
889 CHECK_EQ(a[7], 8);
890 CHECK_EQ(a[8], 9);
891 CHECK_EQ(a[9], 0);
892 CHECK_EQ(a[10], 1);
893 CHECK_EQ(a[11], 2);
894 CHECK_EQ(a[12], 3);
895 CHECK_EQ(a[13], 4);
896 CHECK_EQ(a[14], 5);
897 CHECK_EQ(a[15], 6);
898
899 /***** PartialMatch *****/
900
901 printf("Testing PartialMatch\n");
902
903 CHECK(RE("h.*o").PartialMatch("hello"));
904 CHECK(RE("h.*o").PartialMatch("othello"));
905 CHECK(RE("h.*o").PartialMatch("hello!"));
906 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
907
908 RadixTests();
909 TestReplace();
910 TestExtract();
911 TestConsume();
912 TestFindAndConsume();
913 TestMatchNumberPeculiarity();
914
915 // Check the pattern() accessor
916 {
917 const string kPattern = "http://([^/]+)/.*";
918 const RE re(kPattern);
919 CHECK_EQ(kPattern, re.pattern());
920 }
921
922 // Check RE error field.
923 {
924 RE re("foo");
925 CHECK(re.error().empty()); // Must have no error
926 }
927
928 #ifdef SUPPORT_UTF8
929 // Check UTF-8 handling
930 {
931 printf("Testing UTF-8 handling\n");
932
933 // Three Japanese characters (nihongo)
934 const char utf8_string[] = {
935 0xe6, 0x97, 0xa5, // 65e5
936 0xe6, 0x9c, 0xac, // 627c
937 0xe8, 0xaa, 0x9e, // 8a9e
938 0
939 };
940 const char utf8_pattern[] = {
941 '.',
942 0xe6, 0x9c, 0xac, // 627c
943 '.',
944 0
945 };
946
947 // Both should match in either mode, bytes or UTF-8
948 RE re_test1(".........");
949 CHECK(re_test1.FullMatch(utf8_string));
950 RE re_test2("...", pcrecpp::UTF8());
951 CHECK(re_test2.FullMatch(utf8_string));
952
953 // Check that '.' matches one byte or UTF-8 character
954 // according to the mode.
955 string ss;
956 RE re_test3("(.)");
957 CHECK(re_test3.PartialMatch(utf8_string, &ss));
958 CHECK_EQ(ss, string("\xe6"));
959 RE re_test4("(.)", pcrecpp::UTF8());
960 CHECK(re_test4.PartialMatch(utf8_string, &ss));
961 CHECK_EQ(ss, string("\xe6\x97\xa5"));
962
963 // Check that string matches itself in either mode
964 RE re_test5(utf8_string);
965 CHECK(re_test5.FullMatch(utf8_string));
966 RE re_test6(utf8_string, pcrecpp::UTF8());
967 CHECK(re_test6.FullMatch(utf8_string));
968
969 // Check that pattern matches string only in UTF8 mode
970 RE re_test7(utf8_pattern);
971 CHECK(!re_test7.FullMatch(utf8_string));
972 RE re_test8(utf8_pattern, pcrecpp::UTF8());
973 CHECK(re_test8.FullMatch(utf8_string));
974 }
975
976 // Check that ungreedy, UTF8 regular expressions don't match when they
977 // oughtn't -- see bug 82246.
978 {
979 // This code always worked.
980 const char* pattern = "\\w+X";
981 const string target = "a aX";
982 RE match_sentence(pattern);
983 RE match_sentence_re(pattern, pcrecpp::UTF8());
984
985 CHECK(!match_sentence.FullMatch(target));
986 CHECK(!match_sentence_re.FullMatch(target));
987 }
988
989 {
990 const char* pattern = "(?U)\\w+X";
991 const string target = "a aX";
992 RE match_sentence(pattern);
993 RE match_sentence_re(pattern, pcrecpp::UTF8());
994
995 CHECK(!match_sentence.FullMatch(target));
996 CHECK(!match_sentence_re.FullMatch(target));
997 }
998 #endif /* def SUPPORT_UTF8 */
999
1000 printf("Testing error reporting\n");
1001
1002 { RE re("a\\1"); CHECK(!re.error().empty()); }
1003 {
1004 RE re("a[x");
1005 CHECK(!re.error().empty());
1006 }
1007 {
1008 RE re("a[z-a]");
1009 CHECK(!re.error().empty());
1010 }
1011 {
1012 RE re("a[[:foobar:]]");
1013 CHECK(!re.error().empty());
1014 }
1015 {
1016 RE re("a(b");
1017 CHECK(!re.error().empty());
1018 }
1019 {
1020 RE re("a\\");
1021 CHECK(!re.error().empty());
1022 }
1023
1024 // Test that recursion is stopped: there will be some errors reported
1025 int matchlimit = 5000;
1026 int bytes = 15 * 1024; // enough to crash if there was no match limit
1027 TestRecursion(bytes, ".", matchlimit);
1028 TestRecursion(bytes, "a", matchlimit);
1029 TestRecursion(bytes, "a.", matchlimit);
1030 TestRecursion(bytes, "ab.", matchlimit);
1031 TestRecursion(bytes, "abc.", matchlimit);
1032
1033 // Test Options
1034 if (getenv("VERBOSE_TEST") != NULL)
1035 VERBOSE_TEST = true;
1036 TestOptions();
1037
1038 // Done
1039 printf("OK\n");
1040
1041 return 0;
1042 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12