/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (show annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 6 months ago) by nigel
File size: 31985 byte(s)
Load pcre-6.5 into code/trunk.

1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31 //
32 // TODO: Test extractions for PartialMatch/Consume
33
34 #include <stdio.h>
35 #include <vector>
36 #include "config.h"
37 #include "pcrecpp.h"
38
39 using pcrecpp::StringPiece;
40 using pcrecpp::RE;
41 using pcrecpp::RE_Options;
42 using pcrecpp::Hex;
43 using pcrecpp::Octal;
44 using pcrecpp::CRadix;
45
46 static bool VERBOSE_TEST = false;
47
48 // CHECK dies with a fatal error if condition is not true. It is *not*
49 // controlled by NDEBUG, so the check will be executed regardless of
50 // compilation mode. Therefore, it is safe to do things like:
51 // CHECK_EQ(fp->Write(x), 4)
52 #define CHECK(condition) do { \
53 if (!(condition)) { \
54 fprintf(stderr, "%s:%d: Check failed: %s\n", \
55 __FILE__, __LINE__, #condition); \
56 exit(1); \
57 } \
58 } while (0)
59
60 #define CHECK_EQ(a, b) CHECK(a == b)
61
62 static void Timing1(int num_iters) {
63 // Same pattern lots of times
64 RE pattern("ruby:\\d+");
65 StringPiece p("ruby:1234");
66 for (int j = num_iters; j > 0; j--) {
67 CHECK(pattern.FullMatch(p));
68 }
69 }
70
71 static void Timing2(int num_iters) {
72 // Same pattern lots of times
73 RE pattern("ruby:(\\d+)");
74 int i;
75 for (int j = num_iters; j > 0; j--) {
76 CHECK(pattern.FullMatch("ruby:1234", &i));
77 CHECK_EQ(i, 1234);
78 }
79 }
80
81 static void Timing3(int num_iters) {
82 string text_string;
83 for (int j = num_iters; j > 0; j--) {
84 text_string += "this is another line\n";
85 }
86
87 RE line_matcher(".*\n");
88 string line;
89 StringPiece text(text_string);
90 int counter = 0;
91 while (line_matcher.Consume(&text)) {
92 counter++;
93 }
94 printf("Matched %d lines\n", counter);
95 }
96
97 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
98
99 static void LeakTest() {
100 // Check for memory leaks
101 unsigned long long initial_size = 0;
102 for (int i = 0; i < 100000; i++) {
103 if (i == 50000) {
104 initial_size = VirtualProcessSize();
105 printf("Size after 50000: %llu\n", initial_size);
106 }
107 char buf[100];
108 snprintf(buf, sizeof(buf), "pat%09d", i);
109 RE newre(buf);
110 }
111 uint64 final_size = VirtualProcessSize();
112 printf("Size after 100000: %llu\n", final_size);
113 const double growth = double(final_size - initial_size) / final_size;
114 printf("Growth: %0.2f%%", growth * 100);
115 CHECK(growth < 0.02); // Allow < 2% growth
116 }
117
118 #endif
119
120 static void RadixTests() {
121 printf("Testing hex\n");
122
123 #define CHECK_HEX(type, value) \
124 do { \
125 type v; \
126 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
127 CHECK_EQ(v, 0x ## value); \
128 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
129 CHECK_EQ(v, 0x ## value); \
130 } while(0)
131
132 CHECK_HEX(short, 2bad);
133 CHECK_HEX(unsigned short, 2badU);
134 CHECK_HEX(int, dead);
135 CHECK_HEX(unsigned int, deadU);
136 CHECK_HEX(long, 7eadbeefL);
137 CHECK_HEX(unsigned long, deadbeefUL);
138 #ifdef HAVE_LONG_LONG
139 CHECK_HEX(long long, 12345678deadbeefLL);
140 #endif
141 #ifdef HAVE_UNSIGNED_LONG_LONG
142 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
143 #endif
144
145 #undef CHECK_HEX
146
147 printf("Testing octal\n");
148
149 #define CHECK_OCTAL(type, value) \
150 do { \
151 type v; \
152 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
153 CHECK_EQ(v, 0 ## value); \
154 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
155 CHECK_EQ(v, 0 ## value); \
156 } while(0)
157
158 CHECK_OCTAL(short, 77777);
159 CHECK_OCTAL(unsigned short, 177777U);
160 CHECK_OCTAL(int, 17777777777);
161 CHECK_OCTAL(unsigned int, 37777777777U);
162 CHECK_OCTAL(long, 17777777777L);
163 CHECK_OCTAL(unsigned long, 37777777777UL);
164 #ifdef HAVE_LONG_LONG
165 CHECK_OCTAL(long long, 777777777777777777777LL);
166 #endif
167 #ifdef HAVE_UNSIGNED_LONG_LONG
168 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
169 #endif
170
171 #undef CHECK_OCTAL
172
173 printf("Testing decimal\n");
174
175 #define CHECK_DECIMAL(type, value) \
176 do { \
177 type v; \
178 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
179 CHECK_EQ(v, value); \
180 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
181 CHECK_EQ(v, value); \
182 } while(0)
183
184 CHECK_DECIMAL(short, -1);
185 CHECK_DECIMAL(unsigned short, 9999);
186 CHECK_DECIMAL(int, -1000);
187 CHECK_DECIMAL(unsigned int, 12345U);
188 CHECK_DECIMAL(long, -10000000L);
189 CHECK_DECIMAL(unsigned long, 3083324652U);
190 #ifdef HAVE_LONG_LONG
191 CHECK_DECIMAL(long long, -100000000000000LL);
192 #endif
193 #ifdef HAVE_UNSIGNED_LONG_LONG
194 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
195 #endif
196
197 #undef CHECK_DECIMAL
198
199 }
200
201 static void TestReplace() {
202 printf("Testing Replace\n");
203
204 struct ReplaceTest {
205 const char *regexp;
206 const char *rewrite;
207 const char *original;
208 const char *single;
209 const char *global;
210 };
211 static const ReplaceTest tests[] = {
212 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
213 "\\2\\1ay",
214 "the quick brown fox jumps over the lazy dogs.",
215 "ethay quick brown fox jumps over the lazy dogs.",
216 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
217 { "\\w+",
218 "\\0-NOSPAM",
219 "paul.haahr@google.com",
220 "paul-NOSPAM.haahr@google.com",
221 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
222 { "^",
223 "(START)",
224 "foo",
225 "(START)foo",
226 "(START)foo" },
227 { "^",
228 "(START)",
229 "",
230 "(START)",
231 "(START)" },
232 { "$",
233 "(END)",
234 "",
235 "(END)",
236 "(END)" },
237 { "b",
238 "bb",
239 "ababababab",
240 "abbabababab",
241 "abbabbabbabbabb" },
242 { "b",
243 "bb",
244 "bbbbbb",
245 "bbbbbbb",
246 "bbbbbbbbbbbb" },
247 { "b+",
248 "bb",
249 "bbbbbb",
250 "bb",
251 "bb" },
252 { "b*",
253 "bb",
254 "bbbbbb",
255 "bb",
256 "bb" },
257 { "b*",
258 "bb",
259 "aaaaa",
260 "bbaaaaa",
261 "bbabbabbabbabbabb" },
262 { "", NULL, NULL, NULL, NULL }
263 };
264
265 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
266 string one(t->original);
267 CHECK(RE(t->regexp).Replace(t->rewrite, &one));
268 CHECK_EQ(one, t->single);
269 string all(t->original);
270 CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
271 CHECK_EQ(all, t->global);
272 }
273 }
274
275 static void TestExtract() {
276 printf("Testing Extract\n");
277
278 string s;
279
280 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
281 CHECK_EQ(s, "kremvax!boris");
282
283 // check the RE interface as well
284 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
285 CHECK_EQ(s, "'foo'");
286 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
287 CHECK_EQ(s, "'foo'");
288 }
289
290 static void TestConsume() {
291 printf("Testing Consume\n");
292
293 string word;
294
295 string s(" aaa b!@#$@#$cccc");
296 StringPiece input(s);
297
298 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
299 CHECK(r.Consume(&input, &word));
300 CHECK_EQ(word, "aaa");
301 CHECK(r.Consume(&input, &word));
302 CHECK_EQ(word, "b");
303 CHECK(! r.Consume(&input, &word));
304 }
305
306 static void TestFindAndConsume() {
307 printf("Testing FindAndConsume\n");
308
309 string word;
310
311 string s(" aaa b!@#$@#$cccc");
312 StringPiece input(s);
313
314 RE r("(\\w+)"); // matches a word
315 CHECK(r.FindAndConsume(&input, &word));
316 CHECK_EQ(word, "aaa");
317 CHECK(r.FindAndConsume(&input, &word));
318 CHECK_EQ(word, "b");
319 CHECK(r.FindAndConsume(&input, &word));
320 CHECK_EQ(word, "cccc");
321 CHECK(! r.FindAndConsume(&input, &word));
322 }
323
324 static void TestMatchNumberPeculiarity() {
325 printf("Testing match-number peculiaraity\n");
326
327 string word1;
328 string word2;
329 string word3;
330
331 RE r("(foo)|(bar)|(baz)");
332 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
333 CHECK_EQ(word1, "foo");
334 CHECK_EQ(word2, "");
335 CHECK_EQ(word3, "");
336 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
337 CHECK_EQ(word1, "");
338 CHECK_EQ(word2, "bar");
339 CHECK_EQ(word3, "");
340 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
341 CHECK_EQ(word1, "");
342 CHECK_EQ(word2, "");
343 CHECK_EQ(word3, "baz");
344 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
345
346 string a;
347 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
348 CHECK_EQ(a, "");
349 }
350
351 static void TestRecursion() {
352 printf("Testing recursion\n");
353
354 // Get one string that passes (sometimes), one that never does.
355 string text_good("abcdefghijk");
356 string text_bad("acdefghijkl");
357
358 // According to pcretest, matching text_good against (\w+)*b
359 // requires match_limit of at least 8192, and match_recursion_limit
360 // of at least 37.
361
362 RE_Options options_ml;
363 options_ml.set_match_limit(8192);
364 RE re("(\\w+)*b", options_ml);
365 CHECK(re.PartialMatch(text_good) == true);
366 CHECK(re.PartialMatch(text_bad) == false);
367 CHECK(re.FullMatch(text_good) == false);
368 CHECK(re.FullMatch(text_bad) == false);
369
370 options_ml.set_match_limit(1024);
371 RE re2("(\\w+)*b", options_ml);
372 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
373 CHECK(re2.PartialMatch(text_bad) == false);
374 CHECK(re2.FullMatch(text_good) == false);
375 CHECK(re2.FullMatch(text_bad) == false);
376
377 RE_Options options_mlr;
378 options_mlr.set_match_limit_recursion(50);
379 RE re3("(\\w+)*b", options_mlr);
380 CHECK(re3.PartialMatch(text_good) == true);
381 CHECK(re3.PartialMatch(text_bad) == false);
382 CHECK(re3.FullMatch(text_good) == false);
383 CHECK(re3.FullMatch(text_bad) == false);
384
385 options_mlr.set_match_limit_recursion(10);
386 RE re4("(\\w+)*b", options_mlr);
387 CHECK(re4.PartialMatch(text_good) == false);
388 CHECK(re4.PartialMatch(text_bad) == false);
389 CHECK(re4.FullMatch(text_good) == false);
390 CHECK(re4.FullMatch(text_bad) == false);
391 }
392
393 //
394 // Options tests contributed by
395 // Giuseppe Maxia, CTO, Stardata s.r.l.
396 // July 2005
397 //
398 static void GetOneOptionResult(
399 const char *option_name,
400 const char *regex,
401 const char *str,
402 RE_Options options,
403 bool full,
404 string expected) {
405
406 printf("Testing Option <%s>\n", option_name);
407 if(VERBOSE_TEST)
408 printf("/%s/ finds \"%s\" within \"%s\" \n",
409 regex,
410 expected.c_str(),
411 str);
412 string captured("");
413 if (full)
414 RE(regex,options).FullMatch(str, &captured);
415 else
416 RE(regex,options).PartialMatch(str, &captured);
417 CHECK_EQ(captured, expected);
418 }
419
420 static void TestOneOption(
421 const char *option_name,
422 const char *regex,
423 const char *str,
424 RE_Options options,
425 bool full,
426 bool assertive = true) {
427
428 printf("Testing Option <%s>\n", option_name);
429 if (VERBOSE_TEST)
430 printf("'%s' %s /%s/ \n",
431 str,
432 (assertive? "matches" : "doesn't match"),
433 regex);
434 if (assertive) {
435 if (full)
436 CHECK(RE(regex,options).FullMatch(str));
437 else
438 CHECK(RE(regex,options).PartialMatch(str));
439 } else {
440 if (full)
441 CHECK(!RE(regex,options).FullMatch(str));
442 else
443 CHECK(!RE(regex,options).PartialMatch(str));
444 }
445 }
446
447 static void Test_CASELESS() {
448 RE_Options options;
449 RE_Options options2;
450
451 options.set_caseless(true);
452 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
453 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
454 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
455
456 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
457 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
458 options.set_caseless(false);
459 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
460 }
461
462 static void Test_MULTILINE() {
463 RE_Options options;
464 RE_Options options2;
465 const char *str = "HELLO\n" "cruel\n" "world\n";
466
467 options.set_multiline(true);
468 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
469 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
470 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
471 options.set_multiline(false);
472 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
473 }
474
475 static void Test_DOTALL() {
476 RE_Options options;
477 RE_Options options2;
478 const char *str = "HELLO\n" "cruel\n" "world";
479
480 options.set_dotall(true);
481 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
482 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
483 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
484 options.set_dotall(false);
485 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
486 }
487
488 static void Test_DOLLAR_ENDONLY() {
489 RE_Options options;
490 RE_Options options2;
491 const char *str = "HELLO world\n";
492
493 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
494 options.set_dollar_endonly(true);
495 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
496 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
497 }
498
499 static void Test_EXTRA() {
500 RE_Options options;
501 const char *str = "HELLO";
502
503 options.set_extra(true);
504 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
505 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
506 options.set_extra(false);
507 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
508 }
509
510 static void Test_EXTENDED() {
511 RE_Options options;
512 RE_Options options2;
513 const char *str = "HELLO world";
514
515 options.set_extended(true);
516 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
517 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
518 TestOneOption("EXTENDED (class)",
519 "^ HE L{2} O "
520 "\\s+ "
521 "\\w+ $ ",
522 str,
523 options,
524 false);
525
526 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
527 TestOneOption("EXTENDED (function)",
528 "^ HE L{2} O "
529 "\\s+ "
530 "\\w+ $ ",
531 str,
532 pcrecpp::EXTENDED(),
533 false);
534
535 options.set_extended(false);
536 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
537 }
538
539 static void Test_NO_AUTO_CAPTURE() {
540 RE_Options options;
541 const char *str = "HELLO world";
542 string captured;
543
544 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
545 if (VERBOSE_TEST)
546 printf("parentheses capture text\n");
547 RE re("(world|universe)$", options);
548 CHECK(re.Extract("\\1", str , &captured));
549 CHECK_EQ(captured, "world");
550 options.set_no_auto_capture(true);
551 printf("testing Option <NO_AUTO_CAPTURE>\n");
552 if (VERBOSE_TEST)
553 printf("parentheses do not capture text\n");
554 re.Extract("\\1",str, &captured );
555 CHECK_EQ(captured, "world");
556 }
557
558 static void Test_UNGREEDY() {
559 RE_Options options;
560 const char *str = "HELLO, 'this' is the 'world'";
561
562 options.set_ungreedy(true);
563 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
564 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
565 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
566
567 options.set_ungreedy(false);
568 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
569 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
570 }
571
572 static void Test_all_options() {
573 const char *str = "HELLO\n" "cruel\n" "world";
574 RE_Options options;
575 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
576
577 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
578 options.set_all_options(0);
579 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
580 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
581
582 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
583 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
584 " ^ c r u e l $ ",
585 str,
586 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
587 false);
588
589 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
590 " ^ c r u e l $ ",
591 str,
592 RE_Options()
593 .set_multiline(true)
594 .set_extended(true),
595 false);
596
597 options.set_all_options(0);
598 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
599
600 }
601
602 static void TestOptions() {
603 printf("Testing Options\n");
604 Test_CASELESS();
605 Test_MULTILINE();
606 Test_DOTALL();
607 Test_DOLLAR_ENDONLY();
608 Test_EXTENDED();
609 Test_NO_AUTO_CAPTURE();
610 Test_UNGREEDY();
611 Test_EXTRA();
612 Test_all_options();
613 }
614
615 int main(int argc, char** argv) {
616 // Treat any flag as --help
617 if (argc > 1 && argv[1][0] == '-') {
618 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
619 " If 'timingX ###' is specified, run the given timing test\n"
620 " with the given number of iterations, rather than running\n"
621 " the default corectness test.\n", argv[0]);
622 return 0;
623 }
624
625 if (argc > 1) {
626 if ( argc == 2 || atoi(argv[2]) == 0) {
627 printf("timing mode needs a num-iters argument\n");
628 return 1;
629 }
630 if (!strcmp(argv[1], "timing1"))
631 Timing1(atoi(argv[2]));
632 else if (!strcmp(argv[1], "timing2"))
633 Timing2(atoi(argv[2]));
634 else if (!strcmp(argv[1], "timing3"))
635 Timing3(atoi(argv[2]));
636 else
637 printf("Unknown argument '%s'\n", argv[1]);
638 return 0;
639 }
640
641 printf("Testing FullMatch\n");
642
643 int i;
644 string s;
645
646 /***** FullMatch with no args *****/
647
648 CHECK(RE("h.*o").FullMatch("hello"));
649 CHECK(!RE("h.*o").FullMatch("othello"));
650 CHECK(!RE("h.*o").FullMatch("hello!"));
651
652 /***** FullMatch with args *****/
653
654 // Zero-arg
655 CHECK(RE("\\d+").FullMatch("1001"));
656
657 // Single-arg
658 CHECK(RE("(\\d+)").FullMatch("1001", &i));
659 CHECK_EQ(i, 1001);
660 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
661 CHECK_EQ(i, -123);
662 CHECK(!RE("()\\d+").FullMatch("10", &i));
663 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
664 &i));
665
666 // Digits surrounding integer-arg
667 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
668 CHECK_EQ(i, 23);
669 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
670 CHECK_EQ(i, 1);
671 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
672 CHECK_EQ(i, -1);
673 CHECK(RE("(\\d)").PartialMatch("1234", &i));
674 CHECK_EQ(i, 1);
675 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
676 CHECK_EQ(i, -1);
677
678 // String-arg
679 CHECK(RE("h(.*)o").FullMatch("hello", &s));
680 CHECK_EQ(s, string("ell"));
681
682 // StringPiece-arg
683 StringPiece sp;
684 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
685 CHECK_EQ(sp.size(), 4);
686 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
687 CHECK_EQ(i, 1234);
688
689 // Multi-arg
690 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
691 CHECK_EQ(s, string("ruby"));
692 CHECK_EQ(i, 1234);
693
694 // Ignored arg
695 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
696 CHECK_EQ(s, string("ruby"));
697 CHECK_EQ(i, 1234);
698
699 // Type tests
700 {
701 char c;
702 CHECK(RE("(H)ello").FullMatch("Hello", &c));
703 CHECK_EQ(c, 'H');
704 }
705 {
706 unsigned char c;
707 CHECK(RE("(H)ello").FullMatch("Hello", &c));
708 CHECK_EQ(c, static_cast<unsigned char>('H'));
709 }
710 {
711 short v;
712 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
713 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
714 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
715 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
716 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
717 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
718 }
719 {
720 unsigned short v;
721 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
722 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
723 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
724 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
725 }
726 {
727 int v;
728 static const int max_value = 0x7fffffff;
729 static const int min_value = -max_value - 1;
730 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
731 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
732 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
733 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
734 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
735 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
736 }
737 {
738 unsigned int v;
739 static const unsigned int max_value = 0xfffffffful;
740 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
741 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
742 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
743 }
744 #ifdef HAVE_LONG_LONG
745 {
746 long long v;
747 static const long long max_value = 0x7fffffffffffffffLL;
748 static const long long min_value = -max_value - 1;
749 char buf[32];
750
751 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
752 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
753
754 snprintf(buf, sizeof(buf), "%lld", max_value);
755 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
756
757 snprintf(buf, sizeof(buf), "%lld", min_value);
758 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
759
760 snprintf(buf, sizeof(buf), "%lld", max_value);
761 assert(buf[strlen(buf)-1] != '9');
762 buf[strlen(buf)-1]++;
763 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
764
765 snprintf(buf, sizeof(buf), "%lld", min_value);
766 assert(buf[strlen(buf)-1] != '9');
767 buf[strlen(buf)-1]++;
768 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
769 }
770 #endif
771 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
772 {
773 unsigned long long v;
774 long long v2;
775 static const unsigned long long max_value = 0xffffffffffffffffULL;
776 char buf[32];
777
778 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
779 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
780
781 snprintf(buf, sizeof(buf), "%llu", max_value);
782 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
783
784 assert(buf[strlen(buf)-1] != '9');
785 buf[strlen(buf)-1]++;
786 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
787 }
788 #endif
789 {
790 float v;
791 CHECK(RE("(.*)").FullMatch("100", &v));
792 CHECK(RE("(.*)").FullMatch("-100.", &v));
793 CHECK(RE("(.*)").FullMatch("1e23", &v));
794 }
795 {
796 double v;
797 CHECK(RE("(.*)").FullMatch("100", &v));
798 CHECK(RE("(.*)").FullMatch("-100.", &v));
799 CHECK(RE("(.*)").FullMatch("1e23", &v));
800 }
801
802 // Check that matching is fully anchored
803 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
804 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
805 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
806 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
807
808 // Braces
809 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
810 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
811 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
812
813 // Complicated RE
814 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
815 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
816 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
817 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
818
819 // Check full-match handling (needs '$' tacked on internally)
820 CHECK(RE("fo|foo").FullMatch("fo"));
821 CHECK(RE("fo|foo").FullMatch("foo"));
822 CHECK(RE("fo|foo$").FullMatch("fo"));
823 CHECK(RE("fo|foo$").FullMatch("foo"));
824 CHECK(RE("foo$").FullMatch("foo"));
825 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
826 CHECK(!RE("fo|bar").FullMatch("fox"));
827
828 // Uncomment the following if we change the handling of '$' to
829 // prevent it from matching a trailing newline
830 if (false) {
831 // Check that we don't get bitten by pcre's special handling of a
832 // '\n' at the end of the string matching '$'
833 CHECK(!RE("foo$").PartialMatch("foo\n"));
834 }
835
836 // Number of args
837 int a[16];
838 CHECK(RE("").FullMatch(""));
839
840 memset(a, 0, sizeof(0));
841 CHECK(RE("(\\d){1}").FullMatch("1",
842 &a[0]));
843 CHECK_EQ(a[0], 1);
844
845 memset(a, 0, sizeof(0));
846 CHECK(RE("(\\d)(\\d)").FullMatch("12",
847 &a[0], &a[1]));
848 CHECK_EQ(a[0], 1);
849 CHECK_EQ(a[1], 2);
850
851 memset(a, 0, sizeof(0));
852 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
853 &a[0], &a[1], &a[2]));
854 CHECK_EQ(a[0], 1);
855 CHECK_EQ(a[1], 2);
856 CHECK_EQ(a[2], 3);
857
858 memset(a, 0, sizeof(0));
859 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
860 &a[0], &a[1], &a[2], &a[3]));
861 CHECK_EQ(a[0], 1);
862 CHECK_EQ(a[1], 2);
863 CHECK_EQ(a[2], 3);
864 CHECK_EQ(a[3], 4);
865
866 memset(a, 0, sizeof(0));
867 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
868 &a[0], &a[1], &a[2],
869 &a[3], &a[4]));
870 CHECK_EQ(a[0], 1);
871 CHECK_EQ(a[1], 2);
872 CHECK_EQ(a[2], 3);
873 CHECK_EQ(a[3], 4);
874 CHECK_EQ(a[4], 5);
875
876 memset(a, 0, sizeof(0));
877 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
878 &a[0], &a[1], &a[2],
879 &a[3], &a[4], &a[5]));
880 CHECK_EQ(a[0], 1);
881 CHECK_EQ(a[1], 2);
882 CHECK_EQ(a[2], 3);
883 CHECK_EQ(a[3], 4);
884 CHECK_EQ(a[4], 5);
885 CHECK_EQ(a[5], 6);
886
887 memset(a, 0, sizeof(0));
888 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
889 &a[0], &a[1], &a[2], &a[3],
890 &a[4], &a[5], &a[6]));
891 CHECK_EQ(a[0], 1);
892 CHECK_EQ(a[1], 2);
893 CHECK_EQ(a[2], 3);
894 CHECK_EQ(a[3], 4);
895 CHECK_EQ(a[4], 5);
896 CHECK_EQ(a[5], 6);
897 CHECK_EQ(a[6], 7);
898
899 memset(a, 0, sizeof(0));
900 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
901 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
902 "1234567890123456",
903 &a[0], &a[1], &a[2], &a[3],
904 &a[4], &a[5], &a[6], &a[7],
905 &a[8], &a[9], &a[10], &a[11],
906 &a[12], &a[13], &a[14], &a[15]));
907 CHECK_EQ(a[0], 1);
908 CHECK_EQ(a[1], 2);
909 CHECK_EQ(a[2], 3);
910 CHECK_EQ(a[3], 4);
911 CHECK_EQ(a[4], 5);
912 CHECK_EQ(a[5], 6);
913 CHECK_EQ(a[6], 7);
914 CHECK_EQ(a[7], 8);
915 CHECK_EQ(a[8], 9);
916 CHECK_EQ(a[9], 0);
917 CHECK_EQ(a[10], 1);
918 CHECK_EQ(a[11], 2);
919 CHECK_EQ(a[12], 3);
920 CHECK_EQ(a[13], 4);
921 CHECK_EQ(a[14], 5);
922 CHECK_EQ(a[15], 6);
923
924 /***** PartialMatch *****/
925
926 printf("Testing PartialMatch\n");
927
928 CHECK(RE("h.*o").PartialMatch("hello"));
929 CHECK(RE("h.*o").PartialMatch("othello"));
930 CHECK(RE("h.*o").PartialMatch("hello!"));
931 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
932
933 RadixTests();
934 TestReplace();
935 TestExtract();
936 TestConsume();
937 TestFindAndConsume();
938 TestMatchNumberPeculiarity();
939
940 // Check the pattern() accessor
941 {
942 const string kPattern = "http://([^/]+)/.*";
943 const RE re(kPattern);
944 CHECK_EQ(kPattern, re.pattern());
945 }
946
947 // Check RE error field.
948 {
949 RE re("foo");
950 CHECK(re.error().empty()); // Must have no error
951 }
952
953 #ifdef SUPPORT_UTF8
954 // Check UTF-8 handling
955 {
956 printf("Testing UTF-8 handling\n");
957
958 // Three Japanese characters (nihongo)
959 const char utf8_string[] = {
960 0xe6, 0x97, 0xa5, // 65e5
961 0xe6, 0x9c, 0xac, // 627c
962 0xe8, 0xaa, 0x9e, // 8a9e
963 0
964 };
965 const char utf8_pattern[] = {
966 '.',
967 0xe6, 0x9c, 0xac, // 627c
968 '.',
969 0
970 };
971
972 // Both should match in either mode, bytes or UTF-8
973 RE re_test1(".........");
974 CHECK(re_test1.FullMatch(utf8_string));
975 RE re_test2("...", pcrecpp::UTF8());
976 CHECK(re_test2.FullMatch(utf8_string));
977
978 // Check that '.' matches one byte or UTF-8 character
979 // according to the mode.
980 string ss;
981 RE re_test3("(.)");
982 CHECK(re_test3.PartialMatch(utf8_string, &ss));
983 CHECK_EQ(ss, string("\xe6"));
984 RE re_test4("(.)", pcrecpp::UTF8());
985 CHECK(re_test4.PartialMatch(utf8_string, &ss));
986 CHECK_EQ(ss, string("\xe6\x97\xa5"));
987
988 // Check that string matches itself in either mode
989 RE re_test5(utf8_string);
990 CHECK(re_test5.FullMatch(utf8_string));
991 RE re_test6(utf8_string, pcrecpp::UTF8());
992 CHECK(re_test6.FullMatch(utf8_string));
993
994 // Check that pattern matches string only in UTF8 mode
995 RE re_test7(utf8_pattern);
996 CHECK(!re_test7.FullMatch(utf8_string));
997 RE re_test8(utf8_pattern, pcrecpp::UTF8());
998 CHECK(re_test8.FullMatch(utf8_string));
999 }
1000
1001 // Check that ungreedy, UTF8 regular expressions don't match when they
1002 // oughtn't -- see bug 82246.
1003 {
1004 // This code always worked.
1005 const char* pattern = "\\w+X";
1006 const string target = "a aX";
1007 RE match_sentence(pattern);
1008 RE match_sentence_re(pattern, pcrecpp::UTF8());
1009
1010 CHECK(!match_sentence.FullMatch(target));
1011 CHECK(!match_sentence_re.FullMatch(target));
1012 }
1013
1014 {
1015 const char* pattern = "(?U)\\w+X";
1016 const string target = "a aX";
1017 RE match_sentence(pattern);
1018 RE match_sentence_re(pattern, pcrecpp::UTF8());
1019
1020 CHECK(!match_sentence.FullMatch(target));
1021 CHECK(!match_sentence_re.FullMatch(target));
1022 }
1023 #endif /* def SUPPORT_UTF8 */
1024
1025 printf("Testing error reporting\n");
1026
1027 { RE re("a\\1"); CHECK(!re.error().empty()); }
1028 {
1029 RE re("a[x");
1030 CHECK(!re.error().empty());
1031 }
1032 {
1033 RE re("a[z-a]");
1034 CHECK(!re.error().empty());
1035 }
1036 {
1037 RE re("a[[:foobar:]]");
1038 CHECK(!re.error().empty());
1039 }
1040 {
1041 RE re("a(b");
1042 CHECK(!re.error().empty());
1043 }
1044 {
1045 RE re("a\\");
1046 CHECK(!re.error().empty());
1047 }
1048
1049 // Test that recursion is stopped
1050 TestRecursion();
1051
1052 // Test Options
1053 if (getenv("VERBOSE_TEST") != NULL)
1054 VERBOSE_TEST = true;
1055 TestOptions();
1056
1057 // Done
1058 printf("OK\n");
1059
1060 return 0;
1061 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12