/[pcre]/code/branches/pcre16/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/branches/pcre16/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (show annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 8 months ago) by nigel
Original Path: code/trunk/pcrecpp_unittest.cc
File size: 33739 byte(s)
Load pcre-6.7 into code/trunk.

1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31 //
32 // TODO: Test extractions for PartialMatch/Consume
33
34 #include <stdio.h>
35 #include <cassert>
36 #include <vector>
37 #include "config.h"
38 #include "pcrecpp.h"
39
40 using pcrecpp::StringPiece;
41 using pcrecpp::RE;
42 using pcrecpp::RE_Options;
43 using pcrecpp::Hex;
44 using pcrecpp::Octal;
45 using pcrecpp::CRadix;
46
47 static bool VERBOSE_TEST = false;
48
49 // CHECK dies with a fatal error if condition is not true. It is *not*
50 // controlled by NDEBUG, so the check will be executed regardless of
51 // compilation mode. Therefore, it is safe to do things like:
52 // CHECK_EQ(fp->Write(x), 4)
53 #define CHECK(condition) do { \
54 if (!(condition)) { \
55 fprintf(stderr, "%s:%d: Check failed: %s\n", \
56 __FILE__, __LINE__, #condition); \
57 exit(1); \
58 } \
59 } while (0)
60
61 #define CHECK_EQ(a, b) CHECK(a == b)
62
63 static void Timing1(int num_iters) {
64 // Same pattern lots of times
65 RE pattern("ruby:\\d+");
66 StringPiece p("ruby:1234");
67 for (int j = num_iters; j > 0; j--) {
68 CHECK(pattern.FullMatch(p));
69 }
70 }
71
72 static void Timing2(int num_iters) {
73 // Same pattern lots of times
74 RE pattern("ruby:(\\d+)");
75 int i;
76 for (int j = num_iters; j > 0; j--) {
77 CHECK(pattern.FullMatch("ruby:1234", &i));
78 CHECK_EQ(i, 1234);
79 }
80 }
81
82 static void Timing3(int num_iters) {
83 string text_string;
84 for (int j = num_iters; j > 0; j--) {
85 text_string += "this is another line\n";
86 }
87
88 RE line_matcher(".*\n");
89 string line;
90 StringPiece text(text_string);
91 int counter = 0;
92 while (line_matcher.Consume(&text)) {
93 counter++;
94 }
95 printf("Matched %d lines\n", counter);
96 }
97
98 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
99
100 static void LeakTest() {
101 // Check for memory leaks
102 unsigned long long initial_size = 0;
103 for (int i = 0; i < 100000; i++) {
104 if (i == 50000) {
105 initial_size = VirtualProcessSize();
106 printf("Size after 50000: %llu\n", initial_size);
107 }
108 char buf[100];
109 snprintf(buf, sizeof(buf), "pat%09d", i);
110 RE newre(buf);
111 }
112 uint64 final_size = VirtualProcessSize();
113 printf("Size after 100000: %llu\n", final_size);
114 const double growth = double(final_size - initial_size) / final_size;
115 printf("Growth: %0.2f%%", growth * 100);
116 CHECK(growth < 0.02); // Allow < 2% growth
117 }
118
119 #endif
120
121 static void RadixTests() {
122 printf("Testing hex\n");
123
124 #define CHECK_HEX(type, value) \
125 do { \
126 type v; \
127 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
128 CHECK_EQ(v, 0x ## value); \
129 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
130 CHECK_EQ(v, 0x ## value); \
131 } while(0)
132
133 CHECK_HEX(short, 2bad);
134 CHECK_HEX(unsigned short, 2badU);
135 CHECK_HEX(int, dead);
136 CHECK_HEX(unsigned int, deadU);
137 CHECK_HEX(long, 7eadbeefL);
138 CHECK_HEX(unsigned long, deadbeefUL);
139 #ifdef HAVE_LONG_LONG
140 CHECK_HEX(long long, 12345678deadbeefLL);
141 #endif
142 #ifdef HAVE_UNSIGNED_LONG_LONG
143 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
144 #endif
145
146 #undef CHECK_HEX
147
148 printf("Testing octal\n");
149
150 #define CHECK_OCTAL(type, value) \
151 do { \
152 type v; \
153 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
154 CHECK_EQ(v, 0 ## value); \
155 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
156 CHECK_EQ(v, 0 ## value); \
157 } while(0)
158
159 CHECK_OCTAL(short, 77777);
160 CHECK_OCTAL(unsigned short, 177777U);
161 CHECK_OCTAL(int, 17777777777);
162 CHECK_OCTAL(unsigned int, 37777777777U);
163 CHECK_OCTAL(long, 17777777777L);
164 CHECK_OCTAL(unsigned long, 37777777777UL);
165 #ifdef HAVE_LONG_LONG
166 CHECK_OCTAL(long long, 777777777777777777777LL);
167 #endif
168 #ifdef HAVE_UNSIGNED_LONG_LONG
169 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
170 #endif
171
172 #undef CHECK_OCTAL
173
174 printf("Testing decimal\n");
175
176 #define CHECK_DECIMAL(type, value) \
177 do { \
178 type v; \
179 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
180 CHECK_EQ(v, value); \
181 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
182 CHECK_EQ(v, value); \
183 } while(0)
184
185 CHECK_DECIMAL(short, -1);
186 CHECK_DECIMAL(unsigned short, 9999);
187 CHECK_DECIMAL(int, -1000);
188 CHECK_DECIMAL(unsigned int, 12345U);
189 CHECK_DECIMAL(long, -10000000L);
190 CHECK_DECIMAL(unsigned long, 3083324652U);
191 #ifdef HAVE_LONG_LONG
192 CHECK_DECIMAL(long long, -100000000000000LL);
193 #endif
194 #ifdef HAVE_UNSIGNED_LONG_LONG
195 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
196 #endif
197
198 #undef CHECK_DECIMAL
199
200 }
201
202 static void TestReplace() {
203 printf("Testing Replace\n");
204
205 struct ReplaceTest {
206 const char *regexp;
207 const char *rewrite;
208 const char *original;
209 const char *single;
210 const char *global;
211 };
212 static const ReplaceTest tests[] = {
213 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
214 "\\2\\1ay",
215 "the quick brown fox jumps over the lazy dogs.",
216 "ethay quick brown fox jumps over the lazy dogs.",
217 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
218 { "\\w+",
219 "\\0-NOSPAM",
220 "paul.haahr@google.com",
221 "paul-NOSPAM.haahr@google.com",
222 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
223 { "^",
224 "(START)",
225 "foo",
226 "(START)foo",
227 "(START)foo" },
228 { "^",
229 "(START)",
230 "",
231 "(START)",
232 "(START)" },
233 { "$",
234 "(END)",
235 "",
236 "(END)",
237 "(END)" },
238 { "b",
239 "bb",
240 "ababababab",
241 "abbabababab",
242 "abbabbabbabbabb" },
243 { "b",
244 "bb",
245 "bbbbbb",
246 "bbbbbbb",
247 "bbbbbbbbbbbb" },
248 { "b+",
249 "bb",
250 "bbbbbb",
251 "bb",
252 "bb" },
253 { "b*",
254 "bb",
255 "bbbbbb",
256 "bb",
257 "bb" },
258 { "b*",
259 "bb",
260 "aaaaa",
261 "bbaaaaa",
262 "bbabbabbabbabbabb" },
263 { "b*",
264 "bb",
265 "aa\naa\n",
266 "bbaa\naa\n",
267 "bbabbabb\nbbabbabb\nbb" },
268 { "b*",
269 "bb",
270 "aa\raa\r",
271 "bbaa\raa\r",
272 "bbabbabb\rbbabbabb\rbb" },
273 { "b*",
274 "bb",
275 "aa\r\naa\r\n",
276 "bbaa\r\naa\r\n",
277 "bbabbabb\r\nbbabbabb\r\nbb" },
278 #ifdef SUPPORT_UTF8
279 { "b*",
280 "bb",
281 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
282 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
283 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
284 { "b*",
285 "bb",
286 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
287 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
288 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
289 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
290 #endif
291 { "", NULL, NULL, NULL, NULL }
292 };
293
294 #ifdef SUPPORT_UTF8
295 const bool support_utf8 = true;
296 #else
297 const bool support_utf8 = false;
298 #endif
299
300 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
301 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
302 assert(re.error().empty());
303 string one(t->original);
304 CHECK(re.Replace(t->rewrite, &one));
305 CHECK_EQ(one, t->single);
306 string all(t->original);
307 CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
308 CHECK_EQ(all, t->global);
309 }
310
311 // One final test: test \r\n replacement when we're not in CRLF mode
312 {
313 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
314 assert(re.error().empty());
315 string all("aa\r\naa\r\n");
316 CHECK(re.GlobalReplace("bb", &all) > 0);
317 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
318 }
319 {
320 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
321 assert(re.error().empty());
322 string all("aa\r\naa\r\n");
323 CHECK(re.GlobalReplace("bb", &all) > 0);
324 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
325 }
326 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
327 // Alas, the answer depends on how pcre was compiled.
328 }
329
330 static void TestExtract() {
331 printf("Testing Extract\n");
332
333 string s;
334
335 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
336 CHECK_EQ(s, "kremvax!boris");
337
338 // check the RE interface as well
339 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
340 CHECK_EQ(s, "'foo'");
341 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
342 CHECK_EQ(s, "'foo'");
343 }
344
345 static void TestConsume() {
346 printf("Testing Consume\n");
347
348 string word;
349
350 string s(" aaa b!@#$@#$cccc");
351 StringPiece input(s);
352
353 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
354 CHECK(r.Consume(&input, &word));
355 CHECK_EQ(word, "aaa");
356 CHECK(r.Consume(&input, &word));
357 CHECK_EQ(word, "b");
358 CHECK(! r.Consume(&input, &word));
359 }
360
361 static void TestFindAndConsume() {
362 printf("Testing FindAndConsume\n");
363
364 string word;
365
366 string s(" aaa b!@#$@#$cccc");
367 StringPiece input(s);
368
369 RE r("(\\w+)"); // matches a word
370 CHECK(r.FindAndConsume(&input, &word));
371 CHECK_EQ(word, "aaa");
372 CHECK(r.FindAndConsume(&input, &word));
373 CHECK_EQ(word, "b");
374 CHECK(r.FindAndConsume(&input, &word));
375 CHECK_EQ(word, "cccc");
376 CHECK(! r.FindAndConsume(&input, &word));
377 }
378
379 static void TestMatchNumberPeculiarity() {
380 printf("Testing match-number peculiaraity\n");
381
382 string word1;
383 string word2;
384 string word3;
385
386 RE r("(foo)|(bar)|(baz)");
387 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
388 CHECK_EQ(word1, "foo");
389 CHECK_EQ(word2, "");
390 CHECK_EQ(word3, "");
391 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
392 CHECK_EQ(word1, "");
393 CHECK_EQ(word2, "bar");
394 CHECK_EQ(word3, "");
395 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
396 CHECK_EQ(word1, "");
397 CHECK_EQ(word2, "");
398 CHECK_EQ(word3, "baz");
399 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
400
401 string a;
402 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
403 CHECK_EQ(a, "");
404 }
405
406 static void TestRecursion() {
407 printf("Testing recursion\n");
408
409 // Get one string that passes (sometimes), one that never does.
410 string text_good("abcdefghijk");
411 string text_bad("acdefghijkl");
412
413 // According to pcretest, matching text_good against (\w+)*b
414 // requires match_limit of at least 8192, and match_recursion_limit
415 // of at least 37.
416
417 RE_Options options_ml;
418 options_ml.set_match_limit(8192);
419 RE re("(\\w+)*b", options_ml);
420 CHECK(re.PartialMatch(text_good) == true);
421 CHECK(re.PartialMatch(text_bad) == false);
422 CHECK(re.FullMatch(text_good) == false);
423 CHECK(re.FullMatch(text_bad) == false);
424
425 options_ml.set_match_limit(1024);
426 RE re2("(\\w+)*b", options_ml);
427 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
428 CHECK(re2.PartialMatch(text_bad) == false);
429 CHECK(re2.FullMatch(text_good) == false);
430 CHECK(re2.FullMatch(text_bad) == false);
431
432 RE_Options options_mlr;
433 options_mlr.set_match_limit_recursion(50);
434 RE re3("(\\w+)*b", options_mlr);
435 CHECK(re3.PartialMatch(text_good) == true);
436 CHECK(re3.PartialMatch(text_bad) == false);
437 CHECK(re3.FullMatch(text_good) == false);
438 CHECK(re3.FullMatch(text_bad) == false);
439
440 options_mlr.set_match_limit_recursion(10);
441 RE re4("(\\w+)*b", options_mlr);
442 CHECK(re4.PartialMatch(text_good) == false);
443 CHECK(re4.PartialMatch(text_bad) == false);
444 CHECK(re4.FullMatch(text_good) == false);
445 CHECK(re4.FullMatch(text_bad) == false);
446 }
447
448 //
449 // Options tests contributed by
450 // Giuseppe Maxia, CTO, Stardata s.r.l.
451 // July 2005
452 //
453 static void GetOneOptionResult(
454 const char *option_name,
455 const char *regex,
456 const char *str,
457 RE_Options options,
458 bool full,
459 string expected) {
460
461 printf("Testing Option <%s>\n", option_name);
462 if(VERBOSE_TEST)
463 printf("/%s/ finds \"%s\" within \"%s\" \n",
464 regex,
465 expected.c_str(),
466 str);
467 string captured("");
468 if (full)
469 RE(regex,options).FullMatch(str, &captured);
470 else
471 RE(regex,options).PartialMatch(str, &captured);
472 CHECK_EQ(captured, expected);
473 }
474
475 static void TestOneOption(
476 const char *option_name,
477 const char *regex,
478 const char *str,
479 RE_Options options,
480 bool full,
481 bool assertive = true) {
482
483 printf("Testing Option <%s>\n", option_name);
484 if (VERBOSE_TEST)
485 printf("'%s' %s /%s/ \n",
486 str,
487 (assertive? "matches" : "doesn't match"),
488 regex);
489 if (assertive) {
490 if (full)
491 CHECK(RE(regex,options).FullMatch(str));
492 else
493 CHECK(RE(regex,options).PartialMatch(str));
494 } else {
495 if (full)
496 CHECK(!RE(regex,options).FullMatch(str));
497 else
498 CHECK(!RE(regex,options).PartialMatch(str));
499 }
500 }
501
502 static void Test_CASELESS() {
503 RE_Options options;
504 RE_Options options2;
505
506 options.set_caseless(true);
507 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
508 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
509 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
510
511 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
512 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
513 options.set_caseless(false);
514 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
515 }
516
517 static void Test_MULTILINE() {
518 RE_Options options;
519 RE_Options options2;
520 const char *str = "HELLO\n" "cruel\n" "world\n";
521
522 options.set_multiline(true);
523 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
524 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
525 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
526 options.set_multiline(false);
527 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
528 }
529
530 static void Test_DOTALL() {
531 RE_Options options;
532 RE_Options options2;
533 const char *str = "HELLO\n" "cruel\n" "world";
534
535 options.set_dotall(true);
536 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
537 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
538 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
539 options.set_dotall(false);
540 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
541 }
542
543 static void Test_DOLLAR_ENDONLY() {
544 RE_Options options;
545 RE_Options options2;
546 const char *str = "HELLO world\n";
547
548 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
549 options.set_dollar_endonly(true);
550 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
551 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
552 }
553
554 static void Test_EXTRA() {
555 RE_Options options;
556 const char *str = "HELLO";
557
558 options.set_extra(true);
559 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
560 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
561 options.set_extra(false);
562 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
563 }
564
565 static void Test_EXTENDED() {
566 RE_Options options;
567 RE_Options options2;
568 const char *str = "HELLO world";
569
570 options.set_extended(true);
571 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
572 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
573 TestOneOption("EXTENDED (class)",
574 "^ HE L{2} O "
575 "\\s+ "
576 "\\w+ $ ",
577 str,
578 options,
579 false);
580
581 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
582 TestOneOption("EXTENDED (function)",
583 "^ HE L{2} O "
584 "\\s+ "
585 "\\w+ $ ",
586 str,
587 pcrecpp::EXTENDED(),
588 false);
589
590 options.set_extended(false);
591 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
592 }
593
594 static void Test_NO_AUTO_CAPTURE() {
595 RE_Options options;
596 const char *str = "HELLO world";
597 string captured;
598
599 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
600 if (VERBOSE_TEST)
601 printf("parentheses capture text\n");
602 RE re("(world|universe)$", options);
603 CHECK(re.Extract("\\1", str , &captured));
604 CHECK_EQ(captured, "world");
605 options.set_no_auto_capture(true);
606 printf("testing Option <NO_AUTO_CAPTURE>\n");
607 if (VERBOSE_TEST)
608 printf("parentheses do not capture text\n");
609 re.Extract("\\1",str, &captured );
610 CHECK_EQ(captured, "world");
611 }
612
613 static void Test_UNGREEDY() {
614 RE_Options options;
615 const char *str = "HELLO, 'this' is the 'world'";
616
617 options.set_ungreedy(true);
618 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
619 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
620 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
621
622 options.set_ungreedy(false);
623 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
624 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
625 }
626
627 static void Test_all_options() {
628 const char *str = "HELLO\n" "cruel\n" "world";
629 RE_Options options;
630 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
631
632 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
633 options.set_all_options(0);
634 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
635 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
636
637 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
638 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
639 " ^ c r u e l $ ",
640 str,
641 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
642 false);
643
644 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
645 " ^ c r u e l $ ",
646 str,
647 RE_Options()
648 .set_multiline(true)
649 .set_extended(true),
650 false);
651
652 options.set_all_options(0);
653 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
654
655 }
656
657 static void TestOptions() {
658 printf("Testing Options\n");
659 Test_CASELESS();
660 Test_MULTILINE();
661 Test_DOTALL();
662 Test_DOLLAR_ENDONLY();
663 Test_EXTENDED();
664 Test_NO_AUTO_CAPTURE();
665 Test_UNGREEDY();
666 Test_EXTRA();
667 Test_all_options();
668 }
669
670 int main(int argc, char** argv) {
671 // Treat any flag as --help
672 if (argc > 1 && argv[1][0] == '-') {
673 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
674 " If 'timingX ###' is specified, run the given timing test\n"
675 " with the given number of iterations, rather than running\n"
676 " the default corectness test.\n", argv[0]);
677 return 0;
678 }
679
680 if (argc > 1) {
681 if ( argc == 2 || atoi(argv[2]) == 0) {
682 printf("timing mode needs a num-iters argument\n");
683 return 1;
684 }
685 if (!strcmp(argv[1], "timing1"))
686 Timing1(atoi(argv[2]));
687 else if (!strcmp(argv[1], "timing2"))
688 Timing2(atoi(argv[2]));
689 else if (!strcmp(argv[1], "timing3"))
690 Timing3(atoi(argv[2]));
691 else
692 printf("Unknown argument '%s'\n", argv[1]);
693 return 0;
694 }
695
696 printf("Testing FullMatch\n");
697
698 int i;
699 string s;
700
701 /***** FullMatch with no args *****/
702
703 CHECK(RE("h.*o").FullMatch("hello"));
704 CHECK(!RE("h.*o").FullMatch("othello"));
705 CHECK(!RE("h.*o").FullMatch("hello!"));
706
707 /***** FullMatch with args *****/
708
709 // Zero-arg
710 CHECK(RE("\\d+").FullMatch("1001"));
711
712 // Single-arg
713 CHECK(RE("(\\d+)").FullMatch("1001", &i));
714 CHECK_EQ(i, 1001);
715 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
716 CHECK_EQ(i, -123);
717 CHECK(!RE("()\\d+").FullMatch("10", &i));
718 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
719 &i));
720
721 // Digits surrounding integer-arg
722 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
723 CHECK_EQ(i, 23);
724 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
725 CHECK_EQ(i, 1);
726 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
727 CHECK_EQ(i, -1);
728 CHECK(RE("(\\d)").PartialMatch("1234", &i));
729 CHECK_EQ(i, 1);
730 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
731 CHECK_EQ(i, -1);
732
733 // String-arg
734 CHECK(RE("h(.*)o").FullMatch("hello", &s));
735 CHECK_EQ(s, string("ell"));
736
737 // StringPiece-arg
738 StringPiece sp;
739 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
740 CHECK_EQ(sp.size(), 4);
741 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
742 CHECK_EQ(i, 1234);
743
744 // Multi-arg
745 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
746 CHECK_EQ(s, string("ruby"));
747 CHECK_EQ(i, 1234);
748
749 // Ignored arg
750 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
751 CHECK_EQ(s, string("ruby"));
752 CHECK_EQ(i, 1234);
753
754 // Type tests
755 {
756 char c;
757 CHECK(RE("(H)ello").FullMatch("Hello", &c));
758 CHECK_EQ(c, 'H');
759 }
760 {
761 unsigned char c;
762 CHECK(RE("(H)ello").FullMatch("Hello", &c));
763 CHECK_EQ(c, static_cast<unsigned char>('H'));
764 }
765 {
766 short v;
767 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
768 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
769 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
770 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
771 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
772 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
773 }
774 {
775 unsigned short v;
776 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
777 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
778 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
779 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
780 }
781 {
782 int v;
783 static const int max_value = 0x7fffffff;
784 static const int min_value = -max_value - 1;
785 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
786 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
787 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
788 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
789 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
790 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
791 }
792 {
793 unsigned int v;
794 static const unsigned int max_value = 0xfffffffful;
795 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
796 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
797 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
798 }
799 #ifdef HAVE_LONG_LONG
800 {
801 long long v;
802 static const long long max_value = 0x7fffffffffffffffLL;
803 static const long long min_value = -max_value - 1;
804 char buf[32];
805
806 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
807 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
808
809 snprintf(buf, sizeof(buf), "%lld", max_value);
810 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
811
812 snprintf(buf, sizeof(buf), "%lld", min_value);
813 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
814
815 snprintf(buf, sizeof(buf), "%lld", max_value);
816 assert(buf[strlen(buf)-1] != '9');
817 buf[strlen(buf)-1]++;
818 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
819
820 snprintf(buf, sizeof(buf), "%lld", min_value);
821 assert(buf[strlen(buf)-1] != '9');
822 buf[strlen(buf)-1]++;
823 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
824 }
825 #endif
826 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
827 {
828 unsigned long long v;
829 long long v2;
830 static const unsigned long long max_value = 0xffffffffffffffffULL;
831 char buf[32];
832
833 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
834 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
835
836 snprintf(buf, sizeof(buf), "%llu", max_value);
837 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
838
839 assert(buf[strlen(buf)-1] != '9');
840 buf[strlen(buf)-1]++;
841 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
842 }
843 #endif
844 {
845 float v;
846 CHECK(RE("(.*)").FullMatch("100", &v));
847 CHECK(RE("(.*)").FullMatch("-100.", &v));
848 CHECK(RE("(.*)").FullMatch("1e23", &v));
849 }
850 {
851 double v;
852 CHECK(RE("(.*)").FullMatch("100", &v));
853 CHECK(RE("(.*)").FullMatch("-100.", &v));
854 CHECK(RE("(.*)").FullMatch("1e23", &v));
855 }
856
857 // Check that matching is fully anchored
858 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
859 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
860 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
861 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
862
863 // Braces
864 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
865 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
866 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
867
868 // Complicated RE
869 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
870 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
871 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
872 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
873
874 // Check full-match handling (needs '$' tacked on internally)
875 CHECK(RE("fo|foo").FullMatch("fo"));
876 CHECK(RE("fo|foo").FullMatch("foo"));
877 CHECK(RE("fo|foo$").FullMatch("fo"));
878 CHECK(RE("fo|foo$").FullMatch("foo"));
879 CHECK(RE("foo$").FullMatch("foo"));
880 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
881 CHECK(!RE("fo|bar").FullMatch("fox"));
882
883 // Uncomment the following if we change the handling of '$' to
884 // prevent it from matching a trailing newline
885 if (false) {
886 // Check that we don't get bitten by pcre's special handling of a
887 // '\n' at the end of the string matching '$'
888 CHECK(!RE("foo$").PartialMatch("foo\n"));
889 }
890
891 // Number of args
892 int a[16];
893 CHECK(RE("").FullMatch(""));
894
895 memset(a, 0, sizeof(0));
896 CHECK(RE("(\\d){1}").FullMatch("1",
897 &a[0]));
898 CHECK_EQ(a[0], 1);
899
900 memset(a, 0, sizeof(0));
901 CHECK(RE("(\\d)(\\d)").FullMatch("12",
902 &a[0], &a[1]));
903 CHECK_EQ(a[0], 1);
904 CHECK_EQ(a[1], 2);
905
906 memset(a, 0, sizeof(0));
907 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
908 &a[0], &a[1], &a[2]));
909 CHECK_EQ(a[0], 1);
910 CHECK_EQ(a[1], 2);
911 CHECK_EQ(a[2], 3);
912
913 memset(a, 0, sizeof(0));
914 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
915 &a[0], &a[1], &a[2], &a[3]));
916 CHECK_EQ(a[0], 1);
917 CHECK_EQ(a[1], 2);
918 CHECK_EQ(a[2], 3);
919 CHECK_EQ(a[3], 4);
920
921 memset(a, 0, sizeof(0));
922 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
923 &a[0], &a[1], &a[2],
924 &a[3], &a[4]));
925 CHECK_EQ(a[0], 1);
926 CHECK_EQ(a[1], 2);
927 CHECK_EQ(a[2], 3);
928 CHECK_EQ(a[3], 4);
929 CHECK_EQ(a[4], 5);
930
931 memset(a, 0, sizeof(0));
932 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
933 &a[0], &a[1], &a[2],
934 &a[3], &a[4], &a[5]));
935 CHECK_EQ(a[0], 1);
936 CHECK_EQ(a[1], 2);
937 CHECK_EQ(a[2], 3);
938 CHECK_EQ(a[3], 4);
939 CHECK_EQ(a[4], 5);
940 CHECK_EQ(a[5], 6);
941
942 memset(a, 0, sizeof(0));
943 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
944 &a[0], &a[1], &a[2], &a[3],
945 &a[4], &a[5], &a[6]));
946 CHECK_EQ(a[0], 1);
947 CHECK_EQ(a[1], 2);
948 CHECK_EQ(a[2], 3);
949 CHECK_EQ(a[3], 4);
950 CHECK_EQ(a[4], 5);
951 CHECK_EQ(a[5], 6);
952 CHECK_EQ(a[6], 7);
953
954 memset(a, 0, sizeof(0));
955 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
956 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
957 "1234567890123456",
958 &a[0], &a[1], &a[2], &a[3],
959 &a[4], &a[5], &a[6], &a[7],
960 &a[8], &a[9], &a[10], &a[11],
961 &a[12], &a[13], &a[14], &a[15]));
962 CHECK_EQ(a[0], 1);
963 CHECK_EQ(a[1], 2);
964 CHECK_EQ(a[2], 3);
965 CHECK_EQ(a[3], 4);
966 CHECK_EQ(a[4], 5);
967 CHECK_EQ(a[5], 6);
968 CHECK_EQ(a[6], 7);
969 CHECK_EQ(a[7], 8);
970 CHECK_EQ(a[8], 9);
971 CHECK_EQ(a[9], 0);
972 CHECK_EQ(a[10], 1);
973 CHECK_EQ(a[11], 2);
974 CHECK_EQ(a[12], 3);
975 CHECK_EQ(a[13], 4);
976 CHECK_EQ(a[14], 5);
977 CHECK_EQ(a[15], 6);
978
979 /***** PartialMatch *****/
980
981 printf("Testing PartialMatch\n");
982
983 CHECK(RE("h.*o").PartialMatch("hello"));
984 CHECK(RE("h.*o").PartialMatch("othello"));
985 CHECK(RE("h.*o").PartialMatch("hello!"));
986 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
987
988 RadixTests();
989 TestReplace();
990 TestExtract();
991 TestConsume();
992 TestFindAndConsume();
993 TestMatchNumberPeculiarity();
994
995 // Check the pattern() accessor
996 {
997 const string kPattern = "http://([^/]+)/.*";
998 const RE re(kPattern);
999 CHECK_EQ(kPattern, re.pattern());
1000 }
1001
1002 // Check RE error field.
1003 {
1004 RE re("foo");
1005 CHECK(re.error().empty()); // Must have no error
1006 }
1007
1008 #ifdef SUPPORT_UTF8
1009 // Check UTF-8 handling
1010 {
1011 printf("Testing UTF-8 handling\n");
1012
1013 // Three Japanese characters (nihongo)
1014 const char utf8_string[] = {
1015 0xe6, 0x97, 0xa5, // 65e5
1016 0xe6, 0x9c, 0xac, // 627c
1017 0xe8, 0xaa, 0x9e, // 8a9e
1018 0
1019 };
1020 const char utf8_pattern[] = {
1021 '.',
1022 0xe6, 0x9c, 0xac, // 627c
1023 '.',
1024 0
1025 };
1026
1027 // Both should match in either mode, bytes or UTF-8
1028 RE re_test1(".........");
1029 CHECK(re_test1.FullMatch(utf8_string));
1030 RE re_test2("...", pcrecpp::UTF8());
1031 CHECK(re_test2.FullMatch(utf8_string));
1032
1033 // Check that '.' matches one byte or UTF-8 character
1034 // according to the mode.
1035 string ss;
1036 RE re_test3("(.)");
1037 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1038 CHECK_EQ(ss, string("\xe6"));
1039 RE re_test4("(.)", pcrecpp::UTF8());
1040 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1041 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1042
1043 // Check that string matches itself in either mode
1044 RE re_test5(utf8_string);
1045 CHECK(re_test5.FullMatch(utf8_string));
1046 RE re_test6(utf8_string, pcrecpp::UTF8());
1047 CHECK(re_test6.FullMatch(utf8_string));
1048
1049 // Check that pattern matches string only in UTF8 mode
1050 RE re_test7(utf8_pattern);
1051 CHECK(!re_test7.FullMatch(utf8_string));
1052 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1053 CHECK(re_test8.FullMatch(utf8_string));
1054 }
1055
1056 // Check that ungreedy, UTF8 regular expressions don't match when they
1057 // oughtn't -- see bug 82246.
1058 {
1059 // This code always worked.
1060 const char* pattern = "\\w+X";
1061 const string target = "a aX";
1062 RE match_sentence(pattern);
1063 RE match_sentence_re(pattern, pcrecpp::UTF8());
1064
1065 CHECK(!match_sentence.FullMatch(target));
1066 CHECK(!match_sentence_re.FullMatch(target));
1067 }
1068
1069 {
1070 const char* pattern = "(?U)\\w+X";
1071 const string target = "a aX";
1072 RE match_sentence(pattern);
1073 RE match_sentence_re(pattern, pcrecpp::UTF8());
1074
1075 CHECK(!match_sentence.FullMatch(target));
1076 CHECK(!match_sentence_re.FullMatch(target));
1077 }
1078 #endif /* def SUPPORT_UTF8 */
1079
1080 printf("Testing error reporting\n");
1081
1082 { RE re("a\\1"); CHECK(!re.error().empty()); }
1083 {
1084 RE re("a[x");
1085 CHECK(!re.error().empty());
1086 }
1087 {
1088 RE re("a[z-a]");
1089 CHECK(!re.error().empty());
1090 }
1091 {
1092 RE re("a[[:foobar:]]");
1093 CHECK(!re.error().empty());
1094 }
1095 {
1096 RE re("a(b");
1097 CHECK(!re.error().empty());
1098 }
1099 {
1100 RE re("a\\");
1101 CHECK(!re.error().empty());
1102 }
1103
1104 // Test that recursion is stopped
1105 TestRecursion();
1106
1107 // Test Options
1108 if (getenv("VERBOSE_TEST") != NULL)
1109 VERBOSE_TEST = true;
1110 TestOptions();
1111
1112 // Done
1113 printf("OK\n");
1114
1115 return 0;
1116 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12