/[pcre]/code/tags/pcre-7.7/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/tags/pcre-7.7/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 326 - (show annotations) (download)
Sat Mar 8 17:24:02 2008 UTC (6 years, 1 month ago) by ph10
Original Path: code/trunk/pcrecpp_unittest.cc
File size: 38916 byte(s)
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE
doesn't support NULs in patterns.

1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <cassert>
42 #include <vector>
43 #include "pcrecpp.h"
44
45 using pcrecpp::StringPiece;
46 using pcrecpp::RE;
47 using pcrecpp::RE_Options;
48 using pcrecpp::Hex;
49 using pcrecpp::Octal;
50 using pcrecpp::CRadix;
51
52 static bool VERBOSE_TEST = false;
53
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
59 if (!(condition)) { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
62 exit(1); \
63 } \
64 } while (0)
65
66 #define CHECK_EQ(a, b) CHECK(a == b)
67
68 static void Timing1(int num_iters) {
69 // Same pattern lots of times
70 RE pattern("ruby:\\d+");
71 StringPiece p("ruby:1234");
72 for (int j = num_iters; j > 0; j--) {
73 CHECK(pattern.FullMatch(p));
74 }
75 }
76
77 static void Timing2(int num_iters) {
78 // Same pattern lots of times
79 RE pattern("ruby:(\\d+)");
80 int i;
81 for (int j = num_iters; j > 0; j--) {
82 CHECK(pattern.FullMatch("ruby:1234", &i));
83 CHECK_EQ(i, 1234);
84 }
85 }
86
87 static void Timing3(int num_iters) {
88 string text_string;
89 for (int j = num_iters; j > 0; j--) {
90 text_string += "this is another line\n";
91 }
92
93 RE line_matcher(".*\n");
94 string line;
95 StringPiece text(text_string);
96 int counter = 0;
97 while (line_matcher.Consume(&text)) {
98 counter++;
99 }
100 printf("Matched %d lines\n", counter);
101 }
102
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size = 0;
108 for (int i = 0; i < 100000; i++) {
109 if (i == 50000) {
110 initial_size = VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size);
112 }
113 char buf[100]; // definitely big enough
114 sprintf(buf, "pat%09d", i);
115 RE newre(buf);
116 }
117 uint64 final_size = VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size);
119 const double growth = double(final_size - initial_size) / final_size;
120 printf("Growth: %0.2f%%", growth * 100);
121 CHECK(growth < 0.02); // Allow < 2% growth
122 }
123
124 #endif
125
126 static void RadixTests() {
127 printf("Testing hex\n");
128
129 #define CHECK_HEX(type, value) \
130 do { \
131 type v; \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 } while(0)
137
138 CHECK_HEX(short, 2bad);
139 CHECK_HEX(unsigned short, 2badU);
140 CHECK_HEX(int, dead);
141 CHECK_HEX(unsigned int, deadU);
142 CHECK_HEX(long, 7eadbeefL);
143 CHECK_HEX(unsigned long, deadbeefUL);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL);
146 #endif
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149 #endif
150
151 #undef CHECK_HEX
152
153 printf("Testing octal\n");
154
155 #define CHECK_OCTAL(type, value) \
156 do { \
157 type v; \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 } while(0)
163
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
172 #endif
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175 #endif
176
177 #undef CHECK_OCTAL
178
179 printf("Testing decimal\n");
180
181 #define CHECK_DECIMAL(type, value) \
182 do { \
183 type v; \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
188 } while(0)
189
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
198 #endif
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201 #endif
202
203 #undef CHECK_DECIMAL
204
205 }
206
207 static void TestReplace() {
208 printf("Testing Replace\n");
209
210 struct ReplaceTest {
211 const char *regexp;
212 const char *rewrite;
213 const char *original;
214 const char *single;
215 const char *global;
216 int global_count; // the expected return value from ReplaceAll
217 };
218 static const ReplaceTest tests[] = {
219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220 "\\2\\1ay",
221 "the quick brown fox jumps over the lazy dogs.",
222 "ethay quick brown fox jumps over the lazy dogs.",
223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224 9 },
225 { "\\w+",
226 "\\0-NOSPAM",
227 "paul.haahr@google.com",
228 "paul-NOSPAM.haahr@google.com",
229 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230 4 },
231 { "^",
232 "(START)",
233 "foo",
234 "(START)foo",
235 "(START)foo",
236 1 },
237 { "^",
238 "(START)",
239 "",
240 "(START)",
241 "(START)",
242 1 },
243 { "$",
244 "(END)",
245 "",
246 "(END)",
247 "(END)",
248 1 },
249 { "b",
250 "bb",
251 "ababababab",
252 "abbabababab",
253 "abbabbabbabbabb",
254 5 },
255 { "b",
256 "bb",
257 "bbbbbb",
258 "bbbbbbb",
259 "bbbbbbbbbbbb",
260 6 },
261 { "b+",
262 "bb",
263 "bbbbbb",
264 "bb",
265 "bb",
266 1 },
267 { "b*",
268 "bb",
269 "bbbbbb",
270 "bb",
271 "bb",
272 1 },
273 { "b*",
274 "bb",
275 "aaaaa",
276 "bbaaaaa",
277 "bbabbabbabbabbabb",
278 6 },
279 { "b*",
280 "bb",
281 "aa\naa\n",
282 "bbaa\naa\n",
283 "bbabbabb\nbbabbabb\nbb",
284 7 },
285 { "b*",
286 "bb",
287 "aa\raa\r",
288 "bbaa\raa\r",
289 "bbabbabb\rbbabbabb\rbb",
290 7 },
291 { "b*",
292 "bb",
293 "aa\r\naa\r\n",
294 "bbaa\r\naa\r\n",
295 "bbabbabb\r\nbbabbabb\r\nbb",
296 7 },
297 #ifdef SUPPORT_UTF8
298 { "b*",
299 "bb",
300 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303 5 },
304 { "b*",
305 "bb",
306 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310 9 },
311 #endif
312 { "", NULL, NULL, NULL, NULL, 0 }
313 };
314
315 #ifdef SUPPORT_UTF8
316 const bool support_utf8 = true;
317 #else
318 const bool support_utf8 = false;
319 #endif
320
321 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323 assert(re.error().empty());
324 string one(t->original);
325 CHECK(re.Replace(t->rewrite, &one));
326 CHECK_EQ(one, t->single);
327 string all(t->original);
328 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 CHECK_EQ(all, t->global);
330 CHECK_EQ(replace_count, t->global_count);
331 }
332
333 // One final test: test \r\n replacement when we're not in CRLF mode
334 {
335 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336 assert(re.error().empty());
337 string all("aa\r\naa\r\n");
338 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340 }
341 {
342 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343 assert(re.error().empty());
344 string all("aa\r\naa\r\n");
345 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347 }
348 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349 // Alas, the answer depends on how pcre was compiled.
350 }
351
352 static void TestExtract() {
353 printf("Testing Extract\n");
354
355 string s;
356
357 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358 CHECK_EQ(s, "kremvax!boris");
359
360 // check the RE interface as well
361 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362 CHECK_EQ(s, "'foo'");
363 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364 CHECK_EQ(s, "'foo'");
365 }
366
367 static void TestConsume() {
368 printf("Testing Consume\n");
369
370 string word;
371
372 string s(" aaa b!@#$@#$cccc");
373 StringPiece input(s);
374
375 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376 CHECK(r.Consume(&input, &word));
377 CHECK_EQ(word, "aaa");
378 CHECK(r.Consume(&input, &word));
379 CHECK_EQ(word, "b");
380 CHECK(! r.Consume(&input, &word));
381 }
382
383 static void TestFindAndConsume() {
384 printf("Testing FindAndConsume\n");
385
386 string word;
387
388 string s(" aaa b!@#$@#$cccc");
389 StringPiece input(s);
390
391 RE r("(\\w+)"); // matches a word
392 CHECK(r.FindAndConsume(&input, &word));
393 CHECK_EQ(word, "aaa");
394 CHECK(r.FindAndConsume(&input, &word));
395 CHECK_EQ(word, "b");
396 CHECK(r.FindAndConsume(&input, &word));
397 CHECK_EQ(word, "cccc");
398 CHECK(! r.FindAndConsume(&input, &word));
399 }
400
401 static void TestMatchNumberPeculiarity() {
402 printf("Testing match-number peculiaraity\n");
403
404 string word1;
405 string word2;
406 string word3;
407
408 RE r("(foo)|(bar)|(baz)");
409 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410 CHECK_EQ(word1, "foo");
411 CHECK_EQ(word2, "");
412 CHECK_EQ(word3, "");
413 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
414 CHECK_EQ(word1, "");
415 CHECK_EQ(word2, "bar");
416 CHECK_EQ(word3, "");
417 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
418 CHECK_EQ(word1, "");
419 CHECK_EQ(word2, "");
420 CHECK_EQ(word3, "baz");
421 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
422
423 string a;
424 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
425 CHECK_EQ(a, "");
426 }
427
428 static void TestRecursion() {
429 printf("Testing recursion\n");
430
431 // Get one string that passes (sometimes), one that never does.
432 string text_good("abcdefghijk");
433 string text_bad("acdefghijkl");
434
435 // According to pcretest, matching text_good against (\w+)*b
436 // requires match_limit of at least 8192, and match_recursion_limit
437 // of at least 37.
438
439 RE_Options options_ml;
440 options_ml.set_match_limit(8192);
441 RE re("(\\w+)*b", options_ml);
442 CHECK(re.PartialMatch(text_good) == true);
443 CHECK(re.PartialMatch(text_bad) == false);
444 CHECK(re.FullMatch(text_good) == false);
445 CHECK(re.FullMatch(text_bad) == false);
446
447 options_ml.set_match_limit(1024);
448 RE re2("(\\w+)*b", options_ml);
449 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450 CHECK(re2.PartialMatch(text_bad) == false);
451 CHECK(re2.FullMatch(text_good) == false);
452 CHECK(re2.FullMatch(text_bad) == false);
453
454 RE_Options options_mlr;
455 options_mlr.set_match_limit_recursion(50);
456 RE re3("(\\w+)*b", options_mlr);
457 CHECK(re3.PartialMatch(text_good) == true);
458 CHECK(re3.PartialMatch(text_bad) == false);
459 CHECK(re3.FullMatch(text_good) == false);
460 CHECK(re3.FullMatch(text_bad) == false);
461
462 options_mlr.set_match_limit_recursion(10);
463 RE re4("(\\w+)*b", options_mlr);
464 CHECK(re4.PartialMatch(text_good) == false);
465 CHECK(re4.PartialMatch(text_bad) == false);
466 CHECK(re4.FullMatch(text_good) == false);
467 CHECK(re4.FullMatch(text_bad) == false);
468 }
469
470 // A meta-quoted string, interpreted as a pattern, should always match
471 // the original unquoted string.
472 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473 string quoted = RE::QuoteMeta(unquoted);
474 RE re(quoted, options);
475 CHECK(re.FullMatch(unquoted));
476 }
477
478 // A string containing meaningful regexp characters, which is then meta-
479 // quoted, should not generally match a string the unquoted string does.
480 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481 RE_Options options = RE_Options()) {
482 string quoted = RE::QuoteMeta(unquoted);
483 RE re(quoted, options);
484 CHECK(!re.FullMatch(should_not_match));
485 }
486
487 // Tests that quoted meta characters match their original strings,
488 // and that a few things that shouldn't match indeed do not.
489 static void TestQuotaMetaSimple() {
490 TestQuoteMeta("foo");
491 TestQuoteMeta("foo.bar");
492 TestQuoteMeta("foo\\.bar");
493 TestQuoteMeta("[1-9]");
494 TestQuoteMeta("1.5-2.0?");
495 TestQuoteMeta("\\d");
496 TestQuoteMeta("Who doesn't like ice cream?");
497 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498 TestQuoteMeta("((?!)xxx).*yyy");
499 TestQuoteMeta("([");
500 TestQuoteMeta(string("foo\0bar", 7));
501 }
502
503 static void TestQuoteMetaSimpleNegative() {
504 NegativeTestQuoteMeta("foo", "bar");
505 NegativeTestQuoteMeta("...", "bar");
506 NegativeTestQuoteMeta("\\.", ".");
507 NegativeTestQuoteMeta("\\.", "..");
508 NegativeTestQuoteMeta("(a)", "a");
509 NegativeTestQuoteMeta("(a|b)", "a");
510 NegativeTestQuoteMeta("(a|b)", "(a)");
511 NegativeTestQuoteMeta("(a|b)", "a|b");
512 NegativeTestQuoteMeta("[0-9]", "0");
513 NegativeTestQuoteMeta("[0-9]", "0-9");
514 NegativeTestQuoteMeta("[0-9]", "[9]");
515 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
516 }
517
518 static void TestQuoteMetaLatin1() {
519 TestQuoteMeta("3\xb2 = 9");
520 }
521
522 static void TestQuoteMetaUtf8() {
523 #ifdef SUPPORT_UTF8
524 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
525 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
526 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
527 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
528 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
529 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
530 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
531 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
532 "27\\\xc2\\\xb0",
533 pcrecpp::UTF8());
534 #endif
535 }
536
537 static void TestQuoteMetaAll() {
538 printf("Testing QuoteMeta\n");
539 TestQuotaMetaSimple();
540 TestQuoteMetaSimpleNegative();
541 TestQuoteMetaLatin1();
542 TestQuoteMetaUtf8();
543 }
544
545 //
546 // Options tests contributed by
547 // Giuseppe Maxia, CTO, Stardata s.r.l.
548 // July 2005
549 //
550 static void GetOneOptionResult(
551 const char *option_name,
552 const char *regex,
553 const char *str,
554 RE_Options options,
555 bool full,
556 string expected) {
557
558 printf("Testing Option <%s>\n", option_name);
559 if(VERBOSE_TEST)
560 printf("/%s/ finds \"%s\" within \"%s\" \n",
561 regex,
562 expected.c_str(),
563 str);
564 string captured("");
565 if (full)
566 RE(regex,options).FullMatch(str, &captured);
567 else
568 RE(regex,options).PartialMatch(str, &captured);
569 CHECK_EQ(captured, expected);
570 }
571
572 static void TestOneOption(
573 const char *option_name,
574 const char *regex,
575 const char *str,
576 RE_Options options,
577 bool full,
578 bool assertive = true) {
579
580 printf("Testing Option <%s>\n", option_name);
581 if (VERBOSE_TEST)
582 printf("'%s' %s /%s/ \n",
583 str,
584 (assertive? "matches" : "doesn't match"),
585 regex);
586 if (assertive) {
587 if (full)
588 CHECK(RE(regex,options).FullMatch(str));
589 else
590 CHECK(RE(regex,options).PartialMatch(str));
591 } else {
592 if (full)
593 CHECK(!RE(regex,options).FullMatch(str));
594 else
595 CHECK(!RE(regex,options).PartialMatch(str));
596 }
597 }
598
599 static void Test_CASELESS() {
600 RE_Options options;
601 RE_Options options2;
602
603 options.set_caseless(true);
604 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
605 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
606 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
607
608 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
609 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
610 options.set_caseless(false);
611 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
612 }
613
614 static void Test_MULTILINE() {
615 RE_Options options;
616 RE_Options options2;
617 const char *str = "HELLO\n" "cruel\n" "world\n";
618
619 options.set_multiline(true);
620 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
621 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
622 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
623 options.set_multiline(false);
624 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
625 }
626
627 static void Test_DOTALL() {
628 RE_Options options;
629 RE_Options options2;
630 const char *str = "HELLO\n" "cruel\n" "world";
631
632 options.set_dotall(true);
633 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
634 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
635 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
636 options.set_dotall(false);
637 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
638 }
639
640 static void Test_DOLLAR_ENDONLY() {
641 RE_Options options;
642 RE_Options options2;
643 const char *str = "HELLO world\n";
644
645 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
646 options.set_dollar_endonly(true);
647 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
648 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
649 }
650
651 static void Test_EXTRA() {
652 RE_Options options;
653 const char *str = "HELLO";
654
655 options.set_extra(true);
656 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
657 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
658 options.set_extra(false);
659 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
660 }
661
662 static void Test_EXTENDED() {
663 RE_Options options;
664 RE_Options options2;
665 const char *str = "HELLO world";
666
667 options.set_extended(true);
668 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
669 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
670 TestOneOption("EXTENDED (class)",
671 "^ HE L{2} O "
672 "\\s+ "
673 "\\w+ $ ",
674 str,
675 options,
676 false);
677
678 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
679 TestOneOption("EXTENDED (function)",
680 "^ HE L{2} O "
681 "\\s+ "
682 "\\w+ $ ",
683 str,
684 pcrecpp::EXTENDED(),
685 false);
686
687 options.set_extended(false);
688 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
689 }
690
691 static void Test_NO_AUTO_CAPTURE() {
692 RE_Options options;
693 const char *str = "HELLO world";
694 string captured;
695
696 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
697 if (VERBOSE_TEST)
698 printf("parentheses capture text\n");
699 RE re("(world|universe)$", options);
700 CHECK(re.Extract("\\1", str , &captured));
701 CHECK_EQ(captured, "world");
702 options.set_no_auto_capture(true);
703 printf("testing Option <NO_AUTO_CAPTURE>\n");
704 if (VERBOSE_TEST)
705 printf("parentheses do not capture text\n");
706 re.Extract("\\1",str, &captured );
707 CHECK_EQ(captured, "world");
708 }
709
710 static void Test_UNGREEDY() {
711 RE_Options options;
712 const char *str = "HELLO, 'this' is the 'world'";
713
714 options.set_ungreedy(true);
715 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
716 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
717 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
718
719 options.set_ungreedy(false);
720 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
721 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
722 }
723
724 static void Test_all_options() {
725 const char *str = "HELLO\n" "cruel\n" "world";
726 RE_Options options;
727 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
728
729 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
730 options.set_all_options(0);
731 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
732 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
733
734 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
735 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
736 " ^ c r u e l $ ",
737 str,
738 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
739 false);
740
741 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
742 " ^ c r u e l $ ",
743 str,
744 RE_Options()
745 .set_multiline(true)
746 .set_extended(true),
747 false);
748
749 options.set_all_options(0);
750 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
751
752 }
753
754 static void TestOptions() {
755 printf("Testing Options\n");
756 Test_CASELESS();
757 Test_MULTILINE();
758 Test_DOTALL();
759 Test_DOLLAR_ENDONLY();
760 Test_EXTENDED();
761 Test_NO_AUTO_CAPTURE();
762 Test_UNGREEDY();
763 Test_EXTRA();
764 Test_all_options();
765 }
766
767 static void TestConstructors() {
768 printf("Testing constructors\n");
769
770 RE_Options options;
771 options.set_dotall(true);
772 const char *str = "HELLO\n" "cruel\n" "world";
773
774 RE orig("HELLO.*world", options);
775 CHECK(orig.FullMatch(str));
776
777 RE copy1(orig);
778 CHECK(copy1.FullMatch(str));
779
780 RE copy2("not a match");
781 CHECK(!copy2.FullMatch(str));
782 copy2 = copy1;
783 CHECK(copy2.FullMatch(str));
784 copy2 = orig;
785 CHECK(copy2.FullMatch(str));
786
787 // Make sure when we assign to ourselves, nothing bad happens
788 orig = orig;
789 copy1 = copy1;
790 copy2 = copy2;
791 CHECK(orig.FullMatch(str));
792 CHECK(copy1.FullMatch(str));
793 CHECK(copy2.FullMatch(str));
794 }
795
796 int main(int argc, char** argv) {
797 // Treat any flag as --help
798 if (argc > 1 && argv[1][0] == '-') {
799 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
800 " If 'timingX ###' is specified, run the given timing test\n"
801 " with the given number of iterations, rather than running\n"
802 " the default corectness test.\n", argv[0]);
803 return 0;
804 }
805
806 if (argc > 1) {
807 if ( argc == 2 || atoi(argv[2]) == 0) {
808 printf("timing mode needs a num-iters argument\n");
809 return 1;
810 }
811 if (!strcmp(argv[1], "timing1"))
812 Timing1(atoi(argv[2]));
813 else if (!strcmp(argv[1], "timing2"))
814 Timing2(atoi(argv[2]));
815 else if (!strcmp(argv[1], "timing3"))
816 Timing3(atoi(argv[2]));
817 else
818 printf("Unknown argument '%s'\n", argv[1]);
819 return 0;
820 }
821
822 printf("Testing FullMatch\n");
823
824 int i;
825 string s;
826
827 /***** FullMatch with no args *****/
828
829 CHECK(RE("h.*o").FullMatch("hello"));
830 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
831 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
832 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
833 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
834 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
835
836 /***** FullMatch with args *****/
837
838 // Zero-arg
839 CHECK(RE("\\d+").FullMatch("1001"));
840
841 // Single-arg
842 CHECK(RE("(\\d+)").FullMatch("1001", &i));
843 CHECK_EQ(i, 1001);
844 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
845 CHECK_EQ(i, -123);
846 CHECK(!RE("()\\d+").FullMatch("10", &i));
847 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
848 &i));
849
850 // Digits surrounding integer-arg
851 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
852 CHECK_EQ(i, 23);
853 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
854 CHECK_EQ(i, 1);
855 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
856 CHECK_EQ(i, -1);
857 CHECK(RE("(\\d)").PartialMatch("1234", &i));
858 CHECK_EQ(i, 1);
859 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
860 CHECK_EQ(i, -1);
861
862 // String-arg
863 CHECK(RE("h(.*)o").FullMatch("hello", &s));
864 CHECK_EQ(s, string("ell"));
865
866 // StringPiece-arg
867 StringPiece sp;
868 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
869 CHECK_EQ(sp.size(), 4);
870 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
871 CHECK_EQ(i, 1234);
872
873 // Multi-arg
874 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
875 CHECK_EQ(s, string("ruby"));
876 CHECK_EQ(i, 1234);
877
878 // Ignore non-void* NULL arg
879 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
880 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
881 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
882 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
883 #ifdef HAVE_LONG_LONG
884 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
885 #endif
886 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
887 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
888
889 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
890 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
891 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
892 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
893 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
894 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
895
896 // Ignored arg
897 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
898 CHECK_EQ(s, string("ruby"));
899 CHECK_EQ(i, 1234);
900
901 // Type tests
902 {
903 char c;
904 CHECK(RE("(H)ello").FullMatch("Hello", &c));
905 CHECK_EQ(c, 'H');
906 }
907 {
908 unsigned char c;
909 CHECK(RE("(H)ello").FullMatch("Hello", &c));
910 CHECK_EQ(c, static_cast<unsigned char>('H'));
911 }
912 {
913 short v;
914 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
915 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
916 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
917 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
918 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
919 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
920 }
921 {
922 unsigned short v;
923 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
924 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
925 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
926 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
927 }
928 {
929 int v;
930 static const int max_value = 0x7fffffff;
931 static const int min_value = -max_value - 1;
932 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
933 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
934 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
935 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
936 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
937 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
938 }
939 {
940 unsigned int v;
941 static const unsigned int max_value = 0xfffffffful;
942 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
943 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
944 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
945 }
946 #ifdef HAVE_LONG_LONG
947 # if defined(__MINGW__) || defined(__MINGW32__)
948 # define LLD "%I64d"
949 # define LLU "%I64u"
950 # else
951 # define LLD "%lld"
952 # define LLU "%llu"
953 # endif
954 {
955 long long v;
956 static const long long max_value = 0x7fffffffffffffffLL;
957 static const long long min_value = -max_value - 1;
958 char buf[32]; // definitely big enough for a long long
959
960 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
961 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
962
963 sprintf(buf, LLD, max_value);
964 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
965
966 sprintf(buf, LLD, min_value);
967 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
968
969 sprintf(buf, LLD, max_value);
970 assert(buf[strlen(buf)-1] != '9');
971 buf[strlen(buf)-1]++;
972 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
973
974 sprintf(buf, LLD, min_value);
975 assert(buf[strlen(buf)-1] != '9');
976 buf[strlen(buf)-1]++;
977 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
978 }
979 #endif
980 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
981 {
982 unsigned long long v;
983 long long v2;
984 static const unsigned long long max_value = 0xffffffffffffffffULL;
985 char buf[32]; // definitely big enough for a unsigned long long
986
987 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
988 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
989
990 sprintf(buf, LLU, max_value);
991 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
992
993 assert(buf[strlen(buf)-1] != '9');
994 buf[strlen(buf)-1]++;
995 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
996 }
997 #endif
998 {
999 float v;
1000 CHECK(RE("(.*)").FullMatch("100", &v));
1001 CHECK(RE("(.*)").FullMatch("-100.", &v));
1002 CHECK(RE("(.*)").FullMatch("1e23", &v));
1003 }
1004 {
1005 double v;
1006 CHECK(RE("(.*)").FullMatch("100", &v));
1007 CHECK(RE("(.*)").FullMatch("-100.", &v));
1008 CHECK(RE("(.*)").FullMatch("1e23", &v));
1009 }
1010
1011 // Check that matching is fully anchored
1012 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1013 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1014 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1015 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1016
1017 // Braces
1018 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1019 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1020 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1021
1022 // Complicated RE
1023 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1024 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1025 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1026 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1027
1028 // Check full-match handling (needs '$' tacked on internally)
1029 CHECK(RE("fo|foo").FullMatch("fo"));
1030 CHECK(RE("fo|foo").FullMatch("foo"));
1031 CHECK(RE("fo|foo$").FullMatch("fo"));
1032 CHECK(RE("fo|foo$").FullMatch("foo"));
1033 CHECK(RE("foo$").FullMatch("foo"));
1034 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1035 CHECK(!RE("fo|bar").FullMatch("fox"));
1036
1037 // Uncomment the following if we change the handling of '$' to
1038 // prevent it from matching a trailing newline
1039 if (false) {
1040 // Check that we don't get bitten by pcre's special handling of a
1041 // '\n' at the end of the string matching '$'
1042 CHECK(!RE("foo$").PartialMatch("foo\n"));
1043 }
1044
1045 // Number of args
1046 int a[16];
1047 CHECK(RE("").FullMatch(""));
1048
1049 memset(a, 0, sizeof(0));
1050 CHECK(RE("(\\d){1}").FullMatch("1",
1051 &a[0]));
1052 CHECK_EQ(a[0], 1);
1053
1054 memset(a, 0, sizeof(0));
1055 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1056 &a[0], &a[1]));
1057 CHECK_EQ(a[0], 1);
1058 CHECK_EQ(a[1], 2);
1059
1060 memset(a, 0, sizeof(0));
1061 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1062 &a[0], &a[1], &a[2]));
1063 CHECK_EQ(a[0], 1);
1064 CHECK_EQ(a[1], 2);
1065 CHECK_EQ(a[2], 3);
1066
1067 memset(a, 0, sizeof(0));
1068 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1069 &a[0], &a[1], &a[2], &a[3]));
1070 CHECK_EQ(a[0], 1);
1071 CHECK_EQ(a[1], 2);
1072 CHECK_EQ(a[2], 3);
1073 CHECK_EQ(a[3], 4);
1074
1075 memset(a, 0, sizeof(0));
1076 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1077 &a[0], &a[1], &a[2],
1078 &a[3], &a[4]));
1079 CHECK_EQ(a[0], 1);
1080 CHECK_EQ(a[1], 2);
1081 CHECK_EQ(a[2], 3);
1082 CHECK_EQ(a[3], 4);
1083 CHECK_EQ(a[4], 5);
1084
1085 memset(a, 0, sizeof(0));
1086 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1087 &a[0], &a[1], &a[2],
1088 &a[3], &a[4], &a[5]));
1089 CHECK_EQ(a[0], 1);
1090 CHECK_EQ(a[1], 2);
1091 CHECK_EQ(a[2], 3);
1092 CHECK_EQ(a[3], 4);
1093 CHECK_EQ(a[4], 5);
1094 CHECK_EQ(a[5], 6);
1095
1096 memset(a, 0, sizeof(0));
1097 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1098 &a[0], &a[1], &a[2], &a[3],
1099 &a[4], &a[5], &a[6]));
1100 CHECK_EQ(a[0], 1);
1101 CHECK_EQ(a[1], 2);
1102 CHECK_EQ(a[2], 3);
1103 CHECK_EQ(a[3], 4);
1104 CHECK_EQ(a[4], 5);
1105 CHECK_EQ(a[5], 6);
1106 CHECK_EQ(a[6], 7);
1107
1108 memset(a, 0, sizeof(0));
1109 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1110 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1111 "1234567890123456",
1112 &a[0], &a[1], &a[2], &a[3],
1113 &a[4], &a[5], &a[6], &a[7],
1114 &a[8], &a[9], &a[10], &a[11],
1115 &a[12], &a[13], &a[14], &a[15]));
1116 CHECK_EQ(a[0], 1);
1117 CHECK_EQ(a[1], 2);
1118 CHECK_EQ(a[2], 3);
1119 CHECK_EQ(a[3], 4);
1120 CHECK_EQ(a[4], 5);
1121 CHECK_EQ(a[5], 6);
1122 CHECK_EQ(a[6], 7);
1123 CHECK_EQ(a[7], 8);
1124 CHECK_EQ(a[8], 9);
1125 CHECK_EQ(a[9], 0);
1126 CHECK_EQ(a[10], 1);
1127 CHECK_EQ(a[11], 2);
1128 CHECK_EQ(a[12], 3);
1129 CHECK_EQ(a[13], 4);
1130 CHECK_EQ(a[14], 5);
1131 CHECK_EQ(a[15], 6);
1132
1133 /***** PartialMatch *****/
1134
1135 printf("Testing PartialMatch\n");
1136
1137 CHECK(RE("h.*o").PartialMatch("hello"));
1138 CHECK(RE("h.*o").PartialMatch("othello"));
1139 CHECK(RE("h.*o").PartialMatch("hello!"));
1140 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1141
1142 /***** other tests *****/
1143
1144 RadixTests();
1145 TestReplace();
1146 TestExtract();
1147 TestConsume();
1148 TestFindAndConsume();
1149 TestQuoteMetaAll();
1150 TestMatchNumberPeculiarity();
1151
1152 // Check the pattern() accessor
1153 {
1154 const string kPattern = "http://([^/]+)/.*";
1155 const RE re(kPattern);
1156 CHECK_EQ(kPattern, re.pattern());
1157 }
1158
1159 // Check RE error field.
1160 {
1161 RE re("foo");
1162 CHECK(re.error().empty()); // Must have no error
1163 }
1164
1165 #ifdef SUPPORT_UTF8
1166 // Check UTF-8 handling
1167 {
1168 printf("Testing UTF-8 handling\n");
1169
1170 // Three Japanese characters (nihongo)
1171 const unsigned char utf8_string[] = {
1172 0xe6, 0x97, 0xa5, // 65e5
1173 0xe6, 0x9c, 0xac, // 627c
1174 0xe8, 0xaa, 0x9e, // 8a9e
1175 0
1176 };
1177 const unsigned char utf8_pattern[] = {
1178 '.',
1179 0xe6, 0x9c, 0xac, // 627c
1180 '.',
1181 0
1182 };
1183
1184 // Both should match in either mode, bytes or UTF-8
1185 RE re_test1(".........");
1186 CHECK(re_test1.FullMatch(utf8_string));
1187 RE re_test2("...", pcrecpp::UTF8());
1188 CHECK(re_test2.FullMatch(utf8_string));
1189
1190 // Check that '.' matches one byte or UTF-8 character
1191 // according to the mode.
1192 string ss;
1193 RE re_test3("(.)");
1194 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1195 CHECK_EQ(ss, string("\xe6"));
1196 RE re_test4("(.)", pcrecpp::UTF8());
1197 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1198 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1199
1200 // Check that string matches itself in either mode
1201 RE re_test5(utf8_string);
1202 CHECK(re_test5.FullMatch(utf8_string));
1203 RE re_test6(utf8_string, pcrecpp::UTF8());
1204 CHECK(re_test6.FullMatch(utf8_string));
1205
1206 // Check that pattern matches string only in UTF8 mode
1207 RE re_test7(utf8_pattern);
1208 CHECK(!re_test7.FullMatch(utf8_string));
1209 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1210 CHECK(re_test8.FullMatch(utf8_string));
1211 }
1212
1213 // Check that ungreedy, UTF8 regular expressions don't match when they
1214 // oughtn't -- see bug 82246.
1215 {
1216 // This code always worked.
1217 const char* pattern = "\\w+X";
1218 const string target = "a aX";
1219 RE match_sentence(pattern);
1220 RE match_sentence_re(pattern, pcrecpp::UTF8());
1221
1222 CHECK(!match_sentence.FullMatch(target));
1223 CHECK(!match_sentence_re.FullMatch(target));
1224 }
1225
1226 {
1227 const char* pattern = "(?U)\\w+X";
1228 const string target = "a aX";
1229 RE match_sentence(pattern);
1230 RE match_sentence_re(pattern, pcrecpp::UTF8());
1231
1232 CHECK(!match_sentence.FullMatch(target));
1233 CHECK(!match_sentence_re.FullMatch(target));
1234 }
1235 #endif /* def SUPPORT_UTF8 */
1236
1237 printf("Testing error reporting\n");
1238
1239 { RE re("a\\1"); CHECK(!re.error().empty()); }
1240 {
1241 RE re("a[x");
1242 CHECK(!re.error().empty());
1243 }
1244 {
1245 RE re("a[z-a]");
1246 CHECK(!re.error().empty());
1247 }
1248 {
1249 RE re("a[[:foobar:]]");
1250 CHECK(!re.error().empty());
1251 }
1252 {
1253 RE re("a(b");
1254 CHECK(!re.error().empty());
1255 }
1256 {
1257 RE re("a\\");
1258 CHECK(!re.error().empty());
1259 }
1260
1261 // Test that recursion is stopped
1262 TestRecursion();
1263
1264 // Test Options
1265 if (getenv("VERBOSE_TEST") != NULL)
1266 VERBOSE_TEST = true;
1267 TestOptions();
1268
1269 // Test the constructors
1270 TestConstructors();
1271
1272 // Done
1273 printf("OK\n");
1274
1275 return 0;
1276 }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12