/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (show annotations) (download)
Sat Feb 24 21:40:45 2007 UTC (7 years, 1 month ago) by nigel
File size: 23826 byte(s)
Load pcre-6.0 into code/trunk.

1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31 //
32 // TODO: Test extractions for PartialMatch/Consume
33
34 #include <stdio.h>
35 #include <vector>
36 #include "config.h"
37 #include "pcrecpp.h"
38
39 using pcrecpp::StringPiece;
40 using pcrecpp::RE;
41 using pcrecpp::RE_Options;
42 using pcrecpp::Hex;
43 using pcrecpp::Octal;
44 using pcrecpp::CRadix;
45
46 // CHECK dies with a fatal error if condition is not true. It is *not*
47 // controlled by NDEBUG, so the check will be executed regardless of
48 // compilation mode. Therefore, it is safe to do things like:
49 // CHECK_EQ(fp->Write(x), 4)
50 #define CHECK(condition) do { \
51 if (!(condition)) { \
52 fprintf(stderr, "%s:%d: Check failed: %s\n", \
53 __FILE__, __LINE__, #condition); \
54 exit(1); \
55 } \
56 } while (0)
57
58 #define CHECK_EQ(a, b) CHECK(a == b)
59
60 static void Timing1(int num_iters) {
61 // Same pattern lots of times
62 RE pattern("ruby:\\d+");
63 StringPiece p("ruby:1234");
64 for (int j = num_iters; j > 0; j--) {
65 CHECK(pattern.FullMatch(p));
66 }
67 }
68
69 static void Timing2(int num_iters) {
70 // Same pattern lots of times
71 RE pattern("ruby:(\\d+)");
72 int i;
73 for (int j = num_iters; j > 0; j--) {
74 CHECK(pattern.FullMatch("ruby:1234", &i));
75 CHECK_EQ(i, 1234);
76 }
77 }
78
79 static void Timing3(int num_iters) {
80 string text_string;
81 for (int j = num_iters; j > 0; j--) {
82 text_string += "this is another line\n";
83 }
84
85 RE line_matcher(".*\n");
86 string line;
87 StringPiece text(text_string);
88 int counter = 0;
89 while (line_matcher.Consume(&text)) {
90 counter++;
91 }
92 printf("Matched %d lines\n", counter);
93 }
94
95 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
96
97 static void LeakTest() {
98 // Check for memory leaks
99 unsigned long long initial_size = 0;
100 for (int i = 0; i < 100000; i++) {
101 if (i == 50000) {
102 initial_size = VirtualProcessSize();
103 printf("Size after 50000: %llu\n", initial_size);
104 }
105 char buf[100];
106 snprintf(buf, sizeof(buf), "pat%09d", i);
107 RE newre(buf);
108 }
109 uint64 final_size = VirtualProcessSize();
110 printf("Size after 100000: %llu\n", final_size);
111 const double growth = double(final_size - initial_size) / final_size;
112 printf("Growth: %0.2f%%", growth * 100);
113 CHECK(growth < 0.02); // Allow < 2% growth
114 }
115
116 #endif
117
118 static void RadixTests() {
119 printf("Testing hex\n");
120
121 #define CHECK_HEX(type, value) \
122 do { \
123 type v; \
124 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
125 CHECK_EQ(v, 0x ## value); \
126 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
127 CHECK_EQ(v, 0x ## value); \
128 } while(0)
129
130 CHECK_HEX(short, 2bad);
131 CHECK_HEX(unsigned short, 2badU);
132 CHECK_HEX(int, dead);
133 CHECK_HEX(unsigned int, deadU);
134 CHECK_HEX(long, 7eadbeefL);
135 CHECK_HEX(unsigned long, deadbeefUL);
136 #ifdef HAVE_LONG_LONG
137 CHECK_HEX(long long, 12345678deadbeefLL);
138 #endif
139 #ifdef HAVE_UNSIGNED_LONG_LONG
140 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
141 #endif
142
143 #undef CHECK_HEX
144
145 printf("Testing octal\n");
146
147 #define CHECK_OCTAL(type, value) \
148 do { \
149 type v; \
150 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
151 CHECK_EQ(v, 0 ## value); \
152 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
153 CHECK_EQ(v, 0 ## value); \
154 } while(0)
155
156 CHECK_OCTAL(short, 77777);
157 CHECK_OCTAL(unsigned short, 177777U);
158 CHECK_OCTAL(int, 17777777777);
159 CHECK_OCTAL(unsigned int, 37777777777U);
160 CHECK_OCTAL(long, 17777777777L);
161 CHECK_OCTAL(unsigned long, 37777777777UL);
162 #ifdef HAVE_LONG_LONG
163 CHECK_OCTAL(long long, 777777777777777777777LL);
164 #endif
165 #ifdef HAVE_UNSIGNED_LONG_LONG
166 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
167 #endif
168
169 #undef CHECK_OCTAL
170
171 printf("Testing decimal\n");
172
173 #define CHECK_DECIMAL(type, value) \
174 do { \
175 type v; \
176 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
177 CHECK_EQ(v, value); \
178 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
179 CHECK_EQ(v, value); \
180 } while(0)
181
182 CHECK_DECIMAL(short, -1);
183 CHECK_DECIMAL(unsigned short, 9999);
184 CHECK_DECIMAL(int, -1000);
185 CHECK_DECIMAL(unsigned int, 12345U);
186 CHECK_DECIMAL(long, -10000000L);
187 CHECK_DECIMAL(unsigned long, 3083324652U);
188 #ifdef HAVE_LONG_LONG
189 CHECK_DECIMAL(long long, -100000000000000LL);
190 #endif
191 #ifdef HAVE_UNSIGNED_LONG_LONG
192 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
193 #endif
194
195 #undef CHECK_DECIMAL
196
197 }
198
199 static void TestReplace() {
200 printf("Testing Replace\n");
201
202 struct ReplaceTest {
203 const char *regexp;
204 const char *rewrite;
205 const char *original;
206 const char *single;
207 const char *global;
208 };
209 static const ReplaceTest tests[] = {
210 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
211 "\\2\\1ay",
212 "the quick brown fox jumps over the lazy dogs.",
213 "ethay quick brown fox jumps over the lazy dogs.",
214 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
215 { "\\w+",
216 "\\0-NOSPAM",
217 "paul.haahr@google.com",
218 "paul-NOSPAM.haahr@google.com",
219 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
220 { "^",
221 "(START)",
222 "foo",
223 "(START)foo",
224 "(START)foo" },
225 { "^",
226 "(START)",
227 "",
228 "(START)",
229 "(START)" },
230 { "$",
231 "(END)",
232 "",
233 "(END)",
234 "(END)" },
235 { "b",
236 "bb",
237 "ababababab",
238 "abbabababab",
239 "abbabbabbabbabb" },
240 { "b",
241 "bb",
242 "bbbbbb",
243 "bbbbbbb",
244 "bbbbbbbbbbbb" },
245 { "b+",
246 "bb",
247 "bbbbbb",
248 "bb",
249 "bb" },
250 { "b*",
251 "bb",
252 "bbbbbb",
253 "bb",
254 "bb" },
255 { "b*",
256 "bb",
257 "aaaaa",
258 "bbaaaaa",
259 "bbabbabbabbabbabb" },
260 { "", NULL, NULL, NULL, NULL }
261 };
262
263 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
264 string one(t->original);
265 CHECK(RE(t->regexp).Replace(t->rewrite, &one));
266 CHECK_EQ(one, t->single);
267 string all(t->original);
268 CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
269 CHECK_EQ(all, t->global);
270 }
271 }
272
273 static void TestExtract() {
274 printf("Testing Extract\n");
275
276 string s;
277
278 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
279 CHECK_EQ(s, "kremvax!boris");
280
281 // check the RE interface as well
282 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
283 CHECK_EQ(s, "'foo'");
284 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
285 CHECK_EQ(s, "'foo'");
286 }
287
288 static void TestConsume() {
289 printf("Testing Consume\n");
290
291 string word;
292
293 string s(" aaa b!@#$@#$cccc");
294 StringPiece input(s);
295
296 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
297 CHECK(r.Consume(&input, &word));
298 CHECK_EQ(word, "aaa");
299 CHECK(r.Consume(&input, &word));
300 CHECK_EQ(word, "b");
301 CHECK(! r.Consume(&input, &word));
302 }
303
304 static void TestFindAndConsume() {
305 printf("Testing FindAndConsume\n");
306
307 string word;
308
309 string s(" aaa b!@#$@#$cccc");
310 StringPiece input(s);
311
312 RE r("(\\w+)"); // matches a word
313 CHECK(r.FindAndConsume(&input, &word));
314 CHECK_EQ(word, "aaa");
315 CHECK(r.FindAndConsume(&input, &word));
316 CHECK_EQ(word, "b");
317 CHECK(r.FindAndConsume(&input, &word));
318 CHECK_EQ(word, "cccc");
319 CHECK(! r.FindAndConsume(&input, &word));
320 }
321
322 static void TestMatchNumberPeculiarity() {
323 printf("Testing match-number peculiaraity\n");
324
325 string word1;
326 string word2;
327 string word3;
328
329 RE r("(foo)|(bar)|(baz)");
330 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
331 CHECK_EQ(word1, "foo");
332 CHECK_EQ(word2, "");
333 CHECK_EQ(word3, "");
334 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
335 CHECK_EQ(word1, "");
336 CHECK_EQ(word2, "bar");
337 CHECK_EQ(word3, "");
338 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
339 CHECK_EQ(word1, "");
340 CHECK_EQ(word2, "");
341 CHECK_EQ(word3, "baz");
342 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
343
344 string a;
345 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
346 CHECK_EQ(a, "");
347 }
348
349 static void TestRecursion(int size, const char *pattern, int match_limit) {
350 printf("Testing recursion\n");
351
352 // Fill up a string repeating the pattern given
353 string domain;
354 domain.resize(size);
355 int patlen = strlen(pattern);
356 for (int i = 0; i < size; ++i) {
357 domain[i] = pattern[i % patlen];
358 }
359 // Just make sure it doesn't crash due to too much recursion.
360 RE_Options options;
361 options.set_match_limit(match_limit);
362 RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
363 re.FullMatch(domain);
364 }
365
366
367 int main(int argc, char** argv) {
368 // Treat any flag as --help
369 if (argc > 1 && argv[1][0] == '-') {
370 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
371 " If 'timingX ###' is specified, run the given timing test\n"
372 " with the given number of iterations, rather than running\n"
373 " the default corectness test.\n", argv[0]);
374 return 0;
375 }
376
377 if (argc > 1) {
378 if ( argc == 2 || atoi(argv[2]) == 0) {
379 printf("timing mode needs a num-iters argument\n");
380 return 1;
381 }
382 if (!strcmp(argv[1], "timing1"))
383 Timing1(atoi(argv[2]));
384 else if (!strcmp(argv[1], "timing2"))
385 Timing2(atoi(argv[2]));
386 else if (!strcmp(argv[1], "timing3"))
387 Timing3(atoi(argv[2]));
388 else
389 printf("Unknown argument '%s'\n", argv[1]);
390 return 0;
391 }
392
393 printf("Testing FullMatch\n");
394
395 int i;
396 string s;
397
398 /***** FullMatch with no args *****/
399
400 CHECK(RE("h.*o").FullMatch("hello"));
401 CHECK(!RE("h.*o").FullMatch("othello"));
402 CHECK(!RE("h.*o").FullMatch("hello!"));
403
404 /***** FullMatch with args *****/
405
406 // Zero-arg
407 CHECK(RE("\\d+").FullMatch("1001"));
408
409 // Single-arg
410 CHECK(RE("(\\d+)").FullMatch("1001", &i));
411 CHECK_EQ(i, 1001);
412 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
413 CHECK_EQ(i, -123);
414 CHECK(!RE("()\\d+").FullMatch("10", &i));
415 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
416 &i));
417
418 // Digits surrounding integer-arg
419 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
420 CHECK_EQ(i, 23);
421 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
422 CHECK_EQ(i, 1);
423 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
424 CHECK_EQ(i, -1);
425 CHECK(RE("(\\d)").PartialMatch("1234", &i));
426 CHECK_EQ(i, 1);
427 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
428 CHECK_EQ(i, -1);
429
430 // String-arg
431 CHECK(RE("h(.*)o").FullMatch("hello", &s));
432 CHECK_EQ(s, string("ell"));
433
434 // StringPiece-arg
435 StringPiece sp;
436 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
437 CHECK_EQ(sp.size(), 4);
438 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
439 CHECK_EQ(i, 1234);
440
441 // Multi-arg
442 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
443 CHECK_EQ(s, string("ruby"));
444 CHECK_EQ(i, 1234);
445
446 // Ignored arg
447 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
448 CHECK_EQ(s, string("ruby"));
449 CHECK_EQ(i, 1234);
450
451 // Type tests
452 {
453 char c;
454 CHECK(RE("(H)ello").FullMatch("Hello", &c));
455 CHECK_EQ(c, 'H');
456 }
457 {
458 unsigned char c;
459 CHECK(RE("(H)ello").FullMatch("Hello", &c));
460 CHECK_EQ(c, static_cast<unsigned char>('H'));
461 }
462 {
463 short v;
464 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
465 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
466 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
467 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
468 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
469 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
470 }
471 {
472 unsigned short v;
473 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
474 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
475 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
476 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
477 }
478 {
479 int v;
480 static const int max_value = 0x7fffffff;
481 static const int min_value = -max_value - 1;
482 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
483 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
484 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
485 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
486 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
487 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
488 }
489 {
490 unsigned int v;
491 static const unsigned int max_value = 0xfffffffful;
492 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
493 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
494 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
495 }
496 #ifdef HAVE_LONG_LONG
497 {
498 long long v;
499 static const long long max_value = 0x7fffffffffffffffLL;
500 static const long long min_value = -max_value - 1;
501 char buf[32];
502
503 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
504 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
505
506 snprintf(buf, sizeof(buf), "%lld", max_value);
507 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
508
509 snprintf(buf, sizeof(buf), "%lld", min_value);
510 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
511
512 snprintf(buf, sizeof(buf), "%lld", max_value);
513 assert(buf[strlen(buf)-1] != '9');
514 buf[strlen(buf)-1]++;
515 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
516
517 snprintf(buf, sizeof(buf), "%lld", min_value);
518 assert(buf[strlen(buf)-1] != '9');
519 buf[strlen(buf)-1]++;
520 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
521 }
522 #endif
523 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
524 {
525 unsigned long long v;
526 long long v2;
527 static const unsigned long long max_value = 0xffffffffffffffffULL;
528 char buf[32];
529
530 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
531 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
532
533 snprintf(buf, sizeof(buf), "%llu", max_value);
534 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
535
536 assert(buf[strlen(buf)-1] != '9');
537 buf[strlen(buf)-1]++;
538 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
539 }
540 #endif
541 {
542 float v;
543 CHECK(RE("(.*)").FullMatch("100", &v));
544 CHECK(RE("(.*)").FullMatch("-100.", &v));
545 CHECK(RE("(.*)").FullMatch("1e23", &v));
546 }
547 {
548 double v;
549 CHECK(RE("(.*)").FullMatch("100", &v));
550 CHECK(RE("(.*)").FullMatch("-100.", &v));
551 CHECK(RE("(.*)").FullMatch("1e23", &v));
552 }
553
554 // Check that matching is fully anchored
555 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
556 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
557 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
558 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
559
560 // Braces
561 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
562 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
563 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
564
565 // Complicated RE
566 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
567 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
568 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
569 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
570
571 // Check full-match handling (needs '$' tacked on internally)
572 CHECK(RE("fo|foo").FullMatch("fo"));
573 CHECK(RE("fo|foo").FullMatch("foo"));
574 CHECK(RE("fo|foo$").FullMatch("fo"));
575 CHECK(RE("fo|foo$").FullMatch("foo"));
576 CHECK(RE("foo$").FullMatch("foo"));
577 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
578 CHECK(!RE("fo|bar").FullMatch("fox"));
579
580 // Uncomment the following if we change the handling of '$' to
581 // prevent it from matching a trailing newline
582 if (false) {
583 // Check that we don't get bitten by pcre's special handling of a
584 // '\n' at the end of the string matching '$'
585 CHECK(!RE("foo$").PartialMatch("foo\n"));
586 }
587
588 // Number of args
589 int a[16];
590 CHECK(RE("").FullMatch(""));
591
592 memset(a, 0, sizeof(0));
593 CHECK(RE("(\\d){1}").FullMatch("1",
594 &a[0]));
595 CHECK_EQ(a[0], 1);
596
597 memset(a, 0, sizeof(0));
598 CHECK(RE("(\\d)(\\d)").FullMatch("12",
599 &a[0], &a[1]));
600 CHECK_EQ(a[0], 1);
601 CHECK_EQ(a[1], 2);
602
603 memset(a, 0, sizeof(0));
604 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
605 &a[0], &a[1], &a[2]));
606 CHECK_EQ(a[0], 1);
607 CHECK_EQ(a[1], 2);
608 CHECK_EQ(a[2], 3);
609
610 memset(a, 0, sizeof(0));
611 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
612 &a[0], &a[1], &a[2], &a[3]));
613 CHECK_EQ(a[0], 1);
614 CHECK_EQ(a[1], 2);
615 CHECK_EQ(a[2], 3);
616 CHECK_EQ(a[3], 4);
617
618 memset(a, 0, sizeof(0));
619 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
620 &a[0], &a[1], &a[2],
621 &a[3], &a[4]));
622 CHECK_EQ(a[0], 1);
623 CHECK_EQ(a[1], 2);
624 CHECK_EQ(a[2], 3);
625 CHECK_EQ(a[3], 4);
626 CHECK_EQ(a[4], 5);
627
628 memset(a, 0, sizeof(0));
629 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
630 &a[0], &a[1], &a[2],
631 &a[3], &a[4], &a[5]));
632 CHECK_EQ(a[0], 1);
633 CHECK_EQ(a[1], 2);
634 CHECK_EQ(a[2], 3);
635 CHECK_EQ(a[3], 4);
636 CHECK_EQ(a[4], 5);
637 CHECK_EQ(a[5], 6);
638
639 memset(a, 0, sizeof(0));
640 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
641 &a[0], &a[1], &a[2], &a[3],
642 &a[4], &a[5], &a[6]));
643 CHECK_EQ(a[0], 1);
644 CHECK_EQ(a[1], 2);
645 CHECK_EQ(a[2], 3);
646 CHECK_EQ(a[3], 4);
647 CHECK_EQ(a[4], 5);
648 CHECK_EQ(a[5], 6);
649 CHECK_EQ(a[6], 7);
650
651 memset(a, 0, sizeof(0));
652 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
653 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
654 "1234567890123456",
655 &a[0], &a[1], &a[2], &a[3],
656 &a[4], &a[5], &a[6], &a[7],
657 &a[8], &a[9], &a[10], &a[11],
658 &a[12], &a[13], &a[14], &a[15]));
659 CHECK_EQ(a[0], 1);
660 CHECK_EQ(a[1], 2);
661 CHECK_EQ(a[2], 3);
662 CHECK_EQ(a[3], 4);
663 CHECK_EQ(a[4], 5);
664 CHECK_EQ(a[5], 6);
665 CHECK_EQ(a[6], 7);
666 CHECK_EQ(a[7], 8);
667 CHECK_EQ(a[8], 9);
668 CHECK_EQ(a[9], 0);
669 CHECK_EQ(a[10], 1);
670 CHECK_EQ(a[11], 2);
671 CHECK_EQ(a[12], 3);
672 CHECK_EQ(a[13], 4);
673 CHECK_EQ(a[14], 5);
674 CHECK_EQ(a[15], 6);
675
676 /***** PartialMatch *****/
677
678 printf("Testing PartialMatch\n");
679
680 CHECK(RE("h.*o").PartialMatch("hello"));
681 CHECK(RE("h.*o").PartialMatch("othello"));
682 CHECK(RE("h.*o").PartialMatch("hello!"));
683 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
684
685 RadixTests();
686 TestReplace();
687 TestExtract();
688 TestConsume();
689 TestFindAndConsume();
690 TestMatchNumberPeculiarity();
691
692 // Check the pattern() accessor
693 {
694 const string kPattern = "http://([^/]+)/.*";
695 const RE re(kPattern);
696 CHECK_EQ(kPattern, re.pattern());
697 }
698
699 // Check RE error field.
700 {
701 RE re("foo");
702 CHECK(re.error().empty()); // Must have no error
703 }
704
705 #ifdef SUPPORT_UTF8
706 // Check UTF-8 handling
707 {
708 printf("Testing UTF-8 handling\n");
709
710 // Three Japanese characters (nihongo)
711 const char utf8_string[] = {
712 0xe6, 0x97, 0xa5, // 65e5
713 0xe6, 0x9c, 0xac, // 627c
714 0xe8, 0xaa, 0x9e, // 8a9e
715 0
716 };
717 const char utf8_pattern[] = {
718 '.',
719 0xe6, 0x9c, 0xac, // 627c
720 '.',
721 0
722 };
723
724 // Both should match in either mode, bytes or UTF-8
725 RE re_test1(".........");
726 CHECK(re_test1.FullMatch(utf8_string));
727 RE re_test2("...", pcrecpp::UTF8());
728 CHECK(re_test2.FullMatch(utf8_string));
729
730 // Check that '.' matches one byte or UTF-8 character
731 // according to the mode.
732 string ss;
733 RE re_test3("(.)");
734 CHECK(re_test3.PartialMatch(utf8_string, &ss));
735 CHECK_EQ(ss, string("\xe6"));
736 RE re_test4("(.)", pcrecpp::UTF8());
737 CHECK(re_test4.PartialMatch(utf8_string, &ss));
738 CHECK_EQ(ss, string("\xe6\x97\xa5"));
739
740 // Check that string matches itself in either mode
741 RE re_test5(utf8_string);
742 CHECK(re_test5.FullMatch(utf8_string));
743 RE re_test6(utf8_string, pcrecpp::UTF8());
744 CHECK(re_test6.FullMatch(utf8_string));
745
746 // Check that pattern matches string only in UTF8 mode
747 RE re_test7(utf8_pattern);
748 CHECK(!re_test7.FullMatch(utf8_string));
749 RE re_test8(utf8_pattern, pcrecpp::UTF8());
750 CHECK(re_test8.FullMatch(utf8_string));
751 }
752
753 // Check that ungreedy, UTF8 regular expressions don't match when they
754 // oughtn't -- see bug 82246.
755 {
756 // This code always worked.
757 const char* pattern = "\\w+X";
758 const string target = "a aX";
759 RE match_sentence(pattern);
760 RE match_sentence_re(pattern, pcrecpp::UTF8());
761
762 CHECK(!match_sentence.FullMatch(target));
763 CHECK(!match_sentence_re.FullMatch(target));
764 }
765
766 {
767 const char* pattern = "(?U)\\w+X";
768 const string target = "a aX";
769 RE match_sentence(pattern);
770 RE match_sentence_re(pattern, pcrecpp::UTF8());
771
772 CHECK(!match_sentence.FullMatch(target));
773 CHECK(!match_sentence_re.FullMatch(target));
774 }
775 #endif /* def SUPPORT_UTF8 */
776
777 printf("Testing error reporting\n");
778
779 { RE re("a\\1"); CHECK(!re.error().empty()); }
780 {
781 RE re("a[x");
782 CHECK(!re.error().empty());
783 }
784 {
785 RE re("a[z-a]");
786 CHECK(!re.error().empty());
787 }
788 {
789 RE re("a[[:foobar:]]");
790 CHECK(!re.error().empty());
791 }
792 {
793 RE re("a(b");
794 CHECK(!re.error().empty());
795 }
796 {
797 RE re("a\\");
798 CHECK(!re.error().empty());
799 }
800
801 // Test that recursion is stopped: there will be some errors reported
802 int matchlimit = 5000;
803 int bytes = 15 * 1024; // enough to crash if there was no match limit
804 TestRecursion(bytes, ".", matchlimit);
805 TestRecursion(bytes, "a", matchlimit);
806 TestRecursion(bytes, "a.", matchlimit);
807 TestRecursion(bytes, "ab.", matchlimit);
808 TestRecursion(bytes, "abc.", matchlimit);
809
810 // Done
811 printf("OK\n");
812
813 return 0;
814 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12