/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (hide annotations) (download)
Sat Feb 24 21:40:45 2007 UTC (7 years, 1 month ago) by nigel
File size: 23826 byte(s)
Load pcre-6.0 into code/trunk.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31     //
32     // TODO: Test extractions for PartialMatch/Consume
33    
34     #include <stdio.h>
35     #include <vector>
36     #include "config.h"
37     #include "pcrecpp.h"
38    
39     using pcrecpp::StringPiece;
40     using pcrecpp::RE;
41     using pcrecpp::RE_Options;
42     using pcrecpp::Hex;
43     using pcrecpp::Octal;
44     using pcrecpp::CRadix;
45    
46     // CHECK dies with a fatal error if condition is not true. It is *not*
47     // controlled by NDEBUG, so the check will be executed regardless of
48     // compilation mode. Therefore, it is safe to do things like:
49     // CHECK_EQ(fp->Write(x), 4)
50     #define CHECK(condition) do { \
51     if (!(condition)) { \
52     fprintf(stderr, "%s:%d: Check failed: %s\n", \
53     __FILE__, __LINE__, #condition); \
54     exit(1); \
55     } \
56     } while (0)
57    
58     #define CHECK_EQ(a, b) CHECK(a == b)
59    
60     static void Timing1(int num_iters) {
61     // Same pattern lots of times
62     RE pattern("ruby:\\d+");
63     StringPiece p("ruby:1234");
64     for (int j = num_iters; j > 0; j--) {
65     CHECK(pattern.FullMatch(p));
66     }
67     }
68    
69     static void Timing2(int num_iters) {
70     // Same pattern lots of times
71     RE pattern("ruby:(\\d+)");
72     int i;
73     for (int j = num_iters; j > 0; j--) {
74     CHECK(pattern.FullMatch("ruby:1234", &i));
75     CHECK_EQ(i, 1234);
76     }
77     }
78    
79     static void Timing3(int num_iters) {
80     string text_string;
81     for (int j = num_iters; j > 0; j--) {
82     text_string += "this is another line\n";
83     }
84    
85     RE line_matcher(".*\n");
86     string line;
87     StringPiece text(text_string);
88     int counter = 0;
89     while (line_matcher.Consume(&text)) {
90     counter++;
91     }
92     printf("Matched %d lines\n", counter);
93     }
94    
95     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
96    
97     static void LeakTest() {
98     // Check for memory leaks
99     unsigned long long initial_size = 0;
100     for (int i = 0; i < 100000; i++) {
101     if (i == 50000) {
102     initial_size = VirtualProcessSize();
103     printf("Size after 50000: %llu\n", initial_size);
104     }
105     char buf[100];
106     snprintf(buf, sizeof(buf), "pat%09d", i);
107     RE newre(buf);
108     }
109     uint64 final_size = VirtualProcessSize();
110     printf("Size after 100000: %llu\n", final_size);
111     const double growth = double(final_size - initial_size) / final_size;
112     printf("Growth: %0.2f%%", growth * 100);
113     CHECK(growth < 0.02); // Allow < 2% growth
114     }
115    
116     #endif
117    
118     static void RadixTests() {
119     printf("Testing hex\n");
120    
121     #define CHECK_HEX(type, value) \
122     do { \
123     type v; \
124     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
125     CHECK_EQ(v, 0x ## value); \
126     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
127     CHECK_EQ(v, 0x ## value); \
128     } while(0)
129    
130     CHECK_HEX(short, 2bad);
131     CHECK_HEX(unsigned short, 2badU);
132     CHECK_HEX(int, dead);
133     CHECK_HEX(unsigned int, deadU);
134     CHECK_HEX(long, 7eadbeefL);
135     CHECK_HEX(unsigned long, deadbeefUL);
136     #ifdef HAVE_LONG_LONG
137     CHECK_HEX(long long, 12345678deadbeefLL);
138     #endif
139     #ifdef HAVE_UNSIGNED_LONG_LONG
140     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
141     #endif
142    
143     #undef CHECK_HEX
144    
145     printf("Testing octal\n");
146    
147     #define CHECK_OCTAL(type, value) \
148     do { \
149     type v; \
150     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
151     CHECK_EQ(v, 0 ## value); \
152     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
153     CHECK_EQ(v, 0 ## value); \
154     } while(0)
155    
156     CHECK_OCTAL(short, 77777);
157     CHECK_OCTAL(unsigned short, 177777U);
158     CHECK_OCTAL(int, 17777777777);
159     CHECK_OCTAL(unsigned int, 37777777777U);
160     CHECK_OCTAL(long, 17777777777L);
161     CHECK_OCTAL(unsigned long, 37777777777UL);
162     #ifdef HAVE_LONG_LONG
163     CHECK_OCTAL(long long, 777777777777777777777LL);
164     #endif
165     #ifdef HAVE_UNSIGNED_LONG_LONG
166     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
167     #endif
168    
169     #undef CHECK_OCTAL
170    
171     printf("Testing decimal\n");
172    
173     #define CHECK_DECIMAL(type, value) \
174     do { \
175     type v; \
176     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
177     CHECK_EQ(v, value); \
178     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
179     CHECK_EQ(v, value); \
180     } while(0)
181    
182     CHECK_DECIMAL(short, -1);
183     CHECK_DECIMAL(unsigned short, 9999);
184     CHECK_DECIMAL(int, -1000);
185     CHECK_DECIMAL(unsigned int, 12345U);
186     CHECK_DECIMAL(long, -10000000L);
187     CHECK_DECIMAL(unsigned long, 3083324652U);
188     #ifdef HAVE_LONG_LONG
189     CHECK_DECIMAL(long long, -100000000000000LL);
190     #endif
191     #ifdef HAVE_UNSIGNED_LONG_LONG
192     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
193     #endif
194    
195     #undef CHECK_DECIMAL
196    
197     }
198    
199     static void TestReplace() {
200     printf("Testing Replace\n");
201    
202     struct ReplaceTest {
203     const char *regexp;
204     const char *rewrite;
205     const char *original;
206     const char *single;
207     const char *global;
208     };
209     static const ReplaceTest tests[] = {
210     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
211     "\\2\\1ay",
212     "the quick brown fox jumps over the lazy dogs.",
213     "ethay quick brown fox jumps over the lazy dogs.",
214     "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
215     { "\\w+",
216     "\\0-NOSPAM",
217     "paul.haahr@google.com",
218     "paul-NOSPAM.haahr@google.com",
219     "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
220     { "^",
221     "(START)",
222     "foo",
223     "(START)foo",
224     "(START)foo" },
225     { "^",
226     "(START)",
227     "",
228     "(START)",
229     "(START)" },
230     { "$",
231     "(END)",
232     "",
233     "(END)",
234     "(END)" },
235     { "b",
236     "bb",
237     "ababababab",
238     "abbabababab",
239     "abbabbabbabbabb" },
240     { "b",
241     "bb",
242     "bbbbbb",
243     "bbbbbbb",
244     "bbbbbbbbbbbb" },
245     { "b+",
246     "bb",
247     "bbbbbb",
248     "bb",
249     "bb" },
250     { "b*",
251     "bb",
252     "bbbbbb",
253     "bb",
254     "bb" },
255     { "b*",
256     "bb",
257     "aaaaa",
258     "bbaaaaa",
259     "bbabbabbabbabbabb" },
260     { "", NULL, NULL, NULL, NULL }
261     };
262    
263     for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
264     string one(t->original);
265     CHECK(RE(t->regexp).Replace(t->rewrite, &one));
266     CHECK_EQ(one, t->single);
267     string all(t->original);
268     CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
269     CHECK_EQ(all, t->global);
270     }
271     }
272    
273     static void TestExtract() {
274     printf("Testing Extract\n");
275    
276     string s;
277    
278     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
279     CHECK_EQ(s, "kremvax!boris");
280    
281     // check the RE interface as well
282     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
283     CHECK_EQ(s, "'foo'");
284     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
285     CHECK_EQ(s, "'foo'");
286     }
287    
288     static void TestConsume() {
289     printf("Testing Consume\n");
290    
291     string word;
292    
293     string s(" aaa b!@#$@#$cccc");
294     StringPiece input(s);
295    
296     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
297     CHECK(r.Consume(&input, &word));
298     CHECK_EQ(word, "aaa");
299     CHECK(r.Consume(&input, &word));
300     CHECK_EQ(word, "b");
301     CHECK(! r.Consume(&input, &word));
302     }
303    
304     static void TestFindAndConsume() {
305     printf("Testing FindAndConsume\n");
306    
307     string word;
308    
309     string s(" aaa b!@#$@#$cccc");
310     StringPiece input(s);
311    
312     RE r("(\\w+)"); // matches a word
313     CHECK(r.FindAndConsume(&input, &word));
314     CHECK_EQ(word, "aaa");
315     CHECK(r.FindAndConsume(&input, &word));
316     CHECK_EQ(word, "b");
317     CHECK(r.FindAndConsume(&input, &word));
318     CHECK_EQ(word, "cccc");
319     CHECK(! r.FindAndConsume(&input, &word));
320     }
321    
322     static void TestMatchNumberPeculiarity() {
323     printf("Testing match-number peculiaraity\n");
324    
325     string word1;
326     string word2;
327     string word3;
328    
329     RE r("(foo)|(bar)|(baz)");
330     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
331     CHECK_EQ(word1, "foo");
332     CHECK_EQ(word2, "");
333     CHECK_EQ(word3, "");
334     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
335     CHECK_EQ(word1, "");
336     CHECK_EQ(word2, "bar");
337     CHECK_EQ(word3, "");
338     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
339     CHECK_EQ(word1, "");
340     CHECK_EQ(word2, "");
341     CHECK_EQ(word3, "baz");
342     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
343    
344     string a;
345     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
346     CHECK_EQ(a, "");
347     }
348    
349     static void TestRecursion(int size, const char *pattern, int match_limit) {
350     printf("Testing recursion\n");
351    
352     // Fill up a string repeating the pattern given
353     string domain;
354     domain.resize(size);
355     int patlen = strlen(pattern);
356     for (int i = 0; i < size; ++i) {
357     domain[i] = pattern[i % patlen];
358     }
359     // Just make sure it doesn't crash due to too much recursion.
360     RE_Options options;
361     options.set_match_limit(match_limit);
362     RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
363     re.FullMatch(domain);
364     }
365    
366    
367     int main(int argc, char** argv) {
368     // Treat any flag as --help
369     if (argc > 1 && argv[1][0] == '-') {
370     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
371     " If 'timingX ###' is specified, run the given timing test\n"
372     " with the given number of iterations, rather than running\n"
373     " the default corectness test.\n", argv[0]);
374     return 0;
375     }
376    
377     if (argc > 1) {
378     if ( argc == 2 || atoi(argv[2]) == 0) {
379     printf("timing mode needs a num-iters argument\n");
380     return 1;
381     }
382     if (!strcmp(argv[1], "timing1"))
383     Timing1(atoi(argv[2]));
384     else if (!strcmp(argv[1], "timing2"))
385     Timing2(atoi(argv[2]));
386     else if (!strcmp(argv[1], "timing3"))
387     Timing3(atoi(argv[2]));
388     else
389     printf("Unknown argument '%s'\n", argv[1]);
390     return 0;
391     }
392    
393     printf("Testing FullMatch\n");
394    
395     int i;
396     string s;
397    
398     /***** FullMatch with no args *****/
399    
400     CHECK(RE("h.*o").FullMatch("hello"));
401     CHECK(!RE("h.*o").FullMatch("othello"));
402     CHECK(!RE("h.*o").FullMatch("hello!"));
403    
404     /***** FullMatch with args *****/
405    
406     // Zero-arg
407     CHECK(RE("\\d+").FullMatch("1001"));
408    
409     // Single-arg
410     CHECK(RE("(\\d+)").FullMatch("1001", &i));
411     CHECK_EQ(i, 1001);
412     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
413     CHECK_EQ(i, -123);
414     CHECK(!RE("()\\d+").FullMatch("10", &i));
415     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
416     &i));
417    
418     // Digits surrounding integer-arg
419     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
420     CHECK_EQ(i, 23);
421     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
422     CHECK_EQ(i, 1);
423     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
424     CHECK_EQ(i, -1);
425     CHECK(RE("(\\d)").PartialMatch("1234", &i));
426     CHECK_EQ(i, 1);
427     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
428     CHECK_EQ(i, -1);
429    
430     // String-arg
431     CHECK(RE("h(.*)o").FullMatch("hello", &s));
432     CHECK_EQ(s, string("ell"));
433    
434     // StringPiece-arg
435     StringPiece sp;
436     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
437     CHECK_EQ(sp.size(), 4);
438     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
439     CHECK_EQ(i, 1234);
440    
441     // Multi-arg
442     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
443     CHECK_EQ(s, string("ruby"));
444     CHECK_EQ(i, 1234);
445    
446     // Ignored arg
447     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
448     CHECK_EQ(s, string("ruby"));
449     CHECK_EQ(i, 1234);
450    
451     // Type tests
452     {
453     char c;
454     CHECK(RE("(H)ello").FullMatch("Hello", &c));
455     CHECK_EQ(c, 'H');
456     }
457     {
458     unsigned char c;
459     CHECK(RE("(H)ello").FullMatch("Hello", &c));
460     CHECK_EQ(c, static_cast<unsigned char>('H'));
461     }
462     {
463     short v;
464     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
465     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
466     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
467     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
468     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
469     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
470     }
471     {
472     unsigned short v;
473     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
474     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
475     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
476     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
477     }
478     {
479     int v;
480     static const int max_value = 0x7fffffff;
481     static const int min_value = -max_value - 1;
482     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
483     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
484     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
485     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
486     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
487     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
488     }
489     {
490     unsigned int v;
491     static const unsigned int max_value = 0xfffffffful;
492     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
493     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
494     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
495     }
496     #ifdef HAVE_LONG_LONG
497     {
498     long long v;
499     static const long long max_value = 0x7fffffffffffffffLL;
500     static const long long min_value = -max_value - 1;
501     char buf[32];
502    
503     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
504     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
505    
506     snprintf(buf, sizeof(buf), "%lld", max_value);
507     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
508    
509     snprintf(buf, sizeof(buf), "%lld", min_value);
510     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
511    
512     snprintf(buf, sizeof(buf), "%lld", max_value);
513     assert(buf[strlen(buf)-1] != '9');
514     buf[strlen(buf)-1]++;
515     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
516    
517     snprintf(buf, sizeof(buf), "%lld", min_value);
518     assert(buf[strlen(buf)-1] != '9');
519     buf[strlen(buf)-1]++;
520     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
521     }
522     #endif
523     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
524     {
525     unsigned long long v;
526     long long v2;
527     static const unsigned long long max_value = 0xffffffffffffffffULL;
528     char buf[32];
529    
530     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
531     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
532    
533     snprintf(buf, sizeof(buf), "%llu", max_value);
534     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
535    
536     assert(buf[strlen(buf)-1] != '9');
537     buf[strlen(buf)-1]++;
538     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
539     }
540     #endif
541     {
542     float v;
543     CHECK(RE("(.*)").FullMatch("100", &v));
544     CHECK(RE("(.*)").FullMatch("-100.", &v));
545     CHECK(RE("(.*)").FullMatch("1e23", &v));
546     }
547     {
548     double v;
549     CHECK(RE("(.*)").FullMatch("100", &v));
550     CHECK(RE("(.*)").FullMatch("-100.", &v));
551     CHECK(RE("(.*)").FullMatch("1e23", &v));
552     }
553    
554     // Check that matching is fully anchored
555     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
556     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
557     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
558     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
559    
560     // Braces
561     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
562     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
563     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
564    
565     // Complicated RE
566     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
567     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
568     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
569     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
570    
571     // Check full-match handling (needs '$' tacked on internally)
572     CHECK(RE("fo|foo").FullMatch("fo"));
573     CHECK(RE("fo|foo").FullMatch("foo"));
574     CHECK(RE("fo|foo$").FullMatch("fo"));
575     CHECK(RE("fo|foo$").FullMatch("foo"));
576     CHECK(RE("foo$").FullMatch("foo"));
577     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
578     CHECK(!RE("fo|bar").FullMatch("fox"));
579    
580     // Uncomment the following if we change the handling of '$' to
581     // prevent it from matching a trailing newline
582     if (false) {
583     // Check that we don't get bitten by pcre's special handling of a
584     // '\n' at the end of the string matching '$'
585     CHECK(!RE("foo$").PartialMatch("foo\n"));
586     }
587    
588     // Number of args
589     int a[16];
590     CHECK(RE("").FullMatch(""));
591    
592     memset(a, 0, sizeof(0));
593     CHECK(RE("(\\d){1}").FullMatch("1",
594     &a[0]));
595     CHECK_EQ(a[0], 1);
596    
597     memset(a, 0, sizeof(0));
598     CHECK(RE("(\\d)(\\d)").FullMatch("12",
599     &a[0], &a[1]));
600     CHECK_EQ(a[0], 1);
601     CHECK_EQ(a[1], 2);
602    
603     memset(a, 0, sizeof(0));
604     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
605     &a[0], &a[1], &a[2]));
606     CHECK_EQ(a[0], 1);
607     CHECK_EQ(a[1], 2);
608     CHECK_EQ(a[2], 3);
609    
610     memset(a, 0, sizeof(0));
611     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
612     &a[0], &a[1], &a[2], &a[3]));
613     CHECK_EQ(a[0], 1);
614     CHECK_EQ(a[1], 2);
615     CHECK_EQ(a[2], 3);
616     CHECK_EQ(a[3], 4);
617    
618     memset(a, 0, sizeof(0));
619     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
620     &a[0], &a[1], &a[2],
621     &a[3], &a[4]));
622     CHECK_EQ(a[0], 1);
623     CHECK_EQ(a[1], 2);
624     CHECK_EQ(a[2], 3);
625     CHECK_EQ(a[3], 4);
626     CHECK_EQ(a[4], 5);
627    
628     memset(a, 0, sizeof(0));
629     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
630     &a[0], &a[1], &a[2],
631     &a[3], &a[4], &a[5]));
632     CHECK_EQ(a[0], 1);
633     CHECK_EQ(a[1], 2);
634     CHECK_EQ(a[2], 3);
635     CHECK_EQ(a[3], 4);
636     CHECK_EQ(a[4], 5);
637     CHECK_EQ(a[5], 6);
638    
639     memset(a, 0, sizeof(0));
640     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
641     &a[0], &a[1], &a[2], &a[3],
642     &a[4], &a[5], &a[6]));
643     CHECK_EQ(a[0], 1);
644     CHECK_EQ(a[1], 2);
645     CHECK_EQ(a[2], 3);
646     CHECK_EQ(a[3], 4);
647     CHECK_EQ(a[4], 5);
648     CHECK_EQ(a[5], 6);
649     CHECK_EQ(a[6], 7);
650    
651     memset(a, 0, sizeof(0));
652     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
653     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
654     "1234567890123456",
655     &a[0], &a[1], &a[2], &a[3],
656     &a[4], &a[5], &a[6], &a[7],
657     &a[8], &a[9], &a[10], &a[11],
658     &a[12], &a[13], &a[14], &a[15]));
659     CHECK_EQ(a[0], 1);
660     CHECK_EQ(a[1], 2);
661     CHECK_EQ(a[2], 3);
662     CHECK_EQ(a[3], 4);
663     CHECK_EQ(a[4], 5);
664     CHECK_EQ(a[5], 6);
665     CHECK_EQ(a[6], 7);
666     CHECK_EQ(a[7], 8);
667     CHECK_EQ(a[8], 9);
668     CHECK_EQ(a[9], 0);
669     CHECK_EQ(a[10], 1);
670     CHECK_EQ(a[11], 2);
671     CHECK_EQ(a[12], 3);
672     CHECK_EQ(a[13], 4);
673     CHECK_EQ(a[14], 5);
674     CHECK_EQ(a[15], 6);
675    
676     /***** PartialMatch *****/
677    
678     printf("Testing PartialMatch\n");
679    
680     CHECK(RE("h.*o").PartialMatch("hello"));
681     CHECK(RE("h.*o").PartialMatch("othello"));
682     CHECK(RE("h.*o").PartialMatch("hello!"));
683     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
684    
685     RadixTests();
686     TestReplace();
687     TestExtract();
688     TestConsume();
689     TestFindAndConsume();
690     TestMatchNumberPeculiarity();
691    
692     // Check the pattern() accessor
693     {
694     const string kPattern = "http://([^/]+)/.*";
695     const RE re(kPattern);
696     CHECK_EQ(kPattern, re.pattern());
697     }
698    
699     // Check RE error field.
700     {
701     RE re("foo");
702     CHECK(re.error().empty()); // Must have no error
703     }
704    
705     #ifdef SUPPORT_UTF8
706     // Check UTF-8 handling
707     {
708     printf("Testing UTF-8 handling\n");
709    
710     // Three Japanese characters (nihongo)
711     const char utf8_string[] = {
712     0xe6, 0x97, 0xa5, // 65e5
713     0xe6, 0x9c, 0xac, // 627c
714     0xe8, 0xaa, 0x9e, // 8a9e
715     0
716     };
717     const char utf8_pattern[] = {
718     '.',
719     0xe6, 0x9c, 0xac, // 627c
720     '.',
721     0
722     };
723    
724     // Both should match in either mode, bytes or UTF-8
725     RE re_test1(".........");
726     CHECK(re_test1.FullMatch(utf8_string));
727     RE re_test2("...", pcrecpp::UTF8());
728     CHECK(re_test2.FullMatch(utf8_string));
729    
730     // Check that '.' matches one byte or UTF-8 character
731     // according to the mode.
732     string ss;
733     RE re_test3("(.)");
734     CHECK(re_test3.PartialMatch(utf8_string, &ss));
735     CHECK_EQ(ss, string("\xe6"));
736     RE re_test4("(.)", pcrecpp::UTF8());
737     CHECK(re_test4.PartialMatch(utf8_string, &ss));
738     CHECK_EQ(ss, string("\xe6\x97\xa5"));
739    
740     // Check that string matches itself in either mode
741     RE re_test5(utf8_string);
742     CHECK(re_test5.FullMatch(utf8_string));
743     RE re_test6(utf8_string, pcrecpp::UTF8());
744     CHECK(re_test6.FullMatch(utf8_string));
745    
746     // Check that pattern matches string only in UTF8 mode
747     RE re_test7(utf8_pattern);
748     CHECK(!re_test7.FullMatch(utf8_string));
749     RE re_test8(utf8_pattern, pcrecpp::UTF8());
750     CHECK(re_test8.FullMatch(utf8_string));
751     }
752    
753     // Check that ungreedy, UTF8 regular expressions don't match when they
754     // oughtn't -- see bug 82246.
755     {
756     // This code always worked.
757     const char* pattern = "\\w+X";
758     const string target = "a aX";
759     RE match_sentence(pattern);
760     RE match_sentence_re(pattern, pcrecpp::UTF8());
761    
762     CHECK(!match_sentence.FullMatch(target));
763     CHECK(!match_sentence_re.FullMatch(target));
764     }
765    
766     {
767     const char* pattern = "(?U)\\w+X";
768     const string target = "a aX";
769     RE match_sentence(pattern);
770     RE match_sentence_re(pattern, pcrecpp::UTF8());
771    
772     CHECK(!match_sentence.FullMatch(target));
773     CHECK(!match_sentence_re.FullMatch(target));
774     }
775     #endif /* def SUPPORT_UTF8 */
776    
777     printf("Testing error reporting\n");
778    
779     { RE re("a\\1"); CHECK(!re.error().empty()); }
780     {
781     RE re("a[x");
782     CHECK(!re.error().empty());
783     }
784     {
785     RE re("a[z-a]");
786     CHECK(!re.error().empty());
787     }
788     {
789     RE re("a[[:foobar:]]");
790     CHECK(!re.error().empty());
791     }
792     {
793     RE re("a(b");
794     CHECK(!re.error().empty());
795     }
796     {
797     RE re("a\\");
798     CHECK(!re.error().empty());
799     }
800    
801     // Test that recursion is stopped: there will be some errors reported
802     int matchlimit = 5000;
803     int bytes = 15 * 1024; // enough to crash if there was no match limit
804     TestRecursion(bytes, ".", matchlimit);
805     TestRecursion(bytes, "a", matchlimit);
806     TestRecursion(bytes, "a.", matchlimit);
807     TestRecursion(bytes, "ab.", matchlimit);
808     TestRecursion(bytes, "abc.", matchlimit);
809    
810     // Done
811     printf("OK\n");
812    
813     return 0;
814     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12