/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 302 - (hide annotations) (download)
Sun Jan 20 19:12:46 2008 UTC (6 years, 9 months ago) by ph10
File size: 38876 byte(s)
Exclude "long long" test in pcrecpp_unittest when HAVE_LONG_LONG is not 
defined.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113 ph10 257 char buf[100]; // definitely big enough
114     sprintf(buf, "pat%09d", i);
115 nigel 77 RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216 ph10 297 int global_count; // the expected return value from ReplaceAll
217 nigel 77 };
218     static const ReplaceTest tests[] = {
219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220     "\\2\\1ay",
221     "the quick brown fox jumps over the lazy dogs.",
222     "ethay quick brown fox jumps over the lazy dogs.",
223 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224     9 },
225 nigel 77 { "\\w+",
226     "\\0-NOSPAM",
227     "paul.haahr@google.com",
228     "paul-NOSPAM.haahr@google.com",
229 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230     4 },
231 nigel 77 { "^",
232     "(START)",
233     "foo",
234     "(START)foo",
235 ph10 297 "(START)foo",
236     1 },
237 nigel 77 { "^",
238     "(START)",
239     "",
240     "(START)",
241 ph10 297 "(START)",
242     1 },
243 nigel 77 { "$",
244     "(END)",
245     "",
246     "(END)",
247 ph10 297 "(END)",
248     1 },
249 nigel 77 { "b",
250     "bb",
251     "ababababab",
252     "abbabababab",
253 ph10 297 "abbabbabbabbabb",
254     5 },
255 nigel 77 { "b",
256     "bb",
257     "bbbbbb",
258     "bbbbbbb",
259 ph10 297 "bbbbbbbbbbbb",
260     6 },
261 nigel 77 { "b+",
262     "bb",
263     "bbbbbb",
264     "bb",
265 ph10 297 "bb",
266     1 },
267 nigel 77 { "b*",
268     "bb",
269     "bbbbbb",
270     "bb",
271 ph10 297 "bb",
272     1 },
273 nigel 77 { "b*",
274     "bb",
275     "aaaaa",
276     "bbaaaaa",
277 ph10 297 "bbabbabbabbabbabb",
278     6 },
279 nigel 91 { "b*",
280     "bb",
281     "aa\naa\n",
282     "bbaa\naa\n",
283 ph10 297 "bbabbabb\nbbabbabb\nbb",
284     7 },
285 nigel 91 { "b*",
286     "bb",
287     "aa\raa\r",
288     "bbaa\raa\r",
289 ph10 297 "bbabbabb\rbbabbabb\rbb",
290     7 },
291 nigel 91 { "b*",
292     "bb",
293     "aa\r\naa\r\n",
294     "bbaa\r\naa\r\n",
295 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
296     7 },
297 nigel 91 #ifdef SUPPORT_UTF8
298     { "b*",
299     "bb",
300     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303     5 },
304 nigel 91 { "b*",
305     "bb",
306     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310     9 },
311 nigel 91 #endif
312 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
313 nigel 77 };
314    
315 nigel 91 #ifdef SUPPORT_UTF8
316     const bool support_utf8 = true;
317     #else
318     const bool support_utf8 = false;
319     #endif
320    
321 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323     assert(re.error().empty());
324 nigel 77 string one(t->original);
325 nigel 91 CHECK(re.Replace(t->rewrite, &one));
326 nigel 77 CHECK_EQ(one, t->single);
327     string all(t->original);
328 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 nigel 77 CHECK_EQ(all, t->global);
330 ph10 297 CHECK_EQ(replace_count, t->global_count);
331 nigel 77 }
332 nigel 91
333     // One final test: test \r\n replacement when we're not in CRLF mode
334     {
335     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336     assert(re.error().empty());
337     string all("aa\r\naa\r\n");
338 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340     }
341     {
342     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343     assert(re.error().empty());
344     string all("aa\r\naa\r\n");
345 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347     }
348     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349     // Alas, the answer depends on how pcre was compiled.
350 nigel 77 }
351    
352     static void TestExtract() {
353     printf("Testing Extract\n");
354    
355     string s;
356    
357     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358     CHECK_EQ(s, "kremvax!boris");
359    
360     // check the RE interface as well
361     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362     CHECK_EQ(s, "'foo'");
363     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364     CHECK_EQ(s, "'foo'");
365     }
366    
367     static void TestConsume() {
368     printf("Testing Consume\n");
369    
370     string word;
371    
372     string s(" aaa b!@#$@#$cccc");
373     StringPiece input(s);
374    
375     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376     CHECK(r.Consume(&input, &word));
377     CHECK_EQ(word, "aaa");
378     CHECK(r.Consume(&input, &word));
379     CHECK_EQ(word, "b");
380     CHECK(! r.Consume(&input, &word));
381     }
382    
383     static void TestFindAndConsume() {
384     printf("Testing FindAndConsume\n");
385    
386     string word;
387    
388     string s(" aaa b!@#$@#$cccc");
389     StringPiece input(s);
390    
391     RE r("(\\w+)"); // matches a word
392     CHECK(r.FindAndConsume(&input, &word));
393     CHECK_EQ(word, "aaa");
394     CHECK(r.FindAndConsume(&input, &word));
395     CHECK_EQ(word, "b");
396     CHECK(r.FindAndConsume(&input, &word));
397     CHECK_EQ(word, "cccc");
398     CHECK(! r.FindAndConsume(&input, &word));
399     }
400    
401     static void TestMatchNumberPeculiarity() {
402     printf("Testing match-number peculiaraity\n");
403    
404     string word1;
405     string word2;
406     string word3;
407    
408     RE r("(foo)|(bar)|(baz)");
409     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410     CHECK_EQ(word1, "foo");
411     CHECK_EQ(word2, "");
412     CHECK_EQ(word3, "");
413     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
414     CHECK_EQ(word1, "");
415     CHECK_EQ(word2, "bar");
416     CHECK_EQ(word3, "");
417     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
418     CHECK_EQ(word1, "");
419     CHECK_EQ(word2, "");
420     CHECK_EQ(word3, "baz");
421     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
422    
423     string a;
424     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
425     CHECK_EQ(a, "");
426     }
427    
428 nigel 87 static void TestRecursion() {
429 nigel 77 printf("Testing recursion\n");
430    
431 nigel 87 // Get one string that passes (sometimes), one that never does.
432     string text_good("abcdefghijk");
433     string text_bad("acdefghijkl");
434    
435     // According to pcretest, matching text_good against (\w+)*b
436     // requires match_limit of at least 8192, and match_recursion_limit
437     // of at least 37.
438    
439     RE_Options options_ml;
440     options_ml.set_match_limit(8192);
441     RE re("(\\w+)*b", options_ml);
442     CHECK(re.PartialMatch(text_good) == true);
443     CHECK(re.PartialMatch(text_bad) == false);
444     CHECK(re.FullMatch(text_good) == false);
445     CHECK(re.FullMatch(text_bad) == false);
446    
447     options_ml.set_match_limit(1024);
448     RE re2("(\\w+)*b", options_ml);
449     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450     CHECK(re2.PartialMatch(text_bad) == false);
451     CHECK(re2.FullMatch(text_good) == false);
452     CHECK(re2.FullMatch(text_bad) == false);
453    
454     RE_Options options_mlr;
455     options_mlr.set_match_limit_recursion(50);
456     RE re3("(\\w+)*b", options_mlr);
457     CHECK(re3.PartialMatch(text_good) == true);
458     CHECK(re3.PartialMatch(text_bad) == false);
459     CHECK(re3.FullMatch(text_good) == false);
460     CHECK(re3.FullMatch(text_bad) == false);
461    
462     options_mlr.set_match_limit_recursion(10);
463     RE re4("(\\w+)*b", options_mlr);
464     CHECK(re4.PartialMatch(text_good) == false);
465     CHECK(re4.PartialMatch(text_bad) == false);
466     CHECK(re4.FullMatch(text_good) == false);
467     CHECK(re4.FullMatch(text_bad) == false);
468 nigel 77 }
469    
470 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
471     // the original unquoted string.
472     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473     string quoted = RE::QuoteMeta(unquoted);
474     RE re(quoted, options);
475     CHECK(re.FullMatch(unquoted));
476     }
477    
478     // A string containing meaningful regexp characters, which is then meta-
479     // quoted, should not generally match a string the unquoted string does.
480     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481     RE_Options options = RE_Options()) {
482     string quoted = RE::QuoteMeta(unquoted);
483     RE re(quoted, options);
484     CHECK(!re.FullMatch(should_not_match));
485     }
486    
487     // Tests that quoted meta characters match their original strings,
488     // and that a few things that shouldn't match indeed do not.
489     static void TestQuotaMetaSimple() {
490     TestQuoteMeta("foo");
491     TestQuoteMeta("foo.bar");
492     TestQuoteMeta("foo\\.bar");
493     TestQuoteMeta("[1-9]");
494     TestQuoteMeta("1.5-2.0?");
495     TestQuoteMeta("\\d");
496     TestQuoteMeta("Who doesn't like ice cream?");
497     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498     TestQuoteMeta("((?!)xxx).*yyy");
499     TestQuoteMeta("([");
500     }
501    
502     static void TestQuoteMetaSimpleNegative() {
503     NegativeTestQuoteMeta("foo", "bar");
504     NegativeTestQuoteMeta("...", "bar");
505     NegativeTestQuoteMeta("\\.", ".");
506     NegativeTestQuoteMeta("\\.", "..");
507     NegativeTestQuoteMeta("(a)", "a");
508     NegativeTestQuoteMeta("(a|b)", "a");
509     NegativeTestQuoteMeta("(a|b)", "(a)");
510     NegativeTestQuoteMeta("(a|b)", "a|b");
511     NegativeTestQuoteMeta("[0-9]", "0");
512     NegativeTestQuoteMeta("[0-9]", "0-9");
513     NegativeTestQuoteMeta("[0-9]", "[9]");
514     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
515     }
516    
517     static void TestQuoteMetaLatin1() {
518     TestQuoteMeta("3\xb2 = 9");
519     }
520    
521     static void TestQuoteMetaUtf8() {
522     #ifdef SUPPORT_UTF8
523     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
524     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
525     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
526     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
527     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
528     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
529     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
530     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
531     "27\\\xc2\\\xb0",
532     pcrecpp::UTF8());
533     #endif
534     }
535    
536     static void TestQuoteMetaAll() {
537     printf("Testing QuoteMeta\n");
538     TestQuotaMetaSimple();
539     TestQuoteMetaSimpleNegative();
540     TestQuoteMetaLatin1();
541     TestQuoteMetaUtf8();
542     }
543    
544 nigel 81 //
545     // Options tests contributed by
546     // Giuseppe Maxia, CTO, Stardata s.r.l.
547     // July 2005
548     //
549     static void GetOneOptionResult(
550     const char *option_name,
551     const char *regex,
552     const char *str,
553     RE_Options options,
554     bool full,
555     string expected) {
556 nigel 77
557 nigel 81 printf("Testing Option <%s>\n", option_name);
558     if(VERBOSE_TEST)
559     printf("/%s/ finds \"%s\" within \"%s\" \n",
560     regex,
561     expected.c_str(),
562     str);
563     string captured("");
564     if (full)
565     RE(regex,options).FullMatch(str, &captured);
566     else
567     RE(regex,options).PartialMatch(str, &captured);
568     CHECK_EQ(captured, expected);
569     }
570    
571     static void TestOneOption(
572     const char *option_name,
573     const char *regex,
574     const char *str,
575     RE_Options options,
576     bool full,
577     bool assertive = true) {
578    
579     printf("Testing Option <%s>\n", option_name);
580     if (VERBOSE_TEST)
581     printf("'%s' %s /%s/ \n",
582     str,
583     (assertive? "matches" : "doesn't match"),
584     regex);
585     if (assertive) {
586     if (full)
587     CHECK(RE(regex,options).FullMatch(str));
588     else
589     CHECK(RE(regex,options).PartialMatch(str));
590     } else {
591     if (full)
592     CHECK(!RE(regex,options).FullMatch(str));
593     else
594     CHECK(!RE(regex,options).PartialMatch(str));
595     }
596     }
597    
598     static void Test_CASELESS() {
599     RE_Options options;
600     RE_Options options2;
601    
602     options.set_caseless(true);
603     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
604     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
605     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
606    
607     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
608     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
609     options.set_caseless(false);
610     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
611     }
612    
613     static void Test_MULTILINE() {
614     RE_Options options;
615     RE_Options options2;
616     const char *str = "HELLO\n" "cruel\n" "world\n";
617    
618     options.set_multiline(true);
619     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
620     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
621     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
622     options.set_multiline(false);
623     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
624     }
625    
626     static void Test_DOTALL() {
627     RE_Options options;
628     RE_Options options2;
629     const char *str = "HELLO\n" "cruel\n" "world";
630    
631     options.set_dotall(true);
632     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
633     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
634     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
635     options.set_dotall(false);
636     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
637     }
638    
639     static void Test_DOLLAR_ENDONLY() {
640     RE_Options options;
641     RE_Options options2;
642     const char *str = "HELLO world\n";
643    
644     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
645     options.set_dollar_endonly(true);
646     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
647     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
648     }
649    
650     static void Test_EXTRA() {
651     RE_Options options;
652     const char *str = "HELLO";
653    
654     options.set_extra(true);
655     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
656     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
657     options.set_extra(false);
658     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
659     }
660    
661     static void Test_EXTENDED() {
662     RE_Options options;
663     RE_Options options2;
664     const char *str = "HELLO world";
665    
666     options.set_extended(true);
667     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
668     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
669     TestOneOption("EXTENDED (class)",
670     "^ HE L{2} O "
671     "\\s+ "
672     "\\w+ $ ",
673     str,
674     options,
675     false);
676    
677     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
678     TestOneOption("EXTENDED (function)",
679     "^ HE L{2} O "
680     "\\s+ "
681     "\\w+ $ ",
682     str,
683     pcrecpp::EXTENDED(),
684     false);
685    
686     options.set_extended(false);
687     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
688     }
689    
690     static void Test_NO_AUTO_CAPTURE() {
691     RE_Options options;
692     const char *str = "HELLO world";
693     string captured;
694    
695     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
696     if (VERBOSE_TEST)
697     printf("parentheses capture text\n");
698     RE re("(world|universe)$", options);
699     CHECK(re.Extract("\\1", str , &captured));
700     CHECK_EQ(captured, "world");
701     options.set_no_auto_capture(true);
702     printf("testing Option <NO_AUTO_CAPTURE>\n");
703     if (VERBOSE_TEST)
704     printf("parentheses do not capture text\n");
705     re.Extract("\\1",str, &captured );
706     CHECK_EQ(captured, "world");
707     }
708    
709     static void Test_UNGREEDY() {
710     RE_Options options;
711     const char *str = "HELLO, 'this' is the 'world'";
712    
713     options.set_ungreedy(true);
714     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
715     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
716     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
717    
718     options.set_ungreedy(false);
719     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
720     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
721     }
722    
723     static void Test_all_options() {
724     const char *str = "HELLO\n" "cruel\n" "world";
725     RE_Options options;
726     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
727    
728     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
729     options.set_all_options(0);
730     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
731     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
732    
733     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
734     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
735     " ^ c r u e l $ ",
736     str,
737     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
738     false);
739    
740     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
741     " ^ c r u e l $ ",
742     str,
743     RE_Options()
744     .set_multiline(true)
745     .set_extended(true),
746     false);
747    
748     options.set_all_options(0);
749     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
750    
751     }
752    
753     static void TestOptions() {
754     printf("Testing Options\n");
755     Test_CASELESS();
756     Test_MULTILINE();
757     Test_DOTALL();
758     Test_DOLLAR_ENDONLY();
759     Test_EXTENDED();
760     Test_NO_AUTO_CAPTURE();
761     Test_UNGREEDY();
762     Test_EXTRA();
763     Test_all_options();
764     }
765    
766 nigel 93 static void TestConstructors() {
767     printf("Testing constructors\n");
768    
769     RE_Options options;
770     options.set_dotall(true);
771     const char *str = "HELLO\n" "cruel\n" "world";
772    
773     RE orig("HELLO.*world", options);
774     CHECK(orig.FullMatch(str));
775    
776     RE copy1(orig);
777     CHECK(copy1.FullMatch(str));
778    
779     RE copy2("not a match");
780     CHECK(!copy2.FullMatch(str));
781     copy2 = copy1;
782     CHECK(copy2.FullMatch(str));
783     copy2 = orig;
784     CHECK(copy2.FullMatch(str));
785    
786     // Make sure when we assign to ourselves, nothing bad happens
787     orig = orig;
788     copy1 = copy1;
789     copy2 = copy2;
790     CHECK(orig.FullMatch(str));
791     CHECK(copy1.FullMatch(str));
792     CHECK(copy2.FullMatch(str));
793     }
794    
795 nigel 77 int main(int argc, char** argv) {
796     // Treat any flag as --help
797     if (argc > 1 && argv[1][0] == '-') {
798     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
799     " If 'timingX ###' is specified, run the given timing test\n"
800     " with the given number of iterations, rather than running\n"
801     " the default corectness test.\n", argv[0]);
802     return 0;
803     }
804    
805     if (argc > 1) {
806     if ( argc == 2 || atoi(argv[2]) == 0) {
807     printf("timing mode needs a num-iters argument\n");
808     return 1;
809     }
810     if (!strcmp(argv[1], "timing1"))
811     Timing1(atoi(argv[2]));
812     else if (!strcmp(argv[1], "timing2"))
813     Timing2(atoi(argv[2]));
814     else if (!strcmp(argv[1], "timing3"))
815     Timing3(atoi(argv[2]));
816     else
817     printf("Unknown argument '%s'\n", argv[1]);
818     return 0;
819     }
820    
821     printf("Testing FullMatch\n");
822    
823     int i;
824     string s;
825    
826     /***** FullMatch with no args *****/
827    
828     CHECK(RE("h.*o").FullMatch("hello"));
829 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
830     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
831     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
832     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
833     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
834 nigel 77
835     /***** FullMatch with args *****/
836    
837     // Zero-arg
838     CHECK(RE("\\d+").FullMatch("1001"));
839    
840     // Single-arg
841     CHECK(RE("(\\d+)").FullMatch("1001", &i));
842     CHECK_EQ(i, 1001);
843     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
844     CHECK_EQ(i, -123);
845     CHECK(!RE("()\\d+").FullMatch("10", &i));
846     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
847     &i));
848    
849     // Digits surrounding integer-arg
850     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
851     CHECK_EQ(i, 23);
852     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
853     CHECK_EQ(i, 1);
854     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
855     CHECK_EQ(i, -1);
856     CHECK(RE("(\\d)").PartialMatch("1234", &i));
857     CHECK_EQ(i, 1);
858     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
859     CHECK_EQ(i, -1);
860    
861     // String-arg
862     CHECK(RE("h(.*)o").FullMatch("hello", &s));
863     CHECK_EQ(s, string("ell"));
864    
865     // StringPiece-arg
866     StringPiece sp;
867     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
868     CHECK_EQ(sp.size(), 4);
869     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
870     CHECK_EQ(i, 1234);
871    
872     // Multi-arg
873     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
874     CHECK_EQ(s, string("ruby"));
875     CHECK_EQ(i, 1234);
876    
877 ph10 263 // Ignore non-void* NULL arg
878     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
879     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
880     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
881     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
882 ph10 302 #ifdef HAVE_LONG_LONG
883 ph10 263 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
884 ph10 302 #endif
885 ph10 263 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
886     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
887    
888     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
889     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
890     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
891     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
892     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
893     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
894    
895 nigel 77 // Ignored arg
896     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
897     CHECK_EQ(s, string("ruby"));
898     CHECK_EQ(i, 1234);
899    
900     // Type tests
901     {
902     char c;
903     CHECK(RE("(H)ello").FullMatch("Hello", &c));
904     CHECK_EQ(c, 'H');
905     }
906     {
907     unsigned char c;
908     CHECK(RE("(H)ello").FullMatch("Hello", &c));
909     CHECK_EQ(c, static_cast<unsigned char>('H'));
910     }
911     {
912     short v;
913     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
914     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
915     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
916     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
917     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
918     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
919     }
920     {
921     unsigned short v;
922     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
923     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
924     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
925     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
926     }
927     {
928     int v;
929     static const int max_value = 0x7fffffff;
930     static const int min_value = -max_value - 1;
931     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
932     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
933     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
934     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
935     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
936     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
937     }
938     {
939     unsigned int v;
940     static const unsigned int max_value = 0xfffffffful;
941     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
942     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
943     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
944     }
945     #ifdef HAVE_LONG_LONG
946 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
947     # define LLD "%I64d"
948 ph10 201 # define LLU "%I64u"
949 ph10 193 # else
950     # define LLD "%lld"
951 ph10 201 # define LLU "%llu"
952 ph10 193 # endif
953 nigel 77 {
954     long long v;
955     static const long long max_value = 0x7fffffffffffffffLL;
956     static const long long min_value = -max_value - 1;
957 ph10 257 char buf[32]; // definitely big enough for a long long
958 nigel 77
959     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
960     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
961    
962 ph10 257 sprintf(buf, LLD, max_value);
963 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
964    
965 ph10 257 sprintf(buf, LLD, min_value);
966 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
967    
968 ph10 257 sprintf(buf, LLD, max_value);
969 nigel 77 assert(buf[strlen(buf)-1] != '9');
970     buf[strlen(buf)-1]++;
971     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
972    
973 ph10 257 sprintf(buf, LLD, min_value);
974 nigel 77 assert(buf[strlen(buf)-1] != '9');
975     buf[strlen(buf)-1]++;
976     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
977     }
978     #endif
979     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
980     {
981     unsigned long long v;
982     long long v2;
983     static const unsigned long long max_value = 0xffffffffffffffffULL;
984 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
985 nigel 77
986     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
987     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
988    
989 ph10 257 sprintf(buf, LLU, max_value);
990 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
991    
992     assert(buf[strlen(buf)-1] != '9');
993     buf[strlen(buf)-1]++;
994     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
995     }
996     #endif
997     {
998     float v;
999     CHECK(RE("(.*)").FullMatch("100", &v));
1000     CHECK(RE("(.*)").FullMatch("-100.", &v));
1001     CHECK(RE("(.*)").FullMatch("1e23", &v));
1002     }
1003     {
1004     double v;
1005     CHECK(RE("(.*)").FullMatch("100", &v));
1006     CHECK(RE("(.*)").FullMatch("-100.", &v));
1007     CHECK(RE("(.*)").FullMatch("1e23", &v));
1008     }
1009    
1010     // Check that matching is fully anchored
1011     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1012     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1013     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1014     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1015    
1016     // Braces
1017     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1018     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1019     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1020    
1021     // Complicated RE
1022     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1023     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1024     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1025     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1026    
1027     // Check full-match handling (needs '$' tacked on internally)
1028     CHECK(RE("fo|foo").FullMatch("fo"));
1029     CHECK(RE("fo|foo").FullMatch("foo"));
1030     CHECK(RE("fo|foo$").FullMatch("fo"));
1031     CHECK(RE("fo|foo$").FullMatch("foo"));
1032     CHECK(RE("foo$").FullMatch("foo"));
1033     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1034     CHECK(!RE("fo|bar").FullMatch("fox"));
1035    
1036     // Uncomment the following if we change the handling of '$' to
1037     // prevent it from matching a trailing newline
1038     if (false) {
1039     // Check that we don't get bitten by pcre's special handling of a
1040     // '\n' at the end of the string matching '$'
1041     CHECK(!RE("foo$").PartialMatch("foo\n"));
1042     }
1043    
1044     // Number of args
1045     int a[16];
1046     CHECK(RE("").FullMatch(""));
1047    
1048     memset(a, 0, sizeof(0));
1049     CHECK(RE("(\\d){1}").FullMatch("1",
1050     &a[0]));
1051     CHECK_EQ(a[0], 1);
1052    
1053     memset(a, 0, sizeof(0));
1054     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1055     &a[0], &a[1]));
1056     CHECK_EQ(a[0], 1);
1057     CHECK_EQ(a[1], 2);
1058    
1059     memset(a, 0, sizeof(0));
1060     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1061     &a[0], &a[1], &a[2]));
1062     CHECK_EQ(a[0], 1);
1063     CHECK_EQ(a[1], 2);
1064     CHECK_EQ(a[2], 3);
1065    
1066     memset(a, 0, sizeof(0));
1067     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1068     &a[0], &a[1], &a[2], &a[3]));
1069     CHECK_EQ(a[0], 1);
1070     CHECK_EQ(a[1], 2);
1071     CHECK_EQ(a[2], 3);
1072     CHECK_EQ(a[3], 4);
1073    
1074     memset(a, 0, sizeof(0));
1075     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1076     &a[0], &a[1], &a[2],
1077     &a[3], &a[4]));
1078     CHECK_EQ(a[0], 1);
1079     CHECK_EQ(a[1], 2);
1080     CHECK_EQ(a[2], 3);
1081     CHECK_EQ(a[3], 4);
1082     CHECK_EQ(a[4], 5);
1083    
1084     memset(a, 0, sizeof(0));
1085     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1086     &a[0], &a[1], &a[2],
1087     &a[3], &a[4], &a[5]));
1088     CHECK_EQ(a[0], 1);
1089     CHECK_EQ(a[1], 2);
1090     CHECK_EQ(a[2], 3);
1091     CHECK_EQ(a[3], 4);
1092     CHECK_EQ(a[4], 5);
1093     CHECK_EQ(a[5], 6);
1094    
1095     memset(a, 0, sizeof(0));
1096     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1097     &a[0], &a[1], &a[2], &a[3],
1098     &a[4], &a[5], &a[6]));
1099     CHECK_EQ(a[0], 1);
1100     CHECK_EQ(a[1], 2);
1101     CHECK_EQ(a[2], 3);
1102     CHECK_EQ(a[3], 4);
1103     CHECK_EQ(a[4], 5);
1104     CHECK_EQ(a[5], 6);
1105     CHECK_EQ(a[6], 7);
1106    
1107     memset(a, 0, sizeof(0));
1108     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1109     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1110     "1234567890123456",
1111     &a[0], &a[1], &a[2], &a[3],
1112     &a[4], &a[5], &a[6], &a[7],
1113     &a[8], &a[9], &a[10], &a[11],
1114     &a[12], &a[13], &a[14], &a[15]));
1115     CHECK_EQ(a[0], 1);
1116     CHECK_EQ(a[1], 2);
1117     CHECK_EQ(a[2], 3);
1118     CHECK_EQ(a[3], 4);
1119     CHECK_EQ(a[4], 5);
1120     CHECK_EQ(a[5], 6);
1121     CHECK_EQ(a[6], 7);
1122     CHECK_EQ(a[7], 8);
1123     CHECK_EQ(a[8], 9);
1124     CHECK_EQ(a[9], 0);
1125     CHECK_EQ(a[10], 1);
1126     CHECK_EQ(a[11], 2);
1127     CHECK_EQ(a[12], 3);
1128     CHECK_EQ(a[13], 4);
1129     CHECK_EQ(a[14], 5);
1130     CHECK_EQ(a[15], 6);
1131    
1132     /***** PartialMatch *****/
1133    
1134     printf("Testing PartialMatch\n");
1135    
1136     CHECK(RE("h.*o").PartialMatch("hello"));
1137     CHECK(RE("h.*o").PartialMatch("othello"));
1138     CHECK(RE("h.*o").PartialMatch("hello!"));
1139     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1140    
1141 nigel 93 /***** other tests *****/
1142    
1143 nigel 77 RadixTests();
1144     TestReplace();
1145     TestExtract();
1146     TestConsume();
1147     TestFindAndConsume();
1148 nigel 93 TestQuoteMetaAll();
1149 nigel 77 TestMatchNumberPeculiarity();
1150    
1151     // Check the pattern() accessor
1152     {
1153     const string kPattern = "http://([^/]+)/.*";
1154     const RE re(kPattern);
1155     CHECK_EQ(kPattern, re.pattern());
1156     }
1157    
1158     // Check RE error field.
1159     {
1160     RE re("foo");
1161     CHECK(re.error().empty()); // Must have no error
1162     }
1163    
1164     #ifdef SUPPORT_UTF8
1165     // Check UTF-8 handling
1166     {
1167     printf("Testing UTF-8 handling\n");
1168    
1169     // Three Japanese characters (nihongo)
1170 ph10 256 const unsigned char utf8_string[] = {
1171 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1172     0xe6, 0x9c, 0xac, // 627c
1173     0xe8, 0xaa, 0x9e, // 8a9e
1174     0
1175     };
1176 ph10 256 const unsigned char utf8_pattern[] = {
1177 nigel 77 '.',
1178     0xe6, 0x9c, 0xac, // 627c
1179     '.',
1180     0
1181     };
1182    
1183     // Both should match in either mode, bytes or UTF-8
1184     RE re_test1(".........");
1185     CHECK(re_test1.FullMatch(utf8_string));
1186     RE re_test2("...", pcrecpp::UTF8());
1187     CHECK(re_test2.FullMatch(utf8_string));
1188    
1189     // Check that '.' matches one byte or UTF-8 character
1190     // according to the mode.
1191     string ss;
1192     RE re_test3("(.)");
1193     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1194     CHECK_EQ(ss, string("\xe6"));
1195     RE re_test4("(.)", pcrecpp::UTF8());
1196     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1197     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1198    
1199     // Check that string matches itself in either mode
1200     RE re_test5(utf8_string);
1201     CHECK(re_test5.FullMatch(utf8_string));
1202     RE re_test6(utf8_string, pcrecpp::UTF8());
1203     CHECK(re_test6.FullMatch(utf8_string));
1204    
1205     // Check that pattern matches string only in UTF8 mode
1206     RE re_test7(utf8_pattern);
1207     CHECK(!re_test7.FullMatch(utf8_string));
1208     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1209     CHECK(re_test8.FullMatch(utf8_string));
1210     }
1211    
1212     // Check that ungreedy, UTF8 regular expressions don't match when they
1213     // oughtn't -- see bug 82246.
1214     {
1215     // This code always worked.
1216     const char* pattern = "\\w+X";
1217     const string target = "a aX";
1218     RE match_sentence(pattern);
1219     RE match_sentence_re(pattern, pcrecpp::UTF8());
1220    
1221     CHECK(!match_sentence.FullMatch(target));
1222     CHECK(!match_sentence_re.FullMatch(target));
1223     }
1224    
1225     {
1226     const char* pattern = "(?U)\\w+X";
1227     const string target = "a aX";
1228     RE match_sentence(pattern);
1229     RE match_sentence_re(pattern, pcrecpp::UTF8());
1230    
1231     CHECK(!match_sentence.FullMatch(target));
1232     CHECK(!match_sentence_re.FullMatch(target));
1233     }
1234     #endif /* def SUPPORT_UTF8 */
1235    
1236     printf("Testing error reporting\n");
1237    
1238     { RE re("a\\1"); CHECK(!re.error().empty()); }
1239     {
1240     RE re("a[x");
1241     CHECK(!re.error().empty());
1242     }
1243     {
1244     RE re("a[z-a]");
1245     CHECK(!re.error().empty());
1246     }
1247     {
1248     RE re("a[[:foobar:]]");
1249     CHECK(!re.error().empty());
1250     }
1251     {
1252     RE re("a(b");
1253     CHECK(!re.error().empty());
1254     }
1255     {
1256     RE re("a\\");
1257     CHECK(!re.error().empty());
1258     }
1259    
1260 nigel 87 // Test that recursion is stopped
1261     TestRecursion();
1262 nigel 77
1263 nigel 81 // Test Options
1264     if (getenv("VERBOSE_TEST") != NULL)
1265     VERBOSE_TEST = true;
1266     TestOptions();
1267    
1268 nigel 93 // Test the constructors
1269     TestConstructors();
1270    
1271 nigel 77 // Done
1272     printf("OK\n");
1273    
1274     return 0;
1275     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12