/[pcre]/code/trunk/pcrecpp_unittest.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp_unittest.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 326 - (hide annotations) (download)
Sat Mar 8 17:24:02 2008 UTC (6 years, 7 months ago) by ph10
File size: 38916 byte(s)
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE
doesn't support NULs in patterns.

1 nigel 93 // -*- coding: utf-8 -*-
2     //
3     // Copyright (c) 2005 - 2006, Google Inc.
4 nigel 77 // All rights reserved.
5     //
6     // Redistribution and use in source and binary forms, with or without
7     // modification, are permitted provided that the following conditions are
8     // met:
9     //
10     // * Redistributions of source code must retain the above copyright
11     // notice, this list of conditions and the following disclaimer.
12     // * Redistributions in binary form must reproduce the above
13     // copyright notice, this list of conditions and the following disclaimer
14     // in the documentation and/or other materials provided with the
15     // distribution.
16     // * Neither the name of Google Inc. nor the names of its
17     // contributors may be used to endorse or promote products derived from
18     // this software without specific prior written permission.
19     //
20     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31     //
32     // Author: Sanjay Ghemawat
33     //
34     // TODO: Test extractions for PartialMatch/Consume
35    
36 ph10 200 #ifdef HAVE_CONFIG_H
37 ph10 236 #include "config.h"
38 ph10 200 #endif
39 ph10 199
40 nigel 77 #include <stdio.h>
41 nigel 91 #include <cassert>
42 nigel 77 #include <vector>
43     #include "pcrecpp.h"
44    
45     using pcrecpp::StringPiece;
46     using pcrecpp::RE;
47     using pcrecpp::RE_Options;
48     using pcrecpp::Hex;
49     using pcrecpp::Octal;
50     using pcrecpp::CRadix;
51    
52 nigel 81 static bool VERBOSE_TEST = false;
53    
54 nigel 77 // CHECK dies with a fatal error if condition is not true. It is *not*
55     // controlled by NDEBUG, so the check will be executed regardless of
56     // compilation mode. Therefore, it is safe to do things like:
57     // CHECK_EQ(fp->Write(x), 4)
58     #define CHECK(condition) do { \
59     if (!(condition)) { \
60     fprintf(stderr, "%s:%d: Check failed: %s\n", \
61     __FILE__, __LINE__, #condition); \
62     exit(1); \
63     } \
64     } while (0)
65    
66     #define CHECK_EQ(a, b) CHECK(a == b)
67    
68     static void Timing1(int num_iters) {
69     // Same pattern lots of times
70     RE pattern("ruby:\\d+");
71     StringPiece p("ruby:1234");
72     for (int j = num_iters; j > 0; j--) {
73     CHECK(pattern.FullMatch(p));
74     }
75     }
76    
77     static void Timing2(int num_iters) {
78     // Same pattern lots of times
79     RE pattern("ruby:(\\d+)");
80     int i;
81     for (int j = num_iters; j > 0; j--) {
82     CHECK(pattern.FullMatch("ruby:1234", &i));
83     CHECK_EQ(i, 1234);
84     }
85     }
86    
87     static void Timing3(int num_iters) {
88     string text_string;
89     for (int j = num_iters; j > 0; j--) {
90     text_string += "this is another line\n";
91     }
92    
93     RE line_matcher(".*\n");
94     string line;
95     StringPiece text(text_string);
96     int counter = 0;
97     while (line_matcher.Consume(&text)) {
98     counter++;
99     }
100     printf("Matched %d lines\n", counter);
101     }
102    
103     #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
104    
105     static void LeakTest() {
106     // Check for memory leaks
107     unsigned long long initial_size = 0;
108     for (int i = 0; i < 100000; i++) {
109     if (i == 50000) {
110     initial_size = VirtualProcessSize();
111     printf("Size after 50000: %llu\n", initial_size);
112     }
113 ph10 257 char buf[100]; // definitely big enough
114     sprintf(buf, "pat%09d", i);
115 nigel 77 RE newre(buf);
116     }
117     uint64 final_size = VirtualProcessSize();
118     printf("Size after 100000: %llu\n", final_size);
119     const double growth = double(final_size - initial_size) / final_size;
120     printf("Growth: %0.2f%%", growth * 100);
121     CHECK(growth < 0.02); // Allow < 2% growth
122     }
123    
124     #endif
125    
126     static void RadixTests() {
127     printf("Testing hex\n");
128    
129     #define CHECK_HEX(type, value) \
130     do { \
131     type v; \
132     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133     CHECK_EQ(v, 0x ## value); \
134     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135     CHECK_EQ(v, 0x ## value); \
136     } while(0)
137    
138     CHECK_HEX(short, 2bad);
139     CHECK_HEX(unsigned short, 2badU);
140     CHECK_HEX(int, dead);
141     CHECK_HEX(unsigned int, deadU);
142     CHECK_HEX(long, 7eadbeefL);
143     CHECK_HEX(unsigned long, deadbeefUL);
144     #ifdef HAVE_LONG_LONG
145     CHECK_HEX(long long, 12345678deadbeefLL);
146     #endif
147     #ifdef HAVE_UNSIGNED_LONG_LONG
148     CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
149     #endif
150    
151     #undef CHECK_HEX
152    
153     printf("Testing octal\n");
154    
155     #define CHECK_OCTAL(type, value) \
156     do { \
157     type v; \
158     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159     CHECK_EQ(v, 0 ## value); \
160     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161     CHECK_EQ(v, 0 ## value); \
162     } while(0)
163    
164     CHECK_OCTAL(short, 77777);
165     CHECK_OCTAL(unsigned short, 177777U);
166     CHECK_OCTAL(int, 17777777777);
167     CHECK_OCTAL(unsigned int, 37777777777U);
168     CHECK_OCTAL(long, 17777777777L);
169     CHECK_OCTAL(unsigned long, 37777777777UL);
170     #ifdef HAVE_LONG_LONG
171     CHECK_OCTAL(long long, 777777777777777777777LL);
172     #endif
173     #ifdef HAVE_UNSIGNED_LONG_LONG
174     CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
175     #endif
176    
177     #undef CHECK_OCTAL
178    
179     printf("Testing decimal\n");
180    
181     #define CHECK_DECIMAL(type, value) \
182     do { \
183     type v; \
184     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185     CHECK_EQ(v, value); \
186     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187     CHECK_EQ(v, value); \
188     } while(0)
189    
190     CHECK_DECIMAL(short, -1);
191     CHECK_DECIMAL(unsigned short, 9999);
192     CHECK_DECIMAL(int, -1000);
193     CHECK_DECIMAL(unsigned int, 12345U);
194     CHECK_DECIMAL(long, -10000000L);
195     CHECK_DECIMAL(unsigned long, 3083324652U);
196     #ifdef HAVE_LONG_LONG
197     CHECK_DECIMAL(long long, -100000000000000LL);
198     #endif
199     #ifdef HAVE_UNSIGNED_LONG_LONG
200     CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
201     #endif
202    
203     #undef CHECK_DECIMAL
204    
205     }
206    
207     static void TestReplace() {
208     printf("Testing Replace\n");
209    
210     struct ReplaceTest {
211     const char *regexp;
212     const char *rewrite;
213     const char *original;
214     const char *single;
215     const char *global;
216 ph10 297 int global_count; // the expected return value from ReplaceAll
217 nigel 77 };
218     static const ReplaceTest tests[] = {
219     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220     "\\2\\1ay",
221     "the quick brown fox jumps over the lazy dogs.",
222     "ethay quick brown fox jumps over the lazy dogs.",
223 ph10 297 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
224     9 },
225 nigel 77 { "\\w+",
226     "\\0-NOSPAM",
227     "paul.haahr@google.com",
228     "paul-NOSPAM.haahr@google.com",
229 ph10 297 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
230     4 },
231 nigel 77 { "^",
232     "(START)",
233     "foo",
234     "(START)foo",
235 ph10 297 "(START)foo",
236     1 },
237 nigel 77 { "^",
238     "(START)",
239     "",
240     "(START)",
241 ph10 297 "(START)",
242     1 },
243 nigel 77 { "$",
244     "(END)",
245     "",
246     "(END)",
247 ph10 297 "(END)",
248     1 },
249 nigel 77 { "b",
250     "bb",
251     "ababababab",
252     "abbabababab",
253 ph10 297 "abbabbabbabbabb",
254     5 },
255 nigel 77 { "b",
256     "bb",
257     "bbbbbb",
258     "bbbbbbb",
259 ph10 297 "bbbbbbbbbbbb",
260     6 },
261 nigel 77 { "b+",
262     "bb",
263     "bbbbbb",
264     "bb",
265 ph10 297 "bb",
266     1 },
267 nigel 77 { "b*",
268     "bb",
269     "bbbbbb",
270     "bb",
271 ph10 297 "bb",
272     1 },
273 nigel 77 { "b*",
274     "bb",
275     "aaaaa",
276     "bbaaaaa",
277 ph10 297 "bbabbabbabbabbabb",
278     6 },
279 nigel 91 { "b*",
280     "bb",
281     "aa\naa\n",
282     "bbaa\naa\n",
283 ph10 297 "bbabbabb\nbbabbabb\nbb",
284     7 },
285 nigel 91 { "b*",
286     "bb",
287     "aa\raa\r",
288     "bbaa\raa\r",
289 ph10 297 "bbabbabb\rbbabbabb\rbb",
290     7 },
291 nigel 91 { "b*",
292     "bb",
293     "aa\r\naa\r\n",
294     "bbaa\r\naa\r\n",
295 ph10 297 "bbabbabb\r\nbbabbabb\r\nbb",
296     7 },
297 nigel 91 #ifdef SUPPORT_UTF8
298     { "b*",
299     "bb",
300     "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
301     "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
302 ph10 297 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
303     5 },
304 nigel 91 { "b*",
305     "bb",
306     "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
307     "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
308     ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
309 ph10 297 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
310     9 },
311 nigel 91 #endif
312 ph10 297 { "", NULL, NULL, NULL, NULL, 0 }
313 nigel 77 };
314    
315 nigel 91 #ifdef SUPPORT_UTF8
316     const bool support_utf8 = true;
317     #else
318     const bool support_utf8 = false;
319     #endif
320    
321 nigel 77 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
322 nigel 91 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
323     assert(re.error().empty());
324 nigel 77 string one(t->original);
325 nigel 91 CHECK(re.Replace(t->rewrite, &one));
326 nigel 77 CHECK_EQ(one, t->single);
327     string all(t->original);
328 ph10 297 const int replace_count = re.GlobalReplace(t->rewrite, &all);
329 nigel 77 CHECK_EQ(all, t->global);
330 ph10 297 CHECK_EQ(replace_count, t->global_count);
331 nigel 77 }
332 nigel 91
333     // One final test: test \r\n replacement when we're not in CRLF mode
334     {
335     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
336     assert(re.error().empty());
337     string all("aa\r\naa\r\n");
338 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
339 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
340     }
341     {
342     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
343     assert(re.error().empty());
344     string all("aa\r\naa\r\n");
345 ph10 297 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
346 nigel 91 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
347     }
348     // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
349     // Alas, the answer depends on how pcre was compiled.
350 nigel 77 }
351    
352     static void TestExtract() {
353     printf("Testing Extract\n");
354    
355     string s;
356    
357     CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
358     CHECK_EQ(s, "kremvax!boris");
359    
360     // check the RE interface as well
361     CHECK(RE(".*").Extract("'\\0'", "foo", &s));
362     CHECK_EQ(s, "'foo'");
363     CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
364     CHECK_EQ(s, "'foo'");
365     }
366    
367     static void TestConsume() {
368     printf("Testing Consume\n");
369    
370     string word;
371    
372     string s(" aaa b!@#$@#$cccc");
373     StringPiece input(s);
374    
375     RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
376     CHECK(r.Consume(&input, &word));
377     CHECK_EQ(word, "aaa");
378     CHECK(r.Consume(&input, &word));
379     CHECK_EQ(word, "b");
380     CHECK(! r.Consume(&input, &word));
381     }
382    
383     static void TestFindAndConsume() {
384     printf("Testing FindAndConsume\n");
385    
386     string word;
387    
388     string s(" aaa b!@#$@#$cccc");
389     StringPiece input(s);
390    
391     RE r("(\\w+)"); // matches a word
392     CHECK(r.FindAndConsume(&input, &word));
393     CHECK_EQ(word, "aaa");
394     CHECK(r.FindAndConsume(&input, &word));
395     CHECK_EQ(word, "b");
396     CHECK(r.FindAndConsume(&input, &word));
397     CHECK_EQ(word, "cccc");
398     CHECK(! r.FindAndConsume(&input, &word));
399     }
400    
401     static void TestMatchNumberPeculiarity() {
402     printf("Testing match-number peculiaraity\n");
403    
404     string word1;
405     string word2;
406     string word3;
407    
408     RE r("(foo)|(bar)|(baz)");
409     CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
410     CHECK_EQ(word1, "foo");
411     CHECK_EQ(word2, "");
412     CHECK_EQ(word3, "");
413     CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
414     CHECK_EQ(word1, "");
415     CHECK_EQ(word2, "bar");
416     CHECK_EQ(word3, "");
417     CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
418     CHECK_EQ(word1, "");
419     CHECK_EQ(word2, "");
420     CHECK_EQ(word3, "baz");
421     CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
422    
423     string a;
424     CHECK(RE("(foo)|hello").FullMatch("hello", &a));
425     CHECK_EQ(a, "");
426     }
427    
428 nigel 87 static void TestRecursion() {
429 nigel 77 printf("Testing recursion\n");
430    
431 nigel 87 // Get one string that passes (sometimes), one that never does.
432     string text_good("abcdefghijk");
433     string text_bad("acdefghijkl");
434    
435     // According to pcretest, matching text_good against (\w+)*b
436     // requires match_limit of at least 8192, and match_recursion_limit
437     // of at least 37.
438    
439     RE_Options options_ml;
440     options_ml.set_match_limit(8192);
441     RE re("(\\w+)*b", options_ml);
442     CHECK(re.PartialMatch(text_good) == true);
443     CHECK(re.PartialMatch(text_bad) == false);
444     CHECK(re.FullMatch(text_good) == false);
445     CHECK(re.FullMatch(text_bad) == false);
446    
447     options_ml.set_match_limit(1024);
448     RE re2("(\\w+)*b", options_ml);
449     CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
450     CHECK(re2.PartialMatch(text_bad) == false);
451     CHECK(re2.FullMatch(text_good) == false);
452     CHECK(re2.FullMatch(text_bad) == false);
453    
454     RE_Options options_mlr;
455     options_mlr.set_match_limit_recursion(50);
456     RE re3("(\\w+)*b", options_mlr);
457     CHECK(re3.PartialMatch(text_good) == true);
458     CHECK(re3.PartialMatch(text_bad) == false);
459     CHECK(re3.FullMatch(text_good) == false);
460     CHECK(re3.FullMatch(text_bad) == false);
461    
462     options_mlr.set_match_limit_recursion(10);
463     RE re4("(\\w+)*b", options_mlr);
464     CHECK(re4.PartialMatch(text_good) == false);
465     CHECK(re4.PartialMatch(text_bad) == false);
466     CHECK(re4.FullMatch(text_good) == false);
467     CHECK(re4.FullMatch(text_bad) == false);
468 nigel 77 }
469    
470 nigel 93 // A meta-quoted string, interpreted as a pattern, should always match
471     // the original unquoted string.
472     static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
473     string quoted = RE::QuoteMeta(unquoted);
474     RE re(quoted, options);
475     CHECK(re.FullMatch(unquoted));
476     }
477    
478     // A string containing meaningful regexp characters, which is then meta-
479     // quoted, should not generally match a string the unquoted string does.
480     static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
481     RE_Options options = RE_Options()) {
482     string quoted = RE::QuoteMeta(unquoted);
483     RE re(quoted, options);
484     CHECK(!re.FullMatch(should_not_match));
485     }
486    
487     // Tests that quoted meta characters match their original strings,
488     // and that a few things that shouldn't match indeed do not.
489     static void TestQuotaMetaSimple() {
490     TestQuoteMeta("foo");
491     TestQuoteMeta("foo.bar");
492     TestQuoteMeta("foo\\.bar");
493     TestQuoteMeta("[1-9]");
494     TestQuoteMeta("1.5-2.0?");
495     TestQuoteMeta("\\d");
496     TestQuoteMeta("Who doesn't like ice cream?");
497     TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
498     TestQuoteMeta("((?!)xxx).*yyy");
499     TestQuoteMeta("([");
500 ph10 326 TestQuoteMeta(string("foo\0bar", 7));
501 nigel 93 }
502    
503     static void TestQuoteMetaSimpleNegative() {
504     NegativeTestQuoteMeta("foo", "bar");
505     NegativeTestQuoteMeta("...", "bar");
506     NegativeTestQuoteMeta("\\.", ".");
507     NegativeTestQuoteMeta("\\.", "..");
508     NegativeTestQuoteMeta("(a)", "a");
509     NegativeTestQuoteMeta("(a|b)", "a");
510     NegativeTestQuoteMeta("(a|b)", "(a)");
511     NegativeTestQuoteMeta("(a|b)", "a|b");
512     NegativeTestQuoteMeta("[0-9]", "0");
513     NegativeTestQuoteMeta("[0-9]", "0-9");
514     NegativeTestQuoteMeta("[0-9]", "[9]");
515     NegativeTestQuoteMeta("((?!)xxx)", "xxx");
516     }
517    
518     static void TestQuoteMetaLatin1() {
519     TestQuoteMeta("3\xb2 = 9");
520     }
521    
522     static void TestQuoteMetaUtf8() {
523     #ifdef SUPPORT_UTF8
524     TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
525     TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
526     TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
527     TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
528     TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
529     TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
530     TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
531     NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
532     "27\\\xc2\\\xb0",
533     pcrecpp::UTF8());
534     #endif
535     }
536    
537     static void TestQuoteMetaAll() {
538     printf("Testing QuoteMeta\n");
539     TestQuotaMetaSimple();
540     TestQuoteMetaSimpleNegative();
541     TestQuoteMetaLatin1();
542     TestQuoteMetaUtf8();
543     }
544    
545 nigel 81 //
546     // Options tests contributed by
547     // Giuseppe Maxia, CTO, Stardata s.r.l.
548     // July 2005
549     //
550     static void GetOneOptionResult(
551     const char *option_name,
552     const char *regex,
553     const char *str,
554     RE_Options options,
555     bool full,
556     string expected) {
557 nigel 77
558 nigel 81 printf("Testing Option <%s>\n", option_name);
559     if(VERBOSE_TEST)
560     printf("/%s/ finds \"%s\" within \"%s\" \n",
561     regex,
562     expected.c_str(),
563     str);
564     string captured("");
565     if (full)
566     RE(regex,options).FullMatch(str, &captured);
567     else
568     RE(regex,options).PartialMatch(str, &captured);
569     CHECK_EQ(captured, expected);
570     }
571    
572     static void TestOneOption(
573     const char *option_name,
574     const char *regex,
575     const char *str,
576     RE_Options options,
577     bool full,
578     bool assertive = true) {
579    
580     printf("Testing Option <%s>\n", option_name);
581     if (VERBOSE_TEST)
582     printf("'%s' %s /%s/ \n",
583     str,
584     (assertive? "matches" : "doesn't match"),
585     regex);
586     if (assertive) {
587     if (full)
588     CHECK(RE(regex,options).FullMatch(str));
589     else
590     CHECK(RE(regex,options).PartialMatch(str));
591     } else {
592     if (full)
593     CHECK(!RE(regex,options).FullMatch(str));
594     else
595     CHECK(!RE(regex,options).PartialMatch(str));
596     }
597     }
598    
599     static void Test_CASELESS() {
600     RE_Options options;
601     RE_Options options2;
602    
603     options.set_caseless(true);
604     TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
605     TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
606     TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
607    
608     TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
609     TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
610     options.set_caseless(false);
611     TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
612     }
613    
614     static void Test_MULTILINE() {
615     RE_Options options;
616     RE_Options options2;
617     const char *str = "HELLO\n" "cruel\n" "world\n";
618    
619     options.set_multiline(true);
620     TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
621     TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
622     TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
623     options.set_multiline(false);
624     TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
625     }
626    
627     static void Test_DOTALL() {
628     RE_Options options;
629     RE_Options options2;
630     const char *str = "HELLO\n" "cruel\n" "world";
631    
632     options.set_dotall(true);
633     TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
634     TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
635     TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
636     options.set_dotall(false);
637     TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
638     }
639    
640     static void Test_DOLLAR_ENDONLY() {
641     RE_Options options;
642     RE_Options options2;
643     const char *str = "HELLO world\n";
644    
645     TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
646     options.set_dollar_endonly(true);
647     TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
648     TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
649     }
650    
651     static void Test_EXTRA() {
652     RE_Options options;
653     const char *str = "HELLO";
654    
655     options.set_extra(true);
656     TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
657     TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
658     options.set_extra(false);
659     TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
660     }
661    
662     static void Test_EXTENDED() {
663     RE_Options options;
664     RE_Options options2;
665     const char *str = "HELLO world";
666    
667     options.set_extended(true);
668     TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
669     TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
670     TestOneOption("EXTENDED (class)",
671     "^ HE L{2} O "
672     "\\s+ "
673     "\\w+ $ ",
674     str,
675     options,
676     false);
677    
678     TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
679     TestOneOption("EXTENDED (function)",
680     "^ HE L{2} O "
681     "\\s+ "
682     "\\w+ $ ",
683     str,
684     pcrecpp::EXTENDED(),
685     false);
686    
687     options.set_extended(false);
688     TestOneOption("no EXTENDED", "HELLO world", str, options, false);
689     }
690    
691     static void Test_NO_AUTO_CAPTURE() {
692     RE_Options options;
693     const char *str = "HELLO world";
694     string captured;
695    
696     printf("Testing Option <no NO_AUTO_CAPTURE>\n");
697     if (VERBOSE_TEST)
698     printf("parentheses capture text\n");
699     RE re("(world|universe)$", options);
700     CHECK(re.Extract("\\1", str , &captured));
701     CHECK_EQ(captured, "world");
702     options.set_no_auto_capture(true);
703     printf("testing Option <NO_AUTO_CAPTURE>\n");
704     if (VERBOSE_TEST)
705     printf("parentheses do not capture text\n");
706     re.Extract("\\1",str, &captured );
707     CHECK_EQ(captured, "world");
708     }
709    
710     static void Test_UNGREEDY() {
711     RE_Options options;
712     const char *str = "HELLO, 'this' is the 'world'";
713    
714     options.set_ungreedy(true);
715     GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
716     GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
717     GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
718    
719     options.set_ungreedy(false);
720     GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
721     GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
722     }
723    
724     static void Test_all_options() {
725     const char *str = "HELLO\n" "cruel\n" "world";
726     RE_Options options;
727     options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
728    
729     TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
730     options.set_all_options(0);
731     TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
732     options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
733    
734     TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
735     TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
736     " ^ c r u e l $ ",
737     str,
738     RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
739     false);
740    
741     TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
742     " ^ c r u e l $ ",
743     str,
744     RE_Options()
745     .set_multiline(true)
746     .set_extended(true),
747     false);
748    
749     options.set_all_options(0);
750     TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
751    
752     }
753    
754     static void TestOptions() {
755     printf("Testing Options\n");
756     Test_CASELESS();
757     Test_MULTILINE();
758     Test_DOTALL();
759     Test_DOLLAR_ENDONLY();
760     Test_EXTENDED();
761     Test_NO_AUTO_CAPTURE();
762     Test_UNGREEDY();
763     Test_EXTRA();
764     Test_all_options();
765     }
766    
767 nigel 93 static void TestConstructors() {
768     printf("Testing constructors\n");
769    
770     RE_Options options;
771     options.set_dotall(true);
772     const char *str = "HELLO\n" "cruel\n" "world";
773    
774     RE orig("HELLO.*world", options);
775     CHECK(orig.FullMatch(str));
776    
777     RE copy1(orig);
778     CHECK(copy1.FullMatch(str));
779    
780     RE copy2("not a match");
781     CHECK(!copy2.FullMatch(str));
782     copy2 = copy1;
783     CHECK(copy2.FullMatch(str));
784     copy2 = orig;
785     CHECK(copy2.FullMatch(str));
786    
787     // Make sure when we assign to ourselves, nothing bad happens
788     orig = orig;
789     copy1 = copy1;
790     copy2 = copy2;
791     CHECK(orig.FullMatch(str));
792     CHECK(copy1.FullMatch(str));
793     CHECK(copy2.FullMatch(str));
794     }
795    
796 nigel 77 int main(int argc, char** argv) {
797     // Treat any flag as --help
798     if (argc > 1 && argv[1][0] == '-') {
799     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
800     " If 'timingX ###' is specified, run the given timing test\n"
801     " with the given number of iterations, rather than running\n"
802     " the default corectness test.\n", argv[0]);
803     return 0;
804     }
805    
806     if (argc > 1) {
807     if ( argc == 2 || atoi(argv[2]) == 0) {
808     printf("timing mode needs a num-iters argument\n");
809     return 1;
810     }
811     if (!strcmp(argv[1], "timing1"))
812     Timing1(atoi(argv[2]));
813     else if (!strcmp(argv[1], "timing2"))
814     Timing2(atoi(argv[2]));
815     else if (!strcmp(argv[1], "timing3"))
816     Timing3(atoi(argv[2]));
817     else
818     printf("Unknown argument '%s'\n", argv[1]);
819     return 0;
820     }
821    
822     printf("Testing FullMatch\n");
823    
824     int i;
825     string s;
826    
827     /***** FullMatch with no args *****/
828    
829     CHECK(RE("h.*o").FullMatch("hello"));
830 ph10 179 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
831     CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
832     CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
833     CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
834     CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
835 nigel 77
836     /***** FullMatch with args *****/
837    
838     // Zero-arg
839     CHECK(RE("\\d+").FullMatch("1001"));
840    
841     // Single-arg
842     CHECK(RE("(\\d+)").FullMatch("1001", &i));
843     CHECK_EQ(i, 1001);
844     CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
845     CHECK_EQ(i, -123);
846     CHECK(!RE("()\\d+").FullMatch("10", &i));
847     CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
848     &i));
849    
850     // Digits surrounding integer-arg
851     CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
852     CHECK_EQ(i, 23);
853     CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
854     CHECK_EQ(i, 1);
855     CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
856     CHECK_EQ(i, -1);
857     CHECK(RE("(\\d)").PartialMatch("1234", &i));
858     CHECK_EQ(i, 1);
859     CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
860     CHECK_EQ(i, -1);
861    
862     // String-arg
863     CHECK(RE("h(.*)o").FullMatch("hello", &s));
864     CHECK_EQ(s, string("ell"));
865    
866     // StringPiece-arg
867     StringPiece sp;
868     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
869     CHECK_EQ(sp.size(), 4);
870     CHECK(memcmp(sp.data(), "ruby", 4) == 0);
871     CHECK_EQ(i, 1234);
872    
873     // Multi-arg
874     CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
875     CHECK_EQ(s, string("ruby"));
876     CHECK_EQ(i, 1234);
877    
878 ph10 263 // Ignore non-void* NULL arg
879     CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
880     CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
881     CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
882     CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
883 ph10 302 #ifdef HAVE_LONG_LONG
884 ph10 263 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
885 ph10 302 #endif
886 ph10 263 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
887     CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
888    
889     // Fail on non-void* NULL arg if the match doesn't parse for the given type.
890     CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
891     CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
892     CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
893     CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
894     CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
895    
896 nigel 77 // Ignored arg
897     CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
898     CHECK_EQ(s, string("ruby"));
899     CHECK_EQ(i, 1234);
900    
901     // Type tests
902     {
903     char c;
904     CHECK(RE("(H)ello").FullMatch("Hello", &c));
905     CHECK_EQ(c, 'H');
906     }
907     {
908     unsigned char c;
909     CHECK(RE("(H)ello").FullMatch("Hello", &c));
910     CHECK_EQ(c, static_cast<unsigned char>('H'));
911     }
912     {
913     short v;
914     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
915     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
916     CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
917     CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
918     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
919     CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
920     }
921     {
922     unsigned short v;
923     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
924     CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
925     CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
926     CHECK(!RE("(\\d+)").FullMatch("65536", &v));
927     }
928     {
929     int v;
930     static const int max_value = 0x7fffffff;
931     static const int min_value = -max_value - 1;
932     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
933     CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
934     CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
935     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
936     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
937     CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
938     }
939     {
940     unsigned int v;
941     static const unsigned int max_value = 0xfffffffful;
942     CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
943     CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
944     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
945     }
946     #ifdef HAVE_LONG_LONG
947 ph10 193 # if defined(__MINGW__) || defined(__MINGW32__)
948     # define LLD "%I64d"
949 ph10 201 # define LLU "%I64u"
950 ph10 193 # else
951     # define LLD "%lld"
952 ph10 201 # define LLU "%llu"
953 ph10 193 # endif
954 nigel 77 {
955     long long v;
956     static const long long max_value = 0x7fffffffffffffffLL;
957     static const long long min_value = -max_value - 1;
958 ph10 257 char buf[32]; // definitely big enough for a long long
959 nigel 77
960     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
961     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
962    
963 ph10 257 sprintf(buf, LLD, max_value);
964 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
965    
966 ph10 257 sprintf(buf, LLD, min_value);
967 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
968    
969 ph10 257 sprintf(buf, LLD, max_value);
970 nigel 77 assert(buf[strlen(buf)-1] != '9');
971     buf[strlen(buf)-1]++;
972     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
973    
974 ph10 257 sprintf(buf, LLD, min_value);
975 nigel 77 assert(buf[strlen(buf)-1] != '9');
976     buf[strlen(buf)-1]++;
977     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
978     }
979     #endif
980     #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
981     {
982     unsigned long long v;
983     long long v2;
984     static const unsigned long long max_value = 0xffffffffffffffffULL;
985 ph10 257 char buf[32]; // definitely big enough for a unsigned long long
986 nigel 77
987     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
988     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
989    
990 ph10 257 sprintf(buf, LLU, max_value);
991 nigel 77 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
992    
993     assert(buf[strlen(buf)-1] != '9');
994     buf[strlen(buf)-1]++;
995     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
996     }
997     #endif
998     {
999     float v;
1000     CHECK(RE("(.*)").FullMatch("100", &v));
1001     CHECK(RE("(.*)").FullMatch("-100.", &v));
1002     CHECK(RE("(.*)").FullMatch("1e23", &v));
1003     }
1004     {
1005     double v;
1006     CHECK(RE("(.*)").FullMatch("100", &v));
1007     CHECK(RE("(.*)").FullMatch("-100.", &v));
1008     CHECK(RE("(.*)").FullMatch("1e23", &v));
1009     }
1010    
1011     // Check that matching is fully anchored
1012     CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1013     CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1014     CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1015     CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1016    
1017     // Braces
1018     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1019     CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1020     CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1021    
1022     // Complicated RE
1023     CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1024     CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1025     CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1026     CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1027    
1028     // Check full-match handling (needs '$' tacked on internally)
1029     CHECK(RE("fo|foo").FullMatch("fo"));
1030     CHECK(RE("fo|foo").FullMatch("foo"));
1031     CHECK(RE("fo|foo$").FullMatch("fo"));
1032     CHECK(RE("fo|foo$").FullMatch("foo"));
1033     CHECK(RE("foo$").FullMatch("foo"));
1034     CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1035     CHECK(!RE("fo|bar").FullMatch("fox"));
1036    
1037     // Uncomment the following if we change the handling of '$' to
1038     // prevent it from matching a trailing newline
1039     if (false) {
1040     // Check that we don't get bitten by pcre's special handling of a
1041     // '\n' at the end of the string matching '$'
1042     CHECK(!RE("foo$").PartialMatch("foo\n"));
1043     }
1044    
1045     // Number of args
1046     int a[16];
1047     CHECK(RE("").FullMatch(""));
1048    
1049     memset(a, 0, sizeof(0));
1050     CHECK(RE("(\\d){1}").FullMatch("1",
1051     &a[0]));
1052     CHECK_EQ(a[0], 1);
1053    
1054     memset(a, 0, sizeof(0));
1055     CHECK(RE("(\\d)(\\d)").FullMatch("12",
1056     &a[0], &a[1]));
1057     CHECK_EQ(a[0], 1);
1058     CHECK_EQ(a[1], 2);
1059    
1060     memset(a, 0, sizeof(0));
1061     CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1062     &a[0], &a[1], &a[2]));
1063     CHECK_EQ(a[0], 1);
1064     CHECK_EQ(a[1], 2);
1065     CHECK_EQ(a[2], 3);
1066    
1067     memset(a, 0, sizeof(0));
1068     CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1069     &a[0], &a[1], &a[2], &a[3]));
1070     CHECK_EQ(a[0], 1);
1071     CHECK_EQ(a[1], 2);
1072     CHECK_EQ(a[2], 3);
1073     CHECK_EQ(a[3], 4);
1074    
1075     memset(a, 0, sizeof(0));
1076     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1077     &a[0], &a[1], &a[2],
1078     &a[3], &a[4]));
1079     CHECK_EQ(a[0], 1);
1080     CHECK_EQ(a[1], 2);
1081     CHECK_EQ(a[2], 3);
1082     CHECK_EQ(a[3], 4);
1083     CHECK_EQ(a[4], 5);
1084    
1085     memset(a, 0, sizeof(0));
1086     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1087     &a[0], &a[1], &a[2],
1088     &a[3], &a[4], &a[5]));
1089     CHECK_EQ(a[0], 1);
1090     CHECK_EQ(a[1], 2);
1091     CHECK_EQ(a[2], 3);
1092     CHECK_EQ(a[3], 4);
1093     CHECK_EQ(a[4], 5);
1094     CHECK_EQ(a[5], 6);
1095    
1096     memset(a, 0, sizeof(0));
1097     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1098     &a[0], &a[1], &a[2], &a[3],
1099     &a[4], &a[5], &a[6]));
1100     CHECK_EQ(a[0], 1);
1101     CHECK_EQ(a[1], 2);
1102     CHECK_EQ(a[2], 3);
1103     CHECK_EQ(a[3], 4);
1104     CHECK_EQ(a[4], 5);
1105     CHECK_EQ(a[5], 6);
1106     CHECK_EQ(a[6], 7);
1107    
1108     memset(a, 0, sizeof(0));
1109     CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1110     "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1111     "1234567890123456",
1112     &a[0], &a[1], &a[2], &a[3],
1113     &a[4], &a[5], &a[6], &a[7],
1114     &a[8], &a[9], &a[10], &a[11],
1115     &a[12], &a[13], &a[14], &a[15]));
1116     CHECK_EQ(a[0], 1);
1117     CHECK_EQ(a[1], 2);
1118     CHECK_EQ(a[2], 3);
1119     CHECK_EQ(a[3], 4);
1120     CHECK_EQ(a[4], 5);
1121     CHECK_EQ(a[5], 6);
1122     CHECK_EQ(a[6], 7);
1123     CHECK_EQ(a[7], 8);
1124     CHECK_EQ(a[8], 9);
1125     CHECK_EQ(a[9], 0);
1126     CHECK_EQ(a[10], 1);
1127     CHECK_EQ(a[11], 2);
1128     CHECK_EQ(a[12], 3);
1129     CHECK_EQ(a[13], 4);
1130     CHECK_EQ(a[14], 5);
1131     CHECK_EQ(a[15], 6);
1132    
1133     /***** PartialMatch *****/
1134    
1135     printf("Testing PartialMatch\n");
1136    
1137     CHECK(RE("h.*o").PartialMatch("hello"));
1138     CHECK(RE("h.*o").PartialMatch("othello"));
1139     CHECK(RE("h.*o").PartialMatch("hello!"));
1140     CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1141    
1142 nigel 93 /***** other tests *****/
1143    
1144 nigel 77 RadixTests();
1145     TestReplace();
1146     TestExtract();
1147     TestConsume();
1148     TestFindAndConsume();
1149 nigel 93 TestQuoteMetaAll();
1150 nigel 77 TestMatchNumberPeculiarity();
1151    
1152     // Check the pattern() accessor
1153     {
1154     const string kPattern = "http://([^/]+)/.*";
1155     const RE re(kPattern);
1156     CHECK_EQ(kPattern, re.pattern());
1157     }
1158    
1159     // Check RE error field.
1160     {
1161     RE re("foo");
1162     CHECK(re.error().empty()); // Must have no error
1163     }
1164    
1165     #ifdef SUPPORT_UTF8
1166     // Check UTF-8 handling
1167     {
1168     printf("Testing UTF-8 handling\n");
1169    
1170     // Three Japanese characters (nihongo)
1171 ph10 256 const unsigned char utf8_string[] = {
1172 nigel 77 0xe6, 0x97, 0xa5, // 65e5
1173     0xe6, 0x9c, 0xac, // 627c
1174     0xe8, 0xaa, 0x9e, // 8a9e
1175     0
1176     };
1177 ph10 256 const unsigned char utf8_pattern[] = {
1178 nigel 77 '.',
1179     0xe6, 0x9c, 0xac, // 627c
1180     '.',
1181     0
1182     };
1183    
1184     // Both should match in either mode, bytes or UTF-8
1185     RE re_test1(".........");
1186     CHECK(re_test1.FullMatch(utf8_string));
1187     RE re_test2("...", pcrecpp::UTF8());
1188     CHECK(re_test2.FullMatch(utf8_string));
1189    
1190     // Check that '.' matches one byte or UTF-8 character
1191     // according to the mode.
1192     string ss;
1193     RE re_test3("(.)");
1194     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1195     CHECK_EQ(ss, string("\xe6"));
1196     RE re_test4("(.)", pcrecpp::UTF8());
1197     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1198     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1199    
1200     // Check that string matches itself in either mode
1201     RE re_test5(utf8_string);
1202     CHECK(re_test5.FullMatch(utf8_string));
1203     RE re_test6(utf8_string, pcrecpp::UTF8());
1204     CHECK(re_test6.FullMatch(utf8_string));
1205    
1206     // Check that pattern matches string only in UTF8 mode
1207     RE re_test7(utf8_pattern);
1208     CHECK(!re_test7.FullMatch(utf8_string));
1209     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1210     CHECK(re_test8.FullMatch(utf8_string));
1211     }
1212    
1213     // Check that ungreedy, UTF8 regular expressions don't match when they
1214     // oughtn't -- see bug 82246.
1215     {
1216     // This code always worked.
1217     const char* pattern = "\\w+X";
1218     const string target = "a aX";
1219     RE match_sentence(pattern);
1220     RE match_sentence_re(pattern, pcrecpp::UTF8());
1221    
1222     CHECK(!match_sentence.FullMatch(target));
1223     CHECK(!match_sentence_re.FullMatch(target));
1224     }
1225    
1226     {
1227     const char* pattern = "(?U)\\w+X";
1228     const string target = "a aX";
1229     RE match_sentence(pattern);
1230     RE match_sentence_re(pattern, pcrecpp::UTF8());
1231    
1232     CHECK(!match_sentence.FullMatch(target));
1233     CHECK(!match_sentence_re.FullMatch(target));
1234     }
1235     #endif /* def SUPPORT_UTF8 */
1236    
1237     printf("Testing error reporting\n");
1238    
1239     { RE re("a\\1"); CHECK(!re.error().empty()); }
1240     {
1241     RE re("a[x");
1242     CHECK(!re.error().empty());
1243     }
1244     {
1245     RE re("a[z-a]");
1246     CHECK(!re.error().empty());
1247     }
1248     {
1249     RE re("a[[:foobar:]]");
1250     CHECK(!re.error().empty());
1251     }
1252     {
1253     RE re("a(b");
1254     CHECK(!re.error().empty());
1255     }
1256     {
1257     RE re("a\\");
1258     CHECK(!re.error().empty());
1259     }
1260    
1261 nigel 87 // Test that recursion is stopped
1262     TestRecursion();
1263 nigel 77
1264 nigel 81 // Test Options
1265     if (getenv("VERBOSE_TEST") != NULL)
1266     VERBOSE_TEST = true;
1267     TestOptions();
1268    
1269 nigel 93 // Test the constructors
1270     TestConstructors();
1271    
1272 nigel 77 // Done
1273     printf("OK\n");
1274    
1275     return 0;
1276     }

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12