| 1 |
nigel |
93 |
// -*- coding: utf-8 -*- |
| 2 |
|
|
// |
| 3 |
|
|
// Copyright (c) 2005 - 2006, Google Inc. |
| 4 |
nigel |
77 |
// All rights reserved. |
| 5 |
|
|
// |
| 6 |
|
|
// Redistribution and use in source and binary forms, with or without |
| 7 |
|
|
// modification, are permitted provided that the following conditions are |
| 8 |
|
|
// met: |
| 9 |
|
|
// |
| 10 |
|
|
// * Redistributions of source code must retain the above copyright |
| 11 |
|
|
// notice, this list of conditions and the following disclaimer. |
| 12 |
|
|
// * Redistributions in binary form must reproduce the above |
| 13 |
|
|
// copyright notice, this list of conditions and the following disclaimer |
| 14 |
|
|
// in the documentation and/or other materials provided with the |
| 15 |
|
|
// distribution. |
| 16 |
|
|
// * Neither the name of Google Inc. nor the names of its |
| 17 |
|
|
// contributors may be used to endorse or promote products derived from |
| 18 |
|
|
// this software without specific prior written permission. |
| 19 |
|
|
// |
| 20 |
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 |
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 |
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 |
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 |
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 |
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 |
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 |
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 |
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 |
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 |
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 |
|
|
// |
| 32 |
|
|
// Author: Sanjay Ghemawat |
| 33 |
|
|
// |
| 34 |
|
|
// TODO: Test extractions for PartialMatch/Consume |
| 35 |
|
|
|
| 36 |
ph10 |
200 |
#ifdef HAVE_CONFIG_H |
| 37 |
ph10 |
236 |
#include "config.h" |
| 38 |
ph10 |
200 |
#endif |
| 39 |
ph10 |
199 |
|
| 40 |
ph10 |
234 |
#ifdef _WIN32 |
| 41 |
|
|
#define snprintf _snprintf |
| 42 |
|
|
#endif |
| 43 |
|
|
|
| 44 |
nigel |
77 |
#include <stdio.h> |
| 45 |
nigel |
91 |
#include <cassert> |
| 46 |
nigel |
77 |
#include <vector> |
| 47 |
|
|
#include "pcrecpp.h" |
| 48 |
|
|
|
| 49 |
|
|
using pcrecpp::StringPiece; |
| 50 |
|
|
using pcrecpp::RE; |
| 51 |
|
|
using pcrecpp::RE_Options; |
| 52 |
|
|
using pcrecpp::Hex; |
| 53 |
|
|
using pcrecpp::Octal; |
| 54 |
|
|
using pcrecpp::CRadix; |
| 55 |
|
|
|
| 56 |
nigel |
81 |
static bool VERBOSE_TEST = false; |
| 57 |
|
|
|
| 58 |
nigel |
77 |
// CHECK dies with a fatal error if condition is not true. It is *not* |
| 59 |
|
|
// controlled by NDEBUG, so the check will be executed regardless of |
| 60 |
|
|
// compilation mode. Therefore, it is safe to do things like: |
| 61 |
|
|
// CHECK_EQ(fp->Write(x), 4) |
| 62 |
|
|
#define CHECK(condition) do { \ |
| 63 |
|
|
if (!(condition)) { \ |
| 64 |
|
|
fprintf(stderr, "%s:%d: Check failed: %s\n", \ |
| 65 |
|
|
__FILE__, __LINE__, #condition); \ |
| 66 |
|
|
exit(1); \ |
| 67 |
|
|
} \ |
| 68 |
|
|
} while (0) |
| 69 |
|
|
|
| 70 |
|
|
#define CHECK_EQ(a, b) CHECK(a == b) |
| 71 |
|
|
|
| 72 |
|
|
static void Timing1(int num_iters) { |
| 73 |
|
|
// Same pattern lots of times |
| 74 |
|
|
RE pattern("ruby:\\d+"); |
| 75 |
|
|
StringPiece p("ruby:1234"); |
| 76 |
|
|
for (int j = num_iters; j > 0; j--) { |
| 77 |
|
|
CHECK(pattern.FullMatch(p)); |
| 78 |
|
|
} |
| 79 |
|
|
} |
| 80 |
|
|
|
| 81 |
|
|
static void Timing2(int num_iters) { |
| 82 |
|
|
// Same pattern lots of times |
| 83 |
|
|
RE pattern("ruby:(\\d+)"); |
| 84 |
|
|
int i; |
| 85 |
|
|
for (int j = num_iters; j > 0; j--) { |
| 86 |
|
|
CHECK(pattern.FullMatch("ruby:1234", &i)); |
| 87 |
|
|
CHECK_EQ(i, 1234); |
| 88 |
|
|
} |
| 89 |
|
|
} |
| 90 |
|
|
|
| 91 |
|
|
static void Timing3(int num_iters) { |
| 92 |
|
|
string text_string; |
| 93 |
|
|
for (int j = num_iters; j > 0; j--) { |
| 94 |
|
|
text_string += "this is another line\n"; |
| 95 |
|
|
} |
| 96 |
|
|
|
| 97 |
|
|
RE line_matcher(".*\n"); |
| 98 |
|
|
string line; |
| 99 |
|
|
StringPiece text(text_string); |
| 100 |
|
|
int counter = 0; |
| 101 |
|
|
while (line_matcher.Consume(&text)) { |
| 102 |
|
|
counter++; |
| 103 |
|
|
} |
| 104 |
|
|
printf("Matched %d lines\n", counter); |
| 105 |
|
|
} |
| 106 |
|
|
|
| 107 |
|
|
#if 0 // uncomment this if you have a way of defining VirtualProcessSize() |
| 108 |
|
|
|
| 109 |
|
|
static void LeakTest() { |
| 110 |
|
|
// Check for memory leaks |
| 111 |
|
|
unsigned long long initial_size = 0; |
| 112 |
|
|
for (int i = 0; i < 100000; i++) { |
| 113 |
|
|
if (i == 50000) { |
| 114 |
|
|
initial_size = VirtualProcessSize(); |
| 115 |
|
|
printf("Size after 50000: %llu\n", initial_size); |
| 116 |
|
|
} |
| 117 |
|
|
char buf[100]; |
| 118 |
|
|
snprintf(buf, sizeof(buf), "pat%09d", i); |
| 119 |
|
|
RE newre(buf); |
| 120 |
|
|
} |
| 121 |
|
|
uint64 final_size = VirtualProcessSize(); |
| 122 |
|
|
printf("Size after 100000: %llu\n", final_size); |
| 123 |
|
|
const double growth = double(final_size - initial_size) / final_size; |
| 124 |
|
|
printf("Growth: %0.2f%%", growth * 100); |
| 125 |
|
|
CHECK(growth < 0.02); // Allow < 2% growth |
| 126 |
|
|
} |
| 127 |
|
|
|
| 128 |
|
|
#endif |
| 129 |
|
|
|
| 130 |
|
|
static void RadixTests() { |
| 131 |
|
|
printf("Testing hex\n"); |
| 132 |
|
|
|
| 133 |
|
|
#define CHECK_HEX(type, value) \ |
| 134 |
|
|
do { \ |
| 135 |
|
|
type v; \ |
| 136 |
|
|
CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ |
| 137 |
|
|
CHECK_EQ(v, 0x ## value); \ |
| 138 |
|
|
CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ |
| 139 |
|
|
CHECK_EQ(v, 0x ## value); \ |
| 140 |
|
|
} while(0) |
| 141 |
|
|
|
| 142 |
|
|
CHECK_HEX(short, 2bad); |
| 143 |
|
|
CHECK_HEX(unsigned short, 2badU); |
| 144 |
|
|
CHECK_HEX(int, dead); |
| 145 |
|
|
CHECK_HEX(unsigned int, deadU); |
| 146 |
|
|
CHECK_HEX(long, 7eadbeefL); |
| 147 |
|
|
CHECK_HEX(unsigned long, deadbeefUL); |
| 148 |
|
|
#ifdef HAVE_LONG_LONG |
| 149 |
|
|
CHECK_HEX(long long, 12345678deadbeefLL); |
| 150 |
|
|
#endif |
| 151 |
|
|
#ifdef HAVE_UNSIGNED_LONG_LONG |
| 152 |
|
|
CHECK_HEX(unsigned long long, cafebabedeadbeefULL); |
| 153 |
|
|
#endif |
| 154 |
|
|
|
| 155 |
|
|
#undef CHECK_HEX |
| 156 |
|
|
|
| 157 |
|
|
printf("Testing octal\n"); |
| 158 |
|
|
|
| 159 |
|
|
#define CHECK_OCTAL(type, value) \ |
| 160 |
|
|
do { \ |
| 161 |
|
|
type v; \ |
| 162 |
|
|
CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ |
| 163 |
|
|
CHECK_EQ(v, 0 ## value); \ |
| 164 |
|
|
CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ |
| 165 |
|
|
CHECK_EQ(v, 0 ## value); \ |
| 166 |
|
|
} while(0) |
| 167 |
|
|
|
| 168 |
|
|
CHECK_OCTAL(short, 77777); |
| 169 |
|
|
CHECK_OCTAL(unsigned short, 177777U); |
| 170 |
|
|
CHECK_OCTAL(int, 17777777777); |
| 171 |
|
|
CHECK_OCTAL(unsigned int, 37777777777U); |
| 172 |
|
|
CHECK_OCTAL(long, 17777777777L); |
| 173 |
|
|
CHECK_OCTAL(unsigned long, 37777777777UL); |
| 174 |
|
|
#ifdef HAVE_LONG_LONG |
| 175 |
|
|
CHECK_OCTAL(long long, 777777777777777777777LL); |
| 176 |
|
|
#endif |
| 177 |
|
|
#ifdef HAVE_UNSIGNED_LONG_LONG |
| 178 |
|
|
CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); |
| 179 |
|
|
#endif |
| 180 |
|
|
|
| 181 |
|
|
#undef CHECK_OCTAL |
| 182 |
|
|
|
| 183 |
|
|
printf("Testing decimal\n"); |
| 184 |
|
|
|
| 185 |
|
|
#define CHECK_DECIMAL(type, value) \ |
| 186 |
|
|
do { \ |
| 187 |
|
|
type v; \ |
| 188 |
|
|
CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ |
| 189 |
|
|
CHECK_EQ(v, value); \ |
| 190 |
|
|
CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ |
| 191 |
|
|
CHECK_EQ(v, value); \ |
| 192 |
|
|
} while(0) |
| 193 |
|
|
|
| 194 |
|
|
CHECK_DECIMAL(short, -1); |
| 195 |
|
|
CHECK_DECIMAL(unsigned short, 9999); |
| 196 |
|
|
CHECK_DECIMAL(int, -1000); |
| 197 |
|
|
CHECK_DECIMAL(unsigned int, 12345U); |
| 198 |
|
|
CHECK_DECIMAL(long, -10000000L); |
| 199 |
|
|
CHECK_DECIMAL(unsigned long, 3083324652U); |
| 200 |
|
|
#ifdef HAVE_LONG_LONG |
| 201 |
|
|
CHECK_DECIMAL(long long, -100000000000000LL); |
| 202 |
|
|
#endif |
| 203 |
|
|
#ifdef HAVE_UNSIGNED_LONG_LONG |
| 204 |
|
|
CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); |
| 205 |
|
|
#endif |
| 206 |
|
|
|
| 207 |
|
|
#undef CHECK_DECIMAL |
| 208 |
|
|
|
| 209 |
|
|
} |
| 210 |
|
|
|
| 211 |
|
|
static void TestReplace() { |
| 212 |
|
|
printf("Testing Replace\n"); |
| 213 |
|
|
|
| 214 |
|
|
struct ReplaceTest { |
| 215 |
|
|
const char *regexp; |
| 216 |
|
|
const char *rewrite; |
| 217 |
|
|
const char *original; |
| 218 |
|
|
const char *single; |
| 219 |
|
|
const char *global; |
| 220 |
|
|
}; |
| 221 |
|
|
static const ReplaceTest tests[] = { |
| 222 |
|
|
{ "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
| 223 |
|
|
"\\2\\1ay", |
| 224 |
|
|
"the quick brown fox jumps over the lazy dogs.", |
| 225 |
|
|
"ethay quick brown fox jumps over the lazy dogs.", |
| 226 |
|
|
"ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." }, |
| 227 |
|
|
{ "\\w+", |
| 228 |
|
|
"\\0-NOSPAM", |
| 229 |
|
|
"paul.haahr@google.com", |
| 230 |
|
|
"paul-NOSPAM.haahr@google.com", |
| 231 |
|
|
"paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" }, |
| 232 |
|
|
{ "^", |
| 233 |
|
|
"(START)", |
| 234 |
|
|
"foo", |
| 235 |
|
|
"(START)foo", |
| 236 |
|
|
"(START)foo" }, |
| 237 |
|
|
{ "^", |
| 238 |
|
|
"(START)", |
| 239 |
|
|
"", |
| 240 |
|
|
"(START)", |
| 241 |
|
|
"(START)" }, |
| 242 |
|
|
{ "$", |
| 243 |
|
|
"(END)", |
| 244 |
|
|
"", |
| 245 |
|
|
"(END)", |
| 246 |
|
|
"(END)" }, |
| 247 |
|
|
{ "b", |
| 248 |
|
|
"bb", |
| 249 |
|
|
"ababababab", |
| 250 |
|
|
"abbabababab", |
| 251 |
|
|
"abbabbabbabbabb" }, |
| 252 |
|
|
{ "b", |
| 253 |
|
|
"bb", |
| 254 |
|
|
"bbbbbb", |
| 255 |
|
|
"bbbbbbb", |
| 256 |
|
|
"bbbbbbbbbbbb" }, |
| 257 |
|
|
{ "b+", |
| 258 |
|
|
"bb", |
| 259 |
|
|
"bbbbbb", |
| 260 |
|
|
"bb", |
| 261 |
|
|
"bb" }, |
| 262 |
|
|
{ "b*", |
| 263 |
|
|
"bb", |
| 264 |
|
|
"bbbbbb", |
| 265 |
|
|
"bb", |
| 266 |
|
|
"bb" }, |
| 267 |
|
|
{ "b*", |
| 268 |
|
|
"bb", |
| 269 |
|
|
"aaaaa", |
| 270 |
|
|
"bbaaaaa", |
| 271 |
|
|
"bbabbabbabbabbabb" }, |
| 272 |
nigel |
91 |
{ "b*", |
| 273 |
|
|
"bb", |
| 274 |
|
|
"aa\naa\n", |
| 275 |
|
|
"bbaa\naa\n", |
| 276 |
|
|
"bbabbabb\nbbabbabb\nbb" }, |
| 277 |
|
|
{ "b*", |
| 278 |
|
|
"bb", |
| 279 |
|
|
"aa\raa\r", |
| 280 |
|
|
"bbaa\raa\r", |
| 281 |
|
|
"bbabbabb\rbbabbabb\rbb" }, |
| 282 |
|
|
{ "b*", |
| 283 |
|
|
"bb", |
| 284 |
|
|
"aa\r\naa\r\n", |
| 285 |
|
|
"bbaa\r\naa\r\n", |
| 286 |
|
|
"bbabbabb\r\nbbabbabb\r\nbb" }, |
| 287 |
|
|
#ifdef SUPPORT_UTF8 |
| 288 |
|
|
{ "b*", |
| 289 |
|
|
"bb", |
| 290 |
|
|
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
| 291 |
|
|
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
| 292 |
|
|
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, |
| 293 |
|
|
{ "b*", |
| 294 |
|
|
"bb", |
| 295 |
|
|
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
| 296 |
|
|
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
| 297 |
|
|
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
| 298 |
|
|
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, |
| 299 |
|
|
#endif |
| 300 |
nigel |
77 |
{ "", NULL, NULL, NULL, NULL } |
| 301 |
|
|
}; |
| 302 |
|
|
|
| 303 |
nigel |
91 |
#ifdef SUPPORT_UTF8 |
| 304 |
|
|
const bool support_utf8 = true; |
| 305 |
|
|
#else |
| 306 |
|
|
const bool support_utf8 = false; |
| 307 |
|
|
#endif |
| 308 |
|
|
|
| 309 |
nigel |
77 |
for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 310 |
nigel |
91 |
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
| 311 |
|
|
assert(re.error().empty()); |
| 312 |
nigel |
77 |
string one(t->original); |
| 313 |
nigel |
91 |
CHECK(re.Replace(t->rewrite, &one)); |
| 314 |
nigel |
77 |
CHECK_EQ(one, t->single); |
| 315 |
|
|
string all(t->original); |
| 316 |
nigel |
91 |
CHECK(re.GlobalReplace(t->rewrite, &all) > 0); |
| 317 |
nigel |
77 |
CHECK_EQ(all, t->global); |
| 318 |
|
|
} |
| 319 |
nigel |
91 |
|
| 320 |
|
|
// One final test: test \r\n replacement when we're not in CRLF mode |
| 321 |
|
|
{ |
| 322 |
|
|
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
| 323 |
|
|
assert(re.error().empty()); |
| 324 |
|
|
string all("aa\r\naa\r\n"); |
| 325 |
|
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 326 |
|
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 327 |
|
|
} |
| 328 |
|
|
{ |
| 329 |
|
|
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
| 330 |
|
|
assert(re.error().empty()); |
| 331 |
|
|
string all("aa\r\naa\r\n"); |
| 332 |
|
|
CHECK(re.GlobalReplace("bb", &all) > 0); |
| 333 |
|
|
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 334 |
|
|
} |
| 335 |
|
|
// TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
| 336 |
|
|
// Alas, the answer depends on how pcre was compiled. |
| 337 |
nigel |
77 |
} |
| 338 |
|
|
|
| 339 |
|
|
static void TestExtract() { |
| 340 |
|
|
printf("Testing Extract\n"); |
| 341 |
|
|
|
| 342 |
|
|
string s; |
| 343 |
|
|
|
| 344 |
|
|
CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); |
| 345 |
|
|
CHECK_EQ(s, "kremvax!boris"); |
| 346 |
|
|
|
| 347 |
|
|
// check the RE interface as well |
| 348 |
|
|
CHECK(RE(".*").Extract("'\\0'", "foo", &s)); |
| 349 |
|
|
CHECK_EQ(s, "'foo'"); |
| 350 |
|
|
CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); |
| 351 |
|
|
CHECK_EQ(s, "'foo'"); |
| 352 |
|
|
} |
| 353 |
|
|
|
| 354 |
|
|
static void TestConsume() { |
| 355 |
|
|
printf("Testing Consume\n"); |
| 356 |
|
|
|
| 357 |
|
|
string word; |
| 358 |
|
|
|
| 359 |
|
|
string s(" aaa b!@#$@#$cccc"); |
| 360 |
|
|
StringPiece input(s); |
| 361 |
|
|
|
| 362 |
|
|
RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace |
| 363 |
|
|
CHECK(r.Consume(&input, &word)); |
| 364 |
|
|
CHECK_EQ(word, "aaa"); |
| 365 |
|
|
CHECK(r.Consume(&input, &word)); |
| 366 |
|
|
CHECK_EQ(word, "b"); |
| 367 |
|
|
CHECK(! r.Consume(&input, &word)); |
| 368 |
|
|
} |
| 369 |
|
|
|
| 370 |
|
|
static void TestFindAndConsume() { |
| 371 |
|
|
printf("Testing FindAndConsume\n"); |
| 372 |
|
|
|
| 373 |
|
|
string word; |
| 374 |
|
|
|
| 375 |
|
|
string s(" aaa b!@#$@#$cccc"); |
| 376 |
|
|
StringPiece input(s); |
| 377 |
|
|
|
| 378 |
|
|
RE r("(\\w+)"); // matches a word |
| 379 |
|
|
CHECK(r.FindAndConsume(&input, &word)); |
| 380 |
|
|
CHECK_EQ(word, "aaa"); |
| 381 |
|
|
CHECK(r.FindAndConsume(&input, &word)); |
| 382 |
|
|
CHECK_EQ(word, "b"); |
| 383 |
|
|
CHECK(r.FindAndConsume(&input, &word)); |
| 384 |
|
|
CHECK_EQ(word, "cccc"); |
| 385 |
|
|
CHECK(! r.FindAndConsume(&input, &word)); |
| 386 |
|
|
} |
| 387 |
|
|
|
| 388 |
|
|
static void TestMatchNumberPeculiarity() { |
| 389 |
|
|
printf("Testing match-number peculiaraity\n"); |
| 390 |
|
|
|
| 391 |
|
|
string word1; |
| 392 |
|
|
string word2; |
| 393 |
|
|
string word3; |
| 394 |
|
|
|
| 395 |
|
|
RE r("(foo)|(bar)|(baz)"); |
| 396 |
|
|
CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); |
| 397 |
|
|
CHECK_EQ(word1, "foo"); |
| 398 |
|
|
CHECK_EQ(word2, ""); |
| 399 |
|
|
CHECK_EQ(word3, ""); |
| 400 |
|
|
CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); |
| 401 |
|
|
CHECK_EQ(word1, ""); |
| 402 |
|
|
CHECK_EQ(word2, "bar"); |
| 403 |
|
|
CHECK_EQ(word3, ""); |
| 404 |
|
|
CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); |
| 405 |
|
|
CHECK_EQ(word1, ""); |
| 406 |
|
|
CHECK_EQ(word2, ""); |
| 407 |
|
|
CHECK_EQ(word3, "baz"); |
| 408 |
|
|
CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); |
| 409 |
|
|
|
| 410 |
|
|
string a; |
| 411 |
|
|
CHECK(RE("(foo)|hello").FullMatch("hello", &a)); |
| 412 |
|
|
CHECK_EQ(a, ""); |
| 413 |
|
|
} |
| 414 |
|
|
|
| 415 |
nigel |
87 |
static void TestRecursion() { |
| 416 |
nigel |
77 |
printf("Testing recursion\n"); |
| 417 |
|
|
|
| 418 |
nigel |
87 |
// Get one string that passes (sometimes), one that never does. |
| 419 |
|
|
string text_good("abcdefghijk"); |
| 420 |
|
|
string text_bad("acdefghijkl"); |
| 421 |
|
|
|
| 422 |
|
|
// According to pcretest, matching text_good against (\w+)*b |
| 423 |
|
|
// requires match_limit of at least 8192, and match_recursion_limit |
| 424 |
|
|
// of at least 37. |
| 425 |
|
|
|
| 426 |
|
|
RE_Options options_ml; |
| 427 |
|
|
options_ml.set_match_limit(8192); |
| 428 |
|
|
RE re("(\\w+)*b", options_ml); |
| 429 |
|
|
CHECK(re.PartialMatch(text_good) == true); |
| 430 |
|
|
CHECK(re.PartialMatch(text_bad) == false); |
| 431 |
|
|
CHECK(re.FullMatch(text_good) == false); |
| 432 |
|
|
CHECK(re.FullMatch(text_bad) == false); |
| 433 |
|
|
|
| 434 |
|
|
options_ml.set_match_limit(1024); |
| 435 |
|
|
RE re2("(\\w+)*b", options_ml); |
| 436 |
|
|
CHECK(re2.PartialMatch(text_good) == false); // because of match_limit |
| 437 |
|
|
CHECK(re2.PartialMatch(text_bad) == false); |
| 438 |
|
|
CHECK(re2.FullMatch(text_good) == false); |
| 439 |
|
|
CHECK(re2.FullMatch(text_bad) == false); |
| 440 |
|
|
|
| 441 |
|
|
RE_Options options_mlr; |
| 442 |
|
|
options_mlr.set_match_limit_recursion(50); |
| 443 |
|
|
RE re3("(\\w+)*b", options_mlr); |
| 444 |
|
|
CHECK(re3.PartialMatch(text_good) == true); |
| 445 |
|
|
CHECK(re3.PartialMatch(text_bad) == false); |
| 446 |
|
|
CHECK(re3.FullMatch(text_good) == false); |
| 447 |
|
|
CHECK(re3.FullMatch(text_bad) == false); |
| 448 |
|
|
|
| 449 |
|
|
options_mlr.set_match_limit_recursion(10); |
| 450 |
|
|
RE re4("(\\w+)*b", options_mlr); |
| 451 |
|
|
CHECK(re4.PartialMatch(text_good) == false); |
| 452 |
|
|
CHECK(re4.PartialMatch(text_bad) == false); |
| 453 |
|
|
CHECK(re4.FullMatch(text_good) == false); |
| 454 |
|
|
CHECK(re4.FullMatch(text_bad) == false); |
| 455 |
nigel |
77 |
} |
| 456 |
|
|
|
| 457 |
nigel |
93 |
// A meta-quoted string, interpreted as a pattern, should always match |
| 458 |
|
|
// the original unquoted string. |
| 459 |
|
|
static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { |
| 460 |
|
|
string quoted = RE::QuoteMeta(unquoted); |
| 461 |
|
|
RE re(quoted, options); |
| 462 |
|
|
CHECK(re.FullMatch(unquoted)); |
| 463 |
|
|
} |
| 464 |
|
|
|
| 465 |
|
|
// A string containing meaningful regexp characters, which is then meta- |
| 466 |
|
|
// quoted, should not generally match a string the unquoted string does. |
| 467 |
|
|
static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
| 468 |
|
|
RE_Options options = RE_Options()) { |
| 469 |
|
|
string quoted = RE::QuoteMeta(unquoted); |
| 470 |
|
|
RE re(quoted, options); |
| 471 |
|
|
CHECK(!re.FullMatch(should_not_match)); |
| 472 |
|
|
} |
| 473 |
|
|
|
| 474 |
|
|
// Tests that quoted meta characters match their original strings, |
| 475 |
|
|
// and that a few things that shouldn't match indeed do not. |
| 476 |
|
|
static void TestQuotaMetaSimple() { |
| 477 |
|
|
TestQuoteMeta("foo"); |
| 478 |
|
|
TestQuoteMeta("foo.bar"); |
| 479 |
|
|
TestQuoteMeta("foo\\.bar"); |
| 480 |
|
|
TestQuoteMeta("[1-9]"); |
| 481 |
|
|
TestQuoteMeta("1.5-2.0?"); |
| 482 |
|
|
TestQuoteMeta("\\d"); |
| 483 |
|
|
TestQuoteMeta("Who doesn't like ice cream?"); |
| 484 |
|
|
TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
| 485 |
|
|
TestQuoteMeta("((?!)xxx).*yyy"); |
| 486 |
|
|
TestQuoteMeta("(["); |
| 487 |
|
|
} |
| 488 |
|
|
|
| 489 |
|
|
static void TestQuoteMetaSimpleNegative() { |
| 490 |
|
|
NegativeTestQuoteMeta("foo", "bar"); |
| 491 |
|
|
NegativeTestQuoteMeta("...", "bar"); |
| 492 |
|
|
NegativeTestQuoteMeta("\\.", "."); |
| 493 |
|
|
NegativeTestQuoteMeta("\\.", ".."); |
| 494 |
|
|
NegativeTestQuoteMeta("(a)", "a"); |
| 495 |
|
|
NegativeTestQuoteMeta("(a|b)", "a"); |
| 496 |
|
|
NegativeTestQuoteMeta("(a|b)", "(a)"); |
| 497 |
|
|
NegativeTestQuoteMeta("(a|b)", "a|b"); |
| 498 |
|
|
NegativeTestQuoteMeta("[0-9]", "0"); |
| 499 |
|
|
NegativeTestQuoteMeta("[0-9]", "0-9"); |
| 500 |
|
|
NegativeTestQuoteMeta("[0-9]", "[9]"); |
| 501 |
|
|
NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
| 502 |
|
|
} |
| 503 |
|
|
|
| 504 |
|
|
static void TestQuoteMetaLatin1() { |
| 505 |
|
|
TestQuoteMeta("3\xb2 = 9"); |
| 506 |
|
|
} |
| 507 |
|
|
|
| 508 |
|
|
static void TestQuoteMetaUtf8() { |
| 509 |
|
|
#ifdef SUPPORT_UTF8 |
| 510 |
|
|
TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); |
| 511 |
|
|
TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 |
| 512 |
|
|
TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) |
| 513 |
|
|
TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character |
| 514 |
|
|
TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) |
| 515 |
|
|
TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) |
| 516 |
|
|
TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work |
| 517 |
|
|
NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) |
| 518 |
|
|
"27\\\xc2\\\xb0", |
| 519 |
|
|
pcrecpp::UTF8()); |
| 520 |
|
|
#endif |
| 521 |
|
|
} |
| 522 |
|
|
|
| 523 |
|
|
static void TestQuoteMetaAll() { |
| 524 |
|
|
printf("Testing QuoteMeta\n"); |
| 525 |
|
|
TestQuotaMetaSimple(); |
| 526 |
|
|
TestQuoteMetaSimpleNegative(); |
| 527 |
|
|
TestQuoteMetaLatin1(); |
| 528 |
|
|
TestQuoteMetaUtf8(); |
| 529 |
|
|
} |
| 530 |
|
|
|
| 531 |
nigel |
81 |
// |
| 532 |
|
|
// Options tests contributed by |
| 533 |
|
|
// Giuseppe Maxia, CTO, Stardata s.r.l. |
| 534 |
|
|
// July 2005 |
| 535 |
|
|
// |
| 536 |
|
|
static void GetOneOptionResult( |
| 537 |
|
|
const char *option_name, |
| 538 |
|
|
const char *regex, |
| 539 |
|
|
const char *str, |
| 540 |
|
|
RE_Options options, |
| 541 |
|
|
bool full, |
| 542 |
|
|
string expected) { |
| 543 |
nigel |
77 |
|
| 544 |
nigel |
81 |
printf("Testing Option <%s>\n", option_name); |
| 545 |
|
|
if(VERBOSE_TEST) |
| 546 |
|
|
printf("/%s/ finds \"%s\" within \"%s\" \n", |
| 547 |
|
|
regex, |
| 548 |
|
|
expected.c_str(), |
| 549 |
|
|
str); |
| 550 |
|
|
string captured(""); |
| 551 |
|
|
if (full) |
| 552 |
|
|
RE(regex,options).FullMatch(str, &captured); |
| 553 |
|
|
else |
| 554 |
|
|
RE(regex,options).PartialMatch(str, &captured); |
| 555 |
|
|
CHECK_EQ(captured, expected); |
| 556 |
|
|
} |
| 557 |
|
|
|
| 558 |
|
|
static void TestOneOption( |
| 559 |
|
|
const char *option_name, |
| 560 |
|
|
const char *regex, |
| 561 |
|
|
const char *str, |
| 562 |
|
|
RE_Options options, |
| 563 |
|
|
bool full, |
| 564 |
|
|
bool assertive = true) { |
| 565 |
|
|
|
| 566 |
|
|
printf("Testing Option <%s>\n", option_name); |
| 567 |
|
|
if (VERBOSE_TEST) |
| 568 |
|
|
printf("'%s' %s /%s/ \n", |
| 569 |
|
|
str, |
| 570 |
|
|
(assertive? "matches" : "doesn't match"), |
| 571 |
|
|
regex); |
| 572 |
|
|
if (assertive) { |
| 573 |
|
|
if (full) |
| 574 |
|
|
CHECK(RE(regex,options).FullMatch(str)); |
| 575 |
|
|
else |
| 576 |
|
|
CHECK(RE(regex,options).PartialMatch(str)); |
| 577 |
|
|
} else { |
| 578 |
|
|
if (full) |
| 579 |
|
|
CHECK(!RE(regex,options).FullMatch(str)); |
| 580 |
|
|
else |
| 581 |
|
|
CHECK(!RE(regex,options).PartialMatch(str)); |
| 582 |
|
|
} |
| 583 |
|
|
} |
| 584 |
|
|
|
| 585 |
|
|
static void Test_CASELESS() { |
| 586 |
|
|
RE_Options options; |
| 587 |
|
|
RE_Options options2; |
| 588 |
|
|
|
| 589 |
|
|
options.set_caseless(true); |
| 590 |
|
|
TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); |
| 591 |
|
|
TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); |
| 592 |
|
|
TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); |
| 593 |
|
|
|
| 594 |
|
|
TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); |
| 595 |
|
|
TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); |
| 596 |
|
|
options.set_caseless(false); |
| 597 |
|
|
TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); |
| 598 |
|
|
} |
| 599 |
|
|
|
| 600 |
|
|
static void Test_MULTILINE() { |
| 601 |
|
|
RE_Options options; |
| 602 |
|
|
RE_Options options2; |
| 603 |
|
|
const char *str = "HELLO\n" "cruel\n" "world\n"; |
| 604 |
|
|
|
| 605 |
|
|
options.set_multiline(true); |
| 606 |
|
|
TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); |
| 607 |
|
|
TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); |
| 608 |
|
|
TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); |
| 609 |
|
|
options.set_multiline(false); |
| 610 |
|
|
TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); |
| 611 |
|
|
} |
| 612 |
|
|
|
| 613 |
|
|
static void Test_DOTALL() { |
| 614 |
|
|
RE_Options options; |
| 615 |
|
|
RE_Options options2; |
| 616 |
|
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 617 |
|
|
|
| 618 |
|
|
options.set_dotall(true); |
| 619 |
|
|
TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); |
| 620 |
|
|
TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); |
| 621 |
|
|
TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); |
| 622 |
|
|
options.set_dotall(false); |
| 623 |
|
|
TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); |
| 624 |
|
|
} |
| 625 |
|
|
|
| 626 |
|
|
static void Test_DOLLAR_ENDONLY() { |
| 627 |
|
|
RE_Options options; |
| 628 |
|
|
RE_Options options2; |
| 629 |
|
|
const char *str = "HELLO world\n"; |
| 630 |
|
|
|
| 631 |
|
|
TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); |
| 632 |
|
|
options.set_dollar_endonly(true); |
| 633 |
|
|
TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); |
| 634 |
|
|
TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); |
| 635 |
|
|
} |
| 636 |
|
|
|
| 637 |
|
|
static void Test_EXTRA() { |
| 638 |
|
|
RE_Options options; |
| 639 |
|
|
const char *str = "HELLO"; |
| 640 |
|
|
|
| 641 |
|
|
options.set_extra(true); |
| 642 |
|
|
TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); |
| 643 |
|
|
TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); |
| 644 |
|
|
options.set_extra(false); |
| 645 |
|
|
TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); |
| 646 |
|
|
} |
| 647 |
|
|
|
| 648 |
|
|
static void Test_EXTENDED() { |
| 649 |
|
|
RE_Options options; |
| 650 |
|
|
RE_Options options2; |
| 651 |
|
|
const char *str = "HELLO world"; |
| 652 |
|
|
|
| 653 |
|
|
options.set_extended(true); |
| 654 |
|
|
TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); |
| 655 |
|
|
TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); |
| 656 |
|
|
TestOneOption("EXTENDED (class)", |
| 657 |
|
|
"^ HE L{2} O " |
| 658 |
|
|
"\\s+ " |
| 659 |
|
|
"\\w+ $ ", |
| 660 |
|
|
str, |
| 661 |
|
|
options, |
| 662 |
|
|
false); |
| 663 |
|
|
|
| 664 |
|
|
TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); |
| 665 |
|
|
TestOneOption("EXTENDED (function)", |
| 666 |
|
|
"^ HE L{2} O " |
| 667 |
|
|
"\\s+ " |
| 668 |
|
|
"\\w+ $ ", |
| 669 |
|
|
str, |
| 670 |
|
|
pcrecpp::EXTENDED(), |
| 671 |
|
|
false); |
| 672 |
|
|
|
| 673 |
|
|
options.set_extended(false); |
| 674 |
|
|
TestOneOption("no EXTENDED", "HELLO world", str, options, false); |
| 675 |
|
|
} |
| 676 |
|
|
|
| 677 |
|
|
static void Test_NO_AUTO_CAPTURE() { |
| 678 |
|
|
RE_Options options; |
| 679 |
|
|
const char *str = "HELLO world"; |
| 680 |
|
|
string captured; |
| 681 |
|
|
|
| 682 |
|
|
printf("Testing Option <no NO_AUTO_CAPTURE>\n"); |
| 683 |
|
|
if (VERBOSE_TEST) |
| 684 |
|
|
printf("parentheses capture text\n"); |
| 685 |
|
|
RE re("(world|universe)$", options); |
| 686 |
|
|
CHECK(re.Extract("\\1", str , &captured)); |
| 687 |
|
|
CHECK_EQ(captured, "world"); |
| 688 |
|
|
options.set_no_auto_capture(true); |
| 689 |
|
|
printf("testing Option <NO_AUTO_CAPTURE>\n"); |
| 690 |
|
|
if (VERBOSE_TEST) |
| 691 |
|
|
printf("parentheses do not capture text\n"); |
| 692 |
|
|
re.Extract("\\1",str, &captured ); |
| 693 |
|
|
CHECK_EQ(captured, "world"); |
| 694 |
|
|
} |
| 695 |
|
|
|
| 696 |
|
|
static void Test_UNGREEDY() { |
| 697 |
|
|
RE_Options options; |
| 698 |
|
|
const char *str = "HELLO, 'this' is the 'world'"; |
| 699 |
|
|
|
| 700 |
|
|
options.set_ungreedy(true); |
| 701 |
|
|
GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); |
| 702 |
|
|
GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); |
| 703 |
|
|
GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); |
| 704 |
|
|
|
| 705 |
|
|
options.set_ungreedy(false); |
| 706 |
|
|
GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); |
| 707 |
|
|
GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); |
| 708 |
|
|
} |
| 709 |
|
|
|
| 710 |
|
|
static void Test_all_options() { |
| 711 |
|
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 712 |
|
|
RE_Options options; |
| 713 |
|
|
options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); |
| 714 |
|
|
|
| 715 |
|
|
TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); |
| 716 |
|
|
options.set_all_options(0); |
| 717 |
|
|
TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); |
| 718 |
|
|
options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); |
| 719 |
|
|
|
| 720 |
|
|
TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); |
| 721 |
|
|
TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", |
| 722 |
|
|
" ^ c r u e l $ ", |
| 723 |
|
|
str, |
| 724 |
|
|
RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), |
| 725 |
|
|
false); |
| 726 |
|
|
|
| 727 |
|
|
TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", |
| 728 |
|
|
" ^ c r u e l $ ", |
| 729 |
|
|
str, |
| 730 |
|
|
RE_Options() |
| 731 |
|
|
.set_multiline(true) |
| 732 |
|
|
.set_extended(true), |
| 733 |
|
|
false); |
| 734 |
|
|
|
| 735 |
|
|
options.set_all_options(0); |
| 736 |
|
|
TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); |
| 737 |
|
|
|
| 738 |
|
|
} |
| 739 |
|
|
|
| 740 |
|
|
static void TestOptions() { |
| 741 |
|
|
printf("Testing Options\n"); |
| 742 |
|
|
Test_CASELESS(); |
| 743 |
|
|
Test_MULTILINE(); |
| 744 |
|
|
Test_DOTALL(); |
| 745 |
|
|
Test_DOLLAR_ENDONLY(); |
| 746 |
|
|
Test_EXTENDED(); |
| 747 |
|
|
Test_NO_AUTO_CAPTURE(); |
| 748 |
|
|
Test_UNGREEDY(); |
| 749 |
|
|
Test_EXTRA(); |
| 750 |
|
|
Test_all_options(); |
| 751 |
|
|
} |
| 752 |
|
|
|
| 753 |
nigel |
93 |
static void TestConstructors() { |
| 754 |
|
|
printf("Testing constructors\n"); |
| 755 |
|
|
|
| 756 |
|
|
RE_Options options; |
| 757 |
|
|
options.set_dotall(true); |
| 758 |
|
|
const char *str = "HELLO\n" "cruel\n" "world"; |
| 759 |
|
|
|
| 760 |
|
|
RE orig("HELLO.*world", options); |
| 761 |
|
|
CHECK(orig.FullMatch(str)); |
| 762 |
|
|
|
| 763 |
|
|
RE copy1(orig); |
| 764 |
|
|
CHECK(copy1.FullMatch(str)); |
| 765 |
|
|
|
| 766 |
|
|
RE copy2("not a match"); |
| 767 |
|
|
CHECK(!copy2.FullMatch(str)); |
| 768 |
|
|
copy2 = copy1; |
| 769 |
|
|
CHECK(copy2.FullMatch(str)); |
| 770 |
|
|
copy2 = orig; |
| 771 |
|
|
CHECK(copy2.FullMatch(str)); |
| 772 |
|
|
|
| 773 |
|
|
// Make sure when we assign to ourselves, nothing bad happens |
| 774 |
|
|
orig = orig; |
| 775 |
|
|
copy1 = copy1; |
| 776 |
|
|
copy2 = copy2; |
| 777 |
|
|
CHECK(orig.FullMatch(str)); |
| 778 |
|
|
CHECK(copy1.FullMatch(str)); |
| 779 |
|
|
CHECK(copy2.FullMatch(str)); |
| 780 |
|
|
} |
| 781 |
|
|
|
| 782 |
nigel |
77 |
int main(int argc, char** argv) { |
| 783 |
|
|
// Treat any flag as --help |
| 784 |
|
|
if (argc > 1 && argv[1][0] == '-') { |
| 785 |
|
|
printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" |
| 786 |
|
|
" If 'timingX ###' is specified, run the given timing test\n" |
| 787 |
|
|
" with the given number of iterations, rather than running\n" |
| 788 |
|
|
" the default corectness test.\n", argv[0]); |
| 789 |
|
|
return 0; |
| 790 |
|
|
} |
| 791 |
|
|
|
| 792 |
|
|
if (argc > 1) { |
| 793 |
|
|
if ( argc == 2 || atoi(argv[2]) == 0) { |
| 794 |
|
|
printf("timing mode needs a num-iters argument\n"); |
| 795 |
|
|
return 1; |
| 796 |
|
|
} |
| 797 |
|
|
if (!strcmp(argv[1], "timing1")) |
| 798 |
|
|
Timing1(atoi(argv[2])); |
| 799 |
|
|
else if (!strcmp(argv[1], "timing2")) |
| 800 |
|
|
Timing2(atoi(argv[2])); |
| 801 |
|
|
else if (!strcmp(argv[1], "timing3")) |
| 802 |
|
|
Timing3(atoi(argv[2])); |
| 803 |
|
|
else |
| 804 |
|
|
printf("Unknown argument '%s'\n", argv[1]); |
| 805 |
|
|
return 0; |
| 806 |
|
|
} |
| 807 |
|
|
|
| 808 |
|
|
printf("Testing FullMatch\n"); |
| 809 |
|
|
|
| 810 |
|
|
int i; |
| 811 |
|
|
string s; |
| 812 |
|
|
|
| 813 |
|
|
/***** FullMatch with no args *****/ |
| 814 |
|
|
|
| 815 |
|
|
CHECK(RE("h.*o").FullMatch("hello")); |
| 816 |
ph10 |
179 |
CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front |
| 817 |
|
|
CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end |
| 818 |
|
|
CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op |
| 819 |
|
|
CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op |
| 820 |
|
|
CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops |
| 821 |
nigel |
77 |
|
| 822 |
|
|
/***** FullMatch with args *****/ |
| 823 |
|
|
|
| 824 |
|
|
// Zero-arg |
| 825 |
|
|
CHECK(RE("\\d+").FullMatch("1001")); |
| 826 |
|
|
|
| 827 |
|
|
// Single-arg |
| 828 |
|
|
CHECK(RE("(\\d+)").FullMatch("1001", &i)); |
| 829 |
|
|
CHECK_EQ(i, 1001); |
| 830 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); |
| 831 |
|
|
CHECK_EQ(i, -123); |
| 832 |
|
|
CHECK(!RE("()\\d+").FullMatch("10", &i)); |
| 833 |
|
|
CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", |
| 834 |
|
|
&i)); |
| 835 |
|
|
|
| 836 |
|
|
// Digits surrounding integer-arg |
| 837 |
|
|
CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); |
| 838 |
|
|
CHECK_EQ(i, 23); |
| 839 |
|
|
CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); |
| 840 |
|
|
CHECK_EQ(i, 1); |
| 841 |
|
|
CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); |
| 842 |
|
|
CHECK_EQ(i, -1); |
| 843 |
|
|
CHECK(RE("(\\d)").PartialMatch("1234", &i)); |
| 844 |
|
|
CHECK_EQ(i, 1); |
| 845 |
|
|
CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); |
| 846 |
|
|
CHECK_EQ(i, -1); |
| 847 |
|
|
|
| 848 |
|
|
// String-arg |
| 849 |
|
|
CHECK(RE("h(.*)o").FullMatch("hello", &s)); |
| 850 |
|
|
CHECK_EQ(s, string("ell")); |
| 851 |
|
|
|
| 852 |
|
|
// StringPiece-arg |
| 853 |
|
|
StringPiece sp; |
| 854 |
|
|
CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); |
| 855 |
|
|
CHECK_EQ(sp.size(), 4); |
| 856 |
|
|
CHECK(memcmp(sp.data(), "ruby", 4) == 0); |
| 857 |
|
|
CHECK_EQ(i, 1234); |
| 858 |
|
|
|
| 859 |
|
|
// Multi-arg |
| 860 |
|
|
CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); |
| 861 |
|
|
CHECK_EQ(s, string("ruby")); |
| 862 |
|
|
CHECK_EQ(i, 1234); |
| 863 |
|
|
|
| 864 |
|
|
// Ignored arg |
| 865 |
|
|
CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); |
| 866 |
|
|
CHECK_EQ(s, string("ruby")); |
| 867 |
|
|
CHECK_EQ(i, 1234); |
| 868 |
|
|
|
| 869 |
|
|
// Type tests |
| 870 |
|
|
{ |
| 871 |
|
|
char c; |
| 872 |
|
|
CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
| 873 |
|
|
CHECK_EQ(c, 'H'); |
| 874 |
|
|
} |
| 875 |
|
|
{ |
| 876 |
|
|
unsigned char c; |
| 877 |
|
|
CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
| 878 |
|
|
CHECK_EQ(c, static_cast<unsigned char>('H')); |
| 879 |
|
|
} |
| 880 |
|
|
{ |
| 881 |
|
|
short v; |
| 882 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 883 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
| 884 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
| 885 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); |
| 886 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); |
| 887 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); |
| 888 |
|
|
} |
| 889 |
|
|
{ |
| 890 |
|
|
unsigned short v; |
| 891 |
|
|
CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 892 |
|
|
CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
| 893 |
|
|
CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); |
| 894 |
|
|
CHECK(!RE("(\\d+)").FullMatch("65536", &v)); |
| 895 |
|
|
} |
| 896 |
|
|
{ |
| 897 |
|
|
int v; |
| 898 |
|
|
static const int max_value = 0x7fffffff; |
| 899 |
|
|
static const int min_value = -max_value - 1; |
| 900 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 901 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
| 902 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); |
| 903 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); |
| 904 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); |
| 905 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); |
| 906 |
|
|
} |
| 907 |
|
|
{ |
| 908 |
|
|
unsigned int v; |
| 909 |
|
|
static const unsigned int max_value = 0xfffffffful; |
| 910 |
|
|
CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 911 |
|
|
CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); |
| 912 |
|
|
CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); |
| 913 |
|
|
} |
| 914 |
|
|
#ifdef HAVE_LONG_LONG |
| 915 |
ph10 |
193 |
# if defined(__MINGW__) || defined(__MINGW32__) |
| 916 |
|
|
# define LLD "%I64d" |
| 917 |
ph10 |
201 |
# define LLU "%I64u" |
| 918 |
ph10 |
193 |
# else |
| 919 |
|
|
# define LLD "%lld" |
| 920 |
ph10 |
201 |
# define LLU "%llu" |
| 921 |
ph10 |
193 |
# endif |
| 922 |
nigel |
77 |
{ |
| 923 |
|
|
long long v; |
| 924 |
|
|
static const long long max_value = 0x7fffffffffffffffLL; |
| 925 |
|
|
static const long long min_value = -max_value - 1; |
| 926 |
|
|
char buf[32]; |
| 927 |
|
|
|
| 928 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 929 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
| 930 |
|
|
|
| 931 |
ph10 |
193 |
snprintf(buf, sizeof(buf), LLD, max_value); |
| 932 |
nigel |
77 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 933 |
|
|
|
| 934 |
ph10 |
193 |
snprintf(buf, sizeof(buf), LLD, min_value); |
| 935 |
nigel |
77 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
| 936 |
|
|
|
| 937 |
ph10 |
193 |
snprintf(buf, sizeof(buf), LLD, max_value); |
| 938 |
nigel |
77 |
assert(buf[strlen(buf)-1] != '9'); |
| 939 |
|
|
buf[strlen(buf)-1]++; |
| 940 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 941 |
|
|
|
| 942 |
ph10 |
193 |
snprintf(buf, sizeof(buf), LLD, min_value); |
| 943 |
nigel |
77 |
assert(buf[strlen(buf)-1] != '9'); |
| 944 |
|
|
buf[strlen(buf)-1]++; |
| 945 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 946 |
|
|
} |
| 947 |
|
|
#endif |
| 948 |
|
|
#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG |
| 949 |
|
|
{ |
| 950 |
|
|
unsigned long long v; |
| 951 |
|
|
long long v2; |
| 952 |
|
|
static const unsigned long long max_value = 0xffffffffffffffffULL; |
| 953 |
|
|
char buf[32]; |
| 954 |
|
|
|
| 955 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
| 956 |
|
|
CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
| 957 |
|
|
|
| 958 |
ph10 |
201 |
snprintf(buf, sizeof(buf), LLU, max_value); |
| 959 |
nigel |
77 |
CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 960 |
|
|
|
| 961 |
|
|
assert(buf[strlen(buf)-1] != '9'); |
| 962 |
|
|
buf[strlen(buf)-1]++; |
| 963 |
|
|
CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 964 |
|
|
} |
| 965 |
|
|
#endif |
| 966 |
|
|
{ |
| 967 |
|
|
float v; |
| 968 |
|
|
CHECK(RE("(.*)").FullMatch("100", &v)); |
| 969 |
|
|
CHECK(RE("(.*)").FullMatch("-100.", &v)); |
| 970 |
|
|
CHECK(RE("(.*)").FullMatch("1e23", &v)); |
| 971 |
|
|
} |
| 972 |
|
|
{ |
| 973 |
|
|
double v; |
| 974 |
|
|
CHECK(RE("(.*)").FullMatch("100", &v)); |
| 975 |
|
|
CHECK(RE("(.*)").FullMatch("-100.", &v)); |
| 976 |
|
|
CHECK(RE("(.*)").FullMatch("1e23", &v)); |
| 977 |
|
|
} |
| 978 |
|
|
|
| 979 |
|
|
// Check that matching is fully anchored |
| 980 |
|
|
CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); |
| 981 |
|
|
CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); |
| 982 |
|
|
CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); |
| 983 |
|
|
CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); |
| 984 |
|
|
|
| 985 |
|
|
// Braces |
| 986 |
|
|
CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); |
| 987 |
|
|
CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); |
| 988 |
|
|
CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); |
| 989 |
|
|
|
| 990 |
|
|
// Complicated RE |
| 991 |
|
|
CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); |
| 992 |
|
|
CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); |
| 993 |
|
|
CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); |
| 994 |
|
|
CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); |
| 995 |
|
|
|
| 996 |
|
|
// Check full-match handling (needs '$' tacked on internally) |
| 997 |
|
|
CHECK(RE("fo|foo").FullMatch("fo")); |
| 998 |
|
|
CHECK(RE("fo|foo").FullMatch("foo")); |
| 999 |
|
|
CHECK(RE("fo|foo$").FullMatch("fo")); |
| 1000 |
|
|
CHECK(RE("fo|foo$").FullMatch("foo")); |
| 1001 |
|
|
CHECK(RE("foo$").FullMatch("foo")); |
| 1002 |
|
|
CHECK(!RE("foo\\$").FullMatch("foo$bar")); |
| 1003 |
|
|
CHECK(!RE("fo|bar").FullMatch("fox")); |
| 1004 |
|
|
|
| 1005 |
|
|
// Uncomment the following if we change the handling of '$' to |
| 1006 |
|
|
// prevent it from matching a trailing newline |
| 1007 |
|
|
if (false) { |
| 1008 |
|
|
// Check that we don't get bitten by pcre's special handling of a |
| 1009 |
|
|
// '\n' at the end of the string matching '$' |
| 1010 |
|
|
CHECK(!RE("foo$").PartialMatch("foo\n")); |
| 1011 |
|
|
} |
| 1012 |
|
|
|
| 1013 |
|
|
// Number of args |
| 1014 |
|
|
int a[16]; |
| 1015 |
|
|
CHECK(RE("").FullMatch("")); |
| 1016 |
|
|
|
| 1017 |
|
|
memset(a, 0, sizeof(0)); |
| 1018 |
|
|
CHECK(RE("(\\d){1}").FullMatch("1", |
| 1019 |
|
|
&a[0])); |
| 1020 |
|
|
CHECK_EQ(a[0], 1); |
| 1021 |
|
|
|
| 1022 |
|
|
memset(a, 0, sizeof(0)); |
| 1023 |
|
|
CHECK(RE("(\\d)(\\d)").FullMatch("12", |
| 1024 |
|
|
&a[0], &a[1])); |
| 1025 |
|
|
CHECK_EQ(a[0], 1); |
| 1026 |
|
|
CHECK_EQ(a[1], 2); |
| 1027 |
|
|
|
| 1028 |
|
|
memset(a, 0, sizeof(0)); |
| 1029 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", |
| 1030 |
|
|
&a[0], &a[1], &a[2])); |
| 1031 |
|
|
CHECK_EQ(a[0], 1); |
| 1032 |
|
|
CHECK_EQ(a[1], 2); |
| 1033 |
|
|
CHECK_EQ(a[2], 3); |
| 1034 |
|
|
|
| 1035 |
|
|
memset(a, 0, sizeof(0)); |
| 1036 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", |
| 1037 |
|
|
&a[0], &a[1], &a[2], &a[3])); |
| 1038 |
|
|
CHECK_EQ(a[0], 1); |
| 1039 |
|
|
CHECK_EQ(a[1], 2); |
| 1040 |
|
|
CHECK_EQ(a[2], 3); |
| 1041 |
|
|
CHECK_EQ(a[3], 4); |
| 1042 |
|
|
|
| 1043 |
|
|
memset(a, 0, sizeof(0)); |
| 1044 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", |
| 1045 |
|
|
&a[0], &a[1], &a[2], |
| 1046 |
|
|
&a[3], &a[4])); |
| 1047 |
|
|
CHECK_EQ(a[0], 1); |
| 1048 |
|
|
CHECK_EQ(a[1], 2); |
| 1049 |
|
|
CHECK_EQ(a[2], 3); |
| 1050 |
|
|
CHECK_EQ(a[3], 4); |
| 1051 |
|
|
CHECK_EQ(a[4], 5); |
| 1052 |
|
|
|
| 1053 |
|
|
memset(a, 0, sizeof(0)); |
| 1054 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", |
| 1055 |
|
|
&a[0], &a[1], &a[2], |
| 1056 |
|
|
&a[3], &a[4], &a[5])); |
| 1057 |
|
|
CHECK_EQ(a[0], 1); |
| 1058 |
|
|
CHECK_EQ(a[1], 2); |
| 1059 |
|
|
CHECK_EQ(a[2], 3); |
| 1060 |
|
|
CHECK_EQ(a[3], 4); |
| 1061 |
|
|
CHECK_EQ(a[4], 5); |
| 1062 |
|
|
CHECK_EQ(a[5], 6); |
| 1063 |
|
|
|
| 1064 |
|
|
memset(a, 0, sizeof(0)); |
| 1065 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", |
| 1066 |
|
|
&a[0], &a[1], &a[2], &a[3], |
| 1067 |
|
|
&a[4], &a[5], &a[6])); |
| 1068 |
|
|
CHECK_EQ(a[0], 1); |
| 1069 |
|
|
CHECK_EQ(a[1], 2); |
| 1070 |
|
|
CHECK_EQ(a[2], 3); |
| 1071 |
|
|
CHECK_EQ(a[3], 4); |
| 1072 |
|
|
CHECK_EQ(a[4], 5); |
| 1073 |
|
|
CHECK_EQ(a[5], 6); |
| 1074 |
|
|
CHECK_EQ(a[6], 7); |
| 1075 |
|
|
|
| 1076 |
|
|
memset(a, 0, sizeof(0)); |
| 1077 |
|
|
CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" |
| 1078 |
|
|
"(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( |
| 1079 |
|
|
"1234567890123456", |
| 1080 |
|
|
&a[0], &a[1], &a[2], &a[3], |
| 1081 |
|
|
&a[4], &a[5], &a[6], &a[7], |
| 1082 |
|
|
&a[8], &a[9], &a[10], &a[11], |
| 1083 |
|
|
&a[12], &a[13], &a[14], &a[15])); |
| 1084 |
|
|
CHECK_EQ(a[0], 1); |
| 1085 |
|
|
CHECK_EQ(a[1], 2); |
| 1086 |
|
|
CHECK_EQ(a[2], 3); |
| 1087 |
|
|
CHECK_EQ(a[3], 4); |
| 1088 |
|
|
CHECK_EQ(a[4], 5); |
| 1089 |
|
|
CHECK_EQ(a[5], 6); |
| 1090 |
|
|
CHECK_EQ(a[6], 7); |
| 1091 |
|
|
CHECK_EQ(a[7], 8); |
| 1092 |
|
|
CHECK_EQ(a[8], 9); |
| 1093 |
|
|
CHECK_EQ(a[9], 0); |
| 1094 |
|
|
CHECK_EQ(a[10], 1); |
| 1095 |
|
|
CHECK_EQ(a[11], 2); |
| 1096 |
|
|
CHECK_EQ(a[12], 3); |
| 1097 |
|
|
CHECK_EQ(a[13], 4); |
| 1098 |
|
|
CHECK_EQ(a[14], 5); |
| 1099 |
|
|
CHECK_EQ(a[15], 6); |
| 1100 |
|
|
|
| 1101 |
|
|
/***** PartialMatch *****/ |
| 1102 |
|
|
|
| 1103 |
|
|
printf("Testing PartialMatch\n"); |
| 1104 |
|
|
|
| 1105 |
|
|
CHECK(RE("h.*o").PartialMatch("hello")); |
| 1106 |
|
|
CHECK(RE("h.*o").PartialMatch("othello")); |
| 1107 |
|
|
CHECK(RE("h.*o").PartialMatch("hello!")); |
| 1108 |
|
|
CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
| 1109 |
|
|
|
| 1110 |
nigel |
93 |
/***** other tests *****/ |
| 1111 |
|
|
|
| 1112 |
nigel |
77 |
RadixTests(); |
| 1113 |
|
|
TestReplace(); |
| 1114 |
|
|
TestExtract(); |
| 1115 |
|
|
TestConsume(); |
| 1116 |
|
|
TestFindAndConsume(); |
| 1117 |
nigel |
93 |
TestQuoteMetaAll(); |
| 1118 |
nigel |
77 |
TestMatchNumberPeculiarity(); |
| 1119 |
|
|
|
| 1120 |
|
|
// Check the pattern() accessor |
| 1121 |
|
|
{ |
| 1122 |
|
|
const string kPattern = "http://([^/]+)/.*"; |
| 1123 |
|
|
const RE re(kPattern); |
| 1124 |
|
|
CHECK_EQ(kPattern, re.pattern()); |
| 1125 |
|
|
} |
| 1126 |
|
|
|
| 1127 |
|
|
// Check RE error field. |
| 1128 |
|
|
{ |
| 1129 |
|
|
RE re("foo"); |
| 1130 |
|
|
CHECK(re.error().empty()); // Must have no error |
| 1131 |
|
|
} |
| 1132 |
|
|
|
| 1133 |
|
|
#ifdef SUPPORT_UTF8 |
| 1134 |
|
|
// Check UTF-8 handling |
| 1135 |
|
|
{ |
| 1136 |
|
|
printf("Testing UTF-8 handling\n"); |
| 1137 |
|
|
|
| 1138 |
|
|
// Three Japanese characters (nihongo) |
| 1139 |
|
|
const char utf8_string[] = { |
| 1140 |
|
|
0xe6, 0x97, 0xa5, // 65e5 |
| 1141 |
|
|
0xe6, 0x9c, 0xac, // 627c |
| 1142 |
|
|
0xe8, 0xaa, 0x9e, // 8a9e |
| 1143 |
|
|
0 |
| 1144 |
|
|
}; |
| 1145 |
|
|
const char utf8_pattern[] = { |
| 1146 |
|
|
'.', |
| 1147 |
|
|
0xe6, 0x9c, 0xac, // 627c |
| 1148 |
|
|
'.', |
| 1149 |
|
|
0 |
| 1150 |
|
|
}; |
| 1151 |
|
|
|
| 1152 |
|
|
// Both should match in either mode, bytes or UTF-8 |
| 1153 |
|
|
RE re_test1("........."); |
| 1154 |
|
|
CHECK(re_test1.FullMatch(utf8_string)); |
| 1155 |
|
|
RE re_test2("...", pcrecpp::UTF8()); |
| 1156 |
|
|
CHECK(re_test2.FullMatch(utf8_string)); |
| 1157 |
|
|
|
| 1158 |
|
|
// Check that '.' matches one byte or UTF-8 character |
| 1159 |
|
|
// according to the mode. |
| 1160 |
|
|
string ss; |
| 1161 |
|
|
RE re_test3("(.)"); |
| 1162 |
|
|
CHECK(re_test3.PartialMatch(utf8_string, &ss)); |
| 1163 |
|
|
CHECK_EQ(ss, string("\xe6")); |
| 1164 |
|
|
RE re_test4("(.)", pcrecpp::UTF8()); |
| 1165 |
|
|
CHECK(re_test4.PartialMatch(utf8_string, &ss)); |
| 1166 |
|
|
CHECK_EQ(ss, string("\xe6\x97\xa5")); |
| 1167 |
|
|
|
| 1168 |
|
|
// Check that string matches itself in either mode |
| 1169 |
|
|
RE re_test5(utf8_string); |
| 1170 |
|
|
CHECK(re_test5.FullMatch(utf8_string)); |
| 1171 |
|
|
RE re_test6(utf8_string, pcrecpp::UTF8()); |
| 1172 |
|
|
CHECK(re_test6.FullMatch(utf8_string)); |
| 1173 |
|
|
|
| 1174 |
|
|
// Check that pattern matches string only in UTF8 mode |
| 1175 |
|
|
RE re_test7(utf8_pattern); |
| 1176 |
|
|
CHECK(!re_test7.FullMatch(utf8_string)); |
| 1177 |
|
|
RE re_test8(utf8_pattern, pcrecpp::UTF8()); |
| 1178 |
|
|
CHECK(re_test8.FullMatch(utf8_string)); |
| 1179 |
|
|
} |
| 1180 |
|
|
|
| 1181 |
|
|
// Check that ungreedy, UTF8 regular expressions don't match when they |
| 1182 |
|
|
// oughtn't -- see bug 82246. |
| 1183 |
|
|
{ |
| 1184 |
|
|
// This code always worked. |
| 1185 |
|
|
const char* pattern = "\\w+X"; |
| 1186 |
|
|
const string target = "a aX"; |
| 1187 |
|
|
RE match_sentence(pattern); |
| 1188 |
|
|
RE match_sentence_re(pattern, pcrecpp::UTF8()); |
| 1189 |
|
|
|
| 1190 |
|
|
CHECK(!match_sentence.FullMatch(target)); |
| 1191 |
|
|
CHECK(!match_sentence_re.FullMatch(target)); |
| 1192 |
|
|
} |
| 1193 |
|
|
|
| 1194 |
|
|
{ |
| 1195 |
|
|
const char* pattern = "(?U)\\w+X"; |
| 1196 |
|
|
const string target = "a aX"; |
| 1197 |
|
|
RE match_sentence(pattern); |
| 1198 |
|
|
RE match_sentence_re(pattern, pcrecpp::UTF8()); |
| 1199 |
|
|
|
| 1200 |
|
|
CHECK(!match_sentence.FullMatch(target)); |
| 1201 |
|
|
CHECK(!match_sentence_re.FullMatch(target)); |
| 1202 |
|
|
} |
| 1203 |
|
|
#endif /* def SUPPORT_UTF8 */ |
| 1204 |
|
|
|
| 1205 |
|
|
printf("Testing error reporting\n"); |
| 1206 |
|
|
|
| 1207 |
|
|
{ RE re("a\\1"); CHECK(!re.error().empty()); } |
| 1208 |
|
|
{ |
| 1209 |
|
|
RE re("a[x"); |
| 1210 |
|
|
CHECK(!re.error().empty()); |
| 1211 |
|
|
} |
| 1212 |
|
|
{ |
| 1213 |
|
|
RE re("a[z-a]"); |
| 1214 |
|
|
CHECK(!re.error().empty()); |
| 1215 |
|
|
} |
| 1216 |
|
|
{ |
| 1217 |
|
|
RE re("a[[:foobar:]]"); |
| 1218 |
|
|
CHECK(!re.error().empty()); |
| 1219 |
|
|
} |
| 1220 |
|
|
{ |
| 1221 |
|
|
RE re("a(b"); |
| 1222 |
|
|
CHECK(!re.error().empty()); |
| 1223 |
|
|
} |
| 1224 |
|
|
{ |
| 1225 |
|
|
RE re("a\\"); |
| 1226 |
|
|
CHECK(!re.error().empty()); |
| 1227 |
|
|
} |
| 1228 |
|
|
|
| 1229 |
nigel |
87 |
// Test that recursion is stopped |
| 1230 |
|
|
TestRecursion(); |
| 1231 |
nigel |
77 |
|
| 1232 |
nigel |
81 |
// Test Options |
| 1233 |
|
|
if (getenv("VERBOSE_TEST") != NULL) |
| 1234 |
|
|
VERBOSE_TEST = true; |
| 1235 |
|
|
TestOptions(); |
| 1236 |
|
|
|
| 1237 |
nigel |
93 |
// Test the constructors |
| 1238 |
|
|
TestConstructors(); |
| 1239 |
|
|
|
| 1240 |
nigel |
77 |
// Done |
| 1241 |
|
|
printf("OK\n"); |
| 1242 |
|
|
|
| 1243 |
|
|
return 0; |
| 1244 |
|
|
} |