1 // -*- coding: utf-8 -*- 2 // 3 // Copyright (c) 2005 - 2010, Google Inc. 4 // All rights reserved. 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 // 32 // Author: Sanjay Ghemawat 33 // 34 // TODO: Test extractions for PartialMatch/Consume 35 36 #ifdef HAVE_CONFIG_H 37 #include "config.h" 38 #endif 39 40 #include <stdio.h> 41 #include <string.h> /* for memset and strcmp */ 42 #include <cassert> 43 #include <vector> 44 #include "pcrecpp.h" 45 46 using std::string; 47 using pcrecpp::StringPiece; 48 using pcrecpp::RE; 49 using pcrecpp::RE_Options; 50 using pcrecpp::Hex; 51 using pcrecpp::Octal; 52 using pcrecpp::CRadix; 53 54 static bool VERBOSE_TEST = false; 55 56 // CHECK dies with a fatal error if condition is not true. It is *not* 57 // controlled by NDEBUG, so the check will be executed regardless of 58 // compilation mode. Therefore, it is safe to do things like: 59 // CHECK_EQ(fp->Write(x), 4) 60 #define CHECK(condition) do { \ 61 if (!(condition)) { \ 62 fprintf(stderr, "%s:%d: Check failed: %s\n", \ 63 __FILE__, __LINE__, #condition); \ 64 exit(1); \ 65 } \ 66 } while (0) 67 68 #define CHECK_EQ(a, b) CHECK(a == b) 69 70 static void Timing1(int num_iters) { 71 // Same pattern lots of times 72 RE pattern("ruby:\\d+"); 73 StringPiece p("ruby:1234"); 74 for (int j = num_iters; j > 0; j--) { 75 CHECK(pattern.FullMatch(p)); 76 } 77 } 78 79 static void Timing2(int num_iters) { 80 // Same pattern lots of times 81 RE pattern("ruby:(\\d+)"); 82 int i; 83 for (int j = num_iters; j > 0; j--) { 84 CHECK(pattern.FullMatch("ruby:1234", &i)); 85 CHECK_EQ(i, 1234); 86 } 87 } 88 89 static void Timing3(int num_iters) { 90 string text_string; 91 for (int j = num_iters; j > 0; j--) { 92 text_string += "this is another line\n"; 93 } 94 95 RE line_matcher(".*\n"); 96 string line; 97 StringPiece text(text_string); 98 int counter = 0; 99 while (line_matcher.Consume(&text)) { 100 counter++; 101 } 102 printf("Matched %d lines\n", counter); 103 } 104 105 #if 0 // uncomment this if you have a way of defining VirtualProcessSize() 106 107 static void LeakTest() { 108 // Check for memory leaks 109 unsigned long long initial_size = 0; 110 for (int i = 0; i < 100000; i++) { 111 if (i == 50000) { 112 initial_size = VirtualProcessSize(); 113 printf("Size after 50000: %llu\n", initial_size); 114 } 115 char buf[100]; // definitely big enough 116 sprintf(buf, "pat%09d", i); 117 RE newre(buf); 118 } 119 uint64 final_size = VirtualProcessSize(); 120 printf("Size after 100000: %llu\n", final_size); 121 const double growth = double(final_size - initial_size) / final_size; 122 printf("Growth: %0.2f%%", growth * 100); 123 CHECK(growth < 0.02); // Allow < 2% growth 124 } 125 126 #endif 127 128 static void RadixTests() { 129 printf("Testing hex\n"); 130 131 #define CHECK_HEX(type, value) \ 132 do { \ 133 type v; \ 134 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ 135 CHECK_EQ(v, 0x ## value); \ 136 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ 137 CHECK_EQ(v, 0x ## value); \ 138 } while(0) 139 140 CHECK_HEX(short, 2bad); 141 CHECK_HEX(unsigned short, 2badU); 142 CHECK_HEX(int, dead); 143 CHECK_HEX(unsigned int, deadU); 144 CHECK_HEX(long, 7eadbeefL); 145 CHECK_HEX(unsigned long, deadbeefUL); 146 #ifdef HAVE_LONG_LONG 147 CHECK_HEX(long long, 12345678deadbeefLL); 148 #endif 149 #ifdef HAVE_UNSIGNED_LONG_LONG 150 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 151 #endif 152 153 #undef CHECK_HEX 154 155 printf("Testing octal\n"); 156 157 #define CHECK_OCTAL(type, value) \ 158 do { \ 159 type v; \ 160 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ 161 CHECK_EQ(v, 0 ## value); \ 162 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ 163 CHECK_EQ(v, 0 ## value); \ 164 } while(0) 165 166 CHECK_OCTAL(short, 77777); 167 CHECK_OCTAL(unsigned short, 177777U); 168 CHECK_OCTAL(int, 17777777777); 169 CHECK_OCTAL(unsigned int, 37777777777U); 170 CHECK_OCTAL(long, 17777777777L); 171 CHECK_OCTAL(unsigned long, 37777777777UL); 172 #ifdef HAVE_LONG_LONG 173 CHECK_OCTAL(long long, 777777777777777777777LL); 174 #endif 175 #ifdef HAVE_UNSIGNED_LONG_LONG 176 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 177 #endif 178 179 #undef CHECK_OCTAL 180 181 printf("Testing decimal\n"); 182 183 #define CHECK_DECIMAL(type, value) \ 184 do { \ 185 type v; \ 186 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ 187 CHECK_EQ(v, value); \ 188 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ 189 CHECK_EQ(v, value); \ 190 } while(0) 191 192 CHECK_DECIMAL(short, -1); 193 CHECK_DECIMAL(unsigned short, 9999); 194 CHECK_DECIMAL(int, -1000); 195 CHECK_DECIMAL(unsigned int, 12345U); 196 CHECK_DECIMAL(long, -10000000L); 197 CHECK_DECIMAL(unsigned long, 3083324652U); 198 #ifdef HAVE_LONG_LONG 199 CHECK_DECIMAL(long long, -100000000000000LL); 200 #endif 201 #ifdef HAVE_UNSIGNED_LONG_LONG 202 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 203 #endif 204 205 #undef CHECK_DECIMAL 206 207 } 208 209 static void TestReplace() { 210 printf("Testing Replace\n"); 211 212 struct ReplaceTest { 213 const char *regexp; 214 const char *rewrite; 215 const char *original; 216 const char *single; 217 const char *global; 218 int global_count; // the expected return value from ReplaceAll 219 }; 220 static const ReplaceTest tests[] = { 221 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 222 "\\2\\1ay", 223 "the quick brown fox jumps over the lazy dogs.", 224 "ethay quick brown fox jumps over the lazy dogs.", 225 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 226 9 }, 227 { "\\w+", 228 "\\0-NOSPAM", 229 "paul.haahr@google.com", 230 "paul-NOSPAM.haahr@google.com", 231 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", 232 4 }, 233 { "^", 234 "(START)", 235 "foo", 236 "(START)foo", 237 "(START)foo", 238 1 }, 239 { "^", 240 "(START)", 241 "", 242 "(START)", 243 "(START)", 244 1 }, 245 { "$", 246 "(END)", 247 "", 248 "(END)", 249 "(END)", 250 1 }, 251 { "b", 252 "bb", 253 "ababababab", 254 "abbabababab", 255 "abbabbabbabbabb", 256 5 }, 257 { "b", 258 "bb", 259 "bbbbbb", 260 "bbbbbbb", 261 "bbbbbbbbbbbb", 262 6 }, 263 { "b+", 264 "bb", 265 "bbbbbb", 266 "bb", 267 "bb", 268 1 }, 269 { "b*", 270 "bb", 271 "bbbbbb", 272 "bb", 273 "bbbb", 274 2 }, 275 { "b*", 276 "bb", 277 "aaaaa", 278 "bbaaaaa", 279 "bbabbabbabbabbabb", 280 6 }, 281 { "b*", 282 "bb", 283 "aa\naa\n", 284 "bbaa\naa\n", 285 "bbabbabb\nbbabbabb\nbb", 286 7 }, 287 { "b*", 288 "bb", 289 "aa\raa\r", 290 "bbaa\raa\r", 291 "bbabbabb\rbbabbabb\rbb", 292 7 }, 293 { "b*", 294 "bb", 295 "aa\r\naa\r\n", 296 "bbaa\r\naa\r\n", 297 "bbabbabb\r\nbbabbabb\r\nbb", 298 7 }, 299 // Check empty-string matching (it's tricky!) 300 { "aa|b*", 301 "@", 302 "aa", 303 "@", 304 "@@", 305 2 }, 306 { "b*|aa", 307 "@", 308 "aa", 309 "@aa", 310 "@@@", 311 3 }, 312 #ifdef SUPPORT_UTF 313 { "b*", 314 "bb", 315 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 316 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", 317 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", 318 5 }, 319 { "b*", 320 "bb", 321 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 322 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", 323 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" 324 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), 325 9 }, 326 #endif 327 { "", NULL, NULL, NULL, NULL, 0 } 328 }; 329 330 #ifdef SUPPORT_UTF 331 const bool support_utf8 = true; 332 #else 333 const bool support_utf8 = false; 334 #endif 335 336 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 337 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); 338 assert(re.error().empty()); 339 string one(t->original); 340 CHECK(re.Replace(t->rewrite, &one)); 341 CHECK_EQ(one, t->single); 342 string all(t->original); 343 const int replace_count = re.GlobalReplace(t->rewrite, &all); 344 CHECK_EQ(all, t->global); 345 CHECK_EQ(replace_count, t->global_count); 346 } 347 348 // One final test: test \r\n replacement when we're not in CRLF mode 349 { 350 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); 351 assert(re.error().empty()); 352 string all("aa\r\naa\r\n"); 353 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 354 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 355 } 356 { 357 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); 358 assert(re.error().empty()); 359 string all("aa\r\naa\r\n"); 360 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 361 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 362 } 363 // TODO: test what happens when no PCRE_NEWLINE_* flag is set. 364 // Alas, the answer depends on how pcre was compiled. 365 } 366 367 static void TestExtract() { 368 printf("Testing Extract\n"); 369 370 string s; 371 372 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); 373 CHECK_EQ(s, "kremvax!boris"); 374 375 // check the RE interface as well 376 CHECK(RE(".*").Extract("'\\0'", "foo", &s)); 377 CHECK_EQ(s, "'foo'"); 378 CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); 379 CHECK_EQ(s, "'foo'"); 380 } 381 382 static void TestConsume() { 383 printf("Testing Consume\n"); 384 385 string word; 386 387 string s(" aaa b!@#$@#$cccc"); 388 StringPiece input(s); 389 390 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 391 CHECK(r.Consume(&input, &word)); 392 CHECK_EQ(word, "aaa"); 393 CHECK(r.Consume(&input, &word)); 394 CHECK_EQ(word, "b"); 395 CHECK(! r.Consume(&input, &word)); 396 } 397 398 static void TestFindAndConsume() { 399 printf("Testing FindAndConsume\n"); 400 401 string word; 402 403 string s(" aaa b!@#$@#$cccc"); 404 StringPiece input(s); 405 406 RE r("(\\w+)"); // matches a word 407 CHECK(r.FindAndConsume(&input, &word)); 408 CHECK_EQ(word, "aaa"); 409 CHECK(r.FindAndConsume(&input, &word)); 410 CHECK_EQ(word, "b"); 411 CHECK(r.FindAndConsume(&input, &word)); 412 CHECK_EQ(word, "cccc"); 413 CHECK(! r.FindAndConsume(&input, &word)); 414 } 415 416 static void TestMatchNumberPeculiarity() { 417 printf("Testing match-number peculiarity\n"); 418 419 string word1; 420 string word2; 421 string word3; 422 423 RE r("(foo)|(bar)|(baz)"); 424 CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); 425 CHECK_EQ(word1, "foo"); 426 CHECK_EQ(word2, ""); 427 CHECK_EQ(word3, ""); 428 CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); 429 CHECK_EQ(word1, ""); 430 CHECK_EQ(word2, "bar"); 431 CHECK_EQ(word3, ""); 432 CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); 433 CHECK_EQ(word1, ""); 434 CHECK_EQ(word2, ""); 435 CHECK_EQ(word3, "baz"); 436 CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); 437 438 string a; 439 CHECK(RE("(foo)|hello").FullMatch("hello", &a)); 440 CHECK_EQ(a, ""); 441 } 442 443 static void TestRecursion() { 444 printf("Testing recursion\n"); 445 446 // Get one string that passes (sometimes), one that never does. 447 string text_good("abcdefghijk"); 448 string text_bad("acdefghijkl"); 449 450 // According to pcretest, matching text_good against (\w+)*b 451 // requires match_limit of at least 8192, and match_recursion_limit 452 // of at least 37. 453 454 RE_Options options_ml; 455 options_ml.set_match_limit(8192); 456 RE re("(\\w+)*b", options_ml); 457 CHECK(re.PartialMatch(text_good) == true); 458 CHECK(re.PartialMatch(text_bad) == false); 459 CHECK(re.FullMatch(text_good) == false); 460 CHECK(re.FullMatch(text_bad) == false); 461 462 options_ml.set_match_limit(1024); 463 RE re2("(\\w+)*b", options_ml); 464 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit 465 CHECK(re2.PartialMatch(text_bad) == false); 466 CHECK(re2.FullMatch(text_good) == false); 467 CHECK(re2.FullMatch(text_bad) == false); 468 469 RE_Options options_mlr; 470 options_mlr.set_match_limit_recursion(50); 471 RE re3("(\\w+)*b", options_mlr); 472 CHECK(re3.PartialMatch(text_good) == true); 473 CHECK(re3.PartialMatch(text_bad) == false); 474 CHECK(re3.FullMatch(text_good) == false); 475 CHECK(re3.FullMatch(text_bad) == false); 476 477 options_mlr.set_match_limit_recursion(10); 478 RE re4("(\\w+)*b", options_mlr); 479 CHECK(re4.PartialMatch(text_good) == false); 480 CHECK(re4.PartialMatch(text_bad) == false); 481 CHECK(re4.FullMatch(text_good) == false); 482 CHECK(re4.FullMatch(text_bad) == false); 483 } 484 485 // A meta-quoted string, interpreted as a pattern, should always match 486 // the original unquoted string. 487 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { 488 string quoted = RE::QuoteMeta(unquoted); 489 RE re(quoted, options); 490 CHECK(re.FullMatch(unquoted)); 491 } 492 493 // A string containing meaningful regexp characters, which is then meta- 494 // quoted, should not generally match a string the unquoted string does. 495 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 496 RE_Options options = RE_Options()) { 497 string quoted = RE::QuoteMeta(unquoted); 498 RE re(quoted, options); 499 CHECK(!re.FullMatch(should_not_match)); 500 } 501 502 // Tests that quoted meta characters match their original strings, 503 // and that a few things that shouldn't match indeed do not. 504 static void TestQuotaMetaSimple() { 505 TestQuoteMeta("foo"); 506 TestQuoteMeta("foo.bar"); 507 TestQuoteMeta("foo\\.bar"); 508 TestQuoteMeta("[1-9]"); 509 TestQuoteMeta("1.5-2.0?"); 510 TestQuoteMeta("\\d"); 511 TestQuoteMeta("Who doesn't like ice cream?"); 512 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 513 TestQuoteMeta("((?!)xxx).*yyy"); 514 TestQuoteMeta("(["); 515 TestQuoteMeta(string("foo\0bar", 7)); 516 } 517 518 static void TestQuoteMetaSimpleNegative() { 519 NegativeTestQuoteMeta("foo", "bar"); 520 NegativeTestQuoteMeta("...", "bar"); 521 NegativeTestQuoteMeta("\\.", "."); 522 NegativeTestQuoteMeta("\\.", ".."); 523 NegativeTestQuoteMeta("(a)", "a"); 524 NegativeTestQuoteMeta("(a|b)", "a"); 525 NegativeTestQuoteMeta("(a|b)", "(a)"); 526 NegativeTestQuoteMeta("(a|b)", "a|b"); 527 NegativeTestQuoteMeta("[0-9]", "0"); 528 NegativeTestQuoteMeta("[0-9]", "0-9"); 529 NegativeTestQuoteMeta("[0-9]", "[9]"); 530 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 531 } 532 533 static void TestQuoteMetaLatin1() { 534 TestQuoteMeta("3\xb2 = 9"); 535 } 536 537 static void TestQuoteMetaUtf8() { 538 #ifdef SUPPORT_UTF 539 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); 540 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 541 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) 542 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character 543 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) 544 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) 545 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work 546 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) 547 "27\\\xc2\\\xb0", 548 pcrecpp::UTF8()); 549 #endif 550 } 551 552 static void TestQuoteMetaAll() { 553 printf("Testing QuoteMeta\n"); 554 TestQuotaMetaSimple(); 555 TestQuoteMetaSimpleNegative(); 556 TestQuoteMetaLatin1(); 557 TestQuoteMetaUtf8(); 558 } 559 560 // 561 // Options tests contributed by 562 // Giuseppe Maxia, CTO, Stardata s.r.l. 563 // July 2005 564 // 565 static void GetOneOptionResult( 566 const char *option_name, 567 const char *regex, 568 const char *str, 569 RE_Options options, 570 bool full, 571 string expected) { 572 573 printf("Testing Option <%s>\n", option_name); 574 if(VERBOSE_TEST) 575 printf("/%s/ finds \"%s\" within \"%s\" \n", 576 regex, 577 expected.c_str(), 578 str); 579 string captured(""); 580 if (full) 581 RE(regex,options).FullMatch(str, &captured); 582 else 583 RE(regex,options).PartialMatch(str, &captured); 584 CHECK_EQ(captured, expected); 585 } 586 587 static void TestOneOption( 588 const char *option_name, 589 const char *regex, 590 const char *str, 591 RE_Options options, 592 bool full, 593 bool assertive = true) { 594 595 printf("Testing Option <%s>\n", option_name); 596 if (VERBOSE_TEST) 597 printf("'%s' %s /%s/ \n", 598 str, 599 (assertive? "matches" : "doesn't match"), 600 regex); 601 if (assertive) { 602 if (full) 603 CHECK(RE(regex,options).FullMatch(str)); 604 else 605 CHECK(RE(regex,options).PartialMatch(str)); 606 } else { 607 if (full) 608 CHECK(!RE(regex,options).FullMatch(str)); 609 else 610 CHECK(!RE(regex,options).PartialMatch(str)); 611 } 612 } 613 614 static void Test_CASELESS() { 615 RE_Options options; 616 RE_Options options2; 617 618 options.set_caseless(true); 619 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); 620 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); 621 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); 622 623 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); 624 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); 625 options.set_caseless(false); 626 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); 627 } 628 629 static void Test_MULTILINE() { 630 RE_Options options; 631 RE_Options options2; 632 const char *str = "HELLO\n" "cruel\n" "world\n"; 633 634 options.set_multiline(true); 635 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); 636 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); 637 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); 638 options.set_multiline(false); 639 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); 640 } 641 642 static void Test_DOTALL() { 643 RE_Options options; 644 RE_Options options2; 645 const char *str = "HELLO\n" "cruel\n" "world"; 646 647 options.set_dotall(true); 648 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); 649 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); 650 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); 651 options.set_dotall(false); 652 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); 653 } 654 655 static void Test_DOLLAR_ENDONLY() { 656 RE_Options options; 657 RE_Options options2; 658 const char *str = "HELLO world\n"; 659 660 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); 661 options.set_dollar_endonly(true); 662 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); 663 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); 664 } 665 666 static void Test_EXTRA() { 667 RE_Options options; 668 const char *str = "HELLO"; 669 670 options.set_extra(true); 671 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); 672 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); 673 options.set_extra(false); 674 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); 675 } 676 677 static void Test_EXTENDED() { 678 RE_Options options; 679 RE_Options options2; 680 const char *str = "HELLO world"; 681 682 options.set_extended(true); 683 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); 684 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); 685 TestOneOption("EXTENDED (class)", 686 "^ HE L{2} O " 687 "\\s+ " 688 "\\w+ $ ", 689 str, 690 options, 691 false); 692 693 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); 694 TestOneOption("EXTENDED (function)", 695 "^ HE L{2} O " 696 "\\s+ " 697 "\\w+ $ ", 698 str, 699 pcrecpp::EXTENDED(), 700 false); 701 702 options.set_extended(false); 703 TestOneOption("no EXTENDED", "HELLO world", str, options, false); 704 } 705 706 static void Test_NO_AUTO_CAPTURE() { 707 RE_Options options; 708 const char *str = "HELLO world"; 709 string captured; 710 711 printf("Testing Option <no NO_AUTO_CAPTURE>\n"); 712 if (VERBOSE_TEST) 713 printf("parentheses capture text\n"); 714 RE re("(world|universe)$", options); 715 CHECK(re.Extract("\\1", str , &captured)); 716 CHECK_EQ(captured, "world"); 717 options.set_no_auto_capture(true); 718 printf("testing Option <NO_AUTO_CAPTURE>\n"); 719 if (VERBOSE_TEST) 720 printf("parentheses do not capture text\n"); 721 re.Extract("\\1",str, &captured ); 722 CHECK_EQ(captured, "world"); 723 } 724 725 static void Test_UNGREEDY() { 726 RE_Options options; 727 const char *str = "HELLO, 'this' is the 'world'"; 728 729 options.set_ungreedy(true); 730 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); 731 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); 732 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); 733 734 options.set_ungreedy(false); 735 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); 736 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); 737 } 738 739 static void Test_all_options() { 740 const char *str = "HELLO\n" "cruel\n" "world"; 741 RE_Options options; 742 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); 743 744 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); 745 options.set_all_options(0); 746 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); 747 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); 748 749 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); 750 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", 751 " ^ c r u e l $ ", 752 str, 753 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), 754 false); 755 756 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", 757 " ^ c r u e l $ ", 758 str, 759 RE_Options() 760 .set_multiline(true) 761 .set_extended(true), 762 false); 763 764 options.set_all_options(0); 765 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); 766 767 } 768 769 static void TestOptions() { 770 printf("Testing Options\n"); 771 Test_CASELESS(); 772 Test_MULTILINE(); 773 Test_DOTALL(); 774 Test_DOLLAR_ENDONLY(); 775 Test_EXTENDED(); 776 Test_NO_AUTO_CAPTURE(); 777 Test_UNGREEDY(); 778 Test_EXTRA(); 779 Test_all_options(); 780 } 781 782 static void TestConstructors() { 783 printf("Testing constructors\n"); 784 785 RE_Options options; 786 options.set_dotall(true); 787 const char *str = "HELLO\n" "cruel\n" "world"; 788 789 RE orig("HELLO.*world", options); 790 CHECK(orig.FullMatch(str)); 791 792 RE copy1(orig); 793 CHECK(copy1.FullMatch(str)); 794 795 RE copy2("not a match"); 796 CHECK(!copy2.FullMatch(str)); 797 copy2 = copy1; 798 CHECK(copy2.FullMatch(str)); 799 copy2 = orig; 800 CHECK(copy2.FullMatch(str)); 801 802 // Make sure when we assign to ourselves, nothing bad happens 803 orig = orig; 804 copy1 = copy1; 805 copy2 = copy2; 806 CHECK(orig.FullMatch(str)); 807 CHECK(copy1.FullMatch(str)); 808 CHECK(copy2.FullMatch(str)); 809 } 810 811 int main(int argc, char** argv) { 812 // Treat any flag as --help 813 if (argc > 1 && argv[1][0] == '-') { 814 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" 815 " If 'timingX ###' is specified, run the given timing test\n" 816 " with the given number of iterations, rather than running\n" 817 " the default corectness test.\n", argv[0]); 818 return 0; 819 } 820 821 if (argc > 1) { 822 if ( argc == 2 || atoi(argv[2]) == 0) { 823 printf("timing mode needs a num-iters argument\n"); 824 return 1; 825 } 826 if (!strcmp(argv[1], "timing1")) 827 Timing1(atoi(argv[2])); 828 else if (!strcmp(argv[1], "timing2")) 829 Timing2(atoi(argv[2])); 830 else if (!strcmp(argv[1], "timing3")) 831 Timing3(atoi(argv[2])); 832 else 833 printf("Unknown argument '%s'\n", argv[1]); 834 return 0; 835 } 836 837 printf("PCRE C++ wrapper tests\n"); 838 printf("Testing FullMatch\n"); 839 840 int i; 841 string s; 842 843 /***** FullMatch with no args *****/ 844 845 CHECK(RE("h.*o").FullMatch("hello")); 846 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front 847 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end 848 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op 849 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op 850 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops 851 852 /***** FullMatch with args *****/ 853 854 // Zero-arg 855 CHECK(RE("\\d+").FullMatch("1001")); 856 857 // Single-arg 858 CHECK(RE("(\\d+)").FullMatch("1001", &i)); 859 CHECK_EQ(i, 1001); 860 CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); 861 CHECK_EQ(i, -123); 862 CHECK(!RE("()\\d+").FullMatch("10", &i)); 863 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", 864 &i)); 865 866 // Digits surrounding integer-arg 867 CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); 868 CHECK_EQ(i, 23); 869 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); 870 CHECK_EQ(i, 1); 871 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); 872 CHECK_EQ(i, -1); 873 CHECK(RE("(\\d)").PartialMatch("1234", &i)); 874 CHECK_EQ(i, 1); 875 CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); 876 CHECK_EQ(i, -1); 877 878 // String-arg 879 CHECK(RE("h(.*)o").FullMatch("hello", &s)); 880 CHECK_EQ(s, string("ell")); 881 882 // StringPiece-arg 883 StringPiece sp; 884 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); 885 CHECK_EQ(sp.size(), 4); 886 CHECK(memcmp(sp.data(), "ruby", 4) == 0); 887 CHECK_EQ(i, 1234); 888 889 // Multi-arg 890 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); 891 CHECK_EQ(s, string("ruby")); 892 CHECK_EQ(i, 1234); 893 894 // Ignore non-void* NULL arg 895 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); 896 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); 897 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); 898 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); 899 #ifdef HAVE_LONG_LONG 900 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); 901 #endif 902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); 903 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); 904 905 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 906 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); 907 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); 908 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); 909 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); 910 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); 911 912 // Ignored arg 913 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); 914 CHECK_EQ(s, string("ruby")); 915 CHECK_EQ(i, 1234); 916 917 // Type tests 918 { 919 char c; 920 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 921 CHECK_EQ(c, 'H'); 922 } 923 { 924 unsigned char c; 925 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 926 CHECK_EQ(c, static_cast<unsigned char>('H')); 927 } 928 { 929 short v; 930 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 931 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 932 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 933 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); 934 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); 935 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); 936 } 937 { 938 unsigned short v; 939 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 940 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 941 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); 942 CHECK(!RE("(\\d+)").FullMatch("65536", &v)); 943 } 944 { 945 int v; 946 static const int max_value = 0x7fffffff; 947 static const int min_value = -max_value - 1; 948 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 949 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 950 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); 951 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); 952 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); 953 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); 954 } 955 { 956 unsigned int v; 957 static const unsigned int max_value = 0xfffffffful; 958 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 959 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); 960 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); 961 } 962 #ifdef HAVE_LONG_LONG 963 # if defined(__MINGW__) || defined(__MINGW32__) 964 # define LLD "%I64d" 965 # define LLU "%I64u" 966 # else 967 # define LLD "%lld" 968 # define LLU "%llu" 969 # endif 970 { 971 long long v; 972 static const long long max_value = 0x7fffffffffffffffLL; 973 static const long long min_value = -max_value - 1; 974 char buf[32]; // definitely big enough for a long long 975 976 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 977 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); 978 979 sprintf(buf, LLD, max_value); 980 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 981 982 sprintf(buf, LLD, min_value); 983 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); 984 985 sprintf(buf, LLD, max_value); 986 assert(buf[strlen(buf)-1] != '9'); 987 buf[strlen(buf)-1]++; 988 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 989 990 sprintf(buf, LLD, min_value); 991 assert(buf[strlen(buf)-1] != '9'); 992 buf[strlen(buf)-1]++; 993 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 994 } 995 #endif 996 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG 997 { 998 unsigned long long v; 999 long long v2; 1000 static const unsigned long long max_value = 0xffffffffffffffffULL; 1001 char buf[32]; // definitely big enough for a unsigned long long 1002 1003 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); 1004 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); 1005 1006 sprintf(buf, LLU, max_value); 1007 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 1008 1009 assert(buf[strlen(buf)-1] != '9'); 1010 buf[strlen(buf)-1]++; 1011 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 1012 } 1013 #endif 1014 { 1015 float v; 1016 CHECK(RE("(.*)").FullMatch("100", &v)); 1017 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1018 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1019 } 1020 { 1021 double v; 1022 CHECK(RE("(.*)").FullMatch("100", &v)); 1023 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1024 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1025 } 1026 1027 // Check that matching is fully anchored 1028 CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); 1029 CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); 1030 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); 1031 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); 1032 1033 // Braces 1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); 1035 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); 1036 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); 1037 1038 // Complicated RE 1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); 1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); 1041 CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); 1042 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); 1043 1044 // Check full-match handling (needs '$' tacked on internally) 1045 CHECK(RE("fo|foo").FullMatch("fo")); 1046 CHECK(RE("fo|foo").FullMatch("foo")); 1047 CHECK(RE("fo|foo$").FullMatch("fo")); 1048 CHECK(RE("fo|foo$").FullMatch("foo")); 1049 CHECK(RE("foo$").FullMatch("foo")); 1050 CHECK(!RE("foo\\$").FullMatch("foo$bar")); 1051 CHECK(!RE("fo|bar").FullMatch("fox")); 1052 1053 // Uncomment the following if we change the handling of '$' to 1054 // prevent it from matching a trailing newline 1055 if (false) { 1056 // Check that we don't get bitten by pcre's special handling of a 1057 // '\n' at the end of the string matching '$' 1058 CHECK(!RE("foo$").PartialMatch("foo\n")); 1059 } 1060 1061 // Number of args 1062 int a[16]; 1063 CHECK(RE("").FullMatch("")); 1064 1065 memset(a, 0, sizeof(0)); 1066 CHECK(RE("(\\d){1}").FullMatch("1", 1067 &a[0])); 1068 CHECK_EQ(a[0], 1); 1069 1070 memset(a, 0, sizeof(0)); 1071 CHECK(RE("(\\d)(\\d)").FullMatch("12", 1072 &a[0], &a[1])); 1073 CHECK_EQ(a[0], 1); 1074 CHECK_EQ(a[1], 2); 1075 1076 memset(a, 0, sizeof(0)); 1077 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", 1078 &a[0], &a[1], &a[2])); 1079 CHECK_EQ(a[0], 1); 1080 CHECK_EQ(a[1], 2); 1081 CHECK_EQ(a[2], 3); 1082 1083 memset(a, 0, sizeof(0)); 1084 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", 1085 &a[0], &a[1], &a[2], &a[3])); 1086 CHECK_EQ(a[0], 1); 1087 CHECK_EQ(a[1], 2); 1088 CHECK_EQ(a[2], 3); 1089 CHECK_EQ(a[3], 4); 1090 1091 memset(a, 0, sizeof(0)); 1092 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", 1093 &a[0], &a[1], &a[2], 1094 &a[3], &a[4])); 1095 CHECK_EQ(a[0], 1); 1096 CHECK_EQ(a[1], 2); 1097 CHECK_EQ(a[2], 3); 1098 CHECK_EQ(a[3], 4); 1099 CHECK_EQ(a[4], 5); 1100 1101 memset(a, 0, sizeof(0)); 1102 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", 1103 &a[0], &a[1], &a[2], 1104 &a[3], &a[4], &a[5])); 1105 CHECK_EQ(a[0], 1); 1106 CHECK_EQ(a[1], 2); 1107 CHECK_EQ(a[2], 3); 1108 CHECK_EQ(a[3], 4); 1109 CHECK_EQ(a[4], 5); 1110 CHECK_EQ(a[5], 6); 1111 1112 memset(a, 0, sizeof(0)); 1113 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", 1114 &a[0], &a[1], &a[2], &a[3], 1115 &a[4], &a[5], &a[6])); 1116 CHECK_EQ(a[0], 1); 1117 CHECK_EQ(a[1], 2); 1118 CHECK_EQ(a[2], 3); 1119 CHECK_EQ(a[3], 4); 1120 CHECK_EQ(a[4], 5); 1121 CHECK_EQ(a[5], 6); 1122 CHECK_EQ(a[6], 7); 1123 1124 memset(a, 0, sizeof(0)); 1125 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 1126 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( 1127 "1234567890123456", 1128 &a[0], &a[1], &a[2], &a[3], 1129 &a[4], &a[5], &a[6], &a[7], 1130 &a[8], &a[9], &a[10], &a[11], 1131 &a[12], &a[13], &a[14], &a[15])); 1132 CHECK_EQ(a[0], 1); 1133 CHECK_EQ(a[1], 2); 1134 CHECK_EQ(a[2], 3); 1135 CHECK_EQ(a[3], 4); 1136 CHECK_EQ(a[4], 5); 1137 CHECK_EQ(a[5], 6); 1138 CHECK_EQ(a[6], 7); 1139 CHECK_EQ(a[7], 8); 1140 CHECK_EQ(a[8], 9); 1141 CHECK_EQ(a[9], 0); 1142 CHECK_EQ(a[10], 1); 1143 CHECK_EQ(a[11], 2); 1144 CHECK_EQ(a[12], 3); 1145 CHECK_EQ(a[13], 4); 1146 CHECK_EQ(a[14], 5); 1147 CHECK_EQ(a[15], 6); 1148 1149 /***** PartialMatch *****/ 1150 1151 printf("Testing PartialMatch\n"); 1152 1153 CHECK(RE("h.*o").PartialMatch("hello")); 1154 CHECK(RE("h.*o").PartialMatch("othello")); 1155 CHECK(RE("h.*o").PartialMatch("hello!")); 1156 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); 1157 1158 /***** other tests *****/ 1159 1160 RadixTests(); 1161 TestReplace(); 1162 TestExtract(); 1163 TestConsume(); 1164 TestFindAndConsume(); 1165 TestQuoteMetaAll(); 1166 TestMatchNumberPeculiarity(); 1167 1168 // Check the pattern() accessor 1169 { 1170 const string kPattern = "http://([^/]+)/.*"; 1171 const RE re(kPattern); 1172 CHECK_EQ(kPattern, re.pattern()); 1173 } 1174 1175 // Check RE error field. 1176 { 1177 RE re("foo"); 1178 CHECK(re.error().empty()); // Must have no error 1179 } 1180 1181 #ifdef SUPPORT_UTF 1182 // Check UTF-8 handling 1183 { 1184 printf("Testing UTF-8 handling\n"); 1185 1186 // Three Japanese characters (nihongo) 1187 const unsigned char utf8_string[] = { 1188 0xe6, 0x97, 0xa5, // 65e5 1189 0xe6, 0x9c, 0xac, // 627c 1190 0xe8, 0xaa, 0x9e, // 8a9e 1191 0 1192 }; 1193 const unsigned char utf8_pattern[] = { 1194 '.', 1195 0xe6, 0x9c, 0xac, // 627c 1196 '.', 1197 0 1198 }; 1199 1200 // Both should match in either mode, bytes or UTF-8 1201 RE re_test1("........."); 1202 CHECK(re_test1.FullMatch(utf8_string)); 1203 RE re_test2("...", pcrecpp::UTF8()); 1204 CHECK(re_test2.FullMatch(utf8_string)); 1205 1206 // PH added these tests for leading option settings 1207 1208 RE re_testZ0("(*CR)(*NO_START_OPT)........."); 1209 CHECK(re_testZ0.FullMatch(utf8_string)); 1210 1211 #ifdef SUPPORT_UTF 1212 RE re_testZ1("(*UTF8)..."); 1213 CHECK(re_testZ1.FullMatch(utf8_string)); 1214 1215 RE re_testZ2("(*UTF)..."); 1216 CHECK(re_testZ2.FullMatch(utf8_string)); 1217 1218 #ifdef SUPPORT_UCP 1219 RE re_testZ3("(*UCP)(*UTF)..."); 1220 CHECK(re_testZ3.FullMatch(utf8_string)); 1221 1222 RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)..."); 1223 CHECK(re_testZ4.FullMatch(utf8_string)); 1224 1225 RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)..."); 1226 CHECK(re_testZ5.FullMatch(utf8_string)); 1227 #endif 1228 #endif 1229 1230 // Check that '.' matches one byte or UTF-8 character 1231 // according to the mode. 1232 string ss; 1233 RE re_test3("(.)"); 1234 CHECK(re_test3.PartialMatch(utf8_string, &ss)); 1235 CHECK_EQ(ss, string("\xe6")); 1236 RE re_test4("(.)", pcrecpp::UTF8()); 1237 CHECK(re_test4.PartialMatch(utf8_string, &ss)); 1238 CHECK_EQ(ss, string("\xe6\x97\xa5")); 1239 1240 // Check that string matches itself in either mode 1241 RE re_test5(utf8_string); 1242 CHECK(re_test5.FullMatch(utf8_string)); 1243 RE re_test6(utf8_string, pcrecpp::UTF8()); 1244 CHECK(re_test6.FullMatch(utf8_string)); 1245 1246 // Check that pattern matches string only in UTF8 mode 1247 RE re_test7(utf8_pattern); 1248 CHECK(!re_test7.FullMatch(utf8_string)); 1249 RE re_test8(utf8_pattern, pcrecpp::UTF8()); 1250 CHECK(re_test8.FullMatch(utf8_string)); 1251 } 1252 1253 // Check that ungreedy, UTF8 regular expressions don't match when they 1254 // oughtn't -- see bug 82246. 1255 { 1256 // This code always worked. 1257 const char* pattern = "\\w+X"; 1258 const string target = "a aX"; 1259 RE match_sentence(pattern); 1260 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1261 1262 CHECK(!match_sentence.FullMatch(target)); 1263 CHECK(!match_sentence_re.FullMatch(target)); 1264 } 1265 1266 { 1267 const char* pattern = "(?U)\\w+X"; 1268 const string target = "a aX"; 1269 RE match_sentence(pattern); 1270 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1271 1272 CHECK(!match_sentence.FullMatch(target)); 1273 CHECK(!match_sentence_re.FullMatch(target)); 1274 } 1275 #endif /* def SUPPORT_UTF */ 1276 1277 printf("Testing error reporting\n"); 1278 1279 { RE re("a\\1"); CHECK(!re.error().empty()); } 1280 { 1281 RE re("a[x"); 1282 CHECK(!re.error().empty()); 1283 } 1284 { 1285 RE re("a[z-a]"); 1286 CHECK(!re.error().empty()); 1287 } 1288 { 1289 RE re("a[[:foobar:]]"); 1290 CHECK(!re.error().empty()); 1291 } 1292 { 1293 RE re("a(b"); 1294 CHECK(!re.error().empty()); 1295 } 1296 { 1297 RE re("a\\"); 1298 CHECK(!re.error().empty()); 1299 } 1300 1301 // Test that recursion is stopped 1302 TestRecursion(); 1303 1304 // Test Options 1305 if (getenv("VERBOSE_TEST") != NULL) 1306 VERBOSE_TEST = true; 1307 TestOptions(); 1308 1309 // Test the constructors 1310 TestConstructors(); 1311 1312 // Done 1313 printf("OK\n"); 1314 1315 return 0; 1316 } 1317