1 // Split-up due to DMD's enormous memory consumption 2 3 module std.regex.internal.tests2; 4 5 package(std.regex): 6 7 import std.conv, std.exception, std.meta, std.range, 8 std.typecons, std.regex; 9 10 import std.regex.internal.ir : Escapables; // characters that need escaping 11 12 @safe unittest 13 { 14 auto cr = ctRegex!("abc"); 15 assert(bmatch("abc",cr).hit == "abc"); 16 auto cr2 = ctRegex!("ab*c"); 17 assert(bmatch("abbbbc",cr2).hit == "abbbbc"); 18 } 19 @safe unittest 20 { 21 auto cr3 = ctRegex!("^abc$"); 22 assert(bmatch("abc",cr3).hit == "abc"); 23 auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`); 24 assert(array(match("azb",cr4).captures) == ["azb", "azb"]); 25 } 26 27 @safe unittest 28 { 29 auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}"); 30 assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb"); 31 auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w); 32 assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w); 33 } 34 35 @safe unittest 36 { 37 auto cr7 = ctRegex!(`\r.*?$`,"sm"); 38 assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy"); 39 auto greed = ctRegex!("<packet.*?/packet>"); 40 assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit 41 == "<packet>text</packet>"); 42 } 43 44 @safe unittest 45 { 46 import std.algorithm.comparison : equal; 47 auto cr8 = ctRegex!("^(a)(b)?(c*)"); 48 auto m8 = bmatch("abcc",cr8); 49 assert(m8); 50 assert(m8.captures[1] == "a"); 51 assert(m8.captures[2] == "b"); 52 assert(m8.captures[3] == "cc"); 53 auto cr9 = ctRegex!("q(a|b)*q"); 54 auto m9 = match("xxqababqyy",cr9); 55 assert(m9); 56 assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); 57 } 58 59 @safe unittest 60 { 61 import std.algorithm.comparison : equal; 62 auto rtr = regex("a|b|c"); 63 enum ctr = regex("a|b|c"); 64 assert(equal(rtr.ir,ctr.ir)); 65 //CTFE parser BUG is triggered by group 66 //in the middle of alternation (at least not first and not last) 67 enum testCT = regex(`abc|(edf)|xyz`); 68 auto testRT = regex(`abc|(edf)|xyz`); 69 assert(equal(testCT.ir,testRT.ir)); 70 } 71 72 @safe unittest 73 { 74 import std.algorithm.comparison : equal; 75 import std.algorithm.iteration : map; 76 enum cx = ctRegex!"(A|B|C)"; 77 auto mx = match("B",cx); 78 assert(mx); 79 assert(equal(mx.captures, [ "B", "B"])); 80 enum cx2 = ctRegex!"(A|B)*"; 81 assert(match("BAAA",cx2)); 82 83 enum cx3 = ctRegex!("a{3,4}","i"); 84 auto mx3 = match("AaA",cx3); 85 assert(mx3); 86 assert(mx3.captures[0] == "AaA"); 87 enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); 88 auto mx4 = match("aaaabc", cx4); 89 assert(mx4); 90 assert(mx4.captures[0] == "aaaab"); 91 auto cr8 = ctRegex!("(a)(b)?(c*)"); 92 auto m8 = bmatch("abcc",cr8); 93 assert(m8); 94 assert(m8.captures[1] == "a"); 95 assert(m8.captures[2] == "b"); 96 assert(m8.captures[3] == "cc"); 97 auto cr9 = ctRegex!(".*$", "gm"); 98 auto m9 = match("First\rSecond", cr9); 99 assert(m9); 100 assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); 101 } 102 103 @safe unittest 104 { 105 import std.algorithm.comparison : equal; 106 import std.algorithm.iteration : map; 107 //global matching test_body(alias matchFn)108 void test_body(alias matchFn)() 109 { 110 string s = "a quick brown fox jumps over a lazy dog"; 111 auto r1 = regex("\\b[a-z]+\\b","g"); 112 string[] test; 113 foreach (m; matchFn(s, r1)) 114 test ~= m.hit; 115 assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); 116 auto free_reg = regex(` 117 118 abc 119 \s+ 120 " 121 ( 122 [^"]+ 123 | \\ " 124 )+ 125 " 126 z 127 `, "x"); 128 auto m = match(`abc "quoted string with \" inside"z`,free_reg); 129 assert(m); 130 string mails = " hey@you.com no@spam.net "; 131 auto rm = regex(`@(?<=\S+@)\S+`,"g"); 132 assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); 133 auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); 134 assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); 135 auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); 136 assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); 137 auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); 138 assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); 139 debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); 140 } 141 test_body!bmatch(); 142 test_body!match(); 143 } 144 145 //tests for accumulated std.regex issues and other regressions 146 @safe unittest 147 { 148 import std.algorithm.comparison : equal; 149 import std.algorithm.iteration : map; 150 void test_body(alias matchFn)() 151 { 152 //issue 5857 153 //matching goes out of control if ... in (...){x} has .*/.+ 154 auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; 155 assert(c[0] == "axxxzayyyyyzd"); 156 assert(c[1] == "ayyyyyz"); 157 auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; 158 assert(c2[0] == "axxxayyyyyd"); 159 assert(c2[1] == "ayyyyy"); 160 //issue 2108 161 //greedy vs non-greedy 162 auto nogreed = regex("<packet.*?/packet>"); 163 assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit 164 == "<packet>text</packet>"); 165 auto greed = regex("<packet.*/packet>"); 166 assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit 167 == "<packet>text</packet><packet>text</packet>"); 168 //issue 4574 169 //empty successful match still advances the input 170 string[] pres, posts, hits; 171 foreach (m; matchFn("abcabc", regex("","g"))) 172 { 173 pres ~= m.pre; 174 posts ~= m.post; 175 assert(m.hit.empty); 176 177 } 178 auto heads = [ 179 "abcabc", 180 "abcab", 181 "abca", 182 "abc", 183 "ab", 184 "a", 185 "" 186 ]; 187 auto tails = [ 188 "abcabc", 189 "bcabc", 190 "cabc", 191 "abc", 192 "bc", 193 "c", 194 "" 195 ]; 196 assert(pres == array(retro(heads))); 197 assert(posts == tails); 198 //issue 6076 199 //regression on .* 200 auto re = regex("c.*|d"); 201 auto m = matchFn("mm", re); 202 assert(!m); 203 debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); 204 auto rprealloc = regex(`((.){5}.{1,10}){5}`); 205 auto arr = array(repeat('0',100)); 206 auto m2 = matchFn(arr, rprealloc); 207 assert(m2); 208 assert(collectException( 209 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") 210 ) is null); 211 foreach (ch; [Escapables]) 212 { 213 assert(match(to!string(ch),regex(`[\`~ch~`]`))); 214 assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); 215 assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); 216 } 217 //bugzilla 7718 218 string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; 219 auto reStrCmd = regex (`(".*")|('.*')`, "g"); 220 assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), 221 [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); 222 } 223 test_body!bmatch(); 224 test_body!match(); 225 } 226 227 // tests for replace 228 @safe unittest 229 { 230 void test(alias matchFn)() 231 { 232 import std.uni : toUpper; 233 234 foreach (i, v; AliasSeq!(string, wstring, dstring)) 235 { 236 auto baz(Cap)(Cap m) 237 if (is(Cap == Captures!(Cap.String))) 238 { 239 return toUpper(m.hit); 240 } 241 alias String = v; 242 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) 243 == to!String("ack rapacity")); 244 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) 245 == to!String("ack capacity")); 246 assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) 247 == to!String("[n]oon")); 248 assert(std.regex.replace!(matchFn)( 249 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") 250 ) == to!String(": test2 test1 :")); 251 auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), 252 regex(to!String("[ar]"), "g")); 253 assert(s == "StRAp A Rocket engine on A chicken."); 254 } 255 debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); 256 } 257 test!(bmatch)(); 258 test!(match)(); 259 } 260 261 // tests for splitter 262 @safe unittest 263 { 264 import std.algorithm.comparison : equal; 265 auto s1 = ", abc, de, fg, hi, "; 266 auto sp1 = splitter(s1, regex(", *")); 267 auto w1 = ["", "abc", "de", "fg", "hi", ""]; 268 assert(equal(sp1, w1)); 269 270 auto s2 = ", abc, de, fg, hi"; 271 auto sp2 = splitter(s2, regex(", *")); 272 auto w2 = ["", "abc", "de", "fg", "hi"]; 273 274 uint cnt; 275 foreach (e; sp2) 276 { 277 assert(w2[cnt++] == e); 278 } 279 assert(equal(sp2, w2)); 280 } 281 282 @safe unittest 283 { 284 char[] s1 = ", abc, de, fg, hi, ".dup; 285 auto sp2 = splitter(s1, regex(", *")); 286 } 287 288 @safe unittest 289 { 290 import std.algorithm.comparison : equal; 291 auto s1 = ", abc, de, fg, hi, "; 292 auto w1 = ["", "abc", "de", "fg", "hi", ""]; 293 assert(equal(split(s1, regex(", *")), w1[])); 294 } 295 296 @safe unittest 297 { // bugzilla 7141 298 string pattern = `[a\--b]`; 299 assert(match("-", pattern)); 300 assert(match("b", pattern)); 301 string pattern2 = `[&-z]`; 302 assert(match("b", pattern2)); 303 } 304 @safe unittest 305 {//bugzilla 7111 306 assert(match("", regex("^"))); 307 } 308 @safe unittest 309 {//bugzilla 7300 310 assert(!match("a"d, "aa"d)); 311 } 312 313 // bugzilla 7551 314 @safe unittest 315 { 316 auto r = regex("[]abc]*"); 317 assert("]ab".matchFirst(r).hit == "]ab"); 318 assertThrown(regex("[]")); 319 auto r2 = regex("[]abc--ab]*"); 320 assert("]ac".matchFirst(r2).hit == "]"); 321 } 322 323 @safe unittest 324 {//bugzilla 7674 325 assert("1234".replace(regex("^"), "$$") == "$1234"); 326 assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); 327 assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); 328 } 329 @safe unittest 330 {// bugzilla 7679 331 import std.algorithm.comparison : equal; 332 foreach (S; AliasSeq!(string, wstring, dstring)) 333 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 334 enum re = ctRegex!(to!S(r"\.")); 335 auto str = to!S("a.b"); 336 assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); 337 assert(split(str, re) == [to!S("a"), to!S("b")]); 338 }(); 339 } 340 @safe unittest 341 {//bugzilla 8203 342 string data = " 343 NAME = XPAW01_STA:STATION 344 NAME = XPAW01_STA 345 "; 346 auto uniFileOld = data; 347 auto r = regex( 348 r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm"); 349 auto uniCapturesNew = match(uniFileOld, r); 350 for (int i = 0; i < 20; i++) 351 foreach (matchNew; uniCapturesNew) {} 352 //a second issue with same symptoms 353 auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); 354 match("аллея Театральная", r2); 355 } 356 @safe unittest 357 {// bugzilla 8637 purity of enforce 358 auto m = match("hello world", regex("world")); 359 enforce(m); 360 } 361 362 // bugzilla 8725 363 @safe unittest 364 { 365 static italic = regex( r"\* 366 (?!\s+) 367 (.*?) 368 (?!\s+) 369 \*", "gx" ); 370 string input = "this * is* interesting, *very* interesting"; 371 assert(replace(input, italic, "<i>$1</i>") == 372 "this * is* interesting, <i>very</i> interesting"); 373 } 374 375 // bugzilla 8349 376 @safe unittest 377 { 378 enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>"; 379 enum peakRegex = ctRegex!(peakRegexStr); 380 //note that the regex pattern itself is probably bogus 381 assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex)); 382 } 383 384 // bugzilla 9211 385 @safe unittest 386 { 387 import std.algorithm.comparison : equal; 388 auto rx_1 = regex(r"^(\w)*(\d)"); 389 auto m = match("1234", rx_1); 390 assert(equal(m.front, ["1234", "3", "4"])); 391 auto rx_2 = regex(r"^([0-9])*(\d)"); 392 auto m2 = match("1234", rx_2); 393 assert(equal(m2.front, ["1234", "3", "4"])); 394 } 395 396 // bugzilla 9280 397 @safe unittest 398 { 399 string tomatch = "a!b@c"; 400 static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$"); 401 auto nm = match(tomatch, r); 402 assert(nm); 403 auto c = nm.captures; 404 assert(c[1] == "a"); 405 assert(c["nick"] == "a"); 406 } 407 408 409 // bugzilla 9579 410 @safe unittest 411 { 412 char[] input = ['a', 'b', 'c']; 413 string format = "($1)"; 414 // used to give a compile error: 415 auto re = regex(`(a)`, "g"); 416 auto r = replace(input, re, format); 417 assert(r == "(a)bc"); 418 } 419 420 // bugzilla 9634 421 @safe unittest 422 { 423 auto re = ctRegex!"(?:a+)"; 424 assert(match("aaaa", re).hit == "aaaa"); 425 } 426 427 //bugzilla 10798 428 @safe unittest 429 { 430 auto cr = ctRegex!("[abcd--c]*"); 431 auto m = "abc".match(cr); 432 assert(m); 433 assert(m.hit == "ab"); 434 } 435 436 // bugzilla 10913 437 @system unittest 438 { 439 @system static string foo(const(char)[] s) 440 { 441 return s.dup; 442 } 443 @safe static string bar(const(char)[] s) 444 { 445 return s.dup; 446 } 447 () @system { 448 replace!((a) => foo(a.hit))("blah", regex(`a`)); 449 }(); 450 () @safe { 451 replace!((a) => bar(a.hit))("blah", regex(`a`)); 452 }(); 453 } 454 455 // bugzilla 11262 456 @safe unittest 457 { 458 enum reg = ctRegex!(r",", "g"); 459 auto str = "This,List"; 460 str = str.replace(reg, "-"); 461 assert(str == "This-List"); 462 } 463 464 // bugzilla 11775 465 @safe unittest 466 { 467 assert(collectException(regex("a{1,0}"))); 468 } 469 470 // bugzilla 11839 471 @safe unittest 472 { 473 import std.algorithm.comparison : equal; 474 assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"])); 475 assert(collectException(regex(`(?P<1>\w+)`))); 476 assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"])); 477 assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); 478 assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); 479 } 480 481 // bugzilla 12076 482 @safe unittest 483 { 484 auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)"); 485 string s = "one two"; 486 auto m = match(s, RE); 487 } 488 489 // bugzilla 12105 490 @safe unittest 491 { 492 auto r = ctRegex!`.*?(?!a)`; 493 assert("aaab".matchFirst(r).hit == "aaa"); 494 auto r2 = ctRegex!`.*(?!a)`; 495 assert("aaab".matchFirst(r2).hit == "aaab"); 496 } 497 498 //bugzilla 11784 499 @safe unittest 500 { 501 assert("abcdefghijklmnopqrstuvwxyz" 502 .matchFirst("[a-z&&[^aeiuo]]").hit == "b"); 503 } 504 505 //bugzilla 12366 506 @safe unittest 507 { 508 auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`); 509 assert("xxxxxxxx".match(re).empty); 510 assert(!"xxxx".match(re).empty); 511 } 512 513 // bugzilla 12582 514 @safe unittest 515 { 516 auto r = regex(`(?P<a>abc)`); 517 assert(collectException("abc".matchFirst(r)["b"])); 518 } 519 520 // bugzilla 12691 521 @safe unittest 522 { 523 assert(bmatch("e@", "^([a-z]|)*$").empty); 524 assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); 525 } 526 527 //bugzilla 12713 528 @safe unittest 529 { 530 assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); 531 } 532 533 //bugzilla 12747 534 @safe unittest 535 { 536 assertThrown(regex(`^x(\1)`)); 537 assertThrown(regex(`^(x(\1))`)); 538 assertThrown(regex(`^((x)(?=\1))`)); 539 } 540 541 // bugzilla 14504 542 @safe unittest 543 { 544 auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ 545 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); 546 } 547 548 // bugzilla 14529 549 @safe unittest 550 { 551 auto ctPat2 = regex(r"^[CDF]$", "i"); 552 foreach (v; ["C", "c", "D", "d", "F", "f"]) 553 assert(matchAll(v, ctPat2).front.hit == v); 554 } 555 556 // bugzilla 14615 557 @safe unittest 558 { 559 import std.array : appender; 560 import std.regex : replaceFirst, replaceFirstInto, regex; 561 import std.stdio : writeln; 562 563 auto example = "Hello, world!"; 564 auto pattern = regex("^Hello, (bug)"); // won't find this one 565 auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); 566 assert(result == "Hello, world!"); // Ok. 567 568 auto sink = appender!string; 569 replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); 570 assert(sink.data == "Hello, world!"); 571 replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); 572 assert(sink.data == "Hello, world!Hello, world!"); 573 } 574 575 // bugzilla 15573 576 @safe unittest 577 { 578 auto rx = regex("[c d]", "x"); 579 assert("a b".matchFirst(rx)); 580 } 581 582 // bugzilla 15864 583 @safe unittest 584 { 585 regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`); 586 } 587 588 @safe unittest 589 { 590 auto r = regex("(?# comment)abc(?# comment2)"); 591 assert("abc".matchFirst(r)); 592 assertThrown(regex("(?#...")); 593 } 594 595 // bugzilla 17075 596 @safe unittest 597 { 598 enum titlePattern = `<title>(.+)</title>`; 599 static titleRegex = ctRegex!titlePattern; 600 string input = "<title>" ~ "<".repeat(100_000).join; 601 assert(input.matchFirst(titleRegex).empty); 602 } 603 604 // bugzilla 17212 605 @safe unittest 606 { 607 auto r = regex(" [a] ", "x"); 608 assert("a".matchFirst(r)); 609 } 610 611 // bugzilla 17157 612 @safe unittest 613 { 614 import std.algorithm.comparison : equal; 615 auto ctr = ctRegex!"(a)|(b)|(c)|(d)"; 616 auto r = regex("(a)|(b)|(c)|(d)", "g"); 617 auto s = "--a--b--c--d--"; 618 auto outcomes = [ 619 ["a", "a", "", "", ""], 620 ["b", "", "b", "", ""], 621 ["c", "", "", "c", ""], 622 ["d", "", "", "", "d"] 623 ]; 624 assert(equal!equal(s.matchAll(ctr), outcomes)); 625 assert(equal!equal(s.bmatch(r), outcomes)); 626 } 627 628 // bugzilla 17667 629 @safe unittest 630 { 631 import std.algorithm.searching : canFind; 632 void willThrow(T, size_t line = __LINE__)(T arg, string msg) 633 { 634 auto e = collectException(regex(arg)); 635 assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg); 636 } 637 willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class"); 638 willThrow([r"[\", r"123"], "no matching ']' found while parsing character class"); 639 willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class"); 640 willThrow([r"[a-\", r"123"], "invalid escape sequence"); 641 willThrow([r"\", r"123"], "invalid escape sequence"); 642 } 643 644 // bugzilla 17668 645 @safe unittest 646 { 647 import std.algorithm.searching; 648 auto e = collectException!RegexException(regex(q"<[^]>")); 649 assert(e.msg.canFind("no operand for '^'")); 650 } 651 652 // bugzilla 17673 653 @safe unittest 654 { 655 string str = `<">`; 656 string[] regexps = ["abc", "\"|x"]; 657 auto regexp = regex(regexps); 658 auto c = matchFirst(str, regexp); 659 assert(c); 660 assert(c.whichPattern == 2); 661 } 662 663