1 // Split-up due to DMD's enormous memory consumption
2 
3 module std.regex.internal.tests2;
4 
5 package(std.regex):
6 
7 import std.conv, std.exception, std.meta, std.range,
8     std.typecons, std.regex;
9 
10 import std.regex.internal.ir : Escapables; // characters that need escaping
11 
12 @safe unittest
13 {
14     auto cr = ctRegex!("abc");
15     assert(bmatch("abc",cr).hit == "abc");
16     auto cr2 = ctRegex!("ab*c");
17     assert(bmatch("abbbbc",cr2).hit == "abbbbc");
18 }
19 @safe unittest
20 {
21     auto cr3 = ctRegex!("^abc$");
22     assert(bmatch("abc",cr3).hit == "abc");
23     auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
24     assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
25 }
26 
27 @safe unittest
28 {
29     auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
30     assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
31     auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
32     assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
33 }
34 
35 @safe unittest
36 {
37     auto cr7 = ctRegex!(`\r.*?$`,"sm");
38     assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
39     auto greed =  ctRegex!("<packet.*?/packet>");
40     assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
41             == "<packet>text</packet>");
42 }
43 
44 @safe unittest
45 {
46     import std.algorithm.comparison : equal;
47     auto cr8 = ctRegex!("^(a)(b)?(c*)");
48     auto m8 = bmatch("abcc",cr8);
49     assert(m8);
50     assert(m8.captures[1] == "a");
51     assert(m8.captures[2] == "b");
52     assert(m8.captures[3] == "cc");
53     auto cr9 = ctRegex!("q(a|b)*q");
54     auto m9 = match("xxqababqyy",cr9);
55     assert(m9);
56     assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
57 }
58 
59 @safe unittest
60 {
61     import std.algorithm.comparison : equal;
62     auto rtr = regex("a|b|c");
63     enum ctr = regex("a|b|c");
64     assert(equal(rtr.ir,ctr.ir));
65     //CTFE parser BUG is triggered by group
66     //in the middle of alternation (at least not first and not last)
67     enum testCT = regex(`abc|(edf)|xyz`);
68     auto testRT = regex(`abc|(edf)|xyz`);
69     assert(equal(testCT.ir,testRT.ir));
70 }
71 
72 @safe unittest
73 {
74     import std.algorithm.comparison : equal;
75     import std.algorithm.iteration : map;
76     enum cx = ctRegex!"(A|B|C)";
77     auto mx = match("B",cx);
78     assert(mx);
79     assert(equal(mx.captures, [ "B", "B"]));
80     enum cx2 = ctRegex!"(A|B)*";
81     assert(match("BAAA",cx2));
82 
83     enum cx3 = ctRegex!("a{3,4}","i");
84     auto mx3 = match("AaA",cx3);
85     assert(mx3);
86     assert(mx3.captures[0] == "AaA");
87     enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
88     auto mx4 = match("aaaabc", cx4);
89     assert(mx4);
90     assert(mx4.captures[0] == "aaaab");
91     auto cr8 = ctRegex!("(a)(b)?(c*)");
92     auto m8 = bmatch("abcc",cr8);
93     assert(m8);
94     assert(m8.captures[1] == "a");
95     assert(m8.captures[2] == "b");
96     assert(m8.captures[3] == "cc");
97     auto cr9 = ctRegex!(".*$", "gm");
98     auto m9 = match("First\rSecond", cr9);
99     assert(m9);
100     assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
101 }
102 
103 @safe unittest
104 {
105     import std.algorithm.comparison : equal;
106     import std.algorithm.iteration : map;
107 //global matching
test_body(alias matchFn)108     void test_body(alias matchFn)()
109     {
110         string s = "a quick brown fox jumps over a lazy dog";
111         auto r1 = regex("\\b[a-z]+\\b","g");
112         string[] test;
113         foreach (m; matchFn(s, r1))
114             test ~= m.hit;
115         assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
116         auto free_reg = regex(`
117 
118             abc
119             \s+
120             "
121             (
122                     [^"]+
123                 |   \\ "
124             )+
125             "
126             z
127         `, "x");
128         auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
129         assert(m);
130         string mails = " hey@you.com no@spam.net ";
131         auto rm = regex(`@(?<=\S+@)\S+`,"g");
132         assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
133         auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
134         assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
135         auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
136         assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
137         auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
138         assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
139         debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
140     }
141     test_body!bmatch();
142     test_body!match();
143 }
144 
145 //tests for accumulated std.regex issues and other regressions
146 @safe unittest
147 {
148     import std.algorithm.comparison : equal;
149     import std.algorithm.iteration : map;
150     void test_body(alias matchFn)()
151     {
152         //issue 5857
153         //matching goes out of control if ... in (...){x} has .*/.+
154         auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
155         assert(c[0] == "axxxzayyyyyzd");
156         assert(c[1] == "ayyyyyz");
157         auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
158         assert(c2[0] == "axxxayyyyyd");
159         assert(c2[1] == "ayyyyy");
160         //issue 2108
161         //greedy vs non-greedy
162         auto nogreed = regex("<packet.*?/packet>");
163         assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
164                == "<packet>text</packet>");
165         auto greed =  regex("<packet.*/packet>");
166         assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
167                == "<packet>text</packet><packet>text</packet>");
168         //issue 4574
169         //empty successful match still advances the input
170         string[] pres, posts, hits;
171         foreach (m; matchFn("abcabc", regex("","g")))
172         {
173             pres ~= m.pre;
174             posts ~= m.post;
175             assert(m.hit.empty);
176 
177         }
178         auto heads = [
179             "abcabc",
180             "abcab",
181             "abca",
182             "abc",
183             "ab",
184             "a",
185             ""
186         ];
187         auto tails = [
188             "abcabc",
189              "bcabc",
190               "cabc",
191                "abc",
192                 "bc",
193                  "c",
194                   ""
195         ];
196         assert(pres == array(retro(heads)));
197         assert(posts == tails);
198         //issue 6076
199         //regression on .*
200         auto re = regex("c.*|d");
201         auto m = matchFn("mm", re);
202         assert(!m);
203         debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
204         auto rprealloc = regex(`((.){5}.{1,10}){5}`);
205         auto arr = array(repeat('0',100));
206         auto m2 = matchFn(arr, rprealloc);
207         assert(m2);
208         assert(collectException(
209                 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
210                 ) is null);
211         foreach (ch; [Escapables])
212         {
213             assert(match(to!string(ch),regex(`[\`~ch~`]`)));
214             assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
215             assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
216         }
217         //bugzilla 7718
218         string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
219         auto reStrCmd = regex (`(".*")|('.*')`, "g");
220         assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
221                      [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
222     }
223     test_body!bmatch();
224     test_body!match();
225 }
226 
227 // tests for replace
228 @safe unittest
229 {
230     void test(alias matchFn)()
231     {
232         import std.uni : toUpper;
233 
234         foreach (i, v; AliasSeq!(string, wstring, dstring))
235         {
236             auto baz(Cap)(Cap m)
237             if (is(Cap == Captures!(Cap.String)))
238             {
239                 return toUpper(m.hit);
240             }
241             alias String = v;
242             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
243                    == to!String("ack rapacity"));
244             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
245                    == to!String("ack capacity"));
246             assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
247                    == to!String("[n]oon"));
248             assert(std.regex.replace!(matchFn)(
249                 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
250             ) == to!String(": test2 test1 :"));
251             auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
252                     regex(to!String("[ar]"), "g"));
253             assert(s == "StRAp A Rocket engine on A chicken.");
254         }
255         debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
256     }
257     test!(bmatch)();
258     test!(match)();
259 }
260 
261 // tests for splitter
262 @safe unittest
263 {
264     import std.algorithm.comparison : equal;
265     auto s1 = ", abc, de,     fg, hi, ";
266     auto sp1 = splitter(s1, regex(", *"));
267     auto w1 = ["", "abc", "de", "fg", "hi", ""];
268     assert(equal(sp1, w1));
269 
270     auto s2 = ", abc, de,  fg, hi";
271     auto sp2 = splitter(s2, regex(", *"));
272     auto w2 = ["", "abc", "de", "fg", "hi"];
273 
274     uint cnt;
275     foreach (e; sp2)
276     {
277         assert(w2[cnt++] == e);
278     }
279     assert(equal(sp2, w2));
280 }
281 
282 @safe unittest
283 {
284     char[] s1 = ", abc, de,  fg, hi, ".dup;
285     auto sp2 = splitter(s1, regex(", *"));
286 }
287 
288 @safe unittest
289 {
290     import std.algorithm.comparison : equal;
291     auto s1 = ", abc, de,  fg, hi, ";
292     auto w1 = ["", "abc", "de", "fg", "hi", ""];
293     assert(equal(split(s1, regex(", *")), w1[]));
294 }
295 
296 @safe unittest
297 { // bugzilla 7141
298     string pattern = `[a\--b]`;
299     assert(match("-", pattern));
300     assert(match("b", pattern));
301     string pattern2 = `[&-z]`;
302     assert(match("b", pattern2));
303 }
304 @safe unittest
305 {//bugzilla 7111
306     assert(match("", regex("^")));
307 }
308 @safe unittest
309 {//bugzilla 7300
310     assert(!match("a"d, "aa"d));
311 }
312 
313 // bugzilla 7551
314 @safe unittest
315 {
316     auto r = regex("[]abc]*");
317     assert("]ab".matchFirst(r).hit == "]ab");
318     assertThrown(regex("[]"));
319     auto r2 = regex("[]abc--ab]*");
320     assert("]ac".matchFirst(r2).hit == "]");
321 }
322 
323 @safe unittest
324 {//bugzilla 7674
325     assert("1234".replace(regex("^"), "$$") == "$1234");
326     assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
327     assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
328 }
329 @safe unittest
330 {// bugzilla 7679
331     import std.algorithm.comparison : equal;
332     foreach (S; AliasSeq!(string, wstring, dstring))
333     (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
334         enum re = ctRegex!(to!S(r"\."));
335         auto str = to!S("a.b");
336         assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
337         assert(split(str, re) == [to!S("a"), to!S("b")]);
338     }();
339 }
340 @safe unittest
341 {//bugzilla 8203
342     string data = "
343     NAME   = XPAW01_STA:STATION
344     NAME   = XPAW01_STA
345     ";
346     auto uniFileOld = data;
347     auto r = regex(
348        r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
349     auto uniCapturesNew = match(uniFileOld, r);
350     for (int i = 0; i < 20; i++)
351         foreach (matchNew; uniCapturesNew) {}
352     //a second issue with same symptoms
353     auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
354     match("аллея Театральная", r2);
355 }
356 @safe unittest
357 {// bugzilla 8637 purity of enforce
358     auto m = match("hello world", regex("world"));
359     enforce(m);
360 }
361 
362 // bugzilla 8725
363 @safe unittest
364 {
365   static italic = regex( r"\*
366                 (?!\s+)
367                 (.*?)
368                 (?!\s+)
369                 \*", "gx" );
370   string input = "this * is* interesting, *very* interesting";
371   assert(replace(input, italic, "<i>$1</i>") ==
372       "this * is* interesting, <i>very</i> interesting");
373 }
374 
375 // bugzilla 8349
376 @safe unittest
377 {
378     enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
379     enum peakRegex = ctRegex!(peakRegexStr);
380     //note that the regex pattern itself is probably bogus
381     assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
382 }
383 
384 // bugzilla 9211
385 @safe unittest
386 {
387     import std.algorithm.comparison : equal;
388     auto rx_1 =  regex(r"^(\w)*(\d)");
389     auto m = match("1234", rx_1);
390     assert(equal(m.front, ["1234", "3", "4"]));
391     auto rx_2 = regex(r"^([0-9])*(\d)");
392     auto m2 = match("1234", rx_2);
393     assert(equal(m2.front, ["1234", "3", "4"]));
394 }
395 
396 // bugzilla 9280
397 @safe unittest
398 {
399     string tomatch = "a!b@c";
400     static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
401     auto nm = match(tomatch, r);
402     assert(nm);
403     auto c = nm.captures;
404     assert(c[1] == "a");
405     assert(c["nick"] == "a");
406 }
407 
408 
409 // bugzilla 9579
410 @safe unittest
411 {
412     char[] input = ['a', 'b', 'c'];
413     string format = "($1)";
414     // used to give a compile error:
415     auto re = regex(`(a)`, "g");
416     auto r = replace(input, re, format);
417     assert(r == "(a)bc");
418 }
419 
420 // bugzilla 9634
421 @safe unittest
422 {
423     auto re = ctRegex!"(?:a+)";
424     assert(match("aaaa", re).hit == "aaaa");
425 }
426 
427 //bugzilla 10798
428 @safe unittest
429 {
430     auto cr = ctRegex!("[abcd--c]*");
431     auto m  = "abc".match(cr);
432     assert(m);
433     assert(m.hit == "ab");
434 }
435 
436 // bugzilla 10913
437 @system unittest
438 {
439     @system static string foo(const(char)[] s)
440     {
441         return s.dup;
442     }
443     @safe static string bar(const(char)[] s)
444     {
445         return s.dup;
446     }
447     () @system {
448         replace!((a) => foo(a.hit))("blah", regex(`a`));
449     }();
450     () @safe {
451         replace!((a) => bar(a.hit))("blah", regex(`a`));
452     }();
453 }
454 
455 // bugzilla 11262
456 @safe unittest
457 {
458     enum reg = ctRegex!(r",", "g");
459     auto str = "This,List";
460     str = str.replace(reg, "-");
461     assert(str == "This-List");
462 }
463 
464 // bugzilla 11775
465 @safe unittest
466 {
467     assert(collectException(regex("a{1,0}")));
468 }
469 
470 // bugzilla 11839
471 @safe unittest
472 {
473     import std.algorithm.comparison : equal;
474     assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
475     assert(collectException(regex(`(?P<1>\w+)`)));
476     assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
477     assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
478     assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
479 }
480 
481 // bugzilla 12076
482 @safe unittest
483 {
484     auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
485     string s = "one two";
486     auto m = match(s, RE);
487 }
488 
489 // bugzilla 12105
490 @safe unittest
491 {
492     auto r = ctRegex!`.*?(?!a)`;
493     assert("aaab".matchFirst(r).hit == "aaa");
494     auto r2 = ctRegex!`.*(?!a)`;
495     assert("aaab".matchFirst(r2).hit == "aaab");
496 }
497 
498 //bugzilla 11784
499 @safe unittest
500 {
501     assert("abcdefghijklmnopqrstuvwxyz"
502         .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
503 }
504 
505 //bugzilla 12366
506 @safe unittest
507 {
508      auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
509      assert("xxxxxxxx".match(re).empty);
510      assert(!"xxxx".match(re).empty);
511 }
512 
513 // bugzilla 12582
514 @safe unittest
515 {
516     auto r = regex(`(?P<a>abc)`);
517     assert(collectException("abc".matchFirst(r)["b"]));
518 }
519 
520 // bugzilla 12691
521 @safe unittest
522 {
523     assert(bmatch("e@", "^([a-z]|)*$").empty);
524     assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
525 }
526 
527 //bugzilla  12713
528 @safe unittest
529 {
530     assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
531 }
532 
533 //bugzilla 12747
534 @safe unittest
535 {
536     assertThrown(regex(`^x(\1)`));
537     assertThrown(regex(`^(x(\1))`));
538     assertThrown(regex(`^((x)(?=\1))`));
539 }
540 
541 // bugzilla 14504
542 @safe unittest
543 {
544     auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
545             "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
546 }
547 
548 // bugzilla 14529
549 @safe unittest
550 {
551     auto ctPat2 = regex(r"^[CDF]$", "i");
552     foreach (v; ["C", "c", "D", "d", "F", "f"])
553         assert(matchAll(v, ctPat2).front.hit == v);
554 }
555 
556 // bugzilla 14615
557 @safe unittest
558 {
559     import std.array : appender;
560     import std.regex : replaceFirst, replaceFirstInto, regex;
561     import std.stdio : writeln;
562 
563     auto example = "Hello, world!";
564     auto pattern = regex("^Hello, (bug)");  // won't find this one
565     auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
566     assert(result == "Hello, world!");  // Ok.
567 
568     auto sink = appender!string;
569     replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
570     assert(sink.data == "Hello, world!");
571     replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
572     assert(sink.data == "Hello, world!Hello, world!");
573 }
574 
575 // bugzilla 15573
576 @safe unittest
577 {
578     auto rx = regex("[c d]", "x");
579     assert("a b".matchFirst(rx));
580 }
581 
582 // bugzilla 15864
583 @safe unittest
584 {
585     regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
586 }
587 
588 @safe unittest
589 {
590     auto r = regex("(?# comment)abc(?# comment2)");
591     assert("abc".matchFirst(r));
592     assertThrown(regex("(?#..."));
593 }
594 
595 // bugzilla 17075
596 @safe unittest
597 {
598     enum titlePattern = `<title>(.+)</title>`;
599     static titleRegex = ctRegex!titlePattern;
600     string input = "<title>" ~ "<".repeat(100_000).join;
601     assert(input.matchFirst(titleRegex).empty);
602 }
603 
604 // bugzilla 17212
605 @safe unittest
606 {
607     auto r = regex(" [a] ", "x");
608     assert("a".matchFirst(r));
609 }
610 
611 // bugzilla 17157
612 @safe unittest
613 {
614     import std.algorithm.comparison : equal;
615     auto ctr = ctRegex!"(a)|(b)|(c)|(d)";
616     auto r = regex("(a)|(b)|(c)|(d)", "g");
617     auto s = "--a--b--c--d--";
618     auto outcomes = [
619         ["a", "a", "", "", ""],
620         ["b", "", "b", "", ""],
621         ["c", "", "", "c", ""],
622         ["d", "", "", "", "d"]
623     ];
624     assert(equal!equal(s.matchAll(ctr), outcomes));
625     assert(equal!equal(s.bmatch(r), outcomes));
626 }
627 
628 // bugzilla 17667
629 @safe unittest
630 {
631     import std.algorithm.searching : canFind;
632     void willThrow(T, size_t line = __LINE__)(T arg, string msg)
633     {
634         auto e = collectException(regex(arg));
635         assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg);
636     }
637     willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
638     willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
639     willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
640     willThrow([r"[a-\", r"123"], "invalid escape sequence");
641     willThrow([r"\", r"123"], "invalid escape sequence");
642 }
643 
644 // bugzilla 17668
645 @safe unittest
646 {
647     import std.algorithm.searching;
648     auto e = collectException!RegexException(regex(q"<[^]>"));
649     assert(e.msg.canFind("no operand for '^'"));
650 }
651 
652 // bugzilla 17673
653 @safe unittest
654 {
655     string str = `<">`;
656     string[] regexps = ["abc", "\"|x"];
657     auto regexp = regex(regexps);
658     auto c = matchFirst(str, regexp);
659     assert(c);
660     assert(c.whichPattern == 2);
661 }
662 
663