1 /*
2     Regualar expressions package test suite.
3 */
4 module std.regex.internal.tests;
5 
6 package(std.regex):
7 
8 import std.conv, std.exception, std.meta, std.range,
9     std.typecons, std.regex;
10 
11 import std.regex.internal.parser : Escapables; // characters that need escaping
12 
13 alias Sequence(int B, int E) = staticIota!(B, E);
14 
15 @safe unittest
16 {//sanity checks
17     regex("(a|b)*");
18     regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19     regex("abc|edf|ighrg");
20     auto r1 = regex("abc");
21     auto r2 = regex("(gylba)");
22     assert(match("abcdef", r1).hit == "abc");
23     assert(!match("wida",r2));
24     assert(bmatch("abcdef", r1).hit == "abc");
25     assert(!bmatch("wida", r2));
26     assert(match("abc", "abc".dup));
27     assert(bmatch("abc", "abc".dup));
28     Regex!char rc;
29     assert(rc.empty);
30     rc = regex("test");
31     assert(!rc.empty);
32 }
33 
34 /* The test vectors in this file are altered from Henry Spencer's regexp
35    test code. His copyright notice is:
36 
37         Copyright (c) 1986 by University of Toronto.
38         Written by Henry Spencer.  Not derived from licensed software.
39 
40         Permission is granted to anyone to use this software for any
41         purpose on any computer system, and to redistribute it freely,
42         subject to the following restrictions:
43 
44         1. The author is not responsible for the consequences of use of
45                 this software, no matter how awful, even if they arise
46                 from defects in it.
47 
48         2. The origin of this software must not be misrepresented, either
49                 by explicit claim or by omission.
50 
51         3. Altered versions must be plainly marked as such, and must not
52                 be misrepresented as being the original software.
53 
54 
55  */
56 
57 @safe unittest
58 {
59     struct TestVectors
60     {
61         string pattern;
62         string input;
63         string result;
64         string format;
65         string replace;
66         string flags;
67     }
68 
69     static immutable TestVectors[] tv = [
70         TestVectors(  "a\\b",       "a",  "y",    "$&",    "a" ),
71         TestVectors(  "(a)b\\1",   "abaab","y",    "$&",    "aba" ),
72         TestVectors(  "()b\\1",     "aaab", "y",    "$&",    "b" ),
73         TestVectors(  "abc",       "abc",  "y",    "$&",    "abc" ),
74         TestVectors(  "abc",       "xbc",  "n",    "-",    "-" ),
75         TestVectors(  "abc",       "axc",  "n",    "-",    "-" ),
76         TestVectors(  "abc",       "abx",  "n",    "-",    "-" ),
77         TestVectors(  "abc",       "xabcy","y",    "$&",    "abc" ),
78         TestVectors(  "abc",       "ababc","y",    "$&",    "abc" ),
79         TestVectors(  "ab*c",      "abc",  "y",    "$&",    "abc" ),
80         TestVectors(  "ab*bc",     "abc",  "y",    "$&",    "abc" ),
81         TestVectors(  "ab*bc",     "abbc", "y",    "$&",    "abbc" ),
82         TestVectors(  "ab*bc",     "abbbbc","y",   "$&",    "abbbbc" ),
83         TestVectors(  "ab+bc",     "abbc", "y",    "$&",    "abbc" ),
84         TestVectors(  "ab+bc",     "abc",  "n",    "-",    "-" ),
85         TestVectors(  "ab+bc",     "abq",  "n",    "-",    "-" ),
86         TestVectors(  "ab+bc",     "abbbbc","y",   "$&",    "abbbbc" ),
87         TestVectors(  "ab?bc",     "abbc", "y",    "$&",    "abbc" ),
88         TestVectors(  "ab?bc",     "abc",  "y",    "$&",    "abc" ),
89         TestVectors(  "ab?bc",     "abbbbc","n",   "-",    "-" ),
90         TestVectors(  "ab?c",      "abc",  "y",    "$&",    "abc" ),
91         TestVectors(  "^abc$",     "abc",  "y",    "$&",    "abc" ),
92         TestVectors(  "^abc$",     "abcc", "n",    "-",    "-" ),
93         TestVectors(  "^abc",      "abcc", "y",    "$&",    "abc" ),
94         TestVectors(  "^abc$",     "aabc", "n",    "-",    "-" ),
95         TestVectors(  "abc$",      "aabc", "y",    "$&",    "abc" ),
96         TestVectors(  "^",         "abc",  "y",    "$&",    "" ),
97         TestVectors(  "$",         "abc",  "y",    "$&",    "" ),
98         TestVectors(  "a.c",       "abc",  "y",    "$&",    "abc" ),
99         TestVectors(  "a.c",       "axc",  "y",    "$&",    "axc" ),
100         TestVectors(  "a.*c",      "axyzc","y",    "$&",    "axyzc" ),
101         TestVectors(  "a.*c",      "axyzd","n",    "-",    "-" ),
102         TestVectors(  "a[bc]d",    "abc",  "n",    "-",    "-" ),
103         TestVectors(  "a[bc]d",    "abd",  "y",    "$&",    "abd" ),
104         TestVectors(  "a[b-d]e",   "abd",  "n",    "-",    "-" ),
105         TestVectors(  "a[b-d]e",   "ace",  "y",    "$&",    "ace" ),
106         TestVectors(  "a[b-d]",    "aac",  "y",    "$&",    "ac" ),
107         TestVectors(  "a[-b]",     "a-",   "y",    "$&",    "a-" ),
108         TestVectors(  "a[b-]",     "a-",   "y",    "$&",    "a-" ),
109         TestVectors(  "a[b-a]",    "-",    "c",    "-",    "-" ),
110         TestVectors(  "a[]b",      "-",    "c",    "-",    "-" ),
111         TestVectors(  "a[",        "-",    "c",    "-",    "-" ),
112         TestVectors(  "a]",        "a]",   "y",    "$&",    "a]" ),
113         TestVectors(  "a[\\]]b",     "a]b",  "y",  "$&",    "a]b" ),
114         TestVectors(  "a[^bc]d",   "aed",  "y",    "$&",    "aed" ),
115         TestVectors(  "a[^bc]d",   "abd",  "n",    "-",    "-" ),
116         TestVectors(  "a[^-b]c",   "adc",  "y",    "$&",    "adc" ),
117         TestVectors(  "a[^-b]c",   "a-c",  "n",    "-",    "-" ),
118         TestVectors(  "a[^\\]b]c",   "adc",  "y",  "$&",    "adc" ),
119         TestVectors(  "ab|cd",     "abc",  "y",    "$&",    "ab" ),
120         TestVectors(  "ab|cd",     "abcd", "y",    "$&",    "ab" ),
121         TestVectors(  "()ef",      "def",  "y",    "$&-$1",        "ef-" ),
122         TestVectors(  "()*",       "-",    "y",    "-",    "-" ),
123         TestVectors(  "*a",        "-",    "c",    "-",    "-" ),
124         TestVectors(  "^*",        "-",    "y",    "-",    "-" ),
125         TestVectors(  "$*",        "-",    "y",    "-",    "-" ),
126         TestVectors(  "(*)b",      "-",    "c",    "-",    "-" ),
127         TestVectors(  "$b",        "b",    "n",    "-",    "-" ),
128         TestVectors(  "a\\",       "-",    "c",    "-",    "-" ),
129         TestVectors(  "a\\(b",     "a(b",  "y",    "$&-$1",        "a(b-" ),
130         TestVectors(  "a\\(*b",    "ab",   "y",    "$&",    "ab" ),
131         TestVectors(  "a\\(*b",    "a((b", "y",    "$&",    "a((b" ),
132         TestVectors(  "a\\\\b",    "a\\b", "y",    "$&",    "a\\b" ),
133         TestVectors(  "abc)",      "-",    "c",    "-",    "-" ),
134         TestVectors(  "(abc",      "-",    "c",    "-",    "-" ),
135         TestVectors(  "((a))",     "abc",  "y",    "$&-$1-$2",    "a-a-a" ),
136         TestVectors(  "(a)b(c)",   "abc",  "y",    "$&-$1-$2",    "abc-a-c" ),
137         TestVectors(  "a+b+c",     "aabbabc","y",  "$&",    "abc" ),
138         TestVectors(  "a**",       "-",    "c",    "-",    "-" ),
139         TestVectors(  "a*?a",      "aa",   "y",    "$&",    "a" ),
140         TestVectors(  "(a*)*",     "aaa",  "y",    "-",    "-" ),
141         TestVectors(  "(a*)+",     "aaa",  "y",    "-",    "-" ),
142         TestVectors(  "(a|)*",     "-",    "y",    "-",    "-" ),
143         TestVectors(  "(a*|b)*",   "aabb", "y",    "-",    "-" ),
144         TestVectors(  "(a|b)*",    "ab",   "y",    "$&-$1",        "ab-b" ),
145         TestVectors(  "(a+|b)*",   "ab",   "y",    "$&-$1",        "ab-b" ),
146         TestVectors(  "(a+|b)+",   "ab",   "y",    "$&-$1",        "ab-b" ),
147         TestVectors(  "(a+|b)?",   "ab",   "y",    "$&-$1",        "a-a" ),
148         TestVectors(  "[^ab]*",    "cde",  "y",    "$&",    "cde" ),
149         TestVectors(  "(^)*",      "-",    "y",    "-",    "-" ),
150         TestVectors(  "(ab|)*",    "-",    "y",    "-",    "-" ),
151         TestVectors(  ")(",        "-",    "c",    "-",    "-" ),
152         TestVectors(  "",  "abc",  "y",    "$&",    "" ),
153         TestVectors(  "abc",       "",     "n",    "-",    "-" ),
154         TestVectors(  "a*",        "",     "y",    "$&",    "" ),
155         TestVectors(  "([abc])*d", "abbbcd",       "y",    "$&-$1",        "abbbcd-c" ),
156         TestVectors(  "([abc])*bcd", "abcd",       "y",    "$&-$1",        "abcd-a" ),
157         TestVectors(  "a|b|c|d|e", "e",    "y",    "$&",    "e" ),
158         TestVectors(  "(a|b|c|d|e)f", "ef",        "y",    "$&-$1",        "ef-e" ),
159         TestVectors(  "((a*|b))*", "aabb", "y",    "-",    "-" ),
160         TestVectors(  "abcd*efg",  "abcdefg",      "y",    "$&",    "abcdefg" ),
161         TestVectors(  "ab*",       "xabyabbbz",    "y",    "$&",    "ab" ),
162         TestVectors(  "ab*",       "xayabbbz",     "y",    "$&",    "a" ),
163         TestVectors(  "(ab|cd)e",  "abcde",        "y",    "$&-$1",        "cde-cd" ),
164         TestVectors(  "[abhgefdc]ij",      "hij",  "y",    "$&",    "hij" ),
165         TestVectors(  "^(ab|cd)e", "abcde",        "n",    "x$1y",        "xy" ),
166         TestVectors(  "(abc|)ef",  "abcdef",       "y",    "$&-$1",        "ef-" ),
167         TestVectors(  "(a|b)c*d",  "abcd",         "y",    "$&-$1",        "bcd-b" ),
168         TestVectors(  "(ab|ab*)bc",        "abc",  "y",    "$&-$1",        "abc-a" ),
169         TestVectors(  "a([bc]*)c*",        "abc",  "y",    "$&-$1",        "abc-bc" ),
170         TestVectors(  "a([bc]*)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
171         TestVectors(  "a([bc]+)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
172         TestVectors(  "a([bc]*)(c+d)",     "abcd", "y",    "$&-$1-$2",    "abcd-b-cd" ),
173         TestVectors(  "a[bcd]*dcdcde",     "adcdcde",      "y",    "$&",    "adcdcde" ),
174         TestVectors(  "a[bcd]+dcdcde",     "adcdcde",      "n",    "-",    "-" ),
175         TestVectors(  "(ab|a)b*c", "abc",           "y",    "$&-$1",        "abc-ab" ),
176         TestVectors(  "((a)(b)c)(d)",      "abcd",  "y",    "$1-$2-$3-$4",      "abc-a-b-d" ),
177         TestVectors(  "[a-zA-Z_][a-zA-Z0-9_]*",    "alpha",        "y",    "$&",    "alpha" ),
178         TestVectors(  "^a(bc+|b[eh])g|.h$",        "abh",  "y",    "$&-$1",        "bh-" ),
179         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effgz",        "y",    "$&-$1-$2",    "effgz-effgz-" ),
180         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "ij",   "y",    "$&-$1-$2",    "ij-ij-j" ),
181         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effg", "n",    "-",    "-" ),
182         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "bcdd", "n",    "-",    "-" ),
183         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "reffgz",       "y",    "$&-$1-$2",    "effgz-effgz-" ),
184         TestVectors(  "(((((((((a)))))))))",       "a",    "y",    "$&",    "a" ),
185         TestVectors(  "multiple words of text",    "uh-uh",        "n",    "-",    "-" ),
186         TestVectors(  "multiple words",    "multiple words, yeah", "y",    "$&",    "multiple words" ),
187         TestVectors(  "(.*)c(.*)", "abcde",                "y",    "$&-$1-$2",    "abcde-ab-de" ),
188         TestVectors(  "\\((.*), (.*)\\)",  "(a, b)",       "y",    "($2, $1)",   "(b, a)" ),
189         TestVectors(  "abcd",      "abcd",                   "y",    "$&-&-$$$&",  "abcd-&-$abcd" ),
190         TestVectors(  "a(bc)d",    "abcd",                 "y",    "$1-$$1-$$$1",    "bc-$1-$bc" ),
191         TestVectors(  "[k]",                       "ab",   "n",    "-",    "-" ),
192         TestVectors(  "[ -~]*",                    "abc",  "y",    "$&",    "abc" ),
193         TestVectors(  "[ -~ -~]*",                 "abc",  "y",    "$&",    "abc" ),
194         TestVectors(  "[ -~ -~ -~]*",              "abc",  "y",    "$&",    "abc" ),
195         TestVectors(  "[ -~ -~ -~ -~]*",           "abc",  "y",    "$&",    "abc" ),
196         TestVectors(  "[ -~ -~ -~ -~ -~]*",        "abc",  "y",    "$&",    "abc" ),
197         TestVectors(  "[ -~ -~ -~ -~ -~ -~]*",     "abc",  "y",    "$&",    "abc" ),
198         TestVectors(  "[ -~ -~ -~ -~ -~ -~ -~]*",  "abc",  "y",    "$&",    "abc" ),
199         TestVectors(  "a{2}",      "candy",                "n",    "",     "" ),
200         TestVectors(  "a{2}",      "caandy",               "y",    "$&",    "aa" ),
201         TestVectors(  "a{2}",      "caaandy",              "y",    "$&",    "aa" ),
202         TestVectors(  "a{2,}",     "candy",                "n",    "",     "" ),
203         TestVectors(  "a{2,}",     "caandy",               "y",    "$&",    "aa" ),
204         TestVectors(  "a{2,}",     "caaaaaandy",           "y",    "$&",    "aaaaaa" ),
205         TestVectors(  "a{1,3}",    "cndy",                 "n",    "",     "" ),
206         TestVectors(  "a{1,3}",    "candy",                "y",    "$&",    "a" ),
207         TestVectors(  "a{1,3}",    "caandy",               "y",    "$&",    "aa" ),
208         TestVectors(  "a{1,3}",    "caaaaaandy",           "y",    "$&",    "aaa" ),
209         TestVectors(  "e?le?",     "angel",                "y",    "$&",    "el" ),
210         TestVectors(  "e?le?",     "angle",                "y",    "$&",    "le" ),
211         TestVectors(  "\\bn\\w",   "noonday",              "y",    "$&",    "no" ),
212         TestVectors(  "\\wy\\b",   "possibly yesterday",   "y",    "$&",    "ly" ),
213         TestVectors(  "\\w\\Bn",   "noonday",              "y",    "$&",    "on" ),
214         TestVectors(  "y\\B\\w",   "possibly yesterday",   "y",    "$&",    "ye" ),
215         TestVectors(  "\\cJ",      "abc\ndef",             "y",    "$&",    "\n" ),
216         TestVectors(  "\\d",       "B2 is",                "y",    "$&",    "2" ),
217         TestVectors(  "\\D",       "B2 is",                "y",    "$&",    "B" ),
218         TestVectors(  "\\s\\w*",   "foo bar",              "y",    "$&",    " bar" ),
219         TestVectors(  "\\S\\w*",   "foo bar",              "y",    "$&",    "foo" ),
220         TestVectors(  "abc",       "ababc",                "y",    "$&",    "abc" ),
221         TestVectors(  "apple(,)\\sorange\\1",      "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222         TestVectors(  "(\\w+)\\s(\\w+)",           "John Smith", "y", "$2, $1", "Smith, John" ),
223         TestVectors(  "\\n\\f\\r\\t\\v",           "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224         TestVectors(  ".*c",       "abcde",                        "y",    "$&",    "abc" ),
225         TestVectors(  "^\\w+((;|=)\\w+)+$", "some=host=tld",    "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226         TestVectors(  "^\\w+((\\.|-)\\w+)+$", "some.host.tld",    "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227         TestVectors(  "q(a|b)*q",  "xxqababqyy",                "y",    "$&-$1",        "qababq-b" ),
228         TestVectors(  "^(a)(b){0,1}(c*)",   "abcc", "y", "$1 $2 $3", "a b cc" ),
229         TestVectors(  "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230         TestVectors(  "^(a)(b)?(c*)",       "abcc", "y", "$1 $2 $3", "a b cc" ),
231         TestVectors(  "^(a)((b)?)(c*)",     "abcc", "y", "$1 $2 $3", "a b b" ),
232         TestVectors(  "^(a)(b){0,1}(c*)",   "acc",  "y", "$1 $2 $3", "a  cc" ),
233         TestVectors(  "^(a)((b){0,1})(c*)", "acc",  "y", "$1 $2 $3", "a  " ),
234         TestVectors(  "^(a)(b)?(c*)",       "acc",  "y", "$1 $2 $3", "a  cc" ),
235         TestVectors(  "^(a)((b)?)(c*)",     "acc",  "y", "$1 $2 $3", "a  " ),
236         TestVectors(  "(?:ab){3}",       "_abababc","y", "$&-$1",    "ababab-" ),
237         TestVectors(  "(?:a(?:x)?)+",    "aaxaxx",  "y", "$&-$1-$2", "aaxax--" ),
238         TestVectors(  `\W\w\W`,         "aa b!ca",  "y", "$&",       " b!"),
239 //more repetitions:
240         TestVectors(  "(?:a{2,4}b{1,3}){1,2}",  "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241         TestVectors(  "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242 //groups:
243         TestVectors(  "(abc)|(edf)|(xyz)",     "xyz",             "y",   "$1-$2-$3","--xyz"),
244         TestVectors(  "(?P<q>\\d+)/(?P<d>\\d+)",     "2/3",       "y",     "${d}/${q}",    "3/2"),
245 //set operations:
246         TestVectors(  "[a-z--d-f]",                  " dfa",      "y",   "$&",     "a"),
247         TestVectors(  "[abc[pq--acq]]{2}",           "bqpaca",    "y",   "$&",     "pa"),
248         TestVectors(  "[a-z9&&abc0-9]{3}",           "z90a0abc",  "y",   "$&",     "abc"),
249         TestVectors(  "[0-9a-f~~0-5a-z]{2}",         "g0a58x",    "y",   "$&",     "8x"),
250         TestVectors(  "[abc[pq]xyz[rs]]{4}",         "cqxr",      "y",   "$&",     "cqxr"),
251         TestVectors(  "[abcdf--[ab&&[bcd]][acd]]",   "abcdefgh",  "y",   "$&",     "f"),
252         TestVectors(  "[a-c||d-f]+",    "abcdef", "y", "$&", "abcdef"),
253         TestVectors(  "[a-f--a-c]+",    "abcdef", "y", "$&", "def"),
254         TestVectors(  "[a-c&&b-f]+",    "abcdef", "y", "$&", "bc"),
255         TestVectors(  "[a-c~~b-f]+",    "abcdef", "y", "$&", "a"),
256 //unicode blocks & properties:
257         TestVectors(  `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258         TestVectors(  `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259             "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260         TestVectors(  `[-+*/\p{in-mathematical-operators}]{2}`,    "a+\u2212",    "y",    "$&",    "+\u2212"),
261         TestVectors(  `\p{Ll}+`,                      "XabcD",    "y",  "$&",      "abc"),
262         TestVectors(  `\p{Lu}+`,                      "абвГДЕ",   "y",  "$&",      "ГДЕ"),
263         TestVectors(  `^\p{Currency Symbol}\p{Sc}`,   "$₤",       "y",  "$&",      "$₤"),
264         TestVectors(  `\p{Common}\p{Thai}`,           "!ฆ",       "y",  "$&",      "!ฆ"),
265         TestVectors(  `[\d\s]*\D`,  "12 \t3\U00001680\u0F20_2",   "y",  "$&", "12 \t3\U00001680\u0F20_"),
266         TestVectors(  `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
267 //case insensitive:
268         TestVectors(   `^abcdEf$`,           "AbCdEF",              "y",   "$&", "AbCdEF",      "i"),
269         TestVectors(   `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
270         TestVectors(   `ⒶⒷⓒ` ,        "ⓐⓑⒸ",                   "y",   "$&", "ⓐⓑⒸ",      "i"),
271         TestVectors(   "\U00010400{2}",  "\U00010428\U00010400 ",   "y",   "$&", "\U00010428\U00010400", "i"),
272         TestVectors(   `[adzУ-Я]{4}`,    "DzюЯ",                   "y",   "$&", "DzюЯ", "i"),
273         TestVectors(   `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y",   "$&", "абвгдеЖЗИКЛ", "i"),
274         TestVectors(   `(?:Dåb){3}`,  "DåbDÅBdÅb",                  "y",   "$&", "DåbDÅBdÅb", "i"),
275 //escapes:
276         TestVectors(    `\u0041\u005a\U00000065\u0001`,         "AZe\u0001",       "y",   "$&", "AZe\u0001"),
277         TestVectors(    `\u`,               "",   "c",   "-",  "-"),
278         TestVectors(    `\U`,               "",   "c",   "-",  "-"),
279         TestVectors(    `\u003`,            "",   "c",   "-",  "-"),
280         TestVectors(    `[\x00-\x7f]{4}`,        "\x00\x09ab",   "y", "$&", "\x00\x09ab"),
281         TestVectors(    `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282         TestVectors(    `\r\n\v\t\f\\`,     "\r\n\v\t\f\\",   "y",   "$&", "\r\n\v\t\f\\"),
283         TestVectors(    `[\u0003\u0001]{2}`,  "\u0001\u0003",         "y",   "$&", "\u0001\u0003"),
284         TestVectors(    `^[\u0020-\u0080\u0001\n-\r]{8}`,  "abc\u0001\v\f\r\n",  "y",   "$&", "abc\u0001\v\f\r\n"),
285         TestVectors(    `\w+\S\w+`, "ab7!44c",  "y", "$&", "ab7!44c"),
286         TestVectors(    `\b\w+\b`,  " abde4 ",  "y", "$&", "abde4"),
287         TestVectors(    `\b\w+\b`,  " abde4",   "y", "$&", "abde4"),
288         TestVectors(    `\b\w+\b`,  "abde4 ",   "y", "$&", "abde4"),
289         TestVectors(    `\pL\pS`,   "a\u02DA",  "y", "$&", "a\u02DA"),
290         TestVectors(    `\pX`,      "",         "c", "-",  "-"),
291 // ^, $, \b, \B, multiline :
292         TestVectors(    `\r.*?$`,    "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293         TestVectors(    `^a$^b$`,    "a\r\nb\n",  "n", "$&", "-", "m"),
294         TestVectors(    `^a$\r\n^b$`,"a\r\nb\n",  "y", "$&", "a\r\nb", "m"),
295         TestVectors(    `^$`,        "\r\n",      "y", "$&", "", "m"),
296         TestVectors(    `^a$\nx$`,   "a\nx\u2028","y", "$&", "a\nx", "m"),
297         TestVectors(    `^a$\nx$`,   "a\nx\u2029","y", "$&", "a\nx", "m"),
298         TestVectors(    `^a$\nx$`,   "a\nx\u0085","y", "$&", "a\nx","m"),
299         TestVectors(    `^x$`,       "\u2028x",   "y", "$&", "x", "m"),
300         TestVectors(    `^x$`,       "\u2029x",   "y", "$&", "x", "m"),
301         TestVectors(    `^x$`,       "\u0085x",   "y", "$&", "x", "m"),
302         TestVectors(    `\b^.`,      "ab",        "y", "$&", "a"),
303         TestVectors(    `\B^.`,      "ab",        "n", "-",  "-"),
304         TestVectors(    `^ab\Bc\B`,  "\r\nabcd",  "y", "$&", "abc", "m"),
305         TestVectors(    `^.*$`,      "12345678",  "y", "$&", "12345678"),
306 
307 // luckily obtained regression on incremental matching in backtracker
308         TestVectors(  `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309             "0020  ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310 //lookahead
311         TestVectors(    "(foo.)(?=(bar))",     "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312         TestVectors(    `\b(\d+)[a-z](?=\1)`,  "123a123",        "y", "$&-$1", "123a-123" ),
313         TestVectors(    `\$(?!\d{3})\w+`,      "$123 $abc",      "y", "$&", "$abc"),
314         TestVectors(    `(abc)(?=(ed(f))\3)`,    "abcedff",      "y", "-", "-"),
315         TestVectors(    `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com",  "y", "$&-$1", "x-@"),
316         TestVectors(    `x()(abc)(?=(d)(e)(f)\2)`,   "xabcdefabc", "y", "$&", "xabc"),
317         TestVectors(    `x()(abc)(?=(d)(e)(f)()\3\4\5)`,   "xabcdefdef", "y", "$&", "xabc"),
318 //lookback
319         TestVectors(    `(?<=(ab))\d`,    "12ba3ab4",    "y",   "$&-$1", "4-ab",  "i"),
320         TestVectors(    `\w(?<!\d)\w`,   "123ab24",  "y",   "$&", "ab"),
321         TestVectors(    `(?<=Dåb)x\w`,  "DåbDÅBxdÅb",  "y",   "$&", "xd", "i"),
322         TestVectors(    `(?<=(ab*c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
323         TestVectors(    `(?<=(ab*?c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
324         TestVectors(    `(?<=(a.*?c))x`,   "ababbcxac",  "y",   "$&-$1", "x-abbc"),
325         TestVectors(    `(?<=(a{2,4}b{1,3}))x`,   "yyaaaabx",  "y",   "$&-$1", "x-aaaab"),
326         TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aabbbaaaab"),
327         TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}?))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aaaab"),
328         TestVectors(    `(?<=(abc|def|aef))x`,    "abcx", "y",        "$&-$1",  "x-abc"),
329         TestVectors(    `(?<=(abc|def|aef))x`,    "aefx", "y",        "$&-$1",  "x-aef"),
330         TestVectors(    `(?<=(abc|dabc))(x)`,    "dabcx", "y",        "$&-$1-$2",  "x-abc-x"),
331         TestVectors(    `(?<=(|abc))x`,        "dabcx", "y",        "$&-$1",  "x-"),
332         TestVectors(    `(?<=((ab|da)*))x`,    "abdaabx", "y",        "$&-$2-$1",  "x-ab-abdaab"),
333         TestVectors(    `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334         TestVectors(    `.(?<!b).`,   "bax",  "y", "$&", "ax"),
335         TestVectors(    `(?<=b(?<!ab)).`,   "abbx",  "y",  "$&", "x"),
336         TestVectors(    `(?<=\.|[!?]+)X`,   "Hey?!X", "y", "$&", "X"),
337         TestVectors(    `(?<=\.|[!?]+)a{3}`,   ".Nope.aaaX", "y", "$&", "aaa"),
338 //mixed lookaround
339         TestVectors(   `a(?<=a(?=b))b`,    "ab", "y",      "$&", "ab"),
340         TestVectors(   `a(?<=a(?!b))c`,    "ac", "y",      "$&", "ac"),
341         TestVectors(   `a(?i)bc`,         "aBc", "y",      "$&", "aBc"),
342         TestVectors(   `a(?i)bc`,         "Abc", "n",      "$&", "-"),
343         TestVectors(   `(?i)a(?-i)bc`, "aBcAbc", "y",      "$&", "Abc"),
344         TestVectors(   `(?s).(?-s).`, "\n\n\na", "y",      "$&", "\na"),
345         TestVectors(   `(?m)^a(?-m)$`,  "\na",   "y",      "$&", "a")
346         ];
produceExpected(M,String)347     string produceExpected(M,String)(auto ref M m, String fmt)
348     {
349         auto app = appender!(String)();
350         replaceFmt(fmt, m.captures, app, true);
351         return app.data;
352     }
run_tests(alias matchFn)353     void run_tests(alias matchFn)()
354     {
355         int i;
356         foreach (Char; AliasSeq!( char, wchar, dchar))
357         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
358             alias String = immutable(Char)[];
359             String produceExpected(M,Range)(auto ref M m, Range fmt)
360             {
361                 auto app = appender!(String)();
362                 replaceFmt(fmt, m.captures, app, true);
363                 return app.data;
364             }
365             Regex!(Char) r;
366             foreach (a, tvd; tv)
367             {
368                 uint c = tvd.result[0];
369                 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
370                 try
371                 {
372                     i = 1;
373                     r = regex(to!(String)(tvd.pattern), tvd.flags);
374                 }
375                 catch (RegexException e)
376                 {
377                     i = 0;
378                     debug(std_regex_test) writeln(e.msg);
379                 }
380 
381                 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
382 
383                 if (c != 'c')
384                 {
385                     auto m = matchFn(to!(String)(tvd.input), r);
386                     i = !m.empty;
387                     assert(
388                         (c == 'y') ? i : !i,
389                         text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
390                     );
391                     if (c == 'y')
392                     {
393                         auto result = produceExpected(m, to!(String)(tvd.format));
394                         assert(result == to!String(tvd.replace),
395                             text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396                                     tvd.replace, " vs ", result));
397                     }
398                 }
399             }
400         }();
401         debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
402     }
403 
404 
ct_tests()405     void ct_tests()
406     {
407         import std.algorithm.comparison : equal;
408         version (std_regex_ct1)
409         {
410             pragma(msg, "Testing 1st part of ctRegex");
411             alias Tests = Sequence!(0, 155);
412         }
413         else version (std_regex_ct2)
414         {
415             pragma(msg, "Testing 2nd part of ctRegex");
416             alias Tests = Sequence!(155, 174);
417         }
418         //FIXME: #174-178 contains CTFE parser bug
419         else version (std_regex_ct3)
420         {
421             pragma(msg, "Testing 3rd part of ctRegex");
422             alias Tests = Sequence!(178, 220);
423         }
424         else version (std_regex_ct4)
425         {
426             pragma(msg, "Testing 4th part of ctRegex");
427             alias Tests = Sequence!(220, tv.length);
428         }
429         else
430             alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
431         foreach (a, v; Tests)
432         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
433             enum tvd = tv[v];
434             static if (tvd.result == "c")
435             {
436                 static assert(!__traits(compiles, (){
437                     enum r = regex(tvd.pattern, tvd.flags);
438                 }), "errornously compiles regex pattern: " ~ tvd.pattern);
439             }
440             else
441             {
442                 //BUG: tv[v] is fine but tvd is not known at compile time?!
443                 auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444                 auto nr = regex(tvd.pattern, tvd.flags);
445                 assert(equal(r.ir, nr.ir),
446                     text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
447                 auto m = match(tvd.input, r);
448                 auto c = tvd.result[0];
449                 bool ok = (c == 'y') ^ m.empty;
450                 assert(ok, text("ctRegex: failed to match pattern #",
451                     a ,": ", tvd.pattern));
452                 if (c == 'y')
453                 {
454                     import std.stdio;
455                     auto result = produceExpected(m, tvd.format);
456                     if (result != tvd.replace)
457                         writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
458                                 tvd.replace, " vs ", result);
459                 }
460             }
461         }();
462         debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
463     }
464 
465     ct_tests();
466     run_tests!bmatch(); //backtracker
467     run_tests!match(); //thompson VM
468 }
469 
470 @safe unittest
471 {
472     auto cr = ctRegex!("abc");
473     assert(bmatch("abc",cr).hit == "abc");
474     auto cr2 = ctRegex!("ab*c");
475     assert(bmatch("abbbbc",cr2).hit == "abbbbc");
476 }
477 @safe unittest
478 {
479     auto cr3 = ctRegex!("^abc$");
480     assert(bmatch("abc",cr3).hit == "abc");
481     auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
482     assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
483 }
484 
485 @safe unittest
486 {
487     auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
488     assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
489     auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
490     assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
491 }
492 
493 @safe unittest
494 {
495     auto cr7 = ctRegex!(`\r.*?$`,"sm");
496     assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
497     auto greed =  ctRegex!("<packet.*?/packet>");
498     assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
499             == "<packet>text</packet>");
500 }
501 
502 @safe unittest
503 {
504     import std.algorithm.comparison : equal;
505     auto cr8 = ctRegex!("^(a)(b)?(c*)");
506     auto m8 = bmatch("abcc",cr8);
507     assert(m8);
508     assert(m8.captures[1] == "a");
509     assert(m8.captures[2] == "b");
510     assert(m8.captures[3] == "cc");
511     auto cr9 = ctRegex!("q(a|b)*q");
512     auto m9 = match("xxqababqyy",cr9);
513     assert(m9);
514     assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
515 }
516 
517 @safe unittest
518 {
519     import std.algorithm.comparison : equal;
520     auto rtr = regex("a|b|c");
521     enum ctr = regex("a|b|c");
522     assert(equal(rtr.ir,ctr.ir));
523     //CTFE parser BUG is triggered by group
524     //in the middle of alternation (at least not first and not last)
525     enum testCT = regex(`abc|(edf)|xyz`);
526     auto testRT = regex(`abc|(edf)|xyz`);
527     assert(equal(testCT.ir,testRT.ir));
528 }
529 
530 @safe unittest
531 {
532     import std.algorithm.comparison : equal;
533     import std.algorithm.iteration : map;
534     enum cx = ctRegex!"(A|B|C)";
535     auto mx = match("B",cx);
536     assert(mx);
537     assert(equal(mx.captures, [ "B", "B"]));
538     enum cx2 = ctRegex!"(A|B)*";
539     assert(match("BAAA",cx2));
540 
541     enum cx3 = ctRegex!("a{3,4}","i");
542     auto mx3 = match("AaA",cx3);
543     assert(mx3);
544     assert(mx3.captures[0] == "AaA");
545     enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
546     auto mx4 = match("aaaabc", cx4);
547     assert(mx4);
548     assert(mx4.captures[0] == "aaaab");
549     auto cr8 = ctRegex!("(a)(b)?(c*)");
550     auto m8 = bmatch("abcc",cr8);
551     assert(m8);
552     assert(m8.captures[1] == "a");
553     assert(m8.captures[2] == "b");
554     assert(m8.captures[3] == "cc");
555     auto cr9 = ctRegex!(".*$", "gm");
556     auto m9 = match("First\rSecond", cr9);
557     assert(m9);
558     assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
559 }
560 
561 @safe unittest
562 {
563     import std.algorithm.comparison : equal;
564     import std.algorithm.iteration : map;
565 //global matching
test_body(alias matchFn)566     void test_body(alias matchFn)()
567     {
568         string s = "a quick brown fox jumps over a lazy dog";
569         auto r1 = regex("\\b[a-z]+\\b","g");
570         string[] test;
571         foreach (m; matchFn(s, r1))
572             test ~= m.hit;
573         assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
574         auto free_reg = regex(`
575 
576             abc
577             \s+
578             "
579             (
580                     [^"]+
581                 |   \\ "
582             )+
583             "
584             z
585         `, "x");
586         auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
587         assert(m);
588         string mails = " hey@you.com no@spam.net ";
589         auto rm = regex(`@(?<=\S+@)\S+`,"g");
590         assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
591         auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
592         assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
593         auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
594         assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
595         auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
596         assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
597         debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
598     }
599     test_body!bmatch();
600     test_body!match();
601 }
602 
603 //tests for accumulated std.regex issues and other regressions
604 @safe unittest
605 {
606     import std.algorithm.comparison : equal;
607     import std.algorithm.iteration : map;
608     void test_body(alias matchFn)()
609     {
610         //issue 5857
611         //matching goes out of control if ... in (...){x} has .*/.+
612         auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
613         assert(c[0] == "axxxzayyyyyzd");
614         assert(c[1] == "ayyyyyz");
615         auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
616         assert(c2[0] == "axxxayyyyyd");
617         assert(c2[1] == "ayyyyy");
618         //issue 2108
619         //greedy vs non-greedy
620         auto nogreed = regex("<packet.*?/packet>");
621         assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
622                == "<packet>text</packet>");
623         auto greed =  regex("<packet.*/packet>");
624         assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
625                == "<packet>text</packet><packet>text</packet>");
626         //issue 4574
627         //empty successful match still advances the input
628         string[] pres, posts, hits;
629         foreach (m; matchFn("abcabc", regex("","g")))
630         {
631             pres ~= m.pre;
632             posts ~= m.post;
633             assert(m.hit.empty);
634 
635         }
636         auto heads = [
637             "abcabc",
638             "abcab",
639             "abca",
640             "abc",
641             "ab",
642             "a",
643             ""
644         ];
645         auto tails = [
646             "abcabc",
647              "bcabc",
648               "cabc",
649                "abc",
650                 "bc",
651                  "c",
652                   ""
653         ];
654         assert(pres == array(retro(heads)));
655         assert(posts == tails);
656         //issue 6076
657         //regression on .*
658         auto re = regex("c.*|d");
659         auto m = matchFn("mm", re);
660         assert(!m);
661         debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
662         auto rprealloc = regex(`((.){5}.{1,10}){5}`);
663         auto arr = array(repeat('0',100));
664         auto m2 = matchFn(arr, rprealloc);
665         assert(m2);
666         assert(collectException(
667                 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
668                 ) is null);
669         foreach (ch; [Escapables])
670         {
671             assert(match(to!string(ch),regex(`[\`~ch~`]`)));
672             assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
673             assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
674         }
675         //bugzilla 7718
676         string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
677         auto reStrCmd = regex (`(".*")|('.*')`, "g");
678         assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
679                      [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
680     }
681     test_body!bmatch();
682     test_body!match();
683 }
684 
685 // tests for replace
686 @safe unittest
687 {
688     void test(alias matchFn)()
689     {
690         import std.uni : toUpper;
691 
692         foreach (i, v; AliasSeq!(string, wstring, dstring))
693         {
694             auto baz(Cap)(Cap m)
695             if (is(Cap == Captures!(Cap.String)))
696             {
697                 return toUpper(m.hit);
698             }
699             alias String = v;
700             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
701                    == to!String("ack rapacity"));
702             assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
703                    == to!String("ack capacity"));
704             assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
705                    == to!String("[n]oon"));
706             assert(std.regex.replace!(matchFn)(
707                 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
708             ) == to!String(": test2 test1 :"));
709             auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
710                     regex(to!String("[ar]"), "g"));
711             assert(s == "StRAp A Rocket engine on A chicken.");
712         }
713         debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
714     }
715     test!(bmatch)();
716     test!(match)();
717 }
718 
719 // tests for splitter
720 @safe unittest
721 {
722     import std.algorithm.comparison : equal;
723     auto s1 = ", abc, de,     fg, hi, ";
724     auto sp1 = splitter(s1, regex(", *"));
725     auto w1 = ["", "abc", "de", "fg", "hi", ""];
726     assert(equal(sp1, w1));
727 
728     auto s2 = ", abc, de,  fg, hi";
729     auto sp2 = splitter(s2, regex(", *"));
730     auto w2 = ["", "abc", "de", "fg", "hi"];
731 
732     uint cnt;
733     foreach (e; sp2)
734     {
735         assert(w2[cnt++] == e);
736     }
737     assert(equal(sp2, w2));
738 }
739 
740 @safe unittest
741 {
742     char[] s1 = ", abc, de,  fg, hi, ".dup;
743     auto sp2 = splitter(s1, regex(", *"));
744 }
745 
746 @safe unittest
747 {
748     import std.algorithm.comparison : equal;
749     auto s1 = ", abc, de,  fg, hi, ";
750     auto w1 = ["", "abc", "de", "fg", "hi", ""];
751     assert(equal(split(s1, regex(", *")), w1[]));
752 }
753 
754 @safe unittest
755 { // bugzilla 7141
756     string pattern = `[a\--b]`;
757     assert(match("-", pattern));
758     assert(match("b", pattern));
759     string pattern2 = `[&-z]`;
760     assert(match("b", pattern2));
761 }
762 @safe unittest
763 {//bugzilla 7111
764     assert(match("", regex("^")));
765 }
766 @safe unittest
767 {//bugzilla 7300
768     assert(!match("a"d, "aa"d));
769 }
770 
771 // bugzilla 7551
772 @safe unittest
773 {
774     auto r = regex("[]abc]*");
775     assert("]ab".matchFirst(r).hit == "]ab");
776     assertThrown(regex("[]"));
777     auto r2 = regex("[]abc--ab]*");
778     assert("]ac".matchFirst(r2).hit == "]");
779 }
780 
781 @safe unittest
782 {//bugzilla 7674
783     assert("1234".replace(regex("^"), "$$") == "$1234");
784     assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
785     assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
786 }
787 @safe unittest
788 {// bugzilla 7679
789     import std.algorithm.comparison : equal;
790     foreach (S; AliasSeq!(string, wstring, dstring))
791     (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
792         enum re = ctRegex!(to!S(r"\."));
793         auto str = to!S("a.b");
794         assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
795         assert(split(str, re) == [to!S("a"), to!S("b")]);
796     }();
797 }
798 @safe unittest
799 {//bugzilla 8203
800     string data = "
801     NAME   = XPAW01_STA:STATION
802     NAME   = XPAW01_STA
803     ";
804     auto uniFileOld = data;
805     auto r = regex(
806        r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
807     auto uniCapturesNew = match(uniFileOld, r);
808     for (int i = 0; i < 20; i++)
809         foreach (matchNew; uniCapturesNew) {}
810     //a second issue with same symptoms
811     auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
812     match("аллея Театральная", r2);
813 }
814 @safe unittest
815 {// bugzilla 8637 purity of enforce
816     auto m = match("hello world", regex("world"));
817     enforce(m);
818 }
819 
820 // bugzilla 8725
821 @safe unittest
822 {
823   static italic = regex( r"\*
824                 (?!\s+)
825                 (.*?)
826                 (?!\s+)
827                 \*", "gx" );
828   string input = "this * is* interesting, *very* interesting";
829   assert(replace(input, italic, "<i>$1</i>") ==
830       "this * is* interesting, <i>very</i> interesting");
831 }
832 
833 // bugzilla 8349
834 @safe unittest
835 {
836     enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
837     enum peakRegex = ctRegex!(peakRegexStr);
838     //note that the regex pattern itself is probably bogus
839     assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
840 }
841 
842 // bugzilla 9211
843 @safe unittest
844 {
845     import std.algorithm.comparison : equal;
846     auto rx_1 =  regex(r"^(\w)*(\d)");
847     auto m = match("1234", rx_1);
848     assert(equal(m.front, ["1234", "3", "4"]));
849     auto rx_2 = regex(r"^([0-9])*(\d)");
850     auto m2 = match("1234", rx_2);
851     assert(equal(m2.front, ["1234", "3", "4"]));
852 }
853 
854 // bugzilla 9280
855 @safe unittest
856 {
857     string tomatch = "a!b@c";
858     static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
859     auto nm = match(tomatch, r);
860     assert(nm);
861     auto c = nm.captures;
862     assert(c[1] == "a");
863     assert(c["nick"] == "a");
864 }
865 
866 
867 // bugzilla 9579
868 @safe unittest
869 {
870     char[] input = ['a', 'b', 'c'];
871     string format = "($1)";
872     // used to give a compile error:
873     auto re = regex(`(a)`, "g");
874     auto r = replace(input, re, format);
875     assert(r == "(a)bc");
876 }
877 
878 // bugzilla 9634
879 @safe unittest
880 {
881     auto re = ctRegex!"(?:a+)";
882     assert(match("aaaa", re).hit == "aaaa");
883 }
884 
885 //bugzilla 10798
886 @safe unittest
887 {
888     auto cr = ctRegex!("[abcd--c]*");
889     auto m  = "abc".match(cr);
890     assert(m);
891     assert(m.hit == "ab");
892 }
893 
894 // bugzilla 10913
895 @system unittest
896 {
897     @system static string foo(const(char)[] s)
898     {
899         return s.dup;
900     }
901     @safe static string bar(const(char)[] s)
902     {
903         return s.dup;
904     }
905     () @system {
906         replace!((a) => foo(a.hit))("blah", regex(`a`));
907     }();
908     () @safe {
909         replace!((a) => bar(a.hit))("blah", regex(`a`));
910     }();
911 }
912 
913 // bugzilla 11262
914 @safe unittest
915 {
916     enum reg = ctRegex!(r",", "g");
917     auto str = "This,List";
918     str = str.replace(reg, "-");
919     assert(str == "This-List");
920 }
921 
922 // bugzilla 11775
923 @safe unittest
924 {
925     assert(collectException(regex("a{1,0}")));
926 }
927 
928 // bugzilla 11839
929 @safe unittest
930 {
931     import std.algorithm.comparison : equal;
932     assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
933     assert(collectException(regex(`(?P<1>\w+)`)));
934     assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
935     assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
936     assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
937 }
938 
939 // bugzilla 12076
940 @safe unittest
941 {
942     auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
943     string s = "one two";
944     auto m = match(s, RE);
945 }
946 
947 // bugzilla 12105
948 @safe unittest
949 {
950     auto r = ctRegex!`.*?(?!a)`;
951     assert("aaab".matchFirst(r).hit == "aaa");
952     auto r2 = ctRegex!`.*(?!a)`;
953     assert("aaab".matchFirst(r2).hit == "aaab");
954 }
955 
956 //bugzilla 11784
957 @safe unittest
958 {
959     assert("abcdefghijklmnopqrstuvwxyz"
960         .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
961 }
962 
963 //bugzilla 12366
964 @safe unittest
965 {
966      auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
967      assert("xxxxxxxx".match(re).empty);
968      assert(!"xxxx".match(re).empty);
969 }
970 
971 // bugzilla 12582
972 @safe unittest
973 {
974     auto r = regex(`(?P<a>abc)`);
975     assert(collectException("abc".matchFirst(r)["b"]));
976 }
977 
978 // bugzilla 12691
979 @safe unittest
980 {
981     assert(bmatch("e@", "^([a-z]|)*$").empty);
982     assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
983 }
984 
985 //bugzilla  12713
986 @safe unittest
987 {
988     assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
989 }
990 
991 //bugzilla 12747
992 @safe unittest
993 {
994     assertThrown(regex(`^x(\1)`));
995     assertThrown(regex(`^(x(\1))`));
996     assertThrown(regex(`^((x)(?=\1))`));
997 }
998 
999 // bugzilla 14504
1000 @safe unittest
1001 {
1002     auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
1003             "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1004 }
1005 
1006 // bugzilla 14529
1007 @safe unittest
1008 {
1009     auto ctPat2 = regex(r"^[CDF]$", "i");
1010     foreach (v; ["C", "c", "D", "d", "F", "f"])
1011         assert(matchAll(v, ctPat2).front.hit == v);
1012 }
1013 
1014 // bugzilla 14615
1015 @safe unittest
1016 {
1017     import std.array : appender;
1018     import std.regex : replaceFirst, replaceFirstInto, regex;
1019     import std.stdio : writeln;
1020 
1021     auto example = "Hello, world!";
1022     auto pattern = regex("^Hello, (bug)");  // won't find this one
1023     auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
1024     assert(result == "Hello, world!");  // Ok.
1025 
1026     auto sink = appender!string;
1027     replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
1028     assert(sink.data == "Hello, world!");
1029     replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
1030     assert(sink.data == "Hello, world!Hello, world!");
1031 }
1032 
1033 // bugzilla 15573
1034 @safe unittest
1035 {
1036     auto rx = regex("[c d]", "x");
1037     assert("a b".matchFirst(rx));
1038 }
1039 
1040 // bugzilla 15864
1041 @safe unittest
1042 {
1043     regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
1044 }
1045 
1046 @safe unittest
1047 {
1048     auto r = regex("(?# comment)abc(?# comment2)");
1049     assert("abc".matchFirst(r));
1050     assertThrown(regex("(?#..."));
1051 }
1052 
1053 // bugzilla 17075
1054 @safe unittest
1055 {
1056     enum titlePattern = `<title>(.+)</title>`;
1057     static titleRegex = ctRegex!titlePattern;
1058     string input = "<title>" ~ "<".repeat(100_000).join;
1059     assert(input.matchFirst(titleRegex).empty);
1060 }
1061 
1062 // bugzilla 17212
1063 @safe unittest
1064 {
1065     auto r = regex(" [a] ", "x");
1066     assert("a".matchFirst(r));
1067 }
1068 
1069 // bugzilla 17157
1070 @safe unittest
1071 {
1072     import std.algorithm.comparison : equal;
1073     auto ctr = ctRegex!"(a)|(b)|(c)|(d)";
1074     auto r = regex("(a)|(b)|(c)|(d)", "g");
1075     auto s = "--a--b--c--d--";
1076     auto outcomes = [
1077         ["a", "a", "", "", ""],
1078         ["b", "", "b", "", ""],
1079         ["c", "", "", "c", ""],
1080         ["d", "", "", "", "d"]
1081     ];
1082     assert(equal!equal(s.matchAll(ctr), outcomes));
1083     assert(equal!equal(s.bmatch(r), outcomes));
1084 }
1085 
1086 // bugzilla 17667
1087 @safe unittest
1088 {
1089     import std.algorithm.searching : canFind;
1090     void willThrow(T, size_t line = __LINE__)(T arg, string msg)
1091     {
1092         auto e = collectException(regex(arg));
1093         assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg);
1094     }
1095     willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
1096     willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
1097     willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
1098     willThrow([r"[a-\", r"123"], "invalid escape sequence");
1099     willThrow([r"\", r"123"], "invalid escape sequence");
1100 }
1101 
1102 // bugzilla 17668
1103 @safe unittest
1104 {
1105     import std.algorithm.searching;
1106     auto e = collectException!RegexException(regex(q"<[^]>"));
1107     assert(e.msg.canFind("no operand for '^'"));
1108 }
1109 
1110 // bugzilla 17673
1111 @safe unittest
1112 {
1113     string str = `<">`;
1114     string[] regexps = ["abc", "\"|x"];
1115     auto regexp = regex(regexps);
1116     auto c = matchFirst(str, regexp);
1117     assert(c);
1118     assert(c.whichPattern == 2);
1119 }
1120 
1121