1 /*
2     Regualar expressions package test suite.
3 */
4 module std.regex.internal.tests;
5 
6 package(std.regex):
7 
8 import std.conv, std.exception, std.meta, std.range,
9     std.typecons, std.regex;
10 
11 import std.uni : Escapables; // characters that need escaping
12 
13 debug(std_regex_test) import std.stdio;
14 
15 @safe unittest
16 {//sanity checks
17     regex("(a|b)*");
18     regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19     regex("abc|edf|ighrg");
20     auto r1 = regex("abc");
21     auto r2 = regex("(gylba)");
22     assert(match("abcdef", r1).hit == "abc");
23     assert(!match("wida",r2));
24     assert(bmatch("abcdef", r1).hit == "abc");
25     assert(!bmatch("wida", r2));
26     assert(match("abc", "abc".dup));
27     assert(bmatch("abc", "abc".dup));
28     Regex!char rc;
29     assert(rc.empty);
30     rc = regex("test");
31     assert(!rc.empty);
32 }
33 
34 /* The test vectors in this file are altered from Henry Spencer's regexp
35    test code. His copyright notice is:
36 
37         Copyright (c) 1986 by University of Toronto.
38         Written by Henry Spencer.  Not derived from licensed software.
39 
40         Permission is granted to anyone to use this software for any
41         purpose on any computer system, and to redistribute it freely,
42         subject to the following restrictions:
43 
44         1. The author is not responsible for the consequences of use of
45                 this software, no matter how awful, even if they arise
46                 from defects in it.
47 
48         2. The origin of this software must not be misrepresented, either
49                 by explicit claim or by omission.
50 
51         3. Altered versions must be plainly marked as such, and must not
52                 be misrepresented as being the original software.
53 
54 
55  */
56 
57 @safe unittest
58 {
59     struct TestVectors
60     {
61         string pattern;
62         string input;
63         string result;
64         string format;
65         string replace;
66         string flags;
67     }
68 
69     static immutable TestVectors[] tv = [
70         TestVectors(  "a\\b",       "a",  "y",    "$&",    "a" ),
71         TestVectors(  "(a)b\\1",   "abaab","y",    "$&",    "aba" ),
72         TestVectors(  "()b\\1",     "aaab", "y",    "$&",    "b" ),
73         TestVectors(  "abc",       "abc",  "y",    "$&",    "abc" ),
74         TestVectors(  "abc",       "xbc",  "n",    "-",    "-" ),
75         TestVectors(  "abc",       "axc",  "n",    "-",    "-" ),
76         TestVectors(  "abc",       "abx",  "n",    "-",    "-" ),
77         TestVectors(  "abc",       "xabcy","y",    "$&",    "abc" ),
78         TestVectors(  "abc",       "ababc","y",    "$&",    "abc" ),
79         TestVectors(  "ab*c",      "abc",  "y",    "$&",    "abc" ),
80         TestVectors(  "ab*bc",     "abc",  "y",    "$&",    "abc" ),
81         TestVectors(  "ab*bc",     "abbc", "y",    "$&",    "abbc" ),
82         TestVectors(  "ab*bc",     "abbbbc","y",   "$&",    "abbbbc" ),
83         TestVectors(  "ab+bc",     "abbc", "y",    "$&",    "abbc" ),
84         TestVectors(  "ab+bc",     "abc",  "n",    "-",    "-" ),
85         TestVectors(  "ab+bc",     "abq",  "n",    "-",    "-" ),
86         TestVectors(  "ab+bc",     "abbbbc","y",   "$&",    "abbbbc" ),
87         TestVectors(  "ab?bc",     "abbc", "y",    "$&",    "abbc" ),
88         TestVectors(  "ab?bc",     "abc",  "y",    "$&",    "abc" ),
89         TestVectors(  "ab?bc",     "abbbbc","n",   "-",    "-" ),
90         TestVectors(  "ab?c",      "abc",  "y",    "$&",    "abc" ),
91         TestVectors(  "^abc$",     "abc",  "y",    "$&",    "abc" ),
92         TestVectors(  "^abc$",     "abcc", "n",    "-",    "-" ),
93         TestVectors(  "^abc",      "abcc", "y",    "$&",    "abc" ),
94         TestVectors(  "^abc$",     "aabc", "n",    "-",    "-" ),
95         TestVectors(  "abc$",      "aabc", "y",    "$&",    "abc" ),
96         TestVectors(  "^",         "abc",  "y",    "$&",    "" ),
97         TestVectors(  "$",         "abc",  "y",    "$&",    "" ),
98         TestVectors(  "a.c",       "abc",  "y",    "$&",    "abc" ),
99         TestVectors(  "a.c",       "axc",  "y",    "$&",    "axc" ),
100         TestVectors(  "a.*c",      "axyzc","y",    "$&",    "axyzc" ),
101         TestVectors(  "a.*c",      "axyzd","n",    "-",    "-" ),
102         TestVectors(  "a[bc]d",    "abc",  "n",    "-",    "-" ),
103         TestVectors(  "a[bc]d",    "abd",  "y",    "$&",    "abd" ),
104         TestVectors(  "a[b-d]e",   "abd",  "n",    "-",    "-" ),
105         TestVectors(  "a[b-d]e",   "ace",  "y",    "$&",    "ace" ),
106         TestVectors(  "a[b-d]",    "aac",  "y",    "$&",    "ac" ),
107         TestVectors(  "a[-b]",     "a-",   "y",    "$&",    "a-" ),
108         TestVectors(  "a[b-]",     "a-",   "y",    "$&",    "a-" ),
109         TestVectors(  "a[b-a]",    "-",    "c",    "-",    "-" ),
110         TestVectors(  "a[]b",      "-",    "c",    "-",    "-" ),
111         TestVectors(  "a[",        "-",    "c",    "-",    "-" ),
112         TestVectors(  "a]",        "a]",   "y",    "$&",    "a]" ),
113         TestVectors(  "a[\\]]b",     "a]b",  "y",  "$&",    "a]b" ),
114         TestVectors(  "a[^bc]d",   "aed",  "y",    "$&",    "aed" ),
115         TestVectors(  "a[^bc]d",   "abd",  "n",    "-",    "-" ),
116         TestVectors(  "a[^-b]c",   "adc",  "y",    "$&",    "adc" ),
117         TestVectors(  "a[^-b]c",   "a-c",  "n",    "-",    "-" ),
118         TestVectors(  "a[^\\]b]c",   "adc",  "y",  "$&",    "adc" ),
119         TestVectors(  "ab|cd",     "abc",  "y",    "$&",    "ab" ),
120         TestVectors(  "ab|cd",     "abcd", "y",    "$&",    "ab" ),
121         TestVectors(  "()ef",      "def",  "y",    "$&-$1",        "ef-" ),
122         TestVectors(  "()*",       "-",    "y",    "-",    "-" ),
123         TestVectors(  "*a",        "-",    "c",    "-",    "-" ),
124         TestVectors(  "^*",        "-",    "y",    "-",    "-" ),
125         TestVectors(  "$*",        "-",    "y",    "-",    "-" ),
126         TestVectors(  "(*)b",      "-",    "c",    "-",    "-" ),
127         TestVectors(  "$b",        "b",    "n",    "-",    "-" ),
128         TestVectors(  "a\\",       "-",    "c",    "-",    "-" ),
129         TestVectors(  "a\\(b",     "a(b",  "y",    "$&-$1",        "a(b-" ),
130         TestVectors(  "a\\(*b",    "ab",   "y",    "$&",    "ab" ),
131         TestVectors(  "a\\(*b",    "a((b", "y",    "$&",    "a((b" ),
132         TestVectors(  "a\\\\b",    "a\\b", "y",    "$&",    "a\\b" ),
133         TestVectors(  "abc)",      "-",    "c",    "-",    "-" ),
134         TestVectors(  "(abc",      "-",    "c",    "-",    "-" ),
135         TestVectors(  "((a))",     "abc",  "y",    "$&-$1-$2",    "a-a-a" ),
136         TestVectors(  "(a)b(c)",   "abc",  "y",    "$&-$1-$2",    "abc-a-c" ),
137         TestVectors(  "a+b+c",     "aabbabc","y",  "$&",    "abc" ),
138         TestVectors(  "a**",       "-",    "c",    "-",    "-" ),
139         TestVectors(  "a*?a",      "aa",   "y",    "$&",    "a" ),
140         TestVectors(  "(a*)*",     "aaa",  "y",    "-",    "-" ),
141         TestVectors(  "(a*)+",     "aaa",  "y",    "-",    "-" ),
142         TestVectors(  "(a|)*",     "-",    "y",    "-",    "-" ),
143         TestVectors(  "(a*|b)*",   "aabb", "y",    "-",    "-" ),
144         TestVectors(  "(a|b)*",    "ab",   "y",    "$&-$1",        "ab-b" ),
145         TestVectors(  "(a+|b)*",   "ab",   "y",    "$&-$1",        "ab-b" ),
146         TestVectors(  "(a+|b)+",   "ab",   "y",    "$&-$1",        "ab-b" ),
147         TestVectors(  "(a+|b)?",   "ab",   "y",    "$&-$1",        "a-a" ),
148         TestVectors(  "[^ab]*",    "cde",  "y",    "$&",    "cde" ),
149         TestVectors(  "(^)*",      "-",    "y",    "-",    "-" ),
150         TestVectors(  "(ab|)*",    "-",    "y",    "-",    "-" ),
151         TestVectors(  ")(",        "-",    "c",    "-",    "-" ),
152         TestVectors(  "",  "abc",  "y",    "$&",    "" ),
153         TestVectors(  "abc",       "",     "n",    "-",    "-" ),
154         TestVectors(  "a*",        "",     "y",    "$&",    "" ),
155         TestVectors(  "([abc])*d", "abbbcd",       "y",    "$&-$1",        "abbbcd-c" ),
156         TestVectors(  "([abc])*bcd", "abcd",       "y",    "$&-$1",        "abcd-a" ),
157         TestVectors(  "a|b|c|d|e", "e",    "y",    "$&",    "e" ),
158         TestVectors(  "(a|b|c|d|e)f", "ef",        "y",    "$&-$1",        "ef-e" ),
159         TestVectors(  "((a*|b))*", "aabb", "y",    "-",    "-" ),
160         TestVectors(  "abcd*efg",  "abcdefg",      "y",    "$&",    "abcdefg" ),
161         TestVectors(  "ab*",       "xabyabbbz",    "y",    "$&",    "ab" ),
162         TestVectors(  "ab*",       "xayabbbz",     "y",    "$&",    "a" ),
163         TestVectors(  "(ab|cd)e",  "abcde",        "y",    "$&-$1",        "cde-cd" ),
164         TestVectors(  "[abhgefdc]ij",      "hij",  "y",    "$&",    "hij" ),
165         TestVectors(  "^(ab|cd)e", "abcde",        "n",    "x$1y",        "xy" ),
166         TestVectors(  "(abc|)ef",  "abcdef",       "y",    "$&-$1",        "ef-" ),
167         TestVectors(  "(a|b)c*d",  "abcd",         "y",    "$&-$1",        "bcd-b" ),
168         TestVectors(  "(ab|ab*)bc",        "abc",  "y",    "$&-$1",        "abc-a" ),
169         TestVectors(  "a([bc]*)c*",        "abc",  "y",    "$&-$1",        "abc-bc" ),
170         TestVectors(  "a([bc]*)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
171         TestVectors(  "a([bc]+)(c*d)",     "abcd", "y",    "$&-$1-$2",    "abcd-bc-d" ),
172         TestVectors(  "a([bc]*)(c+d)",     "abcd", "y",    "$&-$1-$2",    "abcd-b-cd" ),
173         TestVectors(  "a[bcd]*dcdcde",     "adcdcde",      "y",    "$&",    "adcdcde" ),
174         TestVectors(  "a[bcd]+dcdcde",     "adcdcde",      "n",    "-",    "-" ),
175         TestVectors(  "(ab|a)b*c", "abc",           "y",    "$&-$1",        "abc-ab" ),
176         TestVectors(  "((a)(b)c)(d)",      "abcd",  "y",    "$1-$2-$3-$4",      "abc-a-b-d" ),
177         TestVectors(  "[a-zA-Z_][a-zA-Z0-9_]*",    "alpha",        "y",    "$&",    "alpha" ),
178         TestVectors(  "^a(bc+|b[eh])g|.h$",        "abh",  "y",    "$&-$1",        "bh-" ),
179         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effgz",        "y",    "$&-$1-$2",    "effgz-effgz-" ),
180         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "ij",   "y",    "$&-$1-$2",    "ij-ij-j" ),
181         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "effg", "n",    "-",    "-" ),
182         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "bcdd", "n",    "-",    "-" ),
183         TestVectors(  "(bc+d$|ef*g.|h?i(j|k))",    "reffgz",       "y",    "$&-$1-$2",    "effgz-effgz-" ),
184         TestVectors(  "(((((((((a)))))))))",       "a",    "y",    "$&",    "a" ),
185         TestVectors(  "multiple words of text",    "uh-uh",        "n",    "-",    "-" ),
186         TestVectors(  "multiple words",    "multiple words, yeah", "y",    "$&",    "multiple words" ),
187         TestVectors(  "(.*)c(.*)", "abcde",                "y",    "$&-$1-$2",    "abcde-ab-de" ),
188         TestVectors(  "\\((.*), (.*)\\)",  "(a, b)",       "y",    "($2, $1)",   "(b, a)" ),
189         TestVectors(  "abcd",      "abcd",                   "y",    "$&-&-$$$&",  "abcd-&-$abcd" ),
190         TestVectors(  "a(bc)d",    "abcd",                 "y",    "$1-$$1-$$$1",    "bc-$1-$bc" ),
191         TestVectors(  "[k]",                       "ab",   "n",    "-",    "-" ),
192         TestVectors(  "[ -~]*",                    "abc",  "y",    "$&",    "abc" ),
193         TestVectors(  "[ -~ -~]*",                 "abc",  "y",    "$&",    "abc" ),
194         TestVectors(  "[ -~ -~ -~]*",              "abc",  "y",    "$&",    "abc" ),
195         TestVectors(  "[ -~ -~ -~ -~]*",           "abc",  "y",    "$&",    "abc" ),
196         TestVectors(  "[ -~ -~ -~ -~ -~]*",        "abc",  "y",    "$&",    "abc" ),
197         TestVectors(  "[ -~ -~ -~ -~ -~ -~]*",     "abc",  "y",    "$&",    "abc" ),
198         TestVectors(  "[ -~ -~ -~ -~ -~ -~ -~]*",  "abc",  "y",    "$&",    "abc" ),
199         TestVectors(  "a{2}",      "candy",                "n",    "",     "" ),
200         TestVectors(  "a{2}",      "caandy",               "y",    "$&",    "aa" ),
201         TestVectors(  "a{2}",      "caaandy",              "y",    "$&",    "aa" ),
202         TestVectors(  "a{2,}",     "candy",                "n",    "",     "" ),
203         TestVectors(  "a{2,}",     "caandy",               "y",    "$&",    "aa" ),
204         TestVectors(  "a{2,}",     "caaaaaandy",           "y",    "$&",    "aaaaaa" ),
205         TestVectors(  "a{1,3}",    "cndy",                 "n",    "",     "" ),
206         TestVectors(  "a{1,3}",    "candy",                "y",    "$&",    "a" ),
207         TestVectors(  "a{1,3}",    "caandy",               "y",    "$&",    "aa" ),
208         TestVectors(  "a{1,3}",    "caaaaaandy",           "y",    "$&",    "aaa" ),
209         TestVectors(  "e?le?",     "angel",                "y",    "$&",    "el" ),
210         TestVectors(  "e?le?",     "angle",                "y",    "$&",    "le" ),
211         TestVectors(  "\\bn\\w",   "noonday",              "y",    "$&",    "no" ),
212         TestVectors(  "\\wy\\b",   "possibly yesterday",   "y",    "$&",    "ly" ),
213         TestVectors(  "\\w\\Bn",   "noonday",              "y",    "$&",    "on" ),
214         TestVectors(  "y\\B\\w",   "possibly yesterday",   "y",    "$&",    "ye" ),
215         TestVectors(  "\\cJ",      "abc\ndef",             "y",    "$&",    "\n" ),
216         TestVectors(  "\\d",       "B2 is",                "y",    "$&",    "2" ),
217         TestVectors(  "\\D",       "B2 is",                "y",    "$&",    "B" ),
218         TestVectors(  "\\s\\w*",   "foo bar",              "y",    "$&",    " bar" ),
219         TestVectors(  "\\S\\w*",   "foo bar",              "y",    "$&",    "foo" ),
220         TestVectors(  "abc",       "ababc",                "y",    "$&",    "abc" ),
221         TestVectors(  "apple(,)\\sorange\\1",      "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222         TestVectors(  "(\\w+)\\s(\\w+)",           "John Smith", "y", "$2, $1", "Smith, John" ),
223         TestVectors(  "\\n\\f\\r\\t\\v",           "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224         TestVectors(  ".*c",       "abcde",                        "y",    "$&",    "abc" ),
225         TestVectors(  "^\\w+((;|=)\\w+)+$", "some=host=tld",    "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226         TestVectors(  "^\\w+((\\.|-)\\w+)+$", "some.host.tld",    "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227         TestVectors(  "q(a|b)*q",  "xxqababqyy",                "y",    "$&-$1",        "qababq-b" ),
228         TestVectors(  "^(a)(b){0,1}(c*)",   "abcc", "y", "$1 $2 $3", "a b cc" ),
229         TestVectors(  "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230         TestVectors(  "^(a)(b)?(c*)",       "abcc", "y", "$1 $2 $3", "a b cc" ),
231         TestVectors(  "^(a)((b)?)(c*)",     "abcc", "y", "$1 $2 $3", "a b b" ),
232         TestVectors(  "^(a)(b){0,1}(c*)",   "acc",  "y", "$1 $2 $3", "a  cc" ),
233         TestVectors(  "^(a)((b){0,1})(c*)", "acc",  "y", "$1 $2 $3", "a  " ),
234         TestVectors(  "^(a)(b)?(c*)",       "acc",  "y", "$1 $2 $3", "a  cc" ),
235         TestVectors(  "^(a)((b)?)(c*)",     "acc",  "y", "$1 $2 $3", "a  " ),
236         TestVectors(  "(?:ab){3}",       "_abababc","y", "$&-$1",    "ababab-" ),
237         TestVectors(  "(?:a(?:x)?)+",    "aaxaxx",  "y", "$&-$1-$2", "aaxax--" ),
238         TestVectors(  `\W\w\W`,         "aa b!ca",  "y", "$&",       " b!"),
239 //more repetitions:
240         TestVectors(  "(?:a{2,4}b{1,3}){1,2}",  "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241         TestVectors(  "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242 //groups:
243         TestVectors(  "(abc)|(edf)|(xyz)",     "xyz",             "y",   "$1-$2-$3","--xyz"),
244         TestVectors(  "(?P<q>\\d+)/(?P<d>\\d+)",     "2/3",       "y",     "${d}/${q}",    "3/2"),
245 //set operations:
246         TestVectors(  "[a-z--d-f]",                  " dfa",      "y",   "$&",     "a"),
247         TestVectors(  "[abc[pq--acq]]{2}",           "bqpaca",    "y",   "$&",     "pa"),
248         TestVectors(  "[a-z9&&abc0-9]{3}",           "z90a0abc",  "y",   "$&",     "abc"),
249         TestVectors(  "[0-9a-f~~0-5a-z]{2}",         "g0a58x",    "y",   "$&",     "8x"),
250         TestVectors(  "[abc[pq]xyz[rs]]{4}",         "cqxr",      "y",   "$&",     "cqxr"),
251         TestVectors(  "[abcdf--[ab&&[bcd]][acd]]",   "abcdefgh",  "y",   "$&",     "f"),
252         TestVectors(  "[a-c||d-f]+",    "abcdef", "y", "$&", "abcdef"),
253         TestVectors(  "[a-f--a-c]+",    "abcdef", "y", "$&", "def"),
254         TestVectors(  "[a-c&&b-f]+",    "abcdef", "y", "$&", "bc"),
255         TestVectors(  "[a-c~~b-f]+",    "abcdef", "y", "$&", "a"),
256 //unicode blocks & properties:
257         TestVectors(  `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258         TestVectors(  `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259             "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260         TestVectors(  `[-+*/\p{in-mathematical-operators}]{2}`,    "a+\u2212",    "y",    "$&",    "+\u2212"),
261         TestVectors(  `\p{Ll}+`,                      "XabcD",    "y",  "$&",      "abc"),
262         TestVectors(  `\p{Lu}+`,                      "абвГДЕ",   "y",  "$&",      "ГДЕ"),
263         TestVectors(  `^\p{Currency Symbol}\p{Sc}`,   "$₤",       "y",  "$&",      "$₤"),
264         TestVectors(  `\p{Common}\p{Thai}`,           "!ฆ",       "y",  "$&",      "!ฆ"),
265         TestVectors(  `[\d\s]*\D`,  "12 \t3\U00001680\u0F20_2",   "y",  "$&", "12 \t3\U00001680\u0F20_"),
266         TestVectors(  `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
267 //case insensitive:
268         TestVectors(   `^abcdEf$`,           "AbCdEF",              "y",   "$&", "AbCdEF",      "i"),
269         TestVectors(   `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
270         TestVectors(   `ⒶⒷⓒ` ,        "ⓐⓑⒸ",                   "y",   "$&", "ⓐⓑⒸ",      "i"),
271         TestVectors(   "\U00010400{2}",  "\U00010428\U00010400 ",   "y",   "$&", "\U00010428\U00010400", "i"),
272         TestVectors(   `[adzУ-Я]{4}`,    "DzюЯ",                   "y",   "$&", "DzюЯ", "i"),
273         TestVectors(   `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y",   "$&", "абвгдеЖЗИКЛ", "i"),
274         TestVectors(   `(?:Dåb){3}`,  "DåbDÅBdÅb",                  "y",   "$&", "DåbDÅBdÅb", "i"),
275 //escapes:
276         TestVectors(    `\u0041\u005a\U00000065\u0001`,         "AZe\u0001",       "y",   "$&", "AZe\u0001"),
277         TestVectors(    `\u`,               "",   "c",   "-",  "-"),
278         TestVectors(    `\U`,               "",   "c",   "-",  "-"),
279         TestVectors(    `\u003`,            "",   "c",   "-",  "-"),
280         TestVectors(    `[\x00-\x7f]{4}`,        "\x00\x09ab",   "y", "$&", "\x00\x09ab"),
281         TestVectors(    `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282         TestVectors(    `\r\n\v\t\f\\`,     "\r\n\v\t\f\\",   "y",   "$&", "\r\n\v\t\f\\"),
283         TestVectors(    `[\u0003\u0001]{2}`,  "\u0001\u0003",         "y",   "$&", "\u0001\u0003"),
284         TestVectors(    `^[\u0020-\u0080\u0001\n-\r]{8}`,  "abc\u0001\v\f\r\n",  "y",   "$&", "abc\u0001\v\f\r\n"),
285         TestVectors(    `\w+\S\w+`, "ab7!44c",  "y", "$&", "ab7!44c"),
286         TestVectors(    `\b\w+\b`,  " abde4 ",  "y", "$&", "abde4"),
287         TestVectors(    `\b\w+\b`,  " abde4",   "y", "$&", "abde4"),
288         TestVectors(    `\b\w+\b`,  "abde4 ",   "y", "$&", "abde4"),
289         TestVectors(    `\pL\pS`,   "a\u02DA",  "y", "$&", "a\u02DA"),
290         TestVectors(    `\pX`,      "",         "c", "-",  "-"),
291 // ^, $, \b, \B, multiline :
292         TestVectors(    `\r.*?$`,    "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293         TestVectors(    `^a$^b$`,    "a\r\nb\n",  "n", "$&", "-", "m"),
294         TestVectors(    `^a$\r\n^b$`,"a\r\nb\n",  "y", "$&", "a\r\nb", "m"),
295         TestVectors(    `^$`,        "\r\n",      "y", "$&", "", "m"),
296         TestVectors(    `^a$\nx$`,   "a\nx\u2028","y", "$&", "a\nx", "m"),
297         TestVectors(    `^a$\nx$`,   "a\nx\u2029","y", "$&", "a\nx", "m"),
298         TestVectors(    `^a$\nx$`,   "a\nx\u0085","y", "$&", "a\nx","m"),
299         TestVectors(    `^x$`,       "\u2028x",   "y", "$&", "x", "m"),
300         TestVectors(    `^x$`,       "\u2029x",   "y", "$&", "x", "m"),
301         TestVectors(    `^x$`,       "\u0085x",   "y", "$&", "x", "m"),
302         TestVectors(    `\b^.`,      "ab",        "y", "$&", "a"),
303         TestVectors(    `\B^.`,      "ab",        "n", "-",  "-"),
304         TestVectors(    `^ab\Bc\B`,  "\r\nabcd",  "y", "$&", "abc", "m"),
305         TestVectors(    `^.*$`,      "12345678",  "y", "$&", "12345678"),
306 
307 // luckily obtained regression on incremental matching in backtracker
308         TestVectors(  `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309             "0020  ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310 //lookahead
311         TestVectors(    "(foo.)(?=(bar))",     "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312         TestVectors(    `\b(\d+)[a-z](?=\1)`,  "123a123",        "y", "$&-$1", "123a-123" ),
313         TestVectors(    `\$(?!\d{3})\w+`,      "$123 $abc",      "y", "$&", "$abc"),
314         TestVectors(    `(abc)(?=(ed(f))\3)`,    "abcedff",      "y", "-", "-"),
315         TestVectors(    `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com",  "y", "$&-$1", "x-@"),
316         TestVectors(    `x()(abc)(?=(d)(e)(f)\2)`,   "xabcdefabc", "y", "$&", "xabc"),
317         TestVectors(    `x()(abc)(?=(d)(e)(f)()\3\4\5)`,   "xabcdefdef", "y", "$&", "xabc"),
318 //lookback
319         TestVectors(    `(?<=(ab))\d`,    "12ba3ab4",    "y",   "$&-$1", "4-ab",  "i"),
320         TestVectors(    `\w(?<!\d)\w`,   "123ab24",  "y",   "$&", "ab"),
321         TestVectors(    `(?<=Dåb)x\w`,  "DåbDÅBxdÅb",  "y",   "$&", "xd", "i"),
322         TestVectors(    `(?<=(ab*c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
323         TestVectors(    `(?<=(ab*?c))x`,   "abbbbcxac",  "y",   "$&-$1", "x-abbbbc"),
324         TestVectors(    `(?<=(a.*?c))x`,   "ababbcxac",  "y",   "$&-$1", "x-abbc"),
325         TestVectors(    `(?<=(a{2,4}b{1,3}))x`,   "yyaaaabx",  "y",   "$&-$1", "x-aaaab"),
326         TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aabbbaaaab"),
327         TestVectors(    `(?<=((?:a{2,4}b{1,3}){1,2}?))x`,   "aabbbaaaabx",  "y",   "$&-$1", "x-aaaab"),
328         TestVectors(    `(?<=(abc|def|aef))x`,    "abcx", "y",        "$&-$1",  "x-abc"),
329         TestVectors(    `(?<=(abc|def|aef))x`,    "aefx", "y",        "$&-$1",  "x-aef"),
330         TestVectors(    `(?<=(abc|dabc))(x)`,    "dabcx", "y",        "$&-$1-$2",  "x-abc-x"),
331         TestVectors(    `(?<=(|abc))x`,        "dabcx", "y",        "$&-$1",  "x-"),
332         TestVectors(    `(?<=((ab|da)*))x`,    "abdaabx", "y",        "$&-$2-$1",  "x-ab-abdaab"),
333         TestVectors(    `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334         TestVectors(    `.(?<!b).`,   "bax",  "y", "$&", "ax"),
335         TestVectors(    `(?<=b(?<!ab)).`,   "abbx",  "y",  "$&", "x"),
336         TestVectors(    `(?<=\.|[!?]+)X`,   "Hey?!X", "y", "$&", "X"),
337         TestVectors(    `(?<=\.|[!?]+)a{3}`,   ".Nope.aaaX", "y", "$&", "aaa"),
338 //mixed lookaround
339         TestVectors(   `a(?<=a(?=b))b`,    "ab", "y",      "$&", "ab"),
340         TestVectors(   `a(?<=a(?!b))c`,    "ac", "y",      "$&", "ac"),
341         TestVectors(   `a(?i)bc`,         "aBc", "y",      "$&", "aBc"),
342         TestVectors(   `a(?i)bc`,         "Abc", "n",      "$&", "-"),
343         TestVectors(   `(?i)a(?-i)bc`, "aBcAbc", "y",      "$&", "Abc"),
344         TestVectors(   `(?s).(?-s).`, "\n\n\na", "y",      "$&", "\na"),
345         TestVectors(   `(?m)^a(?-m)$`,  "\na",   "y",      "$&", "a")
346         ];
produceExpected(M,String)347     string produceExpected(M,String)(auto ref M m, String fmt)
348     {
349         auto app = appender!(String)();
350         replaceFmt(fmt, m.captures, app, true);
351         return app.data;
352     }
run_tests(alias matchFn)353     void run_tests(alias matchFn)()
354     {
355         int i;
356         static foreach (Char; AliasSeq!( char, wchar, dchar))
357         {{
358             alias String = immutable(Char)[];
359             String produceExpected(M,Range)(auto ref M m, Range fmt)
360             {
361                 auto app = appender!(String)();
362                 replaceFmt(fmt, m.captures, app, true);
363                 return app.data;
364             }
365             Regex!(Char) r;
366             foreach (a, tvd; tv)
367             {
368                 uint c = tvd.result[0];
369                 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
370                 try
371                 {
372                     i = 1;
373                     r = regex(to!(String)(tvd.pattern), tvd.flags);
374                 }
375                 catch (RegexException e)
376                 {
377                     i = 0;
378                     debug(std_regex_test) writeln(e.msg);
379                 }
380 
381                 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
382 
383                 if (c != 'c')
384                 {
385                     auto m = matchFn(to!(String)(tvd.input), r);
386                     i = !m.empty;
387                     assert(
388                         (c == 'y') ? i : !i,
389                         text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
390                     );
391                     if (c == 'y')
392                     {
393                         auto result = produceExpected(m, to!(String)(tvd.format));
394                         assert(result == to!String(tvd.replace),
395                             text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396                                     tvd.replace, " vs ", result));
397                     }
398                 }
399             }
400         }}
401         debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
402     }
403 
404 
ct_tests()405     void ct_tests()
406     {
407         import std.algorithm.comparison : equal;
408         version (std_regex_ct1)
409         {
410             pragma(msg, "Testing 1st part of ctRegex");
411             enum Tests = iota(0, 155);
412         }
413         else version (std_regex_ct2)
414         {
415             pragma(msg, "Testing 2nd part of ctRegex");
416             enum Tests = iota(155, 174);
417         }
418         //FIXME: #174-178 contains CTFE parser bug
419         else version (std_regex_ct3)
420         {
421             pragma(msg, "Testing 3rd part of ctRegex");
422             enum Tests = iota(178, 220);
423         }
424         else version (std_regex_ct4)
425         {
426             pragma(msg, "Testing 4th part of ctRegex");
427             enum Tests = iota(220, tv.length);
428         }
429         else
430             enum Tests = chain(iota(0, 30), iota(235, tv.length-5));
431         static foreach (v; Tests)
432         {{
433             enum tvd = tv[v];
434             static if (tvd.result == "c")
435             {
436                 static assert(!__traits(compiles, (){
437                     enum r = regex(tvd.pattern, tvd.flags);
438                 }), "errornously compiles regex pattern: " ~ tvd.pattern);
439             }
440             else
441             {
442                 //BUG: tv[v] is fine but tvd is not known at compile time?!
443                 auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444                 auto nr = regex(tvd.pattern, tvd.flags);
445                 assert(equal(r.ir, nr.ir),
446                     text("!C-T regex! failed to compile pattern #", v ,": ", tvd.pattern));
447                 auto m = match(tvd.input, r);
448                 auto c = tvd.result[0];
449                 bool ok = (c == 'y') ^ m.empty;
450                 assert(ok, text("ctRegex: failed to match pattern #",
451                     v ,": ", tvd.pattern));
452                 if (c == 'y')
453                 {
454                     auto result = produceExpected(m, tvd.format);
455                     assert(result == tvd.replace, text("ctRegex mismatch pattern #", v,
456                         ": ", tvd.pattern," expected: ", tvd.replace, " vs ", result));
457                 }
458             }
459         }}
460         debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
461     }
462 
463     ct_tests();
464     run_tests!bmatch(); //backtracker
465     run_tests!match(); //thompson VM
466 }
467 
468