1Code.require_file("test_helper.exs", __DIR__)
2
3defmodule RegexTest do
4  use ExUnit.Case, async: true
5
6  @re_21_3_little %Regex{
7    re_pattern:
8      {:re_pattern, 1, 0, 0,
9       <<69, 82, 67, 80, 94, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255,
10         255, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 131, 0, 20, 29, 99, 133,
12         0, 7, 0, 1, 29, 100, 119, 0, 5, 29, 101, 120, 0, 12, 120, 0, 20, 0>>},
13    re_version: {"8.42 2018-03-20", :little},
14    source: "c(?<foo>d|e)"
15  }
16
17  @re_21_3_big %Regex{
18    re_pattern:
19      {:re_pattern, 1, 0, 0,
20       <<80, 67, 82, 69, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 17, 255, 255, 255, 255, 255, 255, 255,
21         255, 0, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 56, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22         0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 131, 0, 20, 29, 99, 133, 0, 7, 0, 1, 29, 100, 119,
23         0, 5, 29, 101, 120, 0, 12, 120, 0, 20, 0>>},
24    re_version: {"8.42 2018-03-20", :big},
25    source: "c(?<foo>d|e)"
26  }
27
28  @re_19_3_little %Regex{
29    re_pattern:
30      {:re_pattern, 1, 0, 0,
31       <<69, 82, 67, 80, 94, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255,
32         255, 99, 0, 0, 0, 0, 0, 1, 0, 0, 0, 64, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 102, 111, 111, 0, 125, 0, 20, 29, 99, 127,
34         0, 7, 0, 1, 29, 100, 113, 0, 5, 29, 101, 114, 0, 12, 114, 0, 20, 0>>},
35    re_version: {"8.33 2013-05-29", :little},
36    source: "c(?<foo>d|e)"
37  }
38
39  doctest Regex
40
41  test "multiline" do
42    refute Regex.match?(~r/^b$/, "a\nb\nc")
43    assert Regex.match?(~r/^b$/m, "a\nb\nc")
44  end
45
46  test "precedence" do
47    assert {"aa", :unknown} |> elem(0) =~ ~r/(a)\1/
48  end
49
50  test "backreference" do
51    assert "aa" =~ ~r/(a)\1/
52  end
53
54  test "source" do
55    src = "foo"
56    assert Regex.source(Regex.compile!(src)) == src
57    assert Regex.source(~r/#{src}/) == src
58
59    src = "\a\b\d\e\f\n\r\s\t\v"
60    assert Regex.source(Regex.compile!(src)) == src
61    assert Regex.source(~r/#{src}/) == src
62
63    src = "\a\\b\\d\\e\f\n\r\\s\t\v"
64    assert Regex.source(Regex.compile!(src)) == src
65    assert Regex.source(~r/#{src}/) == src
66  end
67
68  test "literal source" do
69    assert Regex.source(Regex.compile!("foo")) == "foo"
70    assert Regex.source(~r"foo") == "foo"
71    assert Regex.re_pattern(Regex.compile!("foo")) == Regex.re_pattern(~r"foo")
72
73    assert Regex.source(Regex.compile!("\a\b\d\e\f\n\r\s\t\v")) == "\a\b\d\e\f\n\r\s\t\v"
74    assert Regex.source(~r<\a\b\d\e\f\n\r\s\t\v>) == "\a\\b\\d\\e\f\n\r\\s\t\v"
75
76    assert Regex.re_pattern(Regex.compile!("\a\b\d\e\f\n\r\s\t\v")) ==
77             Regex.re_pattern(~r"\a\010\177\033\f\n\r \t\v")
78
79    assert Regex.source(Regex.compile!("\a\\b\\d\e\f\n\r\\s\t\v")) == "\a\\b\\d\e\f\n\r\\s\t\v"
80    assert Regex.source(~r<\a\\b\\d\\e\f\n\r\\s\t\v>) == "\a\\\\b\\\\d\\\\e\f\n\r\\\\s\t\v"
81
82    assert Regex.re_pattern(Regex.compile!("\a\\b\\d\e\f\n\r\\s\t\v")) ==
83             Regex.re_pattern(~r"\a\b\d\e\f\n\r\s\t\v")
84  end
85
86  test "Unicode" do
87    assert "olá" =~ ~r"\p{Latin}$"u
88    refute "£" =~ ~r/\p{Lu}/u
89
90    # Non breaking space matches [[:space:]] with Unicode
91    assert <<0xA0::utf8>> =~ ~r/[[:space:]]/u
92    assert <<0xA0::utf8>> =~ ~r/\s/u
93
94    # Erlang/OTP 23 raises badarg on invalid UTF-8.
95    # Earlier versions simply would not match.
96    assert catch_error(if <<?<, 255, ?>>> =~ ~r/<.>/u, do: flunk("failed"), else: raise("failed"))
97    assert <<?<, 255, ?>>> =~ ~r/<.>/
98  end
99
100  test "ungreedy" do
101    assert Regex.run(~r/[\d ]+/, "1 2 3 4 5"), ["1 2 3 4 5"]
102    assert Regex.run(~r/[\d ]?+/, "1 2 3 4 5"), ["1"]
103    assert Regex.run(~r/[\d ]+/U, "1 2 3 4 5"), ["1"]
104  end
105
106  test "compile/1" do
107    {:ok, %Regex{}} = Regex.compile("foo")
108    assert {:error, _} = Regex.compile("*foo")
109    assert {:error, _} = Regex.compile("foo", "y")
110    assert {:error, _} = Regex.compile("foo", "uy")
111  end
112
113  test "compile/1 with Erlang options" do
114    {:ok, regex} = Regex.compile("foo\\sbar", [:dotall, {:newline, :anycrlf}])
115    assert "foo\nbar" =~ regex
116  end
117
118  test "compile!/1" do
119    assert %Regex{} = Regex.compile!("foo")
120
121    assert_raise Regex.CompileError, ~r/position 0$/, fn ->
122      Regex.compile!("*foo")
123    end
124  end
125
126  test "recompile/1" do
127    new_regex = ~r/foo/
128    {:ok, %Regex{}} = Regex.recompile(new_regex)
129    assert %Regex{} = Regex.recompile!(new_regex)
130
131    old_regex = Map.delete(~r/foo/, :re_version)
132    {:ok, %Regex{}} = Regex.recompile(old_regex)
133    assert %Regex{} = Regex.recompile!(old_regex)
134  end
135
136  test "opts/1" do
137    assert Regex.opts(Regex.compile!("foo", "i")) == "i"
138  end
139
140  test "names/1" do
141    assert Regex.names(~r/(?<FOO>foo)/) == ["FOO"]
142  end
143
144  test "match?/2" do
145    assert Regex.match?(~r/foo/, "foo")
146    refute Regex.match?(~r/foo/, "FOO")
147    assert Regex.match?(~r/foo/i, "FOO")
148    assert Regex.match?(~r/\d{1,3}/i, "123")
149
150    assert Regex.match?(~r/foo/, "afooa")
151    refute Regex.match?(~r/^foo/, "afooa")
152    assert Regex.match?(~r/^foo/, "fooa")
153    refute Regex.match?(~r/foo$/, "afooa")
154    assert Regex.match?(~r/foo$/, "afoo")
155  end
156
157  test "named_captures/2" do
158    assert Regex.named_captures(~r/(?<foo>c)(?<bar>d)/, "abcd") == %{"bar" => "d", "foo" => "c"}
159    assert Regex.named_captures(~r/c(?<foo>d)/, "abcd") == %{"foo" => "d"}
160    assert Regex.named_captures(~r/c(?<foo>d)/, "no_match") == nil
161    assert Regex.named_captures(~r/c(?<foo>d|e)/, "abcd abce") == %{"foo" => "d"}
162    assert Regex.named_captures(~r/c(.)/, "cat") == %{}
163  end
164
165  test "sigil R" do
166    assert Regex.match?(~R/f#{1,3}o/, "f#o")
167  end
168
169  test "run/2" do
170    assert Regex.run(~r"c(d)", "abcd") == ["cd", "d"]
171    assert Regex.run(~r"e", "abcd") == nil
172  end
173
174  test "run/3 with :all_names as the value of the :capture option" do
175    assert Regex.run(~r/c(?<foo>d)/, "abcd", capture: :all_names) == ["d"]
176    assert Regex.run(~r/c(?<foo>d)/, "no_match", capture: :all_names) == nil
177    assert Regex.run(~r/c(?<foo>d|e)/, "abcd abce", capture: :all_names) == ["d"]
178  end
179
180  test "run/3 with :index as the value of the :return option" do
181    assert Regex.run(~r"c(d)", "abcd", return: :index) == [{2, 2}, {3, 1}]
182    assert Regex.run(~r"e", "abcd", return: :index) == nil
183  end
184
185  test "run/3 with :offset" do
186    assert Regex.run(~r"^foo", "foobar", offset: 0) == ["foo"]
187    assert Regex.run(~r"^foo", "foobar", offset: 2) == nil
188    assert Regex.run(~r"^foo", "foobar", offset: 2, return: :index) == nil
189    assert Regex.run(~r"bar", "foobar", offset: 2, return: :index) == [{3, 3}]
190  end
191
192  test "run/3 with regexes compiled in different systems" do
193    assert Regex.run(@re_21_3_little, "abcd abce", capture: :all_names) == ["d"]
194    assert Regex.run(@re_21_3_big, "abcd abce", capture: :all_names) == ["d"]
195    assert Regex.run(@re_19_3_little, "abcd abce", capture: :all_names) == ["d"]
196  end
197
198  test "run/3 with regexes with options compiled in different systems" do
199    assert Regex.run(%{~r/foo/i | re_version: "bad version"}, "FOO") == ["FOO"]
200  end
201
202  test "scan/2" do
203    assert Regex.scan(~r"c(d|e)", "abcd abce") == [["cd", "d"], ["ce", "e"]]
204    assert Regex.scan(~r"c(?:d|e)", "abcd abce") == [["cd"], ["ce"]]
205    assert Regex.scan(~r"e", "abcd") == []
206  end
207
208  test "scan/2 with :all_names as the value of the :capture option" do
209    assert Regex.scan(~r/cd/, "abcd", capture: :all_names) == []
210    assert Regex.scan(~r/c(?<foo>d)/, "abcd", capture: :all_names) == [["d"]]
211    assert Regex.scan(~r/c(?<foo>d)/, "no_match", capture: :all_names) == []
212    assert Regex.scan(~r/c(?<foo>d|e)/, "abcd abce", capture: :all_names) == [["d"], ["e"]]
213  end
214
215  test "scan/2 with :offset" do
216    assert Regex.scan(~r"^foo", "foobar", offset: 0) == [["foo"]]
217    assert Regex.scan(~r"^foo", "foobar", offset: 1) == []
218  end
219
220  test "scan/2 with regexes compiled in different systems" do
221    assert Regex.scan(@re_21_3_little, "abcd abce", capture: :all_names) == [["d"], ["e"]]
222    assert Regex.scan(@re_21_3_big, "abcd abce", capture: :all_names) == [["d"], ["e"]]
223    assert Regex.scan(@re_19_3_little, "abcd abce", capture: :all_names) == [["d"], ["e"]]
224  end
225
226  test "scan/2 with regexes with options compiled in different systems" do
227    assert Regex.scan(%{~r/foo/i | re_version: "bad version"}, "FOO") == [["FOO"]]
228  end
229
230  test "split/2,3" do
231    assert Regex.split(~r",", "") == [""]
232    assert Regex.split(~r",", "", trim: true) == []
233    assert Regex.split(~r",", "", trim: true, parts: 2) == []
234
235    assert Regex.split(~r"=", "key=") == ["key", ""]
236    assert Regex.split(~r"=", "=value") == ["", "value"]
237
238    assert Regex.split(~r" ", "foo bar baz") == ["foo", "bar", "baz"]
239    assert Regex.split(~r" ", "foo bar baz", parts: :infinity) == ["foo", "bar", "baz"]
240    assert Regex.split(~r" ", "foo bar baz", parts: 10) == ["foo", "bar", "baz"]
241    assert Regex.split(~r" ", "foo bar baz", parts: 2) == ["foo", "bar baz"]
242
243    assert Regex.split(~r" ", " foo bar baz ") == ["", "foo", "bar", "baz", ""]
244    assert Regex.split(~r" ", " foo bar baz ", trim: true) == ["foo", "bar", "baz"]
245    assert Regex.split(~r" ", " foo bar baz ", parts: 2) == ["", "foo bar baz "]
246    assert Regex.split(~r" ", " foo bar baz ", trim: true, parts: 2) == ["foo", "bar baz "]
247  end
248
249  test "split/3 with the :on option" do
250    assert Regex.split(~r/()abc()/, "xabcxabcx", on: :none) == ["xabcxabcx"]
251
252    parts = ["x", "abc", "x", "abc", "x"]
253    assert Regex.split(~r/()abc()/, "xabcxabcx", on: :all_but_first) == parts
254
255    assert Regex.split(~r/(?<first>)abc(?<last>)/, "xabcxabcx", on: [:first, :last]) == parts
256
257    parts = ["xabc", "xabc", "x"]
258    assert Regex.split(~r/(?<first>)abc(?<last>)/, "xabcxabcx", on: [:last, :first]) == parts
259
260    assert Regex.split(~r/a(?<second>b)c/, "abc", on: [:second]) == ["a", "c"]
261
262    parts = ["a", "c adc a", "c"]
263    assert Regex.split(~r/a(?<second>b)c|a(?<fourth>d)c/, "abc adc abc", on: [:second]) == parts
264
265    assert Regex.split(~r/a(?<second>b)c|a(?<fourth>d)c/, "abc adc abc", on: [:second, :fourth]) ==
266             ["a", "c a", "c a", "c"]
267  end
268
269  test "split/3 with the :include_captures option" do
270    assert Regex.split(~r/([ln])/, "Erlang", include_captures: true) == ["Er", "l", "a", "n", "g"]
271    assert Regex.split(~r/([kw])/, "Elixir", include_captures: true) == ["Elixir"]
272
273    assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: true) ==
274             ["Elixir"]
275
276    assert Regex.split(~r/([Ee]lixir)/, "Elixir", include_captures: true, trim: false) ==
277             ["", "Elixir", ""]
278
279    assert Regex.split(~r//, "abc", include_captures: true) ==
280             ["", "", "a", "", "b", "", "c", "", ""]
281
282    assert Regex.split(~r/a/, "abc", include_captures: true) == ["", "a", "bc"]
283    assert Regex.split(~r/c/, "abc", include_captures: true) == ["ab", "c", ""]
284
285    assert Regex.split(~r/[Ei]/, "Elixir", include_captures: true, parts: 2) ==
286             ["", "E", "lixir"]
287
288    assert Regex.split(~r/[Ei]/, "Elixir", include_captures: true, parts: 3) ==
289             ["", "E", "l", "i", "xir"]
290
291    assert Regex.split(~r/[Ei]/, "Elixir", include_captures: true, parts: 2, trim: true) ==
292             ["E", "lixir"]
293
294    assert Regex.split(~r/[Ei]/, "Elixir", include_captures: true, parts: 3, trim: true) ==
295             ["E", "l", "i", "xir"]
296  end
297
298  test "replace/3,4" do
299    assert Regex.replace(~r(d), "abc", "d") == "abc"
300    assert Regex.replace(~r(b), "abc", "d") == "adc"
301    assert Regex.replace(~r(b), "abc", "[\\0]") == "a[b]c"
302    assert Regex.replace(~r[(b)], "abc", "[\\1]") == "a[b]c"
303    assert Regex.replace(~r[(b)], "abc", "[\\2]") == "a[]c"
304    assert Regex.replace(~r[(b)], "abc", "[\\3]") == "a[]c"
305    assert Regex.replace(~r(b), "abc", "[\\g{0}]") == "a[b]c"
306    assert Regex.replace(~r[(b)], "abc", "[\\g{1}]") == "a[b]c"
307
308    assert Regex.replace(~r(b), "abcbe", "d") == "adcde"
309    assert Regex.replace(~r(b), "abcbe", "d", global: false) == "adcbe"
310
311    assert Regex.replace(~r/ /, "first third", "\\second\\") == "first\\second\\third"
312    assert Regex.replace(~r/ /, "first third", "\\\\second\\\\") == "first\\second\\third"
313
314    assert Regex.replace(~r[a(b)c], "abcabc", fn -> "ac" end) == "acac"
315    assert Regex.replace(~r[a(b)c], "abcabc", fn "abc" -> "ac" end) == "acac"
316    assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b" -> "ac" end) == "acac"
317    assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b", "" -> "ac" end) == "acac"
318    assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b" -> "ac" end, global: false) == "acabc"
319  end
320
321  test "escape" do
322    assert matches_escaped?(".")
323    refute matches_escaped?(".", "x")
324
325    assert matches_escaped?("[\w]")
326    refute matches_escaped?("[\w]", "x")
327
328    assert matches_escaped?("\\")
329
330    assert matches_escaped?("\\xff", "\\xff")
331    refute matches_escaped?("\\xff", "\xff")
332
333    assert matches_escaped?("(")
334    assert matches_escaped?("()")
335    assert matches_escaped?("(?:foo)")
336
337    assert matches_escaped?("\\A  \\z")
338    assert matches_escaped?("  x  ")
339    # Unicode spaces here
340    assert matches_escaped?("  x    x ")
341    assert matches_escaped?("# lol")
342
343    assert matches_escaped?("\\A.^$*+?()[{\\| \t\n\x20\\z #hello\u202F\u205F")
344    assert Regex.match?(Regex.compile!("[" <> Regex.escape("!-#") <> "]"), "-")
345
346    assert Regex.escape("{}") == "\\{\\}"
347    assert Regex.escape("[]") == "\\[\\]"
348
349    assert Regex.escape("{foo}") == "\\{foo\\}"
350    assert Regex.escape("[foo]") == "\\[foo\\]"
351  end
352
353  defp matches_escaped?(string) do
354    matches_escaped?(string, string)
355  end
356
357  defp matches_escaped?(string, match) do
358    Regex.match?(~r/#{Regex.escape(string)}/simx, match)
359  end
360end
361