1-- URL parser tests
2
3context("URL check functions", function()
4  local mpool = require("rspamd_mempool")
5  local lua_urls_compose = require "lua_urls_compose"
6  local url = require("rspamd_url")
7  local lua_util = require("lua_util")
8  local logger = require("rspamd_logger")
9  local test_helper = require("rspamd_test_helper")
10  local ffi = require("ffi")
11
12  ffi.cdef[[
13  void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
14  ]]
15
16  test_helper.init_url_parser()
17
18  local pool = mpool.create()
19
20  local cases = {
21    {"test.com", {"test.com", nil}},
22    {" test.com", {"test.com", nil}},
23    {"<test.com> text", {"test.com", nil}},
24    {"test.com. text", {"test.com", nil}},
25    {"mailto:A.User@example.com text", {"example.com", "A.User"}},
26    {"http://Тест.Рф:18 text", {"тест.рф", nil}},
27    {"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}},
28    {"somebody@example.com", {"example.com", "somebody"}},
29    {"https://127.0.0.1/abc text", {"127.0.0.1", nil}},
30    {"https:\\\\127.0.0.1/abc text", {"127.0.0.1", nil}},
31    {"https:\\\\127.0.0.1", {"127.0.0.1", nil}},
32    {"https://127.0.0.1 text", {"127.0.0.1", nil}},
33    {"https://[::1]:1", {"::1", nil}},
34    {"https://user:password@[::1]:1", {"::1", nil}},
35    {"https://user:password@[::1]", {"::1", nil}},
36    {"https://user:password@[::1]/1", {"::1", nil}},
37  }
38
39  for i,c in ipairs(cases) do
40    local res = url.create(pool, c[1])
41
42    test("Extract urls from text" .. i, function()
43      assert_not_nil(res, "cannot parse " .. c[1])
44      local t = res:to_table()
45      --local s = logger.slog("%1 -> %2", c[1], t)
46      --print(s)
47      assert_not_nil(t, "cannot convert to table " .. c[1])
48      assert_equal(c[2][1], t['host'],
49              logger.slog('expected host "%s", but got "%s" in url %s => %s',
50              c[2][1], t['host'], c[1], t))
51
52      if c[2][2] then
53        assert_equal(c[2][1], t['host'],
54                logger.slog('expected user "%s", but got "%s" in url %s => %s',
55                        c[2][1], t['host'], c[1], t))
56      end
57    end)
58  end
59
60  cases = {
61    {[[http://example.net/path/]], true, {
62      host = 'example.net', path = 'path/'
63    }},
64    {'http://example.net/hello%20world.php?arg=x#fragment', true, {
65      host = 'example.net', fragment = 'fragment', query = 'arg=x',
66      path = 'hello world.php',
67    }},
68    {'http://example.net/?arg=%23#fragment', true, {
69      host = 'example.net', fragment = 'fragment', query = 'arg=#',
70    }},
71    {"http:/\\[::eeee:192.168.0.1]/#test", true, {
72      host = '::eeee:c0a8:1', fragment = 'test'
73    }},
74    {"http:/\\[::eeee:192.168.0.1]#test", true, {
75      host = '::eeee:c0a8:1', fragment = 'test'
76    }},
77    {"http:/\\[::eeee:192.168.0.1]?test", true, {
78      host = '::eeee:c0a8:1', query = 'test'
79    }},
80    {"http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
81      host = '192.168.0.1',
82    }},
83    {"http:/\\www.google.com/foo?bar=baz#", true, {
84      host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
85    }},
86    {"http://[www.google.com]/", true, {
87      host = 'www.google.com',
88    }},
89    {"<test.com", true, {
90      host = 'test.com', tld = 'test.com',
91    }},
92    {"test.com>", false},
93    {",test.com text", false},
94    {"ht\ttp:@www.google.com:80/;p?#", false},
95    {"http://user:pass@/", false},
96    {"http://foo:-80/", false},
97    {"http:////////user:@google.com:99?foo", true, {
98      host = 'google.com', user = 'user', port = 99, query = 'foo'
99    }},
100    {"http://%25DOMAIN:foobar@foodomain.com/", true, {
101      host = 'foodomain.com', user = '%25DOMAIN'
102    }},
103    {"http://0.0xFFFFFF", true, {
104      host = '0.255.255.255'
105    }},
106    {"http:/\\030052000001", true, {
107      host = '192.168.0.1'
108    }},
109    {"http:\\/0xc0.052000001", true, {
110      host = '192.168.0.1'
111    }},
112    {"http://192.168.0.1.?foo", true, {
113      host = '192.168.0.1', query = 'foo',
114    }},
115    {"http://twitter.com#test", true, {
116      host = 'twitter.com', fragment = 'test'
117    }},
118    {"http:www.twitter.com#test", true, {
119      host = 'www.twitter.com', fragment = 'test'
120    }},
121    {"http://example。com#test", true, {
122      host = 'example.com', fragment = 'test'
123    }},
124    {"http://hoho.example。com#test", true, {
125      host = 'hoho.example.com', fragment = 'test'
126    }},
127    {"http://hoho。example。com#test", true, {
128      host = 'hoho.example.com', fragment = 'test'
129    }},
130    {"http://hoho.example。com#test", true, {
131      host = 'hoho.example.com', fragment = 'test'
132    }},
133    {"http://hehe。example。com#test", true, {
134      host = 'hehe.example.com', fragment = 'test'
135    }},
136    {"http:////$%^&****((@example.org//#f@f", true, {
137      user = '$%^&****((', host = 'example.org', fragment = 'f@f'
138    }},
139    {"http://@@example.com", true, {
140      user = "@", host = "example.com"
141    }},
142    {"https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, {
143      host = "example.com", path = "_Resources\\ClientImages\\UserData",
144      query = "ol\\o", fragment = "ololo\\",
145    }},
146  }
147
148  -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
149  for i,c in ipairs(cases) do
150    local res = url.create(pool, c[1])
151
152    test("Parse url: " .. c[1], function()
153      if c[2] then
154        assert_not_nil(res, "we are able to parse url: " .. c[1])
155
156        local uf = res:to_table()
157
158        for k,v in pairs(c[3]) do
159          assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
160          assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s',
161                v, k, uf[k], c[1], uf))
162        end
163        for k,v in pairs(uf) do
164          if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
165            assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
166          end
167        end
168      else
169        assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
170      end
171    end)
172  end
173
174  cases = {
175    {"/././foo", "/foo"},
176    {"/a/b/c/./../../g", "/a/g"},
177    {"/./.foo", "/.foo"},
178    {"/foo/.", "/foo/"},
179    {"/foo/./", "/foo/"},
180    {"/foo/bar/..", "/foo"},
181    {"/foo/bar/../", "/foo/"},
182    {"/foo/..bar", "/foo/..bar"},
183    {"/foo/bar/../ton", "/foo/ton"},
184    {"/foo/bar/../ton/../../a", "/a"},
185    {"/foo/../../..", "/"},
186    {"/foo/../../../ton", "/ton"},
187    {"////../..", "/"},
188    {"./", ""},
189    {"/./", "/"},
190    {"/./././././././", "/"},
191    {"/", "/"},
192    {"/a/b", "/a/b"},
193    {"/a/b/", "/a/b/"},
194    {"..", "/"},
195    {"/../", "/"},
196    {"../", "/"},
197    {"///foo", "/foo"},
198  }
199
200  for i,v in ipairs(cases) do
201    test(string.format("Normalize paths '%s'", v[1]), function()
202      local buf = ffi.new("uint8_t[?]", #v[1])
203      local sizbuf = ffi.new("size_t[1]")
204      ffi.copy(buf, v[1], #v[1])
205      ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf)
206      local res = ffi.string(buf, tonumber(sizbuf[0]))
207      assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
208    end)
209  end
210
211  cases = {
212    {'example.com', 'example.com'},
213    {'baz.example.com', 'baz.example.com'},
214    {'3.baz.example.com', 'baz.example.com'},
215    {'bar.example.com', 'example.com'},
216    {'foo.example.com', 'foo.example.com'},
217    {'3.foo.example.com', '3.foo.example.com'},
218    {'foo.com', 'foo.com'},
219    {'bar.foo.com', 'foo.com'},
220  }
221
222  local excl_rules1 = {
223      'example.com',
224      '*.foo.example.com',
225      '!bar.example.com'
226  }
227
228  local comp_rules = lua_urls_compose.inject_composition_rules(rspamd_config, excl_rules1)
229
230  for _,v in ipairs(cases) do
231    test("URL composition " .. v[1], function()
232      local u = url.create(pool, v[1])
233      assert_not_nil(u, "we are able to parse url: " .. v[1])
234      local res = comp_rules:process_url(nil, u:get_tld(), u:get_host())
235      assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1])
236    end)
237  end
238end)
239