1-- URL parser tests 2 3context("URL check functions", function() 4 local mpool = require("rspamd_mempool") 5 local lua_urls_compose = require "lua_urls_compose" 6 local url = require("rspamd_url") 7 local lua_util = require("lua_util") 8 local logger = require("rspamd_logger") 9 local test_helper = require("rspamd_test_helper") 10 local ffi = require("ffi") 11 12 ffi.cdef[[ 13 void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen); 14 ]] 15 16 test_helper.init_url_parser() 17 18 local pool = mpool.create() 19 20 local cases = { 21 {"test.com", {"test.com", nil}}, 22 {" test.com", {"test.com", nil}}, 23 {"<test.com> text", {"test.com", nil}}, 24 {"test.com. text", {"test.com", nil}}, 25 {"mailto:A.User@example.com text", {"example.com", "A.User"}}, 26 {"http://Тест.Рф:18 text", {"тест.рф", nil}}, 27 {"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}}, 28 {"somebody@example.com", {"example.com", "somebody"}}, 29 {"https://127.0.0.1/abc text", {"127.0.0.1", nil}}, 30 {"https:\\\\127.0.0.1/abc text", {"127.0.0.1", nil}}, 31 {"https:\\\\127.0.0.1", {"127.0.0.1", nil}}, 32 {"https://127.0.0.1 text", {"127.0.0.1", nil}}, 33 {"https://[::1]:1", {"::1", nil}}, 34 {"https://user:password@[::1]:1", {"::1", nil}}, 35 {"https://user:password@[::1]", {"::1", nil}}, 36 {"https://user:password@[::1]/1", {"::1", nil}}, 37 } 38 39 for i,c in ipairs(cases) do 40 local res = url.create(pool, c[1]) 41 42 test("Extract urls from text" .. i, function() 43 assert_not_nil(res, "cannot parse " .. c[1]) 44 local t = res:to_table() 45 --local s = logger.slog("%1 -> %2", c[1], t) 46 --print(s) 47 assert_not_nil(t, "cannot convert to table " .. c[1]) 48 assert_equal(c[2][1], t['host'], 49 logger.slog('expected host "%s", but got "%s" in url %s => %s', 50 c[2][1], t['host'], c[1], t)) 51 52 if c[2][2] then 53 assert_equal(c[2][1], t['host'], 54 logger.slog('expected user "%s", but got "%s" in url %s => %s', 55 c[2][1], t['host'], c[1], t)) 56 end 57 end) 58 end 59 60 cases = { 61 {[[http://example.net/path/]], true, { 62 host = 'example.net', path = 'path/' 63 }}, 64 {'http://example.net/hello%20world.php?arg=x#fragment', true, { 65 host = 'example.net', fragment = 'fragment', query = 'arg=x', 66 path = 'hello world.php', 67 }}, 68 {'http://example.net/?arg=%23#fragment', true, { 69 host = 'example.net', fragment = 'fragment', query = 'arg=#', 70 }}, 71 {"http:/\\[::eeee:192.168.0.1]/#test", true, { 72 host = '::eeee:c0a8:1', fragment = 'test' 73 }}, 74 {"http:/\\[::eeee:192.168.0.1]#test", true, { 75 host = '::eeee:c0a8:1', fragment = 'test' 76 }}, 77 {"http:/\\[::eeee:192.168.0.1]?test", true, { 78 host = '::eeee:c0a8:1', query = 'test' 79 }}, 80 {"http:\\\\%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01 81 host = '192.168.0.1', 82 }}, 83 {"http:/\\www.google.com/foo?bar=baz#", true, { 84 host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com' 85 }}, 86 {"http://[www.google.com]/", true, { 87 host = 'www.google.com', 88 }}, 89 {"<test.com", true, { 90 host = 'test.com', tld = 'test.com', 91 }}, 92 {"test.com>", false}, 93 {",test.com text", false}, 94 {"ht\ttp:@www.google.com:80/;p?#", false}, 95 {"http://user:pass@/", false}, 96 {"http://foo:-80/", false}, 97 {"http:////////user:@google.com:99?foo", true, { 98 host = 'google.com', user = 'user', port = 99, query = 'foo' 99 }}, 100 {"http://%25DOMAIN:foobar@foodomain.com/", true, { 101 host = 'foodomain.com', user = '%25DOMAIN' 102 }}, 103 {"http://0.0xFFFFFF", true, { 104 host = '0.255.255.255' 105 }}, 106 {"http:/\\030052000001", true, { 107 host = '192.168.0.1' 108 }}, 109 {"http:\\/0xc0.052000001", true, { 110 host = '192.168.0.1' 111 }}, 112 {"http://192.168.0.1.?foo", true, { 113 host = '192.168.0.1', query = 'foo', 114 }}, 115 {"http://twitter.com#test", true, { 116 host = 'twitter.com', fragment = 'test' 117 }}, 118 {"http:www.twitter.com#test", true, { 119 host = 'www.twitter.com', fragment = 'test' 120 }}, 121 {"http://example。com#test", true, { 122 host = 'example.com', fragment = 'test' 123 }}, 124 {"http://hoho.example。com#test", true, { 125 host = 'hoho.example.com', fragment = 'test' 126 }}, 127 {"http://hoho。example。com#test", true, { 128 host = 'hoho.example.com', fragment = 'test' 129 }}, 130 {"http://hoho.example。com#test", true, { 131 host = 'hoho.example.com', fragment = 'test' 132 }}, 133 {"http://hehe。example。com#test", true, { 134 host = 'hehe.example.com', fragment = 'test' 135 }}, 136 {"http:////$%^&****((@example.org//#f@f", true, { 137 user = '$%^&****((', host = 'example.org', fragment = 'f@f' 138 }}, 139 {"http://@@example.com", true, { 140 user = "@", host = "example.com" 141 }}, 142 {"https://example.com\\_Resources\\ClientImages\\UserData?ol\\o#ololo\\", true, { 143 host = "example.com", path = "_Resources\\ClientImages\\UserData", 144 query = "ol\\o", fragment = "ololo\\", 145 }}, 146 } 147 148 -- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc 149 for i,c in ipairs(cases) do 150 local res = url.create(pool, c[1]) 151 152 test("Parse url: " .. c[1], function() 153 if c[2] then 154 assert_not_nil(res, "we are able to parse url: " .. c[1]) 155 156 local uf = res:to_table() 157 158 for k,v in pairs(c[3]) do 159 assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v) 160 assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s', 161 v, k, uf[k], c[1], uf)) 162 end 163 for k,v in pairs(uf) do 164 if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then 165 assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1]) 166 end 167 end 168 else 169 assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res)) 170 end 171 end) 172 end 173 174 cases = { 175 {"/././foo", "/foo"}, 176 {"/a/b/c/./../../g", "/a/g"}, 177 {"/./.foo", "/.foo"}, 178 {"/foo/.", "/foo/"}, 179 {"/foo/./", "/foo/"}, 180 {"/foo/bar/..", "/foo"}, 181 {"/foo/bar/../", "/foo/"}, 182 {"/foo/..bar", "/foo/..bar"}, 183 {"/foo/bar/../ton", "/foo/ton"}, 184 {"/foo/bar/../ton/../../a", "/a"}, 185 {"/foo/../../..", "/"}, 186 {"/foo/../../../ton", "/ton"}, 187 {"////../..", "/"}, 188 {"./", ""}, 189 {"/./", "/"}, 190 {"/./././././././", "/"}, 191 {"/", "/"}, 192 {"/a/b", "/a/b"}, 193 {"/a/b/", "/a/b/"}, 194 {"..", "/"}, 195 {"/../", "/"}, 196 {"../", "/"}, 197 {"///foo", "/foo"}, 198 } 199 200 for i,v in ipairs(cases) do 201 test(string.format("Normalize paths '%s'", v[1]), function() 202 local buf = ffi.new("uint8_t[?]", #v[1]) 203 local sizbuf = ffi.new("size_t[1]") 204 ffi.copy(buf, v[1], #v[1]) 205 ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf) 206 local res = ffi.string(buf, tonumber(sizbuf[0])) 207 assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1]) 208 end) 209 end 210 211 cases = { 212 {'example.com', 'example.com'}, 213 {'baz.example.com', 'baz.example.com'}, 214 {'3.baz.example.com', 'baz.example.com'}, 215 {'bar.example.com', 'example.com'}, 216 {'foo.example.com', 'foo.example.com'}, 217 {'3.foo.example.com', '3.foo.example.com'}, 218 {'foo.com', 'foo.com'}, 219 {'bar.foo.com', 'foo.com'}, 220 } 221 222 local excl_rules1 = { 223 'example.com', 224 '*.foo.example.com', 225 '!bar.example.com' 226 } 227 228 local comp_rules = lua_urls_compose.inject_composition_rules(rspamd_config, excl_rules1) 229 230 for _,v in ipairs(cases) do 231 test("URL composition " .. v[1], function() 232 local u = url.create(pool, v[1]) 233 assert_not_nil(u, "we are able to parse url: " .. v[1]) 234 local res = comp_rules:process_url(nil, u:get_tld(), u:get_host()) 235 assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in url ' .. v[1]) 236 end) 237 end 238end) 239