1--- Simple URI-based content filter. 2-- 3-- This is a simple, fast ad blocker module that works by blocking requests to 4-- domains that only serve advertisements. It does not currently do any form of 5-- cosmetic ad blocking (i.e. element hiding with CSS). 6-- 7-- See also: @ref{adblock_chrome}. 8-- 9-- # Capabilities 10-- 11-- * You can allow specific content to be loaded if it is inadvertently 12-- blocked: simply add whitelisting rules formed by `@@` and the pattern to 13-- allow. 14-- * Supports multiple filter list files. 15-- * Filter files can be enabled, disabled and reloaded from disk 16-- without restarting luakit. 17-- * A configuration chrome page is provided by @ref{adblock_chrome}. 18-- 19-- # Usage 20-- 21-- * Add `require "adblock"` and `require "adblock_chrome"` to your `config.rc`. 22-- * Download AdblockPlus-compatible filter lists to the adblock directory. 23-- Multiple lists are supported. 24-- EasyList is the most popular Adblock Plus filter list, and can be 25-- downloaded from [https://easylist.to/](https://easylist.to/). 26-- * Filter lists downloaded to the adblock directory must have a 27-- filename ending in `.txt` in order to be loaded. 28-- * Filter lists need to be updated regularly (~weekly), use cron! 29-- 30-- # Troubleshooting 31-- 32-- If ad blocking is not working as expected, the easiest way to determine 33-- what is happening is to set the appropriate log levels to `debug`: 34-- 35-- If a filterlist is not being loaded for some reason, start luakit with 36-- the following: 37-- 38-- --log=lua/lib/adblock=debug 39-- 40-- If a filterlist is not behaving correctly, by blocking too much or too 41-- little, start luakit with the following: 42-- 43-- --log=lua/lib/adblock_wm=debug 44-- 45-- # Files and Directories 46-- 47-- - All filterlists should be downloaded to the adblock data directory. 48-- By default, this is the `adblock` sub-directory of the luakit data 49-- directory. All filterlists must have a filename ending in `.txt`. 50-- 51-- @module adblock 52-- @author Chris van Dijk (quigybo) <quigybo@hotmail.com> 53-- @author Mason Larobina (mason-l) <mason.larobina@gmail.com> 54-- @author Plaque FCC <Reslayer@ya.ru> 55-- @copyright 2010 Chris van Dijk <quigybo@hotmail.com> 56-- @copyright 2010 Mason Larobina <mason.larobina@gmail.com> 57-- @copyright 2012 Plaque FCC <Reslayer@ya.ru> 58 59local webview = require("webview") 60local window = require("window") 61local lousy = require("lousy") 62local util = lousy.util 63local lfs = require("lfs") 64local modes = require("modes") 65local add_cmds = modes.add_cmds 66 67local _M = {} 68 69local adblock_wm = require_web_module("adblock_wm") 70 71-- Adblock Plus compatible filter lists. 72local adblock_dir = luakit.data_dir .. "/adblock/" 73local filterfiles = {} 74local subscriptions_file = adblock_dir .. "subscriptions" 75 76--- The set of ad blocking subscriptions that are active. 77-- @type table 78-- @readonly 79_M.subscriptions = {} 80 81--- String patterns to filter URIs with. 82-- @type table 83-- @readonly 84_M.rules = {} 85 86--- Fitting for adblock.chrome.refresh_views() 87-- @local 88_M.refresh_views = function() 89 -- Dummy. 90end 91 92-- Detect files to read rules from 93local function detect_files() 94 -- Create adblock directory if it doesn't exist 95 local curdir = lfs.currentdir() 96 if not lfs.chdir(adblock_dir) then 97 lfs.mkdir(adblock_dir) 98 else 99 lfs.chdir(curdir) 100 end 101 102 msg.verbose("searching for filter lists in %s", adblock_dir) 103 for filename in lfs.dir(adblock_dir) do 104 if string.find(filename, "%.txt$") then 105 msg.verbose("found filter list: " .. filename) 106 table.insert(filterfiles, filename) 107 end 108 end 109 110 msg.info("found " .. #filterfiles .. " filter list" .. (#filterfiles == 1 and "" or "s")) 111end 112 113local function get_abp_opts(s) 114 local opts = {} 115 local pos = string.find(s, "%$") 116 if pos then 117 local op = string.sub(s, pos+1) 118 s = string.sub(s, 1, pos-1) 119 for key in string.gmatch(op, "[^,]+") do 120 local val 121 local p = string.find(key, "=") 122 if p then 123 val = string.sub(key, p+1) 124 key = string.sub(key, 1, p-1) 125 end 126 127 local negative = false 128 if string.sub(key, 1, 1) == "~" then 129 negative = true 130 key = string.sub(key, 2) 131 end 132 133 if key == "domain" and val then 134 local domains = {} 135 for v in string.gmatch(val, "[^|]+") do 136 table.insert(domains, v) 137 end 138 if #domains > 0 then opts["domain"] = domains end 139 elseif key == "third-party" then 140 opts["third-party"] = not negative 141 else 142 opts["unknown"] = true 143 end 144 end 145 end 146 return s, opts 147end 148 149-- Convert Adblock Plus filter description to lua string pattern 150-- See http://adblockplus.org/en/filters for more information 151local abp_to_pattern = function (s) 152 -- Strip filter options 153 local opts 154 s, opts = get_abp_opts(s) 155 if opts and opts.unknown == true then return {} end -- Skip rules with unknown options 156 157 local domain = nil 158 159 if string.len(s) > 0 then 160 -- If this is matchable as a plain string, return early 161 local has_star = string.find(s, "*", 1, true) 162 local has_caret = string.find(s, "^", 1, true) 163 local domain_anchor = string.match(s, "^||") 164 if not has_star and not has_caret and not domain_anchor then 165 return {s}, opts, nil, true 166 end 167 168 -- Optimize for domain anchor rules 169 if string.match(s, "^||") then 170 -- Extract the domain from the pattern 171 local d = string.sub(s, 3) 172 d = string.gsub(d, "/.*", "") 173 d = string.gsub(d, "%^.*", "") 174 175 -- We don't bother with wildcard domains since they aren't frequent enough 176 if not string.find(d, "*") then 177 domain = d 178 end 179 end 180 181 -- Protect magic characters (^$()%.[]*+-?) not used by ABP (^$()[]*) 182 s = string.gsub(s, "([%%%.%+%-%?])", "%%%1") 183 184 -- Wildcards are globbing 185 s = string.gsub(s, "%*", "%.%*") 186 187 -- Caret is separator (anything but a letter, a digit, or one of the following:Â - . %) 188 s = string.gsub(s, "%^", "[^%%w%%-%%.%%%%]") 189 190 if domain_anchor then 191 local p = string.sub(s, 3) -- Clip off first two || characters 192 s = { "^https?://" .. p, "^https?://[^/]*%." .. p } 193 else 194 s = { s } 195 end 196 197 for k, v in ipairs(s) do 198 -- Pipe is anchor 199 v = string.gsub(v, "^|", "%^") 200 v = string.gsub(v, "|$", "%$") 201 202 -- Convert to lowercase ($match-case option is not honoured) 203 v = string.lower(v) 204 s[k] = v 205 end 206 else 207 s = {""} 208 end 209 210 return s, opts, domain, false 211end 212 213local add_unique_cached = function (pattern, opts, tab, cache_tab) 214 if cache_tab[pattern] then 215 return false 216 else 217 --cache_tab[pattern], tab[pattern] = true, pattern 218 cache_tab[pattern], tab[pattern] = true, opts 219 return true 220 end 221end 222 223local list_new = function () 224 return { 225 patterns = {}, 226 ad_patterns = {}, 227 plain = {}, 228 ad_plain = {}, 229 domains = {}, 230 length = 0, 231 ignored = 0, 232 } 233end 234 235local list_add = function(list, line, cache, pat_exclude) 236 local pats, opts, domain, plain = abp_to_pattern(line) 237 local contains_ad = string.find(line, "ad", 1, true) 238 239 for _, pat in ipairs(pats) do 240 local new 241 if plain then 242 local bucket = contains_ad and list.ad_plain or list.plain 243 new = add_unique_cached(pat, opts, bucket, cache) 244 elseif pat ~= "^http:" and pat ~= pat_exclude then 245 if domain then 246 if not list.domains[domain] then 247 list.domains[domain] = {} 248 end 249 new = add_unique_cached(pat, opts, list.domains[domain], cache) 250 else 251 local bucket = contains_ad and list.ad_patterns or list.patterns 252 new = add_unique_cached(pat, opts, bucket, cache) 253 end 254 end 255 if new then 256 list.length = list.length + 1 257 else 258 list.ignored = list.ignored + 1 259 end 260 end 261end 262 263-- Parses an Adblock Plus compatible filter list 264local parse_abpfilterlist = function (filters_dir, filename, cache) 265 if os.exists(filters_dir .. filename) then 266 msg.verbose("loading filter list %s", filename) 267 else 268 msg.warn("error loading filter list (%s: no such file or directory)", filename) 269 end 270 filename = filters_dir .. filename 271 272 local white, black = list_new(), list_new() 273 for line in io.lines(filename) do 274 -- Ignore comments, header and blank lines 275 if line:match("^[![]") or line:match("^$") or line:match("^# ") or line:match("^#$") then 276 -- dammitwhydoesntluahaveacontinuestatement 277 -- Ignore element hiding 278 elseif line:match("##") or line:match("#@#") then 279 --icnt = icnt + 1 280 elseif line:match("^@@") then 281 list_add(white, string.sub(line, 3), cache.white) 282 else 283 list_add(black, line, cache.black, ".*") 284 end 285 end 286 287 local wlen, blen, icnt = white.length, black.length, white.ignored + black.ignored 288 289 return white, black, wlen, blen, icnt 290end 291 292--- Save the in-memory subscriptions to flatfile. 293-- @tparam string file The destination file or the default location if nil. 294local function write_subscriptions(file) 295 if not file then file = subscriptions_file end 296 assert(file and file ~= "", "Cannot write subscriptions to empty path") 297 298 local lines = {} 299 for _, filename in ipairs(filterfiles) do 300 local list = _M.subscriptions[filename] 301 local subs = { uri = list.uri, title = list.title, opts = table.concat(list.opts or {}, " "), } 302 local line = string.gsub("{title}\t{uri}\t{opts}", "{(%w+)}", subs) 303 table.insert(lines, line) 304 end 305 306 -- Write table to disk 307 local fh = io.open(file, "w") 308 fh:write(table.concat(lines, "\n")) 309 io.close(fh) 310end 311 312-- Remove options and add new ones to list 313-- @param list_index Index of the list to modify 314-- @param opt_ex Options to exclude 315-- @param opt_inc Options to include 316local function list_opts_modify(list_index, opt_ex, opt_inc) 317 assert(type(list_index) == "number", "list options modify: invalid list index") 318 assert(list_index > 0, "list options modify: index has to be > 0") 319 if not opt_ex then opt_ex = {} end 320 if not opt_inc then opt_inc = {} end 321 322 if type(opt_ex) == "string" then opt_ex = util.string.split(opt_ex) end 323 if type(opt_inc) == "string" then opt_inc = util.string.split(opt_inc) end 324 325 local list = util.table.values(_M.subscriptions)[list_index] 326 local opts = opt_inc 327 for _, opt in ipairs(list.opts) do 328 if not util.table.hasitem(opt_ex, opt) then 329 table.insert(opts, opt) 330 end 331 end 332 333 -- Manage list's rules 334 if util.table.hasitem(opt_inc, "Enabled") then 335 adblock_wm:emit_signal("list_set_enabled", list.title, true) 336 _M.refresh_views() 337 elseif util.table.hasitem(opt_inc, "Disabled") then 338 adblock_wm:emit_signal("list_set_enabled", list.title, false) 339 _M.refresh_views() 340 end 341 342 list.opts = opts 343 write_subscriptions() 344end 345 346--- Add a list to the in-memory lists table 347local function add_list(uri, title, opts, replace, save_lists) 348 assert( (title ~= nil) and (title ~= ""), "adblock list add: no title given") 349 if not opts then opts = {} end 350 351 -- Create tags table from string 352 if type(opts) == "string" then opts = util.string.split(opts) end 353 if table.maxn(opts) == 0 then table.insert(opts, "Disabled") end 354 if not replace and _M.subscriptions[title] then 355 local list = _M.subscriptions[title] 356 -- Merge tags 357 for _, opt in ipairs(opts) do 358 if not util.table.hasitem(list, opt) then table.insert(list, opt) end 359 end 360 else 361 -- Insert new adblock list 362 _M.subscriptions[title] = { uri = uri, title = title, opts = opts } 363 end 364 365 -- Save by default 366 if save_lists ~= false then write_subscriptions() end 367end 368 369--- Load subscriptions from a flatfile to memory. 370-- @tparam string file The subscriptions file or the default subscriptions location if nil. 371local function read_subscriptions(file) 372 -- Find a subscriptions file 373 if not file then file = subscriptions_file end 374 if not os.exists(file) then 375 msg.info(string.format("subscriptions file '%s' doesn't exist", file)) 376 return 377 end 378 379 -- Read lines into subscriptions data table 380 for line in io.lines(file) do 381 local title, uri, opts = unpack(util.string.split(line, "\t")) 382 if title ~= "" and os.exists(adblock_dir..title) then 383 add_list(uri, title, opts, false, false) 384 end 385 end 386end 387 388--- Load filter list files, and refresh any adblock pages that are open. 389-- @tparam boolean reload `true` if all subscriptions already loaded 390-- should be fully reloaded. 391-- @tparam string single_list Single list file. 392-- @tparam boolean no_sync `true` if subscriptions should not be synchronized to 393-- the web process. 394_M.load = function (reload, single_list, no_sync) 395 if reload then _M.subscriptions, filterfiles = {}, {} end 396 detect_files() 397 if not single_list then 398 read_subscriptions() 399 for _, filename in ipairs(filterfiles) do 400 local list = _M.subscriptions[filename] 401 if not list then 402 add_list(list and list.uri or "", filename, "Enabled", true, false) 403 end 404 end 405 write_subscriptions() 406 end 407 408 -- [re-]loading: 409 if reload then _M.rules = {} end 410 local filters_dir = adblock_dir 411 local filterfiles_loading 412 if single_list and not reload then 413 filterfiles_loading = { single_list } 414 else 415 filterfiles_loading = filterfiles 416 end 417 local rules_cache = { 418 black = {}, 419 white = {} 420 } -- This cache should let us avoid unnecessary filters duplication. 421 422 for _, filename in ipairs(filterfiles_loading) do 423 local white, black, wlen, blen, icnt = parse_abpfilterlist(filters_dir, filename, rules_cache) 424 local list = _M.subscriptions[filename] 425 if not util.table.hasitem(_M.rules, list) then 426 _M.rules[filename] = list 427 end 428 list.title, list.white, list.black, list.ignored = filename, wlen or 0, blen or 0, icnt or 0 429 list.whitelist, list.blacklist = white or {}, black or {} 430 end 431 432 if not no_sync and not single_list then 433 adblock_wm:emit_signal("update_rules", _M.rules) 434 end 435 _M.refresh_views() 436end 437 438--- Enable or disable an adblock filter list. 439-- @tparam number|string a The number of the list to enable or disable. 440-- @tparam boolean enabled `true` to enable, `false` to disable. 441function _M.list_set_enabled(a, enabled) 442 if enabled then 443 list_opts_modify(tonumber(a), "Disabled", "Enabled") 444 else 445 list_opts_modify(tonumber(a), "Enabled", "Disabled") 446 end 447end 448 449local page_whitelist = {} 450 451--- Whitelist accessing a blocked domain for the current session. 452-- @tparam string domain The domain to whitelist. 453_M.whitelist_domain_access = function (domain) 454 if lousy.util.table.hasitem(page_whitelist, domain) then return end 455 table.insert(page_whitelist, domain) 456 adblock_wm:emit_signal("update_page_whitelist", page_whitelist) 457end 458 459local new_web_extension_created 460 461webview.add_signal("init", function (view) 462 webview.modify_load_block(view, "adblock", _M.enabled) 463 464 view:add_signal("web-extension-loaded", function (v) 465 if not new_web_extension_created then 466 webview.modify_load_block(v, "adblock", false) 467 end 468 new_web_extension_created = nil 469 end) 470 471 -- if adblocking is disabled, unblock the tab as soon as it's switched to 472 local function unblock(vv) 473 if not _M.enabled then 474 webview.modify_load_block(vv, "adblock", false) 475 end 476 vv:remove_signal("switched-page", unblock) 477 end 478 view:add_signal("switched-page", unblock) 479end) 480adblock_wm:add_signal("rules_updated", function (_, web_process_id) 481 for _, ww in pairs(window.bywidget) do 482 for _, v in pairs(ww.tabs.children) do 483 if v.web_process_id == web_process_id then 484 webview.modify_load_block(v, "adblock", false) 485 end 486 end 487 end 488end) 489 490luakit.add_signal("web-extension-created", function (view) 491 new_web_extension_created = true 492 adblock_wm:emit_signal(view, "update_rules", _M.rules) 493 for name, list in pairs(_M.rules) do 494 local enabled = util.table.hasitem(list.opts, "Enabled") 495 adblock_wm:emit_signal(view, "list_set_enabled", name, enabled) 496 end 497end) 498 499-- Add commands. 500add_cmds({ 501 { ":adblock-reload, :abr", "Reload adblock filters.", function (w) 502 _M.load(true) 503 w:notify("adblock: Reloading filters complete.") 504 end }, 505 { ":adblock-list-enable, :able", "Enable an adblock filter list.", 506 function (_, o) _M.list_set_enabled(o.arg, true) end }, 507 { ":adblock-list-disable, :abld", "Disable an adblock filter list.", 508 function (_, o) _M.list_set_enabled(o.arg, false) end }, 509 { ":adblock-enable, :abe", "Enable ad blocking.", 510 function () _M.enabled = true end }, 511 { ":adblock-disable, :abd", "Disable ad blocking.", 512 function () _M.enabled = false end }, 513}) 514 515-- Initialise module 516_M.load(nil, nil, true) 517 518--- @property enabled 519-- Whether ad blocking is enabled. Modifying this value will modify adblock 520-- state; setting it to `true` will enable ad blocking, while setting it to 521-- `false` will disable ad blocking. 522-- @readwrite 523-- @default true 524-- @type boolean 525 526local wrapped = { enabled = true } 527local mt = { 528 __index = wrapped, 529 __newindex = function (_, k, v) 530 if k == "enabled" then 531 assert(type(v) == "boolean", "property 'enabled' must be boolean") 532 wrapped.enabled = v 533 adblock_wm:emit_signal("enable", v) 534 _M.refresh_views() 535 end 536 end, 537} 538 539return setmetatable(_M, mt) 540 541-- vim: et:sw=4:ts=8:sts=4:tw=80 542