1--- Simple URI-based content filter.
2--
3-- This is a simple, fast ad blocker module that works by blocking requests to
4-- domains that only serve advertisements. It does not currently do any form of
5-- cosmetic ad blocking (i.e. element hiding with CSS).
6--
7-- See also: @ref{adblock_chrome}.
8--
9-- # Capabilities
10--
11-- * You can allow specific content to be loaded if it is inadvertently
12--   blocked: simply add whitelisting rules formed by `@@` and the pattern to
13--   allow.
14-- * Supports multiple filter list files.
15-- * Filter files can be enabled, disabled and reloaded from disk
16--   without restarting luakit.
17-- * A configuration chrome page is provided by @ref{adblock_chrome}.
18--
19-- # Usage
20--
21-- * Add `require "adblock"` and `require "adblock_chrome"` to your `config.rc`.
22-- * Download AdblockPlus-compatible filter lists to the adblock directory.
23--   Multiple lists are supported.
24--   EasyList is the most popular Adblock Plus filter list, and can be
25--   downloaded from [https://easylist.to/](https://easylist.to/).
26-- * Filter lists downloaded to the adblock directory must have a
27--   filename ending in `.txt` in order to be loaded.
28-- * Filter lists need to be updated regularly (~weekly), use cron!
29--
30-- # Troubleshooting
31--
32-- If ad blocking is not working as expected, the easiest way to determine
33-- what is happening is to set the appropriate log levels to `debug`:
34--
35-- If a filterlist is not being loaded for some reason, start luakit with
36--   the following:
37--
38--     --log=lua/lib/adblock=debug
39--
40-- If a filterlist is not behaving correctly, by blocking too much or too
41--   little, start luakit with the following:
42--
43--     --log=lua/lib/adblock_wm=debug
44--
45-- # Files and Directories
46--
47-- - All filterlists should be downloaded to the adblock data directory.
48--   By default, this is the `adblock` sub-directory of the luakit data
49--   directory. All filterlists must have a filename ending in `.txt`.
50--
51-- @module adblock
52-- @author Chris van Dijk (quigybo) <quigybo@hotmail.com>
53-- @author Mason Larobina (mason-l) <mason.larobina@gmail.com>
54-- @author Plaque FCC <Reslayer@ya.ru>
55-- @copyright 2010 Chris van Dijk <quigybo@hotmail.com>
56-- @copyright 2010 Mason Larobina <mason.larobina@gmail.com>
57-- @copyright 2012 Plaque FCC <Reslayer@ya.ru>
58
59local webview   = require("webview")
60local window    = require("window")
61local lousy     = require("lousy")
62local util      = lousy.util
63local lfs       = require("lfs")
64local modes     = require("modes")
65local add_cmds  = modes.add_cmds
66
67local _M = {}
68
69local adblock_wm = require_web_module("adblock_wm")
70
71-- Adblock Plus compatible filter lists.
72local adblock_dir = luakit.data_dir .. "/adblock/"
73local filterfiles = {}
74local subscriptions_file = adblock_dir .. "subscriptions"
75
76--- The set of ad blocking subscriptions that are active.
77-- @type table
78-- @readonly
79_M.subscriptions = {}
80
81--- String patterns to filter URIs with.
82-- @type table
83-- @readonly
84_M.rules = {}
85
86--- Fitting for adblock.chrome.refresh_views()
87-- @local
88_M.refresh_views = function()
89    -- Dummy.
90end
91
92-- Detect files to read rules from
93local function detect_files()
94    -- Create adblock directory if it doesn't exist
95    local curdir = lfs.currentdir()
96    if not lfs.chdir(adblock_dir) then
97        lfs.mkdir(adblock_dir)
98    else
99        lfs.chdir(curdir)
100    end
101
102    msg.verbose("searching for filter lists in %s", adblock_dir)
103    for filename in lfs.dir(adblock_dir) do
104        if string.find(filename, "%.txt$") then
105            msg.verbose("found filter list: " .. filename)
106            table.insert(filterfiles, filename)
107        end
108    end
109
110    msg.info("found " .. #filterfiles .. " filter list" .. (#filterfiles == 1 and "" or "s"))
111end
112
113local function get_abp_opts(s)
114    local opts = {}
115    local pos = string.find(s, "%$")
116    if pos then
117        local op = string.sub(s, pos+1)
118        s = string.sub(s, 1, pos-1)
119        for key in string.gmatch(op, "[^,]+") do
120            local val
121            local p = string.find(key, "=")
122            if p then
123                val = string.sub(key, p+1)
124                key = string.sub(key, 1, p-1)
125            end
126
127            local negative = false
128            if string.sub(key, 1, 1) == "~" then
129                negative = true
130                key = string.sub(key, 2)
131            end
132
133            if key == "domain" and val then
134                local domains = {}
135                for v in string.gmatch(val, "[^|]+") do
136                    table.insert(domains, v)
137                end
138                if #domains > 0 then opts["domain"] = domains end
139            elseif key == "third-party" then
140                opts["third-party"] = not negative
141            else
142                opts["unknown"] = true
143            end
144        end
145    end
146    return s, opts
147end
148
149-- Convert Adblock Plus filter description to lua string pattern
150-- See http://adblockplus.org/en/filters for more information
151local abp_to_pattern = function (s)
152    -- Strip filter options
153    local opts
154    s, opts = get_abp_opts(s)
155    if opts and opts.unknown == true then return {} end -- Skip rules with unknown options
156
157    local domain = nil
158
159    if string.len(s) > 0 then
160        -- If this is matchable as a plain string, return early
161        local has_star = string.find(s, "*", 1, true)
162        local has_caret = string.find(s, "^", 1, true)
163        local domain_anchor = string.match(s, "^||")
164        if not has_star and not has_caret and not domain_anchor then
165            return {s}, opts, nil, true
166        end
167
168        -- Optimize for domain anchor rules
169        if string.match(s, "^||") then
170            -- Extract the domain from the pattern
171            local d = string.sub(s, 3)
172            d = string.gsub(d, "/.*", "")
173            d = string.gsub(d, "%^.*", "")
174
175            -- We don't bother with wildcard domains since they aren't frequent enough
176            if not string.find(d, "*") then
177                domain = d
178            end
179        end
180
181        -- Protect magic characters (^$()%.[]*+-?) not used by ABP (^$()[]*)
182        s = string.gsub(s, "([%%%.%+%-%?])", "%%%1")
183
184        -- Wildcards are globbing
185        s = string.gsub(s, "%*", "%.%*")
186
187        -- Caret is separator (anything but a letter, a digit, or one of the following: - . %)
188        s = string.gsub(s, "%^", "[^%%w%%-%%.%%%%]")
189
190        if domain_anchor then
191            local p = string.sub(s, 3) -- Clip off first two || characters
192            s = { "^https?://" .. p, "^https?://[^/]*%." .. p }
193        else
194            s = { s }
195        end
196
197        for k, v in ipairs(s) do
198            -- Pipe is anchor
199            v = string.gsub(v, "^|", "%^")
200            v = string.gsub(v, "|$", "%$")
201
202            -- Convert to lowercase ($match-case option is not honoured)
203            v = string.lower(v)
204            s[k] = v
205        end
206    else
207        s = {""}
208    end
209
210    return s, opts, domain, false
211end
212
213local add_unique_cached = function (pattern, opts, tab, cache_tab)
214    if cache_tab[pattern] then
215        return false
216    else
217        --cache_tab[pattern], tab[pattern] = true, pattern
218        cache_tab[pattern], tab[pattern] = true, opts
219        return true
220    end
221end
222
223local list_new = function ()
224    return {
225        patterns    = {},
226        ad_patterns = {},
227        plain       = {},
228        ad_plain    = {},
229        domains     = {},
230        length      = 0,
231        ignored     = 0,
232    }
233end
234
235local list_add = function(list, line, cache, pat_exclude)
236    local pats, opts, domain, plain = abp_to_pattern(line)
237    local contains_ad = string.find(line, "ad", 1, true)
238
239    for _, pat in ipairs(pats) do
240        local new
241        if plain then
242            local bucket = contains_ad and list.ad_plain or list.plain
243            new = add_unique_cached(pat, opts, bucket, cache)
244        elseif pat ~= "^http:" and pat ~= pat_exclude then
245            if domain then
246                if not list.domains[domain] then
247                    list.domains[domain] = {}
248                end
249                new = add_unique_cached(pat, opts, list.domains[domain], cache)
250            else
251                local bucket = contains_ad and list.ad_patterns or list.patterns
252                new = add_unique_cached(pat, opts, bucket, cache)
253            end
254        end
255        if new then
256            list.length = list.length + 1
257        else
258            list.ignored = list.ignored + 1
259        end
260    end
261end
262
263-- Parses an Adblock Plus compatible filter list
264local parse_abpfilterlist = function (filters_dir, filename, cache)
265    if os.exists(filters_dir .. filename) then
266        msg.verbose("loading filter list %s", filename)
267    else
268        msg.warn("error loading filter list (%s: no such file or directory)", filename)
269    end
270    filename = filters_dir .. filename
271
272    local white, black = list_new(), list_new()
273    for line in io.lines(filename) do
274        -- Ignore comments, header and blank lines
275        if line:match("^[![]") or line:match("^$") or line:match("^# ") or line:match("^#$") then
276            -- dammitwhydoesntluahaveacontinuestatement
277        -- Ignore element hiding
278        elseif line:match("##") or line:match("#@#") then
279            --icnt = icnt + 1
280        elseif line:match("^@@") then
281            list_add(white, string.sub(line, 3), cache.white)
282        else
283            list_add(black, line, cache.black, ".*")
284        end
285    end
286
287    local wlen, blen, icnt = white.length, black.length, white.ignored + black.ignored
288
289    return white, black, wlen, blen, icnt
290end
291
292--- Save the in-memory subscriptions to flatfile.
293-- @tparam string file The destination file or the default location if nil.
294local function write_subscriptions(file)
295    if not file then file = subscriptions_file end
296    assert(file and file ~= "", "Cannot write subscriptions to empty path")
297
298    local lines = {}
299    for _, filename in ipairs(filterfiles) do
300        local list = _M.subscriptions[filename]
301        local subs = { uri = list.uri, title = list.title, opts = table.concat(list.opts or {}, " "), }
302        local line = string.gsub("{title}\t{uri}\t{opts}", "{(%w+)}", subs)
303        table.insert(lines, line)
304    end
305
306    -- Write table to disk
307    local fh = io.open(file, "w")
308    fh:write(table.concat(lines, "\n"))
309    io.close(fh)
310end
311
312-- Remove options and add new ones to list
313-- @param list_index Index of the list to modify
314-- @param opt_ex Options to exclude
315-- @param opt_inc Options to include
316local function list_opts_modify(list_index, opt_ex, opt_inc)
317    assert(type(list_index) == "number", "list options modify: invalid list index")
318    assert(list_index > 0, "list options modify: index has to be > 0")
319    if not opt_ex then opt_ex = {} end
320    if not opt_inc then opt_inc = {} end
321
322    if type(opt_ex) == "string" then opt_ex = util.string.split(opt_ex) end
323    if type(opt_inc) == "string" then opt_inc = util.string.split(opt_inc) end
324
325    local list = util.table.values(_M.subscriptions)[list_index]
326    local opts = opt_inc
327    for _, opt in ipairs(list.opts) do
328        if not util.table.hasitem(opt_ex, opt) then
329            table.insert(opts, opt)
330        end
331    end
332
333    -- Manage list's rules
334    if util.table.hasitem(opt_inc, "Enabled") then
335        adblock_wm:emit_signal("list_set_enabled", list.title, true)
336        _M.refresh_views()
337    elseif util.table.hasitem(opt_inc, "Disabled") then
338        adblock_wm:emit_signal("list_set_enabled", list.title, false)
339        _M.refresh_views()
340    end
341
342    list.opts = opts
343    write_subscriptions()
344end
345
346--- Add a list to the in-memory lists table
347local function add_list(uri, title, opts, replace, save_lists)
348    assert( (title ~= nil) and (title ~= ""), "adblock list add: no title given")
349    if not opts then opts = {} end
350
351    -- Create tags table from string
352    if type(opts) == "string" then opts = util.string.split(opts) end
353    if table.maxn(opts) == 0 then table.insert(opts, "Disabled") end
354    if not replace and _M.subscriptions[title] then
355        local list = _M.subscriptions[title]
356        -- Merge tags
357        for _, opt in ipairs(opts) do
358            if not util.table.hasitem(list, opt) then table.insert(list, opt) end
359        end
360    else
361        -- Insert new adblock list
362        _M.subscriptions[title] = { uri = uri, title = title, opts = opts }
363    end
364
365    -- Save by default
366    if save_lists ~= false then write_subscriptions() end
367end
368
369--- Load subscriptions from a flatfile to memory.
370-- @tparam string file The subscriptions file or the default subscriptions location if nil.
371local function read_subscriptions(file)
372    -- Find a subscriptions file
373    if not file then file = subscriptions_file end
374    if not os.exists(file) then
375        msg.info(string.format("subscriptions file '%s' doesn't exist", file))
376        return
377    end
378
379    -- Read lines into subscriptions data table
380    for line in io.lines(file) do
381        local title, uri, opts = unpack(util.string.split(line, "\t"))
382        if title ~= "" and os.exists(adblock_dir..title) then
383            add_list(uri, title, opts, false, false)
384        end
385    end
386end
387
388--- Load filter list files, and refresh any adblock pages that are open.
389-- @tparam boolean reload `true` if all subscriptions already loaded
390-- should be fully reloaded.
391-- @tparam string single_list Single list file.
392-- @tparam boolean no_sync `true` if subscriptions should not be synchronized to
393-- the web process.
394_M.load = function (reload, single_list, no_sync)
395    if reload then _M.subscriptions, filterfiles = {}, {} end
396    detect_files()
397    if not single_list then
398        read_subscriptions()
399        for _, filename in ipairs(filterfiles) do
400            local list = _M.subscriptions[filename]
401            if not list then
402                add_list(list and list.uri or "", filename, "Enabled", true, false)
403            end
404        end
405        write_subscriptions()
406    end
407
408    -- [re-]loading:
409    if reload then _M.rules = {} end
410    local filters_dir = adblock_dir
411    local filterfiles_loading
412    if single_list and not reload then
413        filterfiles_loading = { single_list }
414    else
415        filterfiles_loading = filterfiles
416    end
417    local rules_cache = {
418        black = {},
419        white = {}
420    } -- This cache should let us avoid unnecessary filters duplication.
421
422    for _, filename in ipairs(filterfiles_loading) do
423        local white, black, wlen, blen, icnt = parse_abpfilterlist(filters_dir, filename, rules_cache)
424        local list = _M.subscriptions[filename]
425        if not util.table.hasitem(_M.rules, list) then
426            _M.rules[filename] = list
427        end
428        list.title, list.white, list.black, list.ignored = filename, wlen or 0, blen or 0, icnt or 0
429        list.whitelist, list.blacklist = white or {}, black or {}
430    end
431
432    if not no_sync and not single_list then
433        adblock_wm:emit_signal("update_rules", _M.rules)
434    end
435    _M.refresh_views()
436end
437
438--- Enable or disable an adblock filter list.
439-- @tparam number|string a The number of the list to enable or disable.
440-- @tparam boolean enabled `true` to enable, `false` to disable.
441function _M.list_set_enabled(a, enabled)
442    if enabled then
443        list_opts_modify(tonumber(a), "Disabled", "Enabled")
444    else
445        list_opts_modify(tonumber(a), "Enabled", "Disabled")
446    end
447end
448
449local page_whitelist = {}
450
451--- Whitelist accessing a blocked domain for the current session.
452-- @tparam string domain The domain to whitelist.
453_M.whitelist_domain_access = function (domain)
454    if lousy.util.table.hasitem(page_whitelist, domain) then return end
455    table.insert(page_whitelist, domain)
456    adblock_wm:emit_signal("update_page_whitelist", page_whitelist)
457end
458
459local new_web_extension_created
460
461webview.add_signal("init", function (view)
462    webview.modify_load_block(view, "adblock", _M.enabled)
463
464    view:add_signal("web-extension-loaded", function (v)
465        if not new_web_extension_created then
466            webview.modify_load_block(v, "adblock", false)
467        end
468        new_web_extension_created = nil
469    end)
470
471    -- if adblocking is disabled, unblock the tab as soon as it's switched to
472    local function unblock(vv)
473        if not _M.enabled then
474            webview.modify_load_block(vv, "adblock", false)
475        end
476        vv:remove_signal("switched-page", unblock)
477    end
478    view:add_signal("switched-page", unblock)
479end)
480adblock_wm:add_signal("rules_updated", function (_, web_process_id)
481    for _, ww in pairs(window.bywidget) do
482        for _, v in pairs(ww.tabs.children) do
483            if v.web_process_id == web_process_id then
484                webview.modify_load_block(v, "adblock", false)
485            end
486        end
487    end
488end)
489
490luakit.add_signal("web-extension-created", function (view)
491    new_web_extension_created = true
492    adblock_wm:emit_signal(view, "update_rules", _M.rules)
493    for name, list in pairs(_M.rules) do
494        local enabled = util.table.hasitem(list.opts, "Enabled")
495        adblock_wm:emit_signal(view, "list_set_enabled", name, enabled)
496    end
497end)
498
499-- Add commands.
500add_cmds({
501    { ":adblock-reload, :abr", "Reload adblock filters.", function (w)
502            _M.load(true)
503            w:notify("adblock: Reloading filters complete.")
504        end },
505    { ":adblock-list-enable, :able", "Enable an adblock filter list.",
506        function (_, o) _M.list_set_enabled(o.arg, true) end },
507    { ":adblock-list-disable, :abld", "Disable an adblock filter list.",
508        function (_, o) _M.list_set_enabled(o.arg, false) end },
509    { ":adblock-enable, :abe", "Enable ad blocking.",
510        function () _M.enabled = true end },
511    { ":adblock-disable, :abd", "Disable ad blocking.",
512        function () _M.enabled = false end },
513})
514
515-- Initialise module
516_M.load(nil, nil, true)
517
518--- @property enabled
519-- Whether ad blocking is enabled. Modifying this value will modify adblock
520-- state; setting it to `true` will enable ad blocking, while setting it to
521-- `false` will disable ad blocking.
522-- @readwrite
523-- @default true
524-- @type boolean
525
526local wrapped = { enabled = true }
527local mt = {
528    __index = wrapped,
529    __newindex = function (_, k, v)
530        if k == "enabled" then
531            assert(type(v) == "boolean", "property 'enabled' must be boolean")
532            wrapped.enabled = v
533            adblock_wm:emit_signal("enable", v)
534            _M.refresh_views()
535        end
536    end,
537}
538
539return setmetatable(_M, mt)
540
541-- vim: et:sw=4:ts=8:sts=4:tw=80
542