1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7var EXPORTED_SYMBOLS = ["InteractionsBlocklist"];
8
9const { XPCOMUtils } = ChromeUtils.import(
10  "resource://gre/modules/XPCOMUtils.jsm"
11);
12
13XPCOMUtils.defineLazyModuleGetters(this, {
14  Services: "resource://gre/modules/Services.jsm",
15  UrlbarUtils: "resource:///modules/UrlbarUtils.jsm",
16});
17
18XPCOMUtils.defineLazyGetter(this, "logConsole", function() {
19  return console.createInstance({
20    prefix: "InteractionsBlocklist",
21    maxLogLevel: Services.prefs.getBoolPref(
22      "browser.places.interactions.log",
23      false
24    )
25      ? "Debug"
26      : "Warn",
27  });
28});
29
30// A blocklist of regular expressions. Maps base hostnames to a list regular
31// expressions for URLs with that base hostname. In this context, "base
32// hostname" means the hostname without any subdomains or a public suffix. For
33// example, the base hostname for "https://www.maps.google.com/a/place" is
34// "google". We do this mapping to improve performance; otherwise we'd have to
35// check all URLs against a long list of regular expressions. The regexes are
36// defined as escaped strings so that we build them lazily.
37// We may want to migrate this list to Remote Settings in the future.
38let HOST_BLOCKLIST = {
39  baidu: [
40    // Baidu SERP
41    "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
42  ],
43  bing: [
44    // Bing SERP
45    "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
46  ],
47  duckduckgo: [
48    // DuckDuckGo SERP
49    "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
50  ],
51  google: [
52    // Google SERP
53    "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
54  ],
55  yandex: [
56    // Yandex SERP
57    "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
58  ],
59  zoom: [
60    // Zoom meeting interstitial
61    "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
62  ],
63};
64
65HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
66  get(target, property) {
67    let regexes = target[property];
68    if (!regexes || !Array.isArray(regexes)) {
69      return null;
70    }
71
72    for (let i = 0; i < regexes.length; i++) {
73      let regex = regexes[i];
74      if (typeof regex === "string") {
75        regex = new RegExp(regex, "i");
76        if (regex) {
77          regexes[i] = regex;
78        } else {
79          throw new Error("Blocklist contains invalid regex.");
80        }
81      }
82    }
83    return regexes;
84  },
85});
86
87/**
88 * A class that maintains a blocklist of URLs. The class exposes a method to
89 * check if a particular URL is contained on the blocklist.
90 */
91class _InteractionsBlocklist {
92  constructor() {
93    // Load custom blocklist items from pref.
94    try {
95      let customBlocklist = JSON.parse(
96        Services.prefs.getStringPref(
97          "places.interactions.customBlocklist",
98          "[]"
99        )
100      );
101      if (!Array.isArray(customBlocklist)) {
102        throw new Error();
103      }
104      let parsedBlocklist = customBlocklist.map(
105        regexStr => new RegExp(regexStr)
106      );
107      HOST_BLOCKLIST["*"] = parsedBlocklist;
108    } catch (ex) {
109      logConsole.warn("places.interactions.customBlocklist is corrupted.");
110    }
111  }
112
113  /**
114   * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
115   * should not record an interaction.
116   *
117   * @param {string} urlToCheck
118   *   The URL we are looking for on the blocklist.
119   * @returns {boolean}
120   *  True if `url` is on a blocklist. False otherwise.
121   */
122  isUrlBlocklisted(urlToCheck) {
123    // First, find the URL's base host: the hostname without any subdomains or a
124    // public suffix.
125    let url;
126    try {
127      url = new URL(urlToCheck);
128      if (!url) {
129        throw new Error();
130      }
131    } catch (ex) {
132      logConsole.warn(
133        `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}`
134      );
135      return false;
136    }
137    let hostWithoutSuffix = UrlbarUtils.stripPublicSuffixFromHost(url.host);
138    let [hostWithSubdomains] = UrlbarUtils.stripPrefixAndTrim(
139      hostWithoutSuffix,
140      {
141        stripWww: true,
142        trimTrailingDot: true,
143      }
144    );
145    let baseHost = hostWithSubdomains.substring(
146      hostWithSubdomains.lastIndexOf(".") + 1
147    );
148    // Then fetch blocked regexes for that baseHost and compare them to the full
149    // URL. Also check the URL against the custom blocklist.
150    let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || [];
151    regexes.push(...(HOST_BLOCKLIST["*"] || []));
152    if (!regexes) {
153      return false;
154    }
155
156    return regexes.some(r => r.test(url.href));
157  }
158
159  /**
160   * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from
161   * the regex, we add it to a list of wildcard regexes. All URLs are checked
162   * against these wildcard regexes.
163   *
164   * @param {string|RegExp} regexToAdd
165   *   The regular expression to add to our blocklist.
166   * @note Currently only exposed for tests and use in the console. In the
167   *       future we could hook this up to a UI component.
168   */
169  addRegexToBlocklist(regexToAdd) {
170    let regex;
171    try {
172      regex = new RegExp(regexToAdd, "i");
173    } catch (ex) {
174      this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
175      return;
176    }
177
178    if (!HOST_BLOCKLIST["*"]) {
179      HOST_BLOCKLIST["*"] = [];
180    }
181    HOST_BLOCKLIST["*"].push(regex);
182    Services.prefs.setStringPref(
183      "places.interactions.customBlocklist",
184      JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
185    );
186  }
187
188  /**
189   * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the
190   * blocklist, this is a no-op.
191   *
192   * @param {string|RegExp} regexToRemove
193   *   The regular expression to add to our blocklist.
194   * @note Currently only exposed for tests and use in the console. In the
195   *       future we could hook this up to a UI component.
196   */
197  removeRegexFromBlocklist(regexToRemove) {
198    let regex;
199    try {
200      regex = new RegExp(regexToRemove, "i");
201    } catch (ex) {
202      this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
203      return;
204    }
205
206    if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) {
207      return;
208    }
209    HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter(
210      curr => curr.source != regex.source
211    );
212    Services.prefs.setStringPref(
213      "places.interactions.customBlocklist",
214      JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
215    );
216  }
217}
218
219const InteractionsBlocklist = new _InteractionsBlocklist();
220