1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7var EXPORTED_SYMBOLS = ["InteractionsBlocklist"]; 8 9const { XPCOMUtils } = ChromeUtils.import( 10 "resource://gre/modules/XPCOMUtils.jsm" 11); 12 13XPCOMUtils.defineLazyModuleGetters(this, { 14 Services: "resource://gre/modules/Services.jsm", 15 UrlbarUtils: "resource:///modules/UrlbarUtils.jsm", 16}); 17 18XPCOMUtils.defineLazyGetter(this, "logConsole", function() { 19 return console.createInstance({ 20 prefix: "InteractionsBlocklist", 21 maxLogLevel: Services.prefs.getBoolPref( 22 "browser.places.interactions.log", 23 false 24 ) 25 ? "Debug" 26 : "Warn", 27 }); 28}); 29 30// A blocklist of regular expressions. Maps base hostnames to a list regular 31// expressions for URLs with that base hostname. In this context, "base 32// hostname" means the hostname without any subdomains or a public suffix. For 33// example, the base hostname for "https://www.maps.google.com/a/place" is 34// "google". We do this mapping to improve performance; otherwise we'd have to 35// check all URLs against a long list of regular expressions. The regexes are 36// defined as escaped strings so that we build them lazily. 37// We may want to migrate this list to Remote Settings in the future. 38let HOST_BLOCKLIST = { 39 baidu: [ 40 // Baidu SERP 41 "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*", 42 ], 43 bing: [ 44 // Bing SERP 45 "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*", 46 ], 47 duckduckgo: [ 48 // DuckDuckGo SERP 49 "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*", 50 ], 51 google: [ 52 // Google SERP 53 "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*", 54 ], 55 yandex: [ 56 // Yandex SERP 57 "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*", 58 ], 59 zoom: [ 60 // Zoom meeting interstitial 61 "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+", 62 ], 63}; 64 65HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, { 66 get(target, property) { 67 let regexes = target[property]; 68 if (!regexes || !Array.isArray(regexes)) { 69 return null; 70 } 71 72 for (let i = 0; i < regexes.length; i++) { 73 let regex = regexes[i]; 74 if (typeof regex === "string") { 75 regex = new RegExp(regex, "i"); 76 if (regex) { 77 regexes[i] = regex; 78 } else { 79 throw new Error("Blocklist contains invalid regex."); 80 } 81 } 82 } 83 return regexes; 84 }, 85}); 86 87/** 88 * A class that maintains a blocklist of URLs. The class exposes a method to 89 * check if a particular URL is contained on the blocklist. 90 */ 91class _InteractionsBlocklist { 92 constructor() { 93 // Load custom blocklist items from pref. 94 try { 95 let customBlocklist = JSON.parse( 96 Services.prefs.getStringPref( 97 "places.interactions.customBlocklist", 98 "[]" 99 ) 100 ); 101 if (!Array.isArray(customBlocklist)) { 102 throw new Error(); 103 } 104 let parsedBlocklist = customBlocklist.map( 105 regexStr => new RegExp(regexStr) 106 ); 107 HOST_BLOCKLIST["*"] = parsedBlocklist; 108 } catch (ex) { 109 logConsole.warn("places.interactions.customBlocklist is corrupted."); 110 } 111 } 112 113 /** 114 * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we 115 * should not record an interaction. 116 * 117 * @param {string} urlToCheck 118 * The URL we are looking for on the blocklist. 119 * @returns {boolean} 120 * True if `url` is on a blocklist. False otherwise. 121 */ 122 isUrlBlocklisted(urlToCheck) { 123 // First, find the URL's base host: the hostname without any subdomains or a 124 // public suffix. 125 let url; 126 try { 127 url = new URL(urlToCheck); 128 if (!url) { 129 throw new Error(); 130 } 131 } catch (ex) { 132 logConsole.warn( 133 `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}` 134 ); 135 return false; 136 } 137 let hostWithoutSuffix = UrlbarUtils.stripPublicSuffixFromHost(url.host); 138 let [hostWithSubdomains] = UrlbarUtils.stripPrefixAndTrim( 139 hostWithoutSuffix, 140 { 141 stripWww: true, 142 trimTrailingDot: true, 143 } 144 ); 145 let baseHost = hostWithSubdomains.substring( 146 hostWithSubdomains.lastIndexOf(".") + 1 147 ); 148 // Then fetch blocked regexes for that baseHost and compare them to the full 149 // URL. Also check the URL against the custom blocklist. 150 let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || []; 151 regexes.push(...(HOST_BLOCKLIST["*"] || [])); 152 if (!regexes) { 153 return false; 154 } 155 156 return regexes.some(r => r.test(url.href)); 157 } 158 159 /** 160 * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from 161 * the regex, we add it to a list of wildcard regexes. All URLs are checked 162 * against these wildcard regexes. 163 * 164 * @param {string|RegExp} regexToAdd 165 * The regular expression to add to our blocklist. 166 * @note Currently only exposed for tests and use in the console. In the 167 * future we could hook this up to a UI component. 168 */ 169 addRegexToBlocklist(regexToAdd) { 170 let regex; 171 try { 172 regex = new RegExp(regexToAdd, "i"); 173 } catch (ex) { 174 this.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); 175 return; 176 } 177 178 if (!HOST_BLOCKLIST["*"]) { 179 HOST_BLOCKLIST["*"] = []; 180 } 181 HOST_BLOCKLIST["*"].push(regex); 182 Services.prefs.setStringPref( 183 "places.interactions.customBlocklist", 184 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) 185 ); 186 } 187 188 /** 189 * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the 190 * blocklist, this is a no-op. 191 * 192 * @param {string|RegExp} regexToRemove 193 * The regular expression to add to our blocklist. 194 * @note Currently only exposed for tests and use in the console. In the 195 * future we could hook this up to a UI component. 196 */ 197 removeRegexFromBlocklist(regexToRemove) { 198 let regex; 199 try { 200 regex = new RegExp(regexToRemove, "i"); 201 } catch (ex) { 202 this.logConsole.warn("Invalid regex passed to addRegexToBlocklist."); 203 return; 204 } 205 206 if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) { 207 return; 208 } 209 HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter( 210 curr => curr.source != regex.source 211 ); 212 Services.prefs.setStringPref( 213 "places.interactions.customBlocklist", 214 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString())) 215 ); 216 } 217} 218 219const InteractionsBlocklist = new _InteractionsBlocklist(); 220