1/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- 2 * vim: sw=2 ts=2 sts=2 expandtab 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7/** 8 * This component handles fixing up URIs, by correcting obvious typos and adding 9 * missing schemes. 10 * URI references: 11 * http://www.faqs.org/rfcs/rfc1738.html 12 * http://www.faqs.org/rfcs/rfc2396.html 13 */ 14 15// TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be 16// simplified, but the risk of regressing its behavior is high. 17/* eslint complexity: ["error", 43] */ 18 19var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"]; 20 21const { ComponentUtils } = ChromeUtils.import( 22 "resource://gre/modules/ComponentUtils.jsm" 23); 24const { XPCOMUtils } = ChromeUtils.import( 25 "resource://gre/modules/XPCOMUtils.jsm" 26); 27const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); 28const { AppConstants } = ChromeUtils.import( 29 "resource://gre/modules/AppConstants.jsm" 30); 31 32XPCOMUtils.defineLazyServiceGetter( 33 this, 34 "externalProtocolService", 35 "@mozilla.org/uriloader/external-protocol-service;1", 36 "nsIExternalProtocolService" 37); 38 39XPCOMUtils.defineLazyServiceGetter( 40 this, 41 "defaultProtocolHandler", 42 "@mozilla.org/network/protocol;1?name=default", 43 "nsIProtocolHandler" 44); 45 46XPCOMUtils.defineLazyServiceGetter( 47 this, 48 "fileProtocolHandler", 49 "@mozilla.org/network/protocol;1?name=file", 50 "nsIFileProtocolHandler" 51); 52 53XPCOMUtils.defineLazyPreferenceGetter( 54 this, 55 "fixupSchemeTypos", 56 "browser.fixup.typo.scheme", 57 true 58); 59XPCOMUtils.defineLazyPreferenceGetter( 60 this, 61 "dnsFirstForSingleWords", 62 "browser.fixup.dns_first_for_single_words", 63 false 64); 65XPCOMUtils.defineLazyPreferenceGetter( 66 this, 67 "keywordEnabled", 68 "keyword.enabled", 69 true 70); 71XPCOMUtils.defineLazyPreferenceGetter( 72 this, 73 "alternateEnabled", 74 "browser.fixup.alternate.enabled", 75 true 76); 77XPCOMUtils.defineLazyPreferenceGetter( 78 this, 79 "alternateProtocol", 80 "browser.fixup.alternate.protocol", 81 "https" 82); 83 84const { 85 FIXUP_FLAG_NONE, 86 FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP, 87 FIXUP_FLAGS_MAKE_ALTERNATE_URI, 88 FIXUP_FLAG_PRIVATE_CONTEXT, 89 FIXUP_FLAG_FIX_SCHEME_TYPOS, 90} = Ci.nsIURIFixup; 91 92const COMMON_PROTOCOLS = ["http", "https", "file"]; 93 94// Regex used to identify user:password tokens in url strings. 95// This is not a strict valid characters check, because we try to fixup this 96// part of the url too. 97XPCOMUtils.defineLazyGetter( 98 this, 99 "userPasswordRegex", 100 () => /^([a-z+.-]+:\/{0,3})*([^\/@]+@).+/i 101); 102 103// Regex used to identify the string that starts with port expression. 104XPCOMUtils.defineLazyGetter(this, "portRegex", () => /^:\d{1,5}([?#/]|$)/); 105 106// Regex used to identify numbers. 107XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); 108 109// Regex used to identify tab separated content (having at least 2 tabs). 110XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/); 111 112// Regex used to test if a string with a protocol might instead be a url 113// without a protocol but with a port: 114// 115// <hostname>:<port> or 116// <hostname>:<port>/ 117// 118// Where <hostname> is a string of alphanumeric characters and dashes 119// separated by dots. 120// and <port> is a 5 or less digits. This actually breaks the rfc2396 121// definition of a scheme which allows dots in schemes. 122// 123// Note: 124// People expecting this to work with 125// <user>:<password>@<host>:<port>/<url-path> will be disappointed! 126// 127// Note: Parser could be a lot tighter, tossing out silly hostnames 128// such as those containing consecutive dots and so on. 129XPCOMUtils.defineLazyGetter( 130 this, 131 "possiblyHostPortRegex", 132 () => /^[a-z0-9-]+(\.[a-z0-9-]+)*:[0-9]{1,5}([/?#]|$)/i 133); 134 135// Regex used to strip newlines. 136XPCOMUtils.defineLazyGetter(this, "newLinesRegex", () => /[\r\n]/g); 137 138// Regex used to match a possible protocol. 139// This resembles the logic in Services.io.extractScheme, thus \t is admitted 140// and stripped later. We don't use Services.io.extractScheme because of 141// performance bottleneck caused by crossing XPConnect. 142XPCOMUtils.defineLazyGetter( 143 this, 144 "possibleProtocolRegex", 145 () => /^([a-z][a-z0-9.+\t-]*)(:|;)?(\/\/)?/i 146); 147 148// Regex used to match IPs. Note that these are not made to validate IPs, but 149// just to detect strings that look like an IP. They also skip protocol. 150// For IPv4 this also accepts a shorthand format with just 2 dots. 151XPCOMUtils.defineLazyGetter( 152 this, 153 "IPv4LikeRegex", 154 () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i 155); 156XPCOMUtils.defineLazyGetter( 157 this, 158 "IPv6LikeRegex", 159 () => 160 /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i 161); 162 163// Cache of known domains. 164XPCOMUtils.defineLazyGetter(this, "knownDomains", () => { 165 const branch = "browser.fixup.domainwhitelist."; 166 let domains = new Set( 167 Services.prefs 168 .getChildList(branch) 169 .filter(p => Services.prefs.getBoolPref(p, false)) 170 .map(p => p.substring(branch.length)) 171 ); 172 // Hold onto the observer to avoid it being GC-ed. 173 domains._observer = { 174 observe(subject, topic, data) { 175 let domain = data.substring(branch.length); 176 if (Services.prefs.getBoolPref(data, false)) { 177 domains.add(domain); 178 } else { 179 domains.delete(domain); 180 } 181 }, 182 QueryInterface: ChromeUtils.generateQI([ 183 "nsIObserver", 184 "nsISupportsWeakReference", 185 ]), 186 }; 187 Services.prefs.addObserver(branch, domains._observer, true); 188 return domains; 189}); 190 191// Cache of known suffixes. 192// This works differently from the known domains, because when we examine a 193// domain we can't tell how many dot-separated parts constitute the suffix. 194// We create a Map keyed by the last dotted part, containing a Set of 195// all the suffixes ending with that part: 196// "two" => ["two"] 197// "three" => ["some.three", "three"] 198// When searching we can restrict the linear scan based on the last part. 199// The ideal structure for this would be a Directed Acyclic Word Graph, but 200// since we expect this list to be small it's not worth the complication. 201XPCOMUtils.defineLazyGetter(this, "knownSuffixes", () => { 202 const branch = "browser.fixup.domainsuffixwhitelist."; 203 let suffixes = new Map(); 204 let prefs = Services.prefs 205 .getChildList(branch) 206 .filter(p => Services.prefs.getBoolPref(p, false)); 207 for (let pref of prefs) { 208 let suffix = pref.substring(branch.length); 209 let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); 210 if (lastPart) { 211 let entries = suffixes.get(lastPart); 212 if (!entries) { 213 entries = new Set(); 214 suffixes.set(lastPart, entries); 215 } 216 entries.add(suffix); 217 } 218 } 219 // Hold onto the observer to avoid it being GC-ed. 220 suffixes._observer = { 221 observe(subject, topic, data) { 222 let suffix = data.substring(branch.length); 223 let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); 224 let entries = suffixes.get(lastPart); 225 if (Services.prefs.getBoolPref(data, false)) { 226 // Add the suffix. 227 if (!entries) { 228 entries = new Set(); 229 suffixes.set(lastPart, entries); 230 } 231 entries.add(suffix); 232 } else if (entries) { 233 // Remove the suffix. 234 entries.delete(suffix); 235 if (!entries.size) { 236 suffixes.delete(lastPart); 237 } 238 } 239 }, 240 QueryInterface: ChromeUtils.generateQI([ 241 "nsIObserver", 242 "nsISupportsWeakReference", 243 ]), 244 }; 245 Services.prefs.addObserver(branch, suffixes._observer, true); 246 return suffixes; 247}); 248 249function URIFixup() {} 250 251URIFixup.prototype = { 252 get FIXUP_FLAG_NONE() { 253 return FIXUP_FLAG_NONE; 254 }, 255 get FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP() { 256 return FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; 257 }, 258 get FIXUP_FLAGS_MAKE_ALTERNATE_URI() { 259 return FIXUP_FLAGS_MAKE_ALTERNATE_URI; 260 }, 261 get FIXUP_FLAG_PRIVATE_CONTEXT() { 262 return FIXUP_FLAG_PRIVATE_CONTEXT; 263 }, 264 get FIXUP_FLAG_FIX_SCHEME_TYPOS() { 265 return FIXUP_FLAG_FIX_SCHEME_TYPOS; 266 }, 267 268 getFixupURIInfo(uriString, fixupFlags = FIXUP_FLAG_NONE) { 269 let isPrivateContext = fixupFlags & FIXUP_FLAG_PRIVATE_CONTEXT; 270 271 // Eliminate embedded newlines, which single-line text fields now allow, 272 // and cleanup the empty spaces and tabs that might be on each end. 273 uriString = uriString.trim().replace(newLinesRegex, ""); 274 275 if (!uriString) { 276 throw new Components.Exception( 277 "Should pass a non-null uri", 278 Cr.NS_ERROR_FAILURE 279 ); 280 } 281 282 let info = new URIFixupInfo(uriString); 283 284 const { 285 scheme, 286 fixedSchemeUriString, 287 fixupChangedProtocol, 288 } = extractScheme(uriString, fixupFlags); 289 uriString = fixedSchemeUriString; 290 info.fixupChangedProtocol = fixupChangedProtocol; 291 292 if (scheme == "view-source") { 293 let { preferredURI, postData } = fixupViewSource(uriString, fixupFlags); 294 info.preferredURI = info.fixedURI = preferredURI; 295 info.postData = postData; 296 return info; 297 } 298 299 if (scheme.length < 2) { 300 // Check if it is a file path. We skip most schemes because the only case 301 // where a file path may look like having a scheme is "X:" on Windows. 302 let fileURI = fileURIFixup(uriString); 303 if (fileURI) { 304 info.preferredURI = info.fixedURI = fileURI; 305 info.fixupChangedProtocol = true; 306 return info; 307 } 308 } 309 310 const isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); 311 312 let canHandleProtocol = 313 scheme && 314 (isCommonProtocol || 315 Services.io.getProtocolHandler(scheme) != defaultProtocolHandler || 316 externalProtocolService.externalProtocolHandlerExists(scheme)); 317 318 if ( 319 canHandleProtocol || 320 // If it's an unknown handler and the given URL looks like host:port or 321 // has a user:password we can't pass it to the external protocol handler. 322 // We'll instead try fixing it with http later. 323 (!possiblyHostPortRegex.test(uriString) && 324 !userPasswordRegex.test(uriString)) 325 ) { 326 // Just try to create an URL out of it. 327 try { 328 info.fixedURI = Services.io.newURI(uriString); 329 } catch (ex) { 330 if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { 331 throw ex; 332 } 333 } 334 } 335 336 // We're dealing with a theoretically valid URI but we have no idea how to 337 // load it. (e.g. "christmas:humbug") 338 // It's more likely the user wants to search, and so we chuck this over to 339 // their preferred search provider. 340 // TODO (Bug 1588118): Should check FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP 341 // instead of FIXUP_FLAG_FIX_SCHEME_TYPOS. 342 if ( 343 info.fixedURI && 344 keywordEnabled && 345 fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS && 346 scheme && 347 !canHandleProtocol 348 ) { 349 tryKeywordFixupForURIInfo(uriString, info, isPrivateContext); 350 } 351 352 if (info.fixedURI) { 353 if (!info.preferredURI) { 354 maybeSetAlternateFixedURI(info, fixupFlags); 355 info.preferredURI = info.fixedURI; 356 } 357 fixupConsecutiveDotsHost(info); 358 return info; 359 } 360 361 // Fix up protocol string before calling KeywordURIFixup, because 362 // it cares about the hostname of such URIs. 363 // Prune duff protocol schemes: 364 // ://totallybroken.url.com 365 // //shorthand.url.com 366 let inputHadDuffProtocol = 367 uriString.startsWith("://") || uriString.startsWith("//"); 368 if (inputHadDuffProtocol) { 369 uriString = uriString.replace(/^:?\/\//, ""); 370 } 371 372 // Avoid fixing up content that looks like tab-separated values. 373 // Assume that 1 tab is accidental, but more than 1 implies this is 374 // supposed to be tab-separated content. 375 if (!isCommonProtocol && maxOneTabRegex.test(uriString)) { 376 let uriWithProtocol = fixupURIProtocol(uriString); 377 if (uriWithProtocol) { 378 info.fixedURI = uriWithProtocol; 379 info.fixupChangedProtocol = true; 380 maybeSetAlternateFixedURI(info, fixupFlags); 381 info.preferredURI = info.fixedURI; 382 // Check if it's a forced visit. The user can enforce a visit by 383 // appending a slash, but the string must be in a valid uri format. 384 if (uriString.endsWith("/")) { 385 fixupConsecutiveDotsHost(info); 386 return info; 387 } 388 } 389 } 390 391 // Handle "www.<something>" as a URI. 392 const asciiHost = info.fixedURI?.asciiHost; 393 if ( 394 asciiHost?.length > 4 && 395 asciiHost?.startsWith("www.") && 396 asciiHost?.lastIndexOf(".") == 3 397 ) { 398 return info; 399 } 400 401 // Memoize the public suffix check, since it may be expensive and should 402 // only run once when necessary. 403 let suffixInfo; 404 function checkSuffix(info) { 405 if (!suffixInfo) { 406 suffixInfo = checkAndFixPublicSuffix(info); 407 } 408 return suffixInfo; 409 } 410 411 // See if it is a keyword and whether a keyword must be fixed up. 412 if ( 413 keywordEnabled && 414 fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && 415 !inputHadDuffProtocol && 416 !checkSuffix(info).suffix && 417 keywordURIFixup(uriString, info, isPrivateContext) 418 ) { 419 fixupConsecutiveDotsHost(info); 420 return info; 421 } 422 423 if ( 424 info.fixedURI && 425 (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix) 426 ) { 427 fixupConsecutiveDotsHost(info); 428 return info; 429 } 430 431 // If we still haven't been able to construct a valid URI, try to force a 432 // keyword match. 433 if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) { 434 tryKeywordFixupForURIInfo(info.originalInput, info, isPrivateContext); 435 } 436 437 if (!info.preferredURI) { 438 // We couldn't salvage anything. 439 throw new Components.Exception( 440 "Couldn't build a valid uri", 441 Cr.NS_ERROR_MALFORMED_URI 442 ); 443 } 444 445 fixupConsecutiveDotsHost(info); 446 return info; 447 }, 448 449 webNavigationFlagsToFixupFlags(href, navigationFlags) { 450 try { 451 Services.io.newURI(href); 452 // Remove LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP for valid uris. 453 navigationFlags &= ~Ci.nsIWebNavigation 454 .LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP; 455 } catch (ex) {} 456 457 let fixupFlags = FIXUP_FLAG_NONE; 458 if ( 459 navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP 460 ) { 461 fixupFlags |= FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; 462 } 463 if (navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_FIXUP_SCHEME_TYPOS) { 464 fixupFlags |= FIXUP_FLAG_FIX_SCHEME_TYPOS; 465 } 466 return fixupFlags; 467 }, 468 469 keywordToURI(keyword, isPrivateContext) { 470 if (Services.appinfo.processType == Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT) { 471 // There's no search service in the content process, thus all the calls 472 // from it that care about keywords conversion should go through the 473 // parent process. 474 throw new Components.Exception( 475 "Can't invoke URIFixup in the content process", 476 Cr.NS_ERROR_NOT_AVAILABLE 477 ); 478 } 479 let info = new URIFixupInfo(keyword); 480 481 // Strip leading "?" and leading/trailing spaces from aKeyword 482 if (keyword.startsWith("?")) { 483 keyword = keyword.substring(1); 484 } 485 keyword = keyword.trim(); 486 487 // Try falling back to the search service's default search engine 488 // We must use an appropriate search engine depending on the private 489 // context. 490 let engine = isPrivateContext 491 ? Services.search.defaultPrivateEngine 492 : Services.search.defaultEngine; 493 494 // We allow default search plugins to specify alternate parameters that are 495 // specific to keyword searches. 496 let responseType = null; 497 if (engine.supportsResponseType("application/x-moz-keywordsearch")) { 498 responseType = "application/x-moz-keywordsearch"; 499 } 500 let submission = engine.getSubmission(keyword, responseType, "keyword"); 501 if ( 502 !submission || 503 // For security reasons (avoid redirecting to file, data, or other unsafe 504 // protocols) we only allow fixup to http/https search engines. 505 !submission.uri.scheme.startsWith("http") 506 ) { 507 throw new Components.Exception( 508 "Invalid search submission uri", 509 Cr.NS_ERROR_NOT_AVAILABLE 510 ); 511 } 512 let submissionPostDataStream = submission.postData; 513 if (submissionPostDataStream) { 514 info.postData = submissionPostDataStream; 515 } 516 517 info.keywordProviderName = engine.name; 518 info.keywordAsSent = keyword; 519 info.preferredURI = submission.uri; 520 return info; 521 }, 522 523 isDomainKnown, 524 525 classID: Components.ID("{c6cf88b7-452e-47eb-bdc9-86e3561648ef}"), 526 _xpcom_factory: ComponentUtils.generateSingletonFactory(URIFixup), 527 QueryInterface: ChromeUtils.generateQI(["nsIURIFixup"]), 528}; 529 530function URIFixupInfo(originalInput = "") { 531 this._originalInput = originalInput; 532} 533 534URIFixupInfo.prototype = { 535 set consumer(consumer) { 536 this._consumer = consumer || null; 537 }, 538 get consumer() { 539 return this._consumer || null; 540 }, 541 542 set preferredURI(uri) { 543 this._preferredURI = uri; 544 }, 545 get preferredURI() { 546 return this._preferredURI || null; 547 }, 548 549 set fixedURI(uri) { 550 this._fixedURI = uri; 551 }, 552 get fixedURI() { 553 return this._fixedURI || null; 554 }, 555 556 set keywordProviderName(name) { 557 this._keywordProviderName = name; 558 }, 559 get keywordProviderName() { 560 return this._keywordProviderName || ""; 561 }, 562 563 set keywordAsSent(keyword) { 564 this._keywordAsSent = keyword; 565 }, 566 get keywordAsSent() { 567 return this._keywordAsSent || ""; 568 }, 569 570 set fixupChangedProtocol(changed) { 571 this._fixupChangedProtocol = changed; 572 }, 573 get fixupChangedProtocol() { 574 return !!this._fixupChangedProtocol; 575 }, 576 577 set fixupCreatedAlternateURI(changed) { 578 this._fixupCreatedAlternateURI = changed; 579 }, 580 get fixupCreatedAlternateURI() { 581 return !!this._fixupCreatedAlternateURI; 582 }, 583 584 set originalInput(input) { 585 this._originalInput = input; 586 }, 587 get originalInput() { 588 return this._originalInput || ""; 589 }, 590 591 set postData(postData) { 592 this._postData = postData; 593 }, 594 get postData() { 595 return this._postData || null; 596 }, 597 598 classID: Components.ID("{33d75835-722f-42c0-89cc-44f328e56a86}"), 599 QueryInterface: ChromeUtils.generateQI(["nsIURIFixupInfo"]), 600}; 601 602// Helpers 603 604/** 605 * Implementation of isDomainKnown, so we don't have to go through the 606 * service. 607 * @param {string} asciiHost 608 * @returns {boolean} whether the domain is known 609 */ 610function isDomainKnown(asciiHost) { 611 if (dnsFirstForSingleWords) { 612 return true; 613 } 614 // Check if this domain is known as an actual 615 // domain (which will prevent a keyword query) 616 // Note that any processing of the host here should stay in sync with 617 // code in the front-end(s) that set the pref. 618 let lastDotIndex = asciiHost.lastIndexOf("."); 619 if (lastDotIndex == asciiHost.length - 1) { 620 asciiHost = asciiHost.substring(0, asciiHost.length - 1); 621 lastDotIndex = asciiHost.lastIndexOf("."); 622 } 623 if (knownDomains.has(asciiHost.toLowerCase())) { 624 return true; 625 } 626 // If there's no dot or only a leading dot we are done, otherwise we'll check 627 // against the known suffixes. 628 if (lastDotIndex <= 0) { 629 return false; 630 } 631 // Don't use getPublicSuffix here, since the suffix is not in the PSL, 632 // thus it couldn't tell if the suffix is made up of one or multiple 633 // dot-separated parts. 634 let lastPart = asciiHost.substr(lastDotIndex + 1); 635 let suffixes = knownSuffixes.get(lastPart); 636 if (suffixes) { 637 return Array.from(suffixes).some(s => asciiHost.endsWith(s)); 638 } 639 return false; 640} 641 642/** 643 * Checks the suffix of info.fixedURI against the Public Suffix List. 644 * If the suffix is unknown due to a typo this will try to fix it up. 645 * @param {URIFixupInfo} info about the uri to check. 646 * @note this may modify the public suffix of info.fixedURI. 647 * @returns {object} result The lookup result. 648 * @returns {string} result.suffix The public suffix if one can be identified. 649 * @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the 650 * Public Suffix List and it's not in knownSuffixes. False in the other cases. 651 */ 652function checkAndFixPublicSuffix(info) { 653 let uri = info.fixedURI; 654 let asciiHost = uri?.asciiHost; 655 if ( 656 !asciiHost || 657 !asciiHost.includes(".") || 658 asciiHost.endsWith(".") || 659 isDomainKnown(asciiHost) 660 ) { 661 return { suffix: "", hasUnknownSuffix: false }; 662 } 663 664 // Quick bailouts for most common cases, according to Alexa Top 1 million. 665 if ( 666 /^\w/.test(asciiHost) && 667 (asciiHost.endsWith(".com") || 668 asciiHost.endsWith(".net") || 669 asciiHost.endsWith(".org") || 670 asciiHost.endsWith(".ru") || 671 asciiHost.endsWith(".de")) 672 ) { 673 return { 674 suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1), 675 hasUnknownSuffix: false, 676 }; 677 } 678 try { 679 let suffix = Services.eTLD.getKnownPublicSuffix(uri); 680 if (suffix) { 681 return { suffix, hasUnknownSuffix: false }; 682 } 683 } catch (ex) { 684 return { suffix: "", hasUnknownSuffix: false }; 685 } 686 // Suffix is unknown, try to fix most common 3 chars TLDs typos. 687 // .com is the most commonly mistyped tld, so it has more cases. 688 let suffix = Services.eTLD.getPublicSuffix(uri); 689 if (!suffix || numberRegex.test(suffix)) { 690 return { suffix: "", hasUnknownSuffix: false }; 691 } 692 for (let [typo, fixed] of [ 693 ["ocm", "com"], 694 ["con", "com"], 695 ["cmo", "com"], 696 ["xom", "com"], 697 ["vom", "com"], 698 ["cpm", "com"], 699 ["com'", "com"], 700 ["ent", "net"], 701 ["ner", "net"], 702 ["nte", "net"], 703 ["met", "net"], 704 ["rog", "org"], 705 ["ogr", "org"], 706 ["prg", "org"], 707 ["orh", "org"], 708 ]) { 709 if (suffix == typo) { 710 let host = uri.host.substring(0, uri.host.length - typo.length) + fixed; 711 let updatePreferredURI = info.preferredURI == info.fixedURI; 712 info.fixedURI = uri 713 .mutate() 714 .setHost(host) 715 .finalize(); 716 if (updatePreferredURI) { 717 info.preferredURI = info.fixedURI; 718 } 719 return { suffix: fixed, hasUnknownSuffix: false }; 720 } 721 } 722 return { suffix: "", hasUnknownSuffix: true }; 723} 724 725function tryKeywordFixupForURIInfo(uriString, fixupInfo, isPrivateContext) { 726 try { 727 let keywordInfo = Services.uriFixup.keywordToURI( 728 uriString, 729 isPrivateContext 730 ); 731 fixupInfo.keywordProviderName = keywordInfo.keywordProviderName; 732 fixupInfo.keywordAsSent = keywordInfo.keywordAsSent; 733 fixupInfo.preferredURI = keywordInfo.preferredURI; 734 return true; 735 } catch (ex) {} 736 return false; 737} 738 739/** 740 * This generates an alternate fixedURI, by adding a prefix and a suffix to 741 * the fixedURI host, if and only if the protocol is http. It should _never_ 742 * modify URIs with other protocols. 743 * @param {URIFixupInfo} info an URIInfo object 744 * @param {integer} fixupFlags the fixup flags 745 * @returns {boolean} Whether an alternate uri was generated 746 */ 747function maybeSetAlternateFixedURI(info, fixupFlags) { 748 let uri = info.fixedURI; 749 if ( 750 !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) || 751 !alternateEnabled || 752 // Code only works for http. Not for any other protocol including https! 753 !uri.schemeIs("http") || 754 // Security - URLs with user / password info should NOT be fixed up 755 uri.userPass || 756 // Don't fix up hosts with ports 757 uri.port != -1 758 ) { 759 return false; 760 } 761 762 let oldHost = uri.host; 763 // Don't create an alternate uri for localhost, because it would be confusing. 764 // Ditto for 'http' and 'https' as these are frequently the result of typos, e.g. 765 // 'https//foo' (note missing : ). 766 if (oldHost == "localhost" || oldHost == "http" || oldHost == "https") { 767 return false; 768 } 769 770 // Get the prefix and suffix to stick onto the new hostname. By default these 771 // are www. & .com but they could be any other value, e.g. www. & .org 772 let prefix = Services.prefs.getCharPref( 773 "browser.fixup.alternate.prefix", 774 "www." 775 ); 776 let suffix = Services.prefs.getCharPref( 777 "browser.fixup.alternate.suffix", 778 ".com" 779 ); 780 781 let newHost = ""; 782 let numDots = (oldHost.match(/\./g) || []).length; 783 if (numDots == 0) { 784 newHost = prefix + oldHost + suffix; 785 } else if (numDots == 1) { 786 if (prefix && oldHost == prefix) { 787 newHost = oldHost + suffix; 788 } else if (suffix && !oldHost.startsWith(prefix)) { 789 newHost = prefix + oldHost; 790 } 791 } 792 if (!newHost) { 793 return false; 794 } 795 796 // Assign the new host string over the old one 797 try { 798 info.fixedURI = uri 799 .mutate() 800 .setScheme(alternateProtocol) 801 .setHost(newHost) 802 .finalize(); 803 } catch (ex) { 804 if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { 805 throw ex; 806 } 807 return false; 808 } 809 info.fixupCreatedAlternateURI = true; 810 return true; 811} 812 813/** 814 * Try to fixup a file URI. 815 * @param {string} uriString The file URI to fix. 816 * @returns {nsIURI} a fixed uri or null. 817 * @note FileURIFixup only returns a URI if it has to add the file: protocol. 818 */ 819function fileURIFixup(uriString) { 820 let attemptFixup = false; 821 if (AppConstants.platform == "win") { 822 // Check for "\"" in the url-string or just a drive (e.g. C:). 823 attemptFixup = 824 uriString.includes("\\") || 825 (uriString.length == 2 && uriString.endsWith(":")); 826 } else { 827 // UNIX: Check if it starts with "/". 828 attemptFixup = uriString.startsWith("/"); 829 } 830 if (attemptFixup) { 831 try { 832 // Test if this is a valid path by trying to create a local file 833 // object. The URL of that is returned if successful. 834 let file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile); 835 file.initWithPath(uriString); 836 return Services.io.newURI( 837 fileProtocolHandler.getURLSpecFromActualFile(file) 838 ); 839 } catch (ex) { 840 // Not a file uri. 841 } 842 } 843 return null; 844} 845 846/** 847 * Tries to fixup a string to an nsIURI by adding the default protocol. 848 * 849 * Should fix things like: 850 * no-scheme.com 851 * ftp.no-scheme.com 852 * ftp4.no-scheme.com 853 * no-scheme.com/query?foo=http://www.foo.com 854 * user:pass@no-scheme.com 855 * 856 * @param {string} uriString The string to fixup. 857 * @returns {nsIURI} an nsIURI built adding the default protocol to the string, 858 * or null if fixing was not possible. 859 */ 860function fixupURIProtocol(uriString) { 861 let schemePos = uriString.indexOf("://"); 862 if (schemePos == -1 || schemePos > uriString.search(/[:\/]/)) { 863 uriString = "http://" + uriString; 864 } 865 try { 866 return Services.io.newURI(uriString); 867 } catch (ex) { 868 // We generated an invalid uri. 869 } 870 return null; 871} 872 873/** 874 * Tries to fixup a string to a search url. 875 * @param {string} uriString the string to fixup. 876 * @param {URIFixupInfo} fixupInfo The fixup info object, modified in-place. 877 * @param {boolean} isPrivateContext Whether this happens in a private context. 878 * @param {nsIInputStream} postData optional POST data for the search 879 * @returns {boolean} Whether the keyword fixup was succesful. 880 */ 881function keywordURIFixup(uriString, fixupInfo, isPrivateContext) { 882 // Here is a few examples of strings that should be searched: 883 // "what is mozilla" 884 // "what is mozilla?" 885 // "docshell site:mozilla.org" - has a space in the origin part 886 // "?site:mozilla.org - anything that begins with a question mark 887 // "mozilla'.org" - Things that have a quote before the first dot/colon 888 // "mozilla/test" - unknown host 889 // ".mozilla", "mozilla." - starts or ends with a dot () 890 // "user@nonQualifiedHost" 891 892 // These other strings should not be searched, because they could be URIs: 893 // "www.blah.com" - Domain with a standard or known suffix 894 // "knowndomain" - known domain 895 // "nonQualifiedHost:8888?something" - has a port 896 // "user:pass@nonQualifiedHost" 897 // "blah.com." 898 899 // We do keyword lookups if the input starts with a question mark. 900 if (uriString.startsWith("?")) { 901 return tryKeywordFixupForURIInfo( 902 fixupInfo.originalInput, 903 fixupInfo, 904 isPrivateContext 905 ); 906 } 907 908 // Check for IPs. 909 const userPassword = userPasswordRegex.exec(uriString); 910 const ipString = userPassword 911 ? uriString.replace(userPassword[2], "") 912 : uriString; 913 if (IPv4LikeRegex.test(ipString) || IPv6LikeRegex.test(ipString)) { 914 return false; 915 } 916 917 // Avoid keyword lookup if we can identify a host and it's known, or ends 918 // with a dot and has some path. 919 // Note that if dnsFirstForSingleWords is true isDomainKnown will always 920 // return true, so we can avoid checking dnsFirstForSingleWords after this. 921 let asciiHost = fixupInfo.fixedURI?.asciiHost; 922 if ( 923 asciiHost && 924 (isDomainKnown(asciiHost) || 925 (asciiHost.endsWith(".") && 926 asciiHost.indexOf(".") != asciiHost.length - 1)) 927 ) { 928 return false; 929 } 930 931 // Avoid keyword lookup if the url seems to have password. 932 if (fixupInfo.fixedURI?.password) { 933 return false; 934 } 935 936 // Even if the host is unknown, avoid keyword lookup if the string has 937 // uri-like characteristics, unless it looks like "user@unknownHost". 938 // Note we already excluded passwords at this point. 939 if ( 940 !isURILike(uriString, fixupInfo.fixedURI?.displayHost) || 941 (fixupInfo.fixedURI?.userPass && fixupInfo.fixedURI?.pathQueryRef === "/") 942 ) { 943 return tryKeywordFixupForURIInfo( 944 fixupInfo.originalInput, 945 fixupInfo, 946 isPrivateContext 947 ); 948 } 949 950 return false; 951} 952 953/** 954 * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect. 955 * This also tries to fixup the scheme if it was clearly mistyped. 956 * @param {string} uriString the string to examine 957 * @param {integer} fixupFlags The original fixup flags 958 * @returns {object} 959 * scheme: a typo fixed scheme or empty string if one could not be identified 960 * fixedSchemeUriString: uri string with a typo fixed scheme 961 * fixupChangedProtocol: true if the scheme is fixed up 962 */ 963function extractScheme(uriString, fixupFlags = FIXUP_FLAG_NONE) { 964 const matches = uriString.match(possibleProtocolRegex); 965 const hasColon = matches?.[2] === ":"; 966 const hasSlash2 = matches?.[3] === "//"; 967 968 const isFixupSchemeTypos = 969 fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS; 970 971 if ( 972 !matches || 973 (!hasColon && !hasSlash2) || 974 (!hasColon && !isFixupSchemeTypos) 975 ) { 976 return { 977 scheme: "", 978 fixedSchemeUriString: uriString, 979 fixupChangedProtocol: false, 980 }; 981 } 982 983 let scheme = matches[1].replace("\t", "").toLowerCase(); 984 let fixedSchemeUriString = uriString; 985 986 if (isFixupSchemeTypos && hasSlash2) { 987 // Fix up typos for string that user would have intented as protocol. 988 const afterProtocol = uriString.substring(matches[0].length); 989 fixedSchemeUriString = `${scheme}://${afterProtocol}`; 990 } 991 992 let fixupChangedProtocol = false; 993 994 if (isFixupSchemeTypos) { 995 // Fix up common scheme typos. 996 // TODO: Use levenshtein distance here? 997 fixupChangedProtocol = [ 998 ["ttp", "http"], 999 ["htp", "http"], 1000 ["ttps", "https"], 1001 ["tps", "https"], 1002 ["ps", "https"], 1003 ["htps", "https"], 1004 ["ile", "file"], 1005 ["le", "file"], 1006 ].some(([typo, fixed]) => { 1007 if (scheme === typo) { 1008 scheme = fixed; 1009 fixedSchemeUriString = 1010 scheme + fixedSchemeUriString.substring(typo.length); 1011 return true; 1012 } 1013 return false; 1014 }); 1015 } 1016 1017 return { 1018 scheme, 1019 fixedSchemeUriString, 1020 fixupChangedProtocol, 1021 }; 1022} 1023 1024/** 1025 * View-source is a pseudo scheme. We're interested in fixing up the stuff 1026 * after it. The easiest way to do that is to call this method again with 1027 * the "view-source:" lopped off and then prepend it again afterwards. 1028 * @param {string} uriString The original string to fixup 1029 * @param {integer} fixupFlags The original fixup flags 1030 * @param {nsIInputStream} postData Optional POST data for the search 1031 * @returns {object} {preferredURI, postData} The fixed URI and relative postData 1032 * @throws if it's not possible to fixup the url 1033 */ 1034function fixupViewSource(uriString, fixupFlags) { 1035 // We disable keyword lookup and alternate URIs so that small typos don't 1036 // cause us to look at very different domains. 1037 let newFixupFlags = 1038 fixupFlags & 1039 ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & 1040 ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; 1041 1042 let innerURIString = uriString.substring(12).trim(); 1043 1044 // Prevent recursion. 1045 const { scheme: innerScheme } = extractScheme(innerURIString); 1046 if (innerScheme == "view-source") { 1047 throw new Components.Exception( 1048 "Prevent view-source recursion", 1049 Cr.NS_ERROR_FAILURE 1050 ); 1051 } 1052 1053 let info = Services.uriFixup.getFixupURIInfo(innerURIString, newFixupFlags); 1054 if (!info.preferredURI) { 1055 throw new Components.Exception( 1056 "Couldn't build a valid uri", 1057 Cr.NS_ERROR_MALFORMED_URI 1058 ); 1059 } 1060 return { 1061 preferredURI: Services.io.newURI("view-source:" + info.preferredURI.spec), 1062 postData: info.postData, 1063 }; 1064} 1065 1066/** 1067 * Fixup the host of fixedURI if it contains consecutive dots. 1068 * @param {URIFixupInfo} info an URIInfo object 1069 */ 1070function fixupConsecutiveDotsHost(fixupInfo) { 1071 const uri = fixupInfo.fixedURI; 1072 1073 try { 1074 if (!uri?.host.includes("..")) { 1075 return; 1076 } 1077 } catch (e) { 1078 return; 1079 } 1080 1081 try { 1082 const isPreferredEqualsToFixed = fixupInfo.preferredURI?.equals(uri); 1083 1084 fixupInfo.fixedURI = uri 1085 .mutate() 1086 .setHost(uri.host.replace(/\.+/g, ".")) 1087 .finalize(); 1088 1089 if (isPreferredEqualsToFixed) { 1090 fixupInfo.preferredURI = fixupInfo.fixedURI; 1091 } 1092 } catch (e) { 1093 if (e.result !== Cr.NS_ERROR_MALFORMED_URI) { 1094 throw e; 1095 } 1096 } 1097} 1098 1099/** 1100 * Return whether or not given string is uri like. 1101 * This function returns true like following strings. 1102 * - ":8080" 1103 * - "localhost:8080" (if given host is "localhost") 1104 * - "/foo?bar" 1105 * - "/foo#bar" 1106 * @param {string} uriString. 1107 * @param {string} host. 1108 * @param {boolean} true if uri like. 1109 */ 1110function isURILike(uriString, host) { 1111 const indexOfSlash = uriString.indexOf("/"); 1112 if ( 1113 indexOfSlash >= 0 && 1114 (indexOfSlash < uriString.indexOf("?", indexOfSlash) || 1115 indexOfSlash < uriString.indexOf("#", indexOfSlash)) 1116 ) { 1117 return true; 1118 } 1119 1120 if (uriString.startsWith(host)) { 1121 uriString = uriString.substring(host.length); 1122 } 1123 1124 return portRegex.test(uriString); 1125} 1126