1/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- 2 * vim: sw=2 ts=2 sts=2 expandtab 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7/** 8 * This component handles fixing up URIs, by correcting obvious typos and adding 9 * missing schemes. 10 * URI references: 11 * http://www.faqs.org/rfcs/rfc1738.html 12 * http://www.faqs.org/rfcs/rfc2396.html 13 */ 14 15// TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be 16// simplified, but the risk of regressing its behavior is high. 17/* eslint complexity: ["error", 43] */ 18 19var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"]; 20 21const { ComponentUtils } = ChromeUtils.import( 22 "resource://gre/modules/ComponentUtils.jsm" 23); 24const { XPCOMUtils } = ChromeUtils.import( 25 "resource://gre/modules/XPCOMUtils.jsm" 26); 27const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); 28const { AppConstants } = ChromeUtils.import( 29 "resource://gre/modules/AppConstants.jsm" 30); 31 32XPCOMUtils.defineLazyServiceGetter( 33 this, 34 "externalProtocolService", 35 "@mozilla.org/uriloader/external-protocol-service;1", 36 "nsIExternalProtocolService" 37); 38 39XPCOMUtils.defineLazyServiceGetter( 40 this, 41 "defaultProtocolHandler", 42 "@mozilla.org/network/protocol;1?name=default", 43 "nsIProtocolHandler" 44); 45 46XPCOMUtils.defineLazyServiceGetter( 47 this, 48 "fileProtocolHandler", 49 "@mozilla.org/network/protocol;1?name=file", 50 "nsIFileProtocolHandler" 51); 52 53XPCOMUtils.defineLazyPreferenceGetter( 54 this, 55 "fixupSchemeTypos", 56 "browser.fixup.typo.scheme", 57 true 58); 59XPCOMUtils.defineLazyPreferenceGetter( 60 this, 61 "dnsFirstForSingleWords", 62 "browser.fixup.dns_first_for_single_words", 63 false 64); 65XPCOMUtils.defineLazyPreferenceGetter( 66 this, 67 "keywordEnabled", 68 "keyword.enabled", 69 true 70); 71XPCOMUtils.defineLazyPreferenceGetter( 72 this, 73 "alternateEnabled", 74 "browser.fixup.alternate.enabled", 75 true 76); 77XPCOMUtils.defineLazyPreferenceGetter( 78 this, 79 "alternateProtocol", 80 "browser.fixup.alternate.protocol", 81 "https" 82); 83 84const { 85 FIXUP_FLAG_NONE, 86 FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP, 87 FIXUP_FLAGS_MAKE_ALTERNATE_URI, 88 FIXUP_FLAG_PRIVATE_CONTEXT, 89 FIXUP_FLAG_FIX_SCHEME_TYPOS, 90} = Ci.nsIURIFixup; 91 92const COMMON_PROTOCOLS = ["http", "https", "file"]; 93 94// Regex used to identify user:password tokens in url strings. 95// This is not a strict valid characters check, because we try to fixup this 96// part of the url too. 97XPCOMUtils.defineLazyGetter( 98 this, 99 "userPasswordRegex", 100 () => /^([a-z+.-]+:\/{0,3})*([^\/@]+@).+/i 101); 102 103// Regex used to identify specific URI characteristics to disallow searching. 104XPCOMUtils.defineLazyGetter( 105 this, 106 "uriLikeRegex", 107 () => /(:\d{1,5}([?#/]|$)|\/.*[?#])/ 108); 109 110// Regex used to identify numbers. 111XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/); 112 113// Regex used to identify tab separated content (having at least 2 tabs). 114XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/); 115 116// Regex used to test if a string with a protocol might instead be a url 117// without a protocol but with a port: 118// 119// <hostname>:<port> or 120// <hostname>:<port>/ 121// 122// Where <hostname> is a string of alphanumeric characters and dashes 123// separated by dots. 124// and <port> is a 5 or less digits. This actually breaks the rfc2396 125// definition of a scheme which allows dots in schemes. 126// 127// Note: 128// People expecting this to work with 129// <user>:<password>@<host>:<port>/<url-path> will be disappointed! 130// 131// Note: Parser could be a lot tighter, tossing out silly hostnames 132// such as those containing consecutive dots and so on. 133XPCOMUtils.defineLazyGetter( 134 this, 135 "possiblyHostPortRegex", 136 () => /^[a-z0-9-]+(\.[a-z0-9-]+)*:[0-9]{1,5}([/?#]|$)/i 137); 138 139// Regex used to strip newlines. 140XPCOMUtils.defineLazyGetter(this, "newLinesRegex", () => /[\r\n]/g); 141 142// Regex used to match a possible protocol. 143// This resembles the logic in Services.io.extractScheme, thus \t is admitted 144// and stripped later. We don't use Services.io.extractScheme because of 145// performance bottleneck caused by crossing XPConnect. 146XPCOMUtils.defineLazyGetter( 147 this, 148 "possibleProtocolRegex", 149 () => /^([a-z][a-z0-9.+\t-]*)(:|;)?(\/\/)?/i 150); 151 152// Regex used to match IPs. Note that these are not made to validate IPs, but 153// just to detect strings that look like an IP. They also skip protocol. 154// For IPv4 this also accepts a shorthand format with just 2 dots. 155XPCOMUtils.defineLazyGetter( 156 this, 157 "IPv4LikeRegex", 158 () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i 159); 160XPCOMUtils.defineLazyGetter( 161 this, 162 "IPv6LikeRegex", 163 () => 164 /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i 165); 166 167// Cache of known domains. 168XPCOMUtils.defineLazyGetter(this, "knownDomains", () => { 169 const branch = "browser.fixup.domainwhitelist."; 170 let domains = new Set( 171 Services.prefs 172 .getChildList(branch) 173 .filter(p => Services.prefs.getBoolPref(p, false)) 174 .map(p => p.substring(branch.length)) 175 ); 176 // Hold onto the observer to avoid it being GC-ed. 177 domains._observer = { 178 observe(subject, topic, data) { 179 let domain = data.substring(branch.length); 180 if (Services.prefs.getBoolPref(data, false)) { 181 domains.add(domain); 182 } else { 183 domains.delete(domain); 184 } 185 }, 186 QueryInterface: ChromeUtils.generateQI([ 187 "nsIObserver", 188 "nsISupportsWeakReference", 189 ]), 190 }; 191 Services.prefs.addObserver(branch, domains._observer, true); 192 return domains; 193}); 194 195// Cache of known suffixes. 196// This works differently from the known domains, because when we examine a 197// domain we can't tell how many dot-separated parts constitute the suffix. 198// We create a Map keyed by the last dotted part, containing a Set of 199// all the suffixes ending with that part: 200// "two" => ["two"] 201// "three" => ["some.three", "three"] 202// When searching we can restrict the linear scan based on the last part. 203// The ideal structure for this would be a Directed Acyclic Word Graph, but 204// since we expect this list to be small it's not worth the complication. 205XPCOMUtils.defineLazyGetter(this, "knownSuffixes", () => { 206 const branch = "browser.fixup.domainsuffixwhitelist."; 207 let suffixes = new Map(); 208 let prefs = Services.prefs 209 .getChildList(branch) 210 .filter(p => Services.prefs.getBoolPref(p, false)); 211 for (let pref of prefs) { 212 let suffix = pref.substring(branch.length); 213 let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); 214 if (lastPart) { 215 let entries = suffixes.get(lastPart); 216 if (!entries) { 217 entries = new Set(); 218 suffixes.set(lastPart, entries); 219 } 220 entries.add(suffix); 221 } 222 } 223 // Hold onto the observer to avoid it being GC-ed. 224 suffixes._observer = { 225 observe(subject, topic, data) { 226 let suffix = data.substring(branch.length); 227 let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1); 228 let entries = suffixes.get(lastPart); 229 if (Services.prefs.getBoolPref(data, false)) { 230 // Add the suffix. 231 if (!entries) { 232 entries = new Set(); 233 suffixes.set(lastPart, entries); 234 } 235 entries.add(suffix); 236 } else if (entries) { 237 // Remove the suffix. 238 entries.delete(suffix); 239 if (!entries.size) { 240 suffixes.delete(lastPart); 241 } 242 } 243 }, 244 QueryInterface: ChromeUtils.generateQI([ 245 "nsIObserver", 246 "nsISupportsWeakReference", 247 ]), 248 }; 249 Services.prefs.addObserver(branch, suffixes._observer, true); 250 return suffixes; 251}); 252 253function URIFixup() {} 254 255URIFixup.prototype = { 256 get FIXUP_FLAG_NONE() { 257 return FIXUP_FLAG_NONE; 258 }, 259 get FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP() { 260 return FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; 261 }, 262 get FIXUP_FLAGS_MAKE_ALTERNATE_URI() { 263 return FIXUP_FLAGS_MAKE_ALTERNATE_URI; 264 }, 265 get FIXUP_FLAG_PRIVATE_CONTEXT() { 266 return FIXUP_FLAG_PRIVATE_CONTEXT; 267 }, 268 get FIXUP_FLAG_FIX_SCHEME_TYPOS() { 269 return FIXUP_FLAG_FIX_SCHEME_TYPOS; 270 }, 271 272 getFixupURIInfo(uriString, fixupFlags = FIXUP_FLAG_NONE) { 273 let isPrivateContext = fixupFlags & FIXUP_FLAG_PRIVATE_CONTEXT; 274 275 // Eliminate embedded newlines, which single-line text fields now allow, 276 // and cleanup the empty spaces and tabs that might be on each end. 277 uriString = uriString.trim().replace(newLinesRegex, ""); 278 279 if (!uriString) { 280 throw new Components.Exception( 281 "Should pass a non-null uri", 282 Cr.NS_ERROR_FAILURE 283 ); 284 } 285 286 let info = new URIFixupInfo(uriString); 287 288 const { 289 scheme, 290 fixedSchemeUriString, 291 fixupChangedProtocol, 292 } = extractScheme(uriString, fixupFlags); 293 uriString = fixedSchemeUriString; 294 info.fixupChangedProtocol = fixupChangedProtocol; 295 296 if (scheme == "view-source") { 297 let { preferredURI, postData } = fixupViewSource(uriString, fixupFlags); 298 info.preferredURI = info.fixedURI = preferredURI; 299 info.postData = postData; 300 return info; 301 } 302 303 if (scheme.length < 2) { 304 // Check if it is a file path. We skip most schemes because the only case 305 // where a file path may look like having a scheme is "X:" on Windows. 306 let fileURI = fileURIFixup(uriString); 307 if (fileURI) { 308 info.preferredURI = info.fixedURI = fileURI; 309 info.fixupChangedProtocol = true; 310 return info; 311 } 312 } 313 314 const isCommonProtocol = COMMON_PROTOCOLS.includes(scheme); 315 316 let canHandleProtocol = 317 scheme && 318 (isCommonProtocol || 319 Services.io.getProtocolHandler(scheme) != defaultProtocolHandler || 320 externalProtocolService.externalProtocolHandlerExists(scheme)); 321 322 if ( 323 canHandleProtocol || 324 // If it's an unknown handler and the given URL looks like host:port or 325 // has a user:password we can't pass it to the external protocol handler. 326 // We'll instead try fixing it with http later. 327 (!possiblyHostPortRegex.test(uriString) && 328 !userPasswordRegex.test(uriString)) 329 ) { 330 // Just try to create an URL out of it. 331 try { 332 info.fixedURI = Services.io.newURI(uriString); 333 } catch (ex) { 334 if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { 335 throw ex; 336 } 337 } 338 } 339 340 // We're dealing with a theoretically valid URI but we have no idea how to 341 // load it. (e.g. "christmas:humbug") 342 // It's more likely the user wants to search, and so we chuck this over to 343 // their preferred search provider. 344 // TODO (Bug 1588118): Should check FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP 345 // instead of FIXUP_FLAG_FIX_SCHEME_TYPOS. 346 if ( 347 info.fixedURI && 348 keywordEnabled && 349 fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS && 350 scheme && 351 !canHandleProtocol 352 ) { 353 tryKeywordFixupForURIInfo(uriString, info, isPrivateContext); 354 } 355 356 if (info.fixedURI) { 357 if (!info.preferredURI) { 358 maybeSetAlternateFixedURI(info, fixupFlags); 359 info.preferredURI = info.fixedURI; 360 } 361 fixupConsecutiveDotsHost(info); 362 return info; 363 } 364 365 // Fix up protocol string before calling KeywordURIFixup, because 366 // it cares about the hostname of such URIs. 367 // Prune duff protocol schemes: 368 // ://totallybroken.url.com 369 // //shorthand.url.com 370 let inputHadDuffProtocol = 371 uriString.startsWith("://") || uriString.startsWith("//"); 372 if (inputHadDuffProtocol) { 373 uriString = uriString.replace(/^:?\/\//, ""); 374 } 375 376 // Avoid fixing up content that looks like tab-separated values. 377 // Assume that 1 tab is accidental, but more than 1 implies this is 378 // supposed to be tab-separated content. 379 if (!isCommonProtocol && maxOneTabRegex.test(uriString)) { 380 let uriWithProtocol = fixupURIProtocol(uriString); 381 if (uriWithProtocol) { 382 info.fixedURI = uriWithProtocol; 383 info.fixupChangedProtocol = true; 384 maybeSetAlternateFixedURI(info, fixupFlags); 385 info.preferredURI = info.fixedURI; 386 // Check if it's a forced visit. The user can enforce a visit by 387 // appending a slash, but the string must be in a valid uri format. 388 if (uriString.endsWith("/")) { 389 fixupConsecutiveDotsHost(info); 390 return info; 391 } 392 } 393 } 394 395 // Handle "www.<something>" as a URI. 396 const asciiHost = info.fixedURI?.asciiHost; 397 if ( 398 asciiHost?.length > 4 && 399 asciiHost?.startsWith("www.") && 400 asciiHost?.lastIndexOf(".") == 3 401 ) { 402 return info; 403 } 404 405 // Memoize the public suffix check, since it may be expensive and should 406 // only run once when necessary. 407 let suffixInfo; 408 function checkSuffix(info) { 409 if (!suffixInfo) { 410 suffixInfo = checkAndFixPublicSuffix(info); 411 } 412 return suffixInfo; 413 } 414 415 // See if it is a keyword and whether a keyword must be fixed up. 416 if ( 417 keywordEnabled && 418 fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP && 419 !inputHadDuffProtocol && 420 !checkSuffix(info).suffix && 421 keywordURIFixup(uriString, info, isPrivateContext) 422 ) { 423 fixupConsecutiveDotsHost(info); 424 return info; 425 } 426 427 if ( 428 info.fixedURI && 429 (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix) 430 ) { 431 fixupConsecutiveDotsHost(info); 432 return info; 433 } 434 435 // If we still haven't been able to construct a valid URI, try to force a 436 // keyword match. 437 if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) { 438 tryKeywordFixupForURIInfo(info.originalInput, info, isPrivateContext); 439 } 440 441 if (!info.preferredURI) { 442 // We couldn't salvage anything. 443 throw new Components.Exception( 444 "Couldn't build a valid uri", 445 Cr.NS_ERROR_MALFORMED_URI 446 ); 447 } 448 449 fixupConsecutiveDotsHost(info); 450 return info; 451 }, 452 453 webNavigationFlagsToFixupFlags(href, navigationFlags) { 454 try { 455 Services.io.newURI(href); 456 // Remove LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP for valid uris. 457 navigationFlags &= ~Ci.nsIWebNavigation 458 .LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP; 459 } catch (ex) {} 460 461 let fixupFlags = FIXUP_FLAG_NONE; 462 if ( 463 navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP 464 ) { 465 fixupFlags |= FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP; 466 } 467 if (navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_FIXUP_SCHEME_TYPOS) { 468 fixupFlags |= FIXUP_FLAG_FIX_SCHEME_TYPOS; 469 } 470 return fixupFlags; 471 }, 472 473 keywordToURI(keyword, isPrivateContext) { 474 if (Services.appinfo.processType == Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT) { 475 // There's no search service in the content process, thus all the calls 476 // from it that care about keywords conversion should go through the 477 // parent process. 478 throw new Components.Exception( 479 "Can't invoke URIFixup in the content process", 480 Cr.NS_ERROR_NOT_AVAILABLE 481 ); 482 } 483 let info = new URIFixupInfo(keyword); 484 485 // Strip leading "?" and leading/trailing spaces from aKeyword 486 if (keyword.startsWith("?")) { 487 keyword = keyword.substring(1); 488 } 489 keyword = keyword.trim(); 490 491 // Try falling back to the search service's default search engine 492 // We must use an appropriate search engine depending on the private 493 // context. 494 let engine = isPrivateContext 495 ? Services.search.defaultPrivateEngine 496 : Services.search.defaultEngine; 497 498 // We allow default search plugins to specify alternate parameters that are 499 // specific to keyword searches. 500 let responseType = null; 501 if (engine.supportsResponseType("application/x-moz-keywordsearch")) { 502 responseType = "application/x-moz-keywordsearch"; 503 } 504 let submission = engine.getSubmission(keyword, responseType, "keyword"); 505 if ( 506 !submission || 507 // For security reasons (avoid redirecting to file, data, or other unsafe 508 // protocols) we only allow fixup to http/https search engines. 509 !submission.uri.scheme.startsWith("http") 510 ) { 511 throw new Components.Exception( 512 "Invalid search submission uri", 513 Cr.NS_ERROR_NOT_AVAILABLE 514 ); 515 } 516 let submissionPostDataStream = submission.postData; 517 if (submissionPostDataStream) { 518 info.postData = submissionPostDataStream; 519 } 520 521 info.keywordProviderName = engine.name; 522 info.keywordAsSent = keyword; 523 info.preferredURI = submission.uri; 524 return info; 525 }, 526 527 isDomainKnown, 528 529 classID: Components.ID("{c6cf88b7-452e-47eb-bdc9-86e3561648ef}"), 530 _xpcom_factory: ComponentUtils.generateSingletonFactory(URIFixup), 531 QueryInterface: ChromeUtils.generateQI(["nsIURIFixup"]), 532}; 533 534function URIFixupInfo(originalInput = "") { 535 this._originalInput = originalInput; 536} 537 538URIFixupInfo.prototype = { 539 set consumer(consumer) { 540 this._consumer = consumer || null; 541 }, 542 get consumer() { 543 return this._consumer || null; 544 }, 545 546 set preferredURI(uri) { 547 this._preferredURI = uri; 548 }, 549 get preferredURI() { 550 return this._preferredURI || null; 551 }, 552 553 set fixedURI(uri) { 554 this._fixedURI = uri; 555 }, 556 get fixedURI() { 557 return this._fixedURI || null; 558 }, 559 560 set keywordProviderName(name) { 561 this._keywordProviderName = name; 562 }, 563 get keywordProviderName() { 564 return this._keywordProviderName || ""; 565 }, 566 567 set keywordAsSent(keyword) { 568 this._keywordAsSent = keyword; 569 }, 570 get keywordAsSent() { 571 return this._keywordAsSent || ""; 572 }, 573 574 set fixupChangedProtocol(changed) { 575 this._fixupChangedProtocol = changed; 576 }, 577 get fixupChangedProtocol() { 578 return !!this._fixupChangedProtocol; 579 }, 580 581 set fixupCreatedAlternateURI(changed) { 582 this._fixupCreatedAlternateURI = changed; 583 }, 584 get fixupCreatedAlternateURI() { 585 return !!this._fixupCreatedAlternateURI; 586 }, 587 588 set originalInput(input) { 589 this._originalInput = input; 590 }, 591 get originalInput() { 592 return this._originalInput || ""; 593 }, 594 595 set postData(postData) { 596 this._postData = postData; 597 }, 598 get postData() { 599 return this._postData || null; 600 }, 601 602 classID: Components.ID("{33d75835-722f-42c0-89cc-44f328e56a86}"), 603 QueryInterface: ChromeUtils.generateQI(["nsIURIFixupInfo"]), 604}; 605 606// Helpers 607 608/** 609 * Implementation of isDomainKnown, so we don't have to go through the 610 * service. 611 * @param {string} asciiHost 612 * @returns {boolean} whether the domain is known 613 */ 614function isDomainKnown(asciiHost) { 615 if (dnsFirstForSingleWords) { 616 return true; 617 } 618 // Check if this domain is known as an actual 619 // domain (which will prevent a keyword query) 620 // Note that any processing of the host here should stay in sync with 621 // code in the front-end(s) that set the pref. 622 let lastDotIndex = asciiHost.lastIndexOf("."); 623 if (lastDotIndex == asciiHost.length - 1) { 624 asciiHost = asciiHost.substring(0, asciiHost.length - 1); 625 lastDotIndex = asciiHost.lastIndexOf("."); 626 } 627 if (knownDomains.has(asciiHost.toLowerCase())) { 628 return true; 629 } 630 // If there's no dot or only a leading dot we are done, otherwise we'll check 631 // against the known suffixes. 632 if (lastDotIndex <= 0) { 633 return false; 634 } 635 // Don't use getPublicSuffix here, since the suffix is not in the PSL, 636 // thus it couldn't tell if the suffix is made up of one or multiple 637 // dot-separated parts. 638 let lastPart = asciiHost.substr(lastDotIndex + 1); 639 let suffixes = knownSuffixes.get(lastPart); 640 if (suffixes) { 641 return Array.from(suffixes).some(s => asciiHost.endsWith(s)); 642 } 643 return false; 644} 645 646/** 647 * Checks the suffix of info.fixedURI against the Public Suffix List. 648 * If the suffix is unknown due to a typo this will try to fix it up. 649 * @param {URIFixupInfo} info about the uri to check. 650 * @note this may modify the public suffix of info.fixedURI. 651 * @returns {object} result The lookup result. 652 * @returns {string} result.suffix The public suffix if one can be identified. 653 * @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the 654 * Public Suffix List and it's not in knownSuffixes. False in the other cases. 655 */ 656function checkAndFixPublicSuffix(info) { 657 let uri = info.fixedURI; 658 let asciiHost = uri?.asciiHost; 659 if ( 660 !asciiHost || 661 !asciiHost.includes(".") || 662 asciiHost.endsWith(".") || 663 isDomainKnown(asciiHost) 664 ) { 665 return { suffix: "", hasUnknownSuffix: false }; 666 } 667 668 // Quick bailouts for most common cases, according to Alexa Top 1 million. 669 if ( 670 asciiHost.endsWith(".com") || 671 asciiHost.endsWith(".net") || 672 asciiHost.endsWith(".org") || 673 asciiHost.endsWith(".ru") || 674 asciiHost.endsWith(".de") 675 ) { 676 return { 677 suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1), 678 hasUnknownSuffix: false, 679 }; 680 } 681 try { 682 let suffix = Services.eTLD.getKnownPublicSuffix(uri); 683 if (suffix) { 684 return { suffix, hasUnknownSuffix: false }; 685 } 686 } catch (ex) { 687 return { suffix: "", hasUnknownSuffix: false }; 688 } 689 // Suffix is unknown, try to fix most common 3 chars TLDs typos. 690 // .com is the most commonly mistyped tld, so it has more cases. 691 let suffix = Services.eTLD.getPublicSuffix(uri); 692 if (!suffix || numberRegex.test(suffix)) { 693 return { suffix: "", hasUnknownSuffix: false }; 694 } 695 for (let [typo, fixed] of [ 696 ["ocm", "com"], 697 ["con", "com"], 698 ["cmo", "com"], 699 ["xom", "com"], 700 ["vom", "com"], 701 ["cpm", "com"], 702 ["com'", "com"], 703 ["ent", "net"], 704 ["ner", "net"], 705 ["nte", "net"], 706 ["met", "net"], 707 ["rog", "org"], 708 ["ogr", "org"], 709 ["prg", "org"], 710 ["orh", "org"], 711 ]) { 712 if (suffix == typo) { 713 let host = uri.host.substring(0, uri.host.length - typo.length) + fixed; 714 let updatePreferredURI = info.preferredURI == info.fixedURI; 715 info.fixedURI = uri 716 .mutate() 717 .setHost(host) 718 .finalize(); 719 if (updatePreferredURI) { 720 info.preferredURI = info.fixedURI; 721 } 722 return { suffix: fixed, hasUnknownSuffix: false }; 723 } 724 } 725 return { suffix: "", hasUnknownSuffix: true }; 726} 727 728function tryKeywordFixupForURIInfo(uriString, fixupInfo, isPrivateContext) { 729 try { 730 let keywordInfo = Services.uriFixup.keywordToURI( 731 uriString, 732 isPrivateContext 733 ); 734 fixupInfo.keywordProviderName = keywordInfo.keywordProviderName; 735 fixupInfo.keywordAsSent = keywordInfo.keywordAsSent; 736 fixupInfo.preferredURI = keywordInfo.preferredURI; 737 return true; 738 } catch (ex) {} 739 return false; 740} 741 742/** 743 * This generates an alternate fixedURI, by adding a prefix and a suffix to 744 * the fixedURI host, if and only if the protocol is http. It should _never_ 745 * modify URIs with other protocols. 746 * @param {URIFixupInfo} info an URIInfo object 747 * @param {integer} fixupFlags the fixup flags 748 * @returns {boolean} Whether an alternate uri was generated 749 */ 750function maybeSetAlternateFixedURI(info, fixupFlags) { 751 let uri = info.fixedURI; 752 if ( 753 !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) || 754 !alternateEnabled || 755 // Code only works for http. Not for any other protocol including https! 756 !uri.schemeIs("http") || 757 // Security - URLs with user / password info should NOT be fixed up 758 uri.userPass || 759 // Don't fix up hosts with ports 760 uri.port != -1 761 ) { 762 return false; 763 } 764 765 let oldHost = uri.host; 766 // Don't create an alternate uri for localhost, because it would be confusing. 767 // Ditto for 'http' and 'https' as these are frequently the result of typos, e.g. 768 // 'https//foo' (note missing : ). 769 if (oldHost == "localhost" || oldHost == "http" || oldHost == "https") { 770 return false; 771 } 772 773 // Get the prefix and suffix to stick onto the new hostname. By default these 774 // are www. & .com but they could be any other value, e.g. www. & .org 775 let prefix = Services.prefs.getCharPref( 776 "browser.fixup.alternate.prefix", 777 "www." 778 ); 779 let suffix = Services.prefs.getCharPref( 780 "browser.fixup.alternate.suffix", 781 ".com" 782 ); 783 784 let newHost = ""; 785 let numDots = (oldHost.match(/\./g) || []).length; 786 if (numDots == 0) { 787 newHost = prefix + oldHost + suffix; 788 } else if (numDots == 1) { 789 if (prefix && oldHost == prefix) { 790 newHost = oldHost + suffix; 791 } else if (suffix && !oldHost.startsWith(prefix)) { 792 newHost = prefix + oldHost; 793 } 794 } 795 if (!newHost) { 796 return false; 797 } 798 799 // Assign the new host string over the old one 800 try { 801 info.fixedURI = uri 802 .mutate() 803 .setScheme(alternateProtocol) 804 .setHost(newHost) 805 .finalize(); 806 } catch (ex) { 807 if (ex.result != Cr.NS_ERROR_MALFORMED_URI) { 808 throw ex; 809 } 810 return false; 811 } 812 info.fixupCreatedAlternateURI = true; 813 return true; 814} 815 816/** 817 * Try to fixup a file URI. 818 * @param {string} uriString The file URI to fix. 819 * @returns {nsIURI} a fixed uri or null. 820 * @note FileURIFixup only returns a URI if it has to add the file: protocol. 821 */ 822function fileURIFixup(uriString) { 823 let attemptFixup = false; 824 if (AppConstants.platform == "win") { 825 // Check for "\"" in the url-string or just a drive (e.g. C:). 826 attemptFixup = 827 uriString.includes("\\") || 828 (uriString.length == 2 && uriString.endsWith(":")); 829 } else { 830 // UNIX: Check if it starts with "/". 831 attemptFixup = uriString.startsWith("/"); 832 } 833 if (attemptFixup) { 834 try { 835 // Test if this is a valid path by trying to create a local file 836 // object. The URL of that is returned if successful. 837 let file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile); 838 file.initWithPath(uriString); 839 return Services.io.newURI( 840 fileProtocolHandler.getURLSpecFromActualFile(file) 841 ); 842 } catch (ex) { 843 // Not a file uri. 844 } 845 } 846 return null; 847} 848 849/** 850 * Tries to fixup a string to an nsIURI by adding the default protocol. 851 * 852 * Should fix things like: 853 * no-scheme.com 854 * ftp.no-scheme.com 855 * ftp4.no-scheme.com 856 * no-scheme.com/query?foo=http://www.foo.com 857 * user:pass@no-scheme.com 858 * 859 * @param {string} uriString The string to fixup. 860 * @returns {nsIURI} an nsIURI built adding the default protocol to the string, 861 * or null if fixing was not possible. 862 */ 863function fixupURIProtocol(uriString) { 864 let schemePos = uriString.indexOf("://"); 865 if (schemePos == -1 || schemePos > uriString.search(/[:\/]/)) { 866 uriString = "http://" + uriString; 867 } 868 try { 869 return Services.io.newURI(uriString); 870 } catch (ex) { 871 // We generated an invalid uri. 872 } 873 return null; 874} 875 876/** 877 * Tries to fixup a string to a search url. 878 * @param {string} uriString the string to fixup. 879 * @param {URIFixupInfo} fixupInfo The fixup info object, modified in-place. 880 * @param {boolean} isPrivateContext Whether this happens in a private context. 881 * @param {nsIInputStream} postData optional POST data for the search 882 * @returns {boolean} Whether the keyword fixup was succesful. 883 */ 884function keywordURIFixup(uriString, fixupInfo, isPrivateContext) { 885 // Here is a few examples of strings that should be searched: 886 // "what is mozilla" 887 // "what is mozilla?" 888 // "docshell site:mozilla.org" - has a space in the origin part 889 // "?site:mozilla.org - anything that begins with a question mark 890 // "mozilla'.org" - Things that have a quote before the first dot/colon 891 // "mozilla/test" - unknown host 892 // ".mozilla", "mozilla." - starts or ends with a dot () 893 // "user@nonQualifiedHost" 894 895 // These other strings should not be searched, because they could be URIs: 896 // "www.blah.com" - Domain with a standard or known suffix 897 // "knowndomain" - known domain 898 // "nonQualifiedHost:8888?something" - has a port 899 // "user:pass@nonQualifiedHost" 900 // "blah.com." 901 902 // We do keyword lookups if the input starts with a question mark. 903 if (uriString.startsWith("?")) { 904 return tryKeywordFixupForURIInfo( 905 fixupInfo.originalInput, 906 fixupInfo, 907 isPrivateContext 908 ); 909 } 910 911 // Check for IPs. 912 const userPassword = userPasswordRegex.exec(uriString); 913 const ipString = userPassword 914 ? uriString.replace(userPassword[2], "") 915 : uriString; 916 if (IPv4LikeRegex.test(ipString) || IPv6LikeRegex.test(ipString)) { 917 return false; 918 } 919 920 // Avoid keyword lookup if we can identify a host and it's known, or ends 921 // with a dot and has some path. 922 // Note that if dnsFirstForSingleWords is true isDomainKnown will always 923 // return true, so we can avoid checking dnsFirstForSingleWords after this. 924 let asciiHost = fixupInfo.fixedURI?.asciiHost; 925 if ( 926 asciiHost && 927 (isDomainKnown(asciiHost) || 928 (asciiHost.endsWith(".") && 929 asciiHost.indexOf(".") != asciiHost.length - 1)) 930 ) { 931 return false; 932 } 933 934 // Avoid keyword lookup if the url seems to have password. 935 if (fixupInfo.fixedURI?.password) { 936 return false; 937 } 938 939 // Even if the host is unknown, avoid keyword lookup if the string has 940 // uri-like characteristics, unless it looks like "user@unknownHost". 941 // Note we already excluded passwords at this point. 942 if ( 943 !uriLikeRegex.test(uriString) || 944 (fixupInfo.fixedURI?.userPass && fixupInfo.fixedURI?.pathQueryRef === "/") 945 ) { 946 return tryKeywordFixupForURIInfo( 947 fixupInfo.originalInput, 948 fixupInfo, 949 isPrivateContext 950 ); 951 } 952 953 return false; 954} 955 956/** 957 * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect. 958 * This also tries to fixup the scheme if it was clearly mistyped. 959 * @param {string} uriString the string to examine 960 * @param {integer} fixupFlags The original fixup flags 961 * @returns {object} 962 * scheme: a typo fixed scheme or empty string if one could not be identified 963 * fixedSchemeUriString: uri string with a typo fixed scheme 964 * fixupChangedProtocol: true if the scheme is fixed up 965 */ 966function extractScheme(uriString, fixupFlags = FIXUP_FLAG_NONE) { 967 const matches = uriString.match(possibleProtocolRegex); 968 const hasColon = matches?.[2] === ":"; 969 const hasSlash2 = matches?.[3] === "//"; 970 971 const isFixupSchemeTypos = 972 fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS; 973 974 if ( 975 !matches || 976 (!hasColon && !hasSlash2) || 977 (!hasColon && !isFixupSchemeTypos) 978 ) { 979 return { 980 scheme: "", 981 fixedSchemeUriString: uriString, 982 fixupChangedProtocol: false, 983 }; 984 } 985 986 let scheme = matches[1].replace("\t", "").toLowerCase(); 987 let fixedSchemeUriString = uriString; 988 989 if (isFixupSchemeTypos && hasSlash2) { 990 // Fix up typos for string that user would have intented as protocol. 991 const afterProtocol = uriString.substring(matches[0].length); 992 fixedSchemeUriString = `${scheme}://${afterProtocol}`; 993 } 994 995 let fixupChangedProtocol = false; 996 997 if (isFixupSchemeTypos) { 998 // Fix up common scheme typos. 999 // TODO: Use levenshtein distance here? 1000 fixupChangedProtocol = [ 1001 ["ttp", "http"], 1002 ["htp", "http"], 1003 ["ttps", "https"], 1004 ["tps", "https"], 1005 ["ps", "https"], 1006 ["htps", "https"], 1007 ["ile", "file"], 1008 ["le", "file"], 1009 ].some(([typo, fixed]) => { 1010 if (scheme === typo) { 1011 scheme = fixed; 1012 fixedSchemeUriString = 1013 scheme + fixedSchemeUriString.substring(typo.length); 1014 return true; 1015 } 1016 return false; 1017 }); 1018 } 1019 1020 return { 1021 scheme, 1022 fixedSchemeUriString, 1023 fixupChangedProtocol, 1024 }; 1025} 1026 1027/** 1028 * View-source is a pseudo scheme. We're interested in fixing up the stuff 1029 * after it. The easiest way to do that is to call this method again with 1030 * the "view-source:" lopped off and then prepend it again afterwards. 1031 * @param {string} uriString The original string to fixup 1032 * @param {integer} fixupFlags The original fixup flags 1033 * @param {nsIInputStream} postData Optional POST data for the search 1034 * @returns {object} {preferredURI, postData} The fixed URI and relative postData 1035 * @throws if it's not possible to fixup the url 1036 */ 1037function fixupViewSource(uriString, fixupFlags) { 1038 // We disable keyword lookup and alternate URIs so that small typos don't 1039 // cause us to look at very different domains. 1040 let newFixupFlags = 1041 fixupFlags & 1042 ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP & 1043 ~FIXUP_FLAGS_MAKE_ALTERNATE_URI; 1044 1045 let innerURIString = uriString.substring(12).trim(); 1046 1047 // Prevent recursion. 1048 const { scheme: innerScheme } = extractScheme(innerURIString); 1049 if (innerScheme == "view-source") { 1050 throw new Components.Exception( 1051 "Prevent view-source recursion", 1052 Cr.NS_ERROR_FAILURE 1053 ); 1054 } 1055 1056 let info = Services.uriFixup.getFixupURIInfo(innerURIString, newFixupFlags); 1057 if (!info.preferredURI) { 1058 throw new Components.Exception( 1059 "Couldn't build a valid uri", 1060 Cr.NS_ERROR_MALFORMED_URI 1061 ); 1062 } 1063 return { 1064 preferredURI: Services.io.newURI("view-source:" + info.preferredURI.spec), 1065 postData: info.postData, 1066 }; 1067} 1068 1069/** 1070 * Fixup the host of fixedURI if it contains consecutive dots. 1071 * @param {URIFixupInfo} info an URIInfo object 1072 */ 1073function fixupConsecutiveDotsHost(fixupInfo) { 1074 const uri = fixupInfo.fixedURI; 1075 1076 try { 1077 if (!uri?.host.includes("..")) { 1078 return; 1079 } 1080 } catch (e) { 1081 return; 1082 } 1083 1084 try { 1085 const isPreferredEqualsToFixed = fixupInfo.preferredURI?.equals(uri); 1086 1087 fixupInfo.fixedURI = uri 1088 .mutate() 1089 .setHost(uri.host.replace(/\.+/g, ".")) 1090 .finalize(); 1091 1092 if (isPreferredEqualsToFixed) { 1093 fixupInfo.preferredURI = fixupInfo.fixedURI; 1094 } 1095 } catch (e) { 1096 if (e.result !== Cr.NS_ERROR_MALFORMED_URI) { 1097 throw e; 1098 } 1099 } 1100} 1101