1/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
2 * vim: sw=2 ts=2 sts=2 expandtab
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7/**
8 * This component handles fixing up URIs, by correcting obvious typos and adding
9 * missing schemes.
10 * URI references:
11 *   http://www.faqs.org/rfcs/rfc1738.html
12 *   http://www.faqs.org/rfcs/rfc2396.html
13 */
14
15// TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be
16// simplified, but the risk of regressing its behavior is high.
17/* eslint complexity: ["error", 43] */
18
19var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"];
20
21const { ComponentUtils } = ChromeUtils.import(
22  "resource://gre/modules/ComponentUtils.jsm"
23);
24const { XPCOMUtils } = ChromeUtils.import(
25  "resource://gre/modules/XPCOMUtils.jsm"
26);
27const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
28const { AppConstants } = ChromeUtils.import(
29  "resource://gre/modules/AppConstants.jsm"
30);
31
32XPCOMUtils.defineLazyServiceGetter(
33  this,
34  "externalProtocolService",
35  "@mozilla.org/uriloader/external-protocol-service;1",
36  "nsIExternalProtocolService"
37);
38
39XPCOMUtils.defineLazyServiceGetter(
40  this,
41  "defaultProtocolHandler",
42  "@mozilla.org/network/protocol;1?name=default",
43  "nsIProtocolHandler"
44);
45
46XPCOMUtils.defineLazyServiceGetter(
47  this,
48  "fileProtocolHandler",
49  "@mozilla.org/network/protocol;1?name=file",
50  "nsIFileProtocolHandler"
51);
52
53XPCOMUtils.defineLazyPreferenceGetter(
54  this,
55  "fixupSchemeTypos",
56  "browser.fixup.typo.scheme",
57  true
58);
59XPCOMUtils.defineLazyPreferenceGetter(
60  this,
61  "dnsFirstForSingleWords",
62  "browser.fixup.dns_first_for_single_words",
63  false
64);
65XPCOMUtils.defineLazyPreferenceGetter(
66  this,
67  "keywordEnabled",
68  "keyword.enabled",
69  true
70);
71XPCOMUtils.defineLazyPreferenceGetter(
72  this,
73  "alternateEnabled",
74  "browser.fixup.alternate.enabled",
75  true
76);
77XPCOMUtils.defineLazyPreferenceGetter(
78  this,
79  "alternateProtocol",
80  "browser.fixup.alternate.protocol",
81  "https"
82);
83
84const {
85  FIXUP_FLAG_NONE,
86  FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP,
87  FIXUP_FLAGS_MAKE_ALTERNATE_URI,
88  FIXUP_FLAG_PRIVATE_CONTEXT,
89  FIXUP_FLAG_FIX_SCHEME_TYPOS,
90} = Ci.nsIURIFixup;
91
92const COMMON_PROTOCOLS = ["http", "https", "file"];
93
94// Regex used to identify user:password tokens in url strings.
95// This is not a strict valid characters check, because we try to fixup this
96// part of the url too.
97XPCOMUtils.defineLazyGetter(
98  this,
99  "userPasswordRegex",
100  () => /^([a-z+.-]+:\/{0,3})*([^\/@]+@).+/i
101);
102
103// Regex used to identify specific URI characteristics to disallow searching.
104XPCOMUtils.defineLazyGetter(
105  this,
106  "uriLikeRegex",
107  () => /(:\d{1,5}([?#/]|$)|\/.*[?#])/
108);
109
110// Regex used to identify numbers.
111XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/);
112
113// Regex used to identify tab separated content (having at least 2 tabs).
114XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/);
115
116// Regex used to test if a string with a protocol might instead be a url
117// without a protocol but with a port:
118//
119//   <hostname>:<port> or
120//   <hostname>:<port>/
121//
122// Where <hostname> is a string of alphanumeric characters and dashes
123// separated by dots.
124// and <port> is a 5 or less digits. This actually breaks the rfc2396
125// definition of a scheme which allows dots in schemes.
126//
127// Note:
128//   People expecting this to work with
129//   <user>:<password>@<host>:<port>/<url-path> will be disappointed!
130//
131// Note: Parser could be a lot tighter, tossing out silly hostnames
132//       such as those containing consecutive dots and so on.
133XPCOMUtils.defineLazyGetter(
134  this,
135  "possiblyHostPortRegex",
136  () => /^[a-z0-9-]+(\.[a-z0-9-]+)*:[0-9]{1,5}([/?#]|$)/i
137);
138
139// Regex used to strip newlines.
140XPCOMUtils.defineLazyGetter(this, "newLinesRegex", () => /[\r\n]/g);
141
142// Regex used to match a possible protocol.
143// This resembles the logic in Services.io.extractScheme, thus \t is admitted
144// and stripped later. We don't use Services.io.extractScheme because of
145// performance bottleneck caused by crossing XPConnect.
146XPCOMUtils.defineLazyGetter(
147  this,
148  "possibleProtocolRegex",
149  () => /^([a-z][a-z0-9.+\t-]*)(:|;)?(\/\/)?/i
150);
151
152// Regex used to match IPs. Note that these are not made to validate IPs, but
153// just to detect strings that look like an IP. They also skip protocol.
154// For IPv4 this also accepts a shorthand format with just 2 dots.
155XPCOMUtils.defineLazyGetter(
156  this,
157  "IPv4LikeRegex",
158  () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i
159);
160XPCOMUtils.defineLazyGetter(
161  this,
162  "IPv6LikeRegex",
163  () =>
164    /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i
165);
166
167// Cache of known domains.
168XPCOMUtils.defineLazyGetter(this, "knownDomains", () => {
169  const branch = "browser.fixup.domainwhitelist.";
170  let domains = new Set(
171    Services.prefs
172      .getChildList(branch)
173      .filter(p => Services.prefs.getBoolPref(p, false))
174      .map(p => p.substring(branch.length))
175  );
176  // Hold onto the observer to avoid it being GC-ed.
177  domains._observer = {
178    observe(subject, topic, data) {
179      let domain = data.substring(branch.length);
180      if (Services.prefs.getBoolPref(data, false)) {
181        domains.add(domain);
182      } else {
183        domains.delete(domain);
184      }
185    },
186    QueryInterface: ChromeUtils.generateQI([
187      "nsIObserver",
188      "nsISupportsWeakReference",
189    ]),
190  };
191  Services.prefs.addObserver(branch, domains._observer, true);
192  return domains;
193});
194
195// Cache of known suffixes.
196// This works differently from the known domains, because when we examine a
197// domain we can't tell how many dot-separated parts constitute the suffix.
198// We create a Map keyed by the last dotted part, containing a Set of
199// all the suffixes ending with that part:
200//   "two" => ["two"]
201//   "three" => ["some.three", "three"]
202// When searching we can restrict the linear scan based on the last part.
203// The ideal structure for this would be a Directed Acyclic Word Graph, but
204// since we expect this list to be small it's not worth the complication.
205XPCOMUtils.defineLazyGetter(this, "knownSuffixes", () => {
206  const branch = "browser.fixup.domainsuffixwhitelist.";
207  let suffixes = new Map();
208  let prefs = Services.prefs
209    .getChildList(branch)
210    .filter(p => Services.prefs.getBoolPref(p, false));
211  for (let pref of prefs) {
212    let suffix = pref.substring(branch.length);
213    let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1);
214    if (lastPart) {
215      let entries = suffixes.get(lastPart);
216      if (!entries) {
217        entries = new Set();
218        suffixes.set(lastPart, entries);
219      }
220      entries.add(suffix);
221    }
222  }
223  // Hold onto the observer to avoid it being GC-ed.
224  suffixes._observer = {
225    observe(subject, topic, data) {
226      let suffix = data.substring(branch.length);
227      let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1);
228      let entries = suffixes.get(lastPart);
229      if (Services.prefs.getBoolPref(data, false)) {
230        // Add the suffix.
231        if (!entries) {
232          entries = new Set();
233          suffixes.set(lastPart, entries);
234        }
235        entries.add(suffix);
236      } else if (entries) {
237        // Remove the suffix.
238        entries.delete(suffix);
239        if (!entries.size) {
240          suffixes.delete(lastPart);
241        }
242      }
243    },
244    QueryInterface: ChromeUtils.generateQI([
245      "nsIObserver",
246      "nsISupportsWeakReference",
247    ]),
248  };
249  Services.prefs.addObserver(branch, suffixes._observer, true);
250  return suffixes;
251});
252
253function URIFixup() {}
254
255URIFixup.prototype = {
256  get FIXUP_FLAG_NONE() {
257    return FIXUP_FLAG_NONE;
258  },
259  get FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP() {
260    return FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP;
261  },
262  get FIXUP_FLAGS_MAKE_ALTERNATE_URI() {
263    return FIXUP_FLAGS_MAKE_ALTERNATE_URI;
264  },
265  get FIXUP_FLAG_PRIVATE_CONTEXT() {
266    return FIXUP_FLAG_PRIVATE_CONTEXT;
267  },
268  get FIXUP_FLAG_FIX_SCHEME_TYPOS() {
269    return FIXUP_FLAG_FIX_SCHEME_TYPOS;
270  },
271
272  getFixupURIInfo(uriString, fixupFlags = FIXUP_FLAG_NONE) {
273    let isPrivateContext = fixupFlags & FIXUP_FLAG_PRIVATE_CONTEXT;
274
275    // Eliminate embedded newlines, which single-line text fields now allow,
276    // and cleanup the empty spaces and tabs that might be on each end.
277    uriString = uriString.trim().replace(newLinesRegex, "");
278
279    if (!uriString) {
280      throw new Components.Exception(
281        "Should pass a non-null uri",
282        Cr.NS_ERROR_FAILURE
283      );
284    }
285
286    let info = new URIFixupInfo(uriString);
287
288    const {
289      scheme,
290      fixedSchemeUriString,
291      fixupChangedProtocol,
292    } = extractScheme(uriString, fixupFlags);
293    uriString = fixedSchemeUriString;
294    info.fixupChangedProtocol = fixupChangedProtocol;
295
296    if (scheme == "view-source") {
297      let { preferredURI, postData } = fixupViewSource(uriString, fixupFlags);
298      info.preferredURI = info.fixedURI = preferredURI;
299      info.postData = postData;
300      return info;
301    }
302
303    if (scheme.length < 2) {
304      // Check if it is a file path. We skip most schemes because the only case
305      // where a file path may look like having a scheme is "X:" on Windows.
306      let fileURI = fileURIFixup(uriString);
307      if (fileURI) {
308        info.preferredURI = info.fixedURI = fileURI;
309        info.fixupChangedProtocol = true;
310        return info;
311      }
312    }
313
314    const isCommonProtocol = COMMON_PROTOCOLS.includes(scheme);
315
316    let canHandleProtocol =
317      scheme &&
318      (isCommonProtocol ||
319        Services.io.getProtocolHandler(scheme) != defaultProtocolHandler ||
320        externalProtocolService.externalProtocolHandlerExists(scheme));
321
322    if (
323      canHandleProtocol ||
324      // If it's an unknown handler and the given URL looks like host:port or
325      // has a user:password we can't pass it to the external protocol handler.
326      // We'll instead try fixing it with http later.
327      (!possiblyHostPortRegex.test(uriString) &&
328        !userPasswordRegex.test(uriString))
329    ) {
330      // Just try to create an URL out of it.
331      try {
332        info.fixedURI = Services.io.newURI(uriString);
333      } catch (ex) {
334        if (ex.result != Cr.NS_ERROR_MALFORMED_URI) {
335          throw ex;
336        }
337      }
338    }
339
340    // We're dealing with a theoretically valid URI but we have no idea how to
341    // load it. (e.g. "christmas:humbug")
342    // It's more likely the user wants to search, and so we chuck this over to
343    // their preferred search provider.
344    // TODO (Bug 1588118): Should check FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP
345    // instead of FIXUP_FLAG_FIX_SCHEME_TYPOS.
346    if (
347      info.fixedURI &&
348      keywordEnabled &&
349      fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS &&
350      scheme &&
351      !canHandleProtocol
352    ) {
353      tryKeywordFixupForURIInfo(uriString, info, isPrivateContext);
354    }
355
356    if (info.fixedURI) {
357      if (!info.preferredURI) {
358        maybeSetAlternateFixedURI(info, fixupFlags);
359        info.preferredURI = info.fixedURI;
360      }
361      fixupConsecutiveDotsHost(info);
362      return info;
363    }
364
365    // Fix up protocol string before calling KeywordURIFixup, because
366    // it cares about the hostname of such URIs.
367    // Prune duff protocol schemes:
368    //   ://totallybroken.url.com
369    //   //shorthand.url.com
370    let inputHadDuffProtocol =
371      uriString.startsWith("://") || uriString.startsWith("//");
372    if (inputHadDuffProtocol) {
373      uriString = uriString.replace(/^:?\/\//, "");
374    }
375
376    // Avoid fixing up content that looks like tab-separated values.
377    // Assume that 1 tab is accidental, but more than 1 implies this is
378    // supposed to be tab-separated content.
379    if (!isCommonProtocol && maxOneTabRegex.test(uriString)) {
380      let uriWithProtocol = fixupURIProtocol(uriString);
381      if (uriWithProtocol) {
382        info.fixedURI = uriWithProtocol;
383        info.fixupChangedProtocol = true;
384        maybeSetAlternateFixedURI(info, fixupFlags);
385        info.preferredURI = info.fixedURI;
386        // Check if it's a forced visit. The user can enforce a visit by
387        // appending a slash, but the string must be in a valid uri format.
388        if (uriString.endsWith("/")) {
389          fixupConsecutiveDotsHost(info);
390          return info;
391        }
392      }
393    }
394
395    // Handle "www.<something>" as a URI.
396    const asciiHost = info.fixedURI?.asciiHost;
397    if (
398      asciiHost?.length > 4 &&
399      asciiHost?.startsWith("www.") &&
400      asciiHost?.lastIndexOf(".") == 3
401    ) {
402      return info;
403    }
404
405    // Memoize the public suffix check, since it may be expensive and should
406    // only run once when necessary.
407    let suffixInfo;
408    function checkSuffix(info) {
409      if (!suffixInfo) {
410        suffixInfo = checkAndFixPublicSuffix(info);
411      }
412      return suffixInfo;
413    }
414
415    // See if it is a keyword and whether a keyword must be fixed up.
416    if (
417      keywordEnabled &&
418      fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP &&
419      !inputHadDuffProtocol &&
420      !checkSuffix(info).suffix &&
421      keywordURIFixup(uriString, info, isPrivateContext)
422    ) {
423      fixupConsecutiveDotsHost(info);
424      return info;
425    }
426
427    if (
428      info.fixedURI &&
429      (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix)
430    ) {
431      fixupConsecutiveDotsHost(info);
432      return info;
433    }
434
435    // If we still haven't been able to construct a valid URI, try to force a
436    // keyword match.
437    if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) {
438      tryKeywordFixupForURIInfo(info.originalInput, info, isPrivateContext);
439    }
440
441    if (!info.preferredURI) {
442      // We couldn't salvage anything.
443      throw new Components.Exception(
444        "Couldn't build a valid uri",
445        Cr.NS_ERROR_MALFORMED_URI
446      );
447    }
448
449    fixupConsecutiveDotsHost(info);
450    return info;
451  },
452
453  webNavigationFlagsToFixupFlags(href, navigationFlags) {
454    try {
455      Services.io.newURI(href);
456      // Remove LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP for valid uris.
457      navigationFlags &= ~Ci.nsIWebNavigation
458        .LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP;
459    } catch (ex) {}
460
461    let fixupFlags = FIXUP_FLAG_NONE;
462    if (
463      navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP
464    ) {
465      fixupFlags |= FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP;
466    }
467    if (navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_FIXUP_SCHEME_TYPOS) {
468      fixupFlags |= FIXUP_FLAG_FIX_SCHEME_TYPOS;
469    }
470    return fixupFlags;
471  },
472
473  keywordToURI(keyword, isPrivateContext) {
474    if (Services.appinfo.processType == Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT) {
475      // There's no search service in the content process, thus all the calls
476      // from it that care about keywords conversion should go through the
477      // parent process.
478      throw new Components.Exception(
479        "Can't invoke URIFixup in the content process",
480        Cr.NS_ERROR_NOT_AVAILABLE
481      );
482    }
483    let info = new URIFixupInfo(keyword);
484
485    // Strip leading "?" and leading/trailing spaces from aKeyword
486    if (keyword.startsWith("?")) {
487      keyword = keyword.substring(1);
488    }
489    keyword = keyword.trim();
490
491    // Try falling back to the search service's default search engine
492    // We must use an appropriate search engine depending on the private
493    // context.
494    let engine = isPrivateContext
495      ? Services.search.defaultPrivateEngine
496      : Services.search.defaultEngine;
497
498    // We allow default search plugins to specify alternate parameters that are
499    // specific to keyword searches.
500    let responseType = null;
501    if (engine.supportsResponseType("application/x-moz-keywordsearch")) {
502      responseType = "application/x-moz-keywordsearch";
503    }
504    let submission = engine.getSubmission(keyword, responseType, "keyword");
505    if (
506      !submission ||
507      // For security reasons (avoid redirecting to file, data, or other unsafe
508      // protocols) we only allow fixup to http/https search engines.
509      !submission.uri.scheme.startsWith("http")
510    ) {
511      throw new Components.Exception(
512        "Invalid search submission uri",
513        Cr.NS_ERROR_NOT_AVAILABLE
514      );
515    }
516    let submissionPostDataStream = submission.postData;
517    if (submissionPostDataStream) {
518      info.postData = submissionPostDataStream;
519    }
520
521    info.keywordProviderName = engine.name;
522    info.keywordAsSent = keyword;
523    info.preferredURI = submission.uri;
524    return info;
525  },
526
527  isDomainKnown,
528
529  classID: Components.ID("{c6cf88b7-452e-47eb-bdc9-86e3561648ef}"),
530  _xpcom_factory: ComponentUtils.generateSingletonFactory(URIFixup),
531  QueryInterface: ChromeUtils.generateQI(["nsIURIFixup"]),
532};
533
534function URIFixupInfo(originalInput = "") {
535  this._originalInput = originalInput;
536}
537
538URIFixupInfo.prototype = {
539  set consumer(consumer) {
540    this._consumer = consumer || null;
541  },
542  get consumer() {
543    return this._consumer || null;
544  },
545
546  set preferredURI(uri) {
547    this._preferredURI = uri;
548  },
549  get preferredURI() {
550    return this._preferredURI || null;
551  },
552
553  set fixedURI(uri) {
554    this._fixedURI = uri;
555  },
556  get fixedURI() {
557    return this._fixedURI || null;
558  },
559
560  set keywordProviderName(name) {
561    this._keywordProviderName = name;
562  },
563  get keywordProviderName() {
564    return this._keywordProviderName || "";
565  },
566
567  set keywordAsSent(keyword) {
568    this._keywordAsSent = keyword;
569  },
570  get keywordAsSent() {
571    return this._keywordAsSent || "";
572  },
573
574  set fixupChangedProtocol(changed) {
575    this._fixupChangedProtocol = changed;
576  },
577  get fixupChangedProtocol() {
578    return !!this._fixupChangedProtocol;
579  },
580
581  set fixupCreatedAlternateURI(changed) {
582    this._fixupCreatedAlternateURI = changed;
583  },
584  get fixupCreatedAlternateURI() {
585    return !!this._fixupCreatedAlternateURI;
586  },
587
588  set originalInput(input) {
589    this._originalInput = input;
590  },
591  get originalInput() {
592    return this._originalInput || "";
593  },
594
595  set postData(postData) {
596    this._postData = postData;
597  },
598  get postData() {
599    return this._postData || null;
600  },
601
602  classID: Components.ID("{33d75835-722f-42c0-89cc-44f328e56a86}"),
603  QueryInterface: ChromeUtils.generateQI(["nsIURIFixupInfo"]),
604};
605
606// Helpers
607
608/**
609 * Implementation of isDomainKnown, so we don't have to go through the
610 * service.
611 * @param {string} asciiHost
612 * @returns {boolean} whether the domain is known
613 */
614function isDomainKnown(asciiHost) {
615  if (dnsFirstForSingleWords) {
616    return true;
617  }
618  // Check if this domain is known as an actual
619  // domain (which will prevent a keyword query)
620  // Note that any processing of the host here should stay in sync with
621  // code in the front-end(s) that set the pref.
622  let lastDotIndex = asciiHost.lastIndexOf(".");
623  if (lastDotIndex == asciiHost.length - 1) {
624    asciiHost = asciiHost.substring(0, asciiHost.length - 1);
625    lastDotIndex = asciiHost.lastIndexOf(".");
626  }
627  if (knownDomains.has(asciiHost.toLowerCase())) {
628    return true;
629  }
630  // If there's no dot or only a leading dot we are done, otherwise we'll check
631  // against the known suffixes.
632  if (lastDotIndex <= 0) {
633    return false;
634  }
635  // Don't use getPublicSuffix here, since the suffix is not in the PSL,
636  // thus it couldn't tell if the suffix is made up of one or multiple
637  // dot-separated parts.
638  let lastPart = asciiHost.substr(lastDotIndex + 1);
639  let suffixes = knownSuffixes.get(lastPart);
640  if (suffixes) {
641    return Array.from(suffixes).some(s => asciiHost.endsWith(s));
642  }
643  return false;
644}
645
646/**
647 * Checks the suffix of info.fixedURI against the Public Suffix List.
648 * If the suffix is unknown due to a typo this will try to fix it up.
649 * @param {URIFixupInfo} info about the uri to check.
650 * @note this may modify the public suffix of info.fixedURI.
651 * @returns {object} result The lookup result.
652 * @returns {string} result.suffix The public suffix if one can be identified.
653 * @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the
654 *     Public Suffix List and it's not in knownSuffixes. False in the other cases.
655 */
656function checkAndFixPublicSuffix(info) {
657  let uri = info.fixedURI;
658  let asciiHost = uri?.asciiHost;
659  if (
660    !asciiHost ||
661    !asciiHost.includes(".") ||
662    asciiHost.endsWith(".") ||
663    isDomainKnown(asciiHost)
664  ) {
665    return { suffix: "", hasUnknownSuffix: false };
666  }
667
668  // Quick bailouts for most common cases, according to Alexa Top 1 million.
669  if (
670    asciiHost.endsWith(".com") ||
671    asciiHost.endsWith(".net") ||
672    asciiHost.endsWith(".org") ||
673    asciiHost.endsWith(".ru") ||
674    asciiHost.endsWith(".de")
675  ) {
676    return {
677      suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1),
678      hasUnknownSuffix: false,
679    };
680  }
681  try {
682    let suffix = Services.eTLD.getKnownPublicSuffix(uri);
683    if (suffix) {
684      return { suffix, hasUnknownSuffix: false };
685    }
686  } catch (ex) {
687    return { suffix: "", hasUnknownSuffix: false };
688  }
689  // Suffix is unknown, try to fix most common 3 chars TLDs typos.
690  // .com is the most commonly mistyped tld, so it has more cases.
691  let suffix = Services.eTLD.getPublicSuffix(uri);
692  if (!suffix || numberRegex.test(suffix)) {
693    return { suffix: "", hasUnknownSuffix: false };
694  }
695  for (let [typo, fixed] of [
696    ["ocm", "com"],
697    ["con", "com"],
698    ["cmo", "com"],
699    ["xom", "com"],
700    ["vom", "com"],
701    ["cpm", "com"],
702    ["com'", "com"],
703    ["ent", "net"],
704    ["ner", "net"],
705    ["nte", "net"],
706    ["met", "net"],
707    ["rog", "org"],
708    ["ogr", "org"],
709    ["prg", "org"],
710    ["orh", "org"],
711  ]) {
712    if (suffix == typo) {
713      let host = uri.host.substring(0, uri.host.length - typo.length) + fixed;
714      let updatePreferredURI = info.preferredURI == info.fixedURI;
715      info.fixedURI = uri
716        .mutate()
717        .setHost(host)
718        .finalize();
719      if (updatePreferredURI) {
720        info.preferredURI = info.fixedURI;
721      }
722      return { suffix: fixed, hasUnknownSuffix: false };
723    }
724  }
725  return { suffix: "", hasUnknownSuffix: true };
726}
727
728function tryKeywordFixupForURIInfo(uriString, fixupInfo, isPrivateContext) {
729  try {
730    let keywordInfo = Services.uriFixup.keywordToURI(
731      uriString,
732      isPrivateContext
733    );
734    fixupInfo.keywordProviderName = keywordInfo.keywordProviderName;
735    fixupInfo.keywordAsSent = keywordInfo.keywordAsSent;
736    fixupInfo.preferredURI = keywordInfo.preferredURI;
737    return true;
738  } catch (ex) {}
739  return false;
740}
741
742/**
743 * This generates an alternate fixedURI, by adding a prefix and a suffix to
744 * the fixedURI host, if and only if the protocol is http. It should _never_
745 * modify URIs with other protocols.
746 * @param {URIFixupInfo} info an URIInfo object
747 * @param {integer} fixupFlags the fixup flags
748 * @returns {boolean} Whether an alternate uri was generated
749 */
750function maybeSetAlternateFixedURI(info, fixupFlags) {
751  let uri = info.fixedURI;
752  if (
753    !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) ||
754    !alternateEnabled ||
755    // Code only works for http. Not for any other protocol including https!
756    !uri.schemeIs("http") ||
757    // Security - URLs with user / password info should NOT be fixed up
758    uri.userPass ||
759    // Don't fix up hosts with ports
760    uri.port != -1
761  ) {
762    return false;
763  }
764
765  let oldHost = uri.host;
766  // Don't create an alternate uri for localhost, because it would be confusing.
767  // Ditto for 'http' and 'https' as these are frequently the result of typos, e.g.
768  // 'https//foo' (note missing : ).
769  if (oldHost == "localhost" || oldHost == "http" || oldHost == "https") {
770    return false;
771  }
772
773  // Get the prefix and suffix to stick onto the new hostname. By default these
774  // are www. & .com but they could be any other value, e.g. www. & .org
775  let prefix = Services.prefs.getCharPref(
776    "browser.fixup.alternate.prefix",
777    "www."
778  );
779  let suffix = Services.prefs.getCharPref(
780    "browser.fixup.alternate.suffix",
781    ".com"
782  );
783
784  let newHost = "";
785  let numDots = (oldHost.match(/\./g) || []).length;
786  if (numDots == 0) {
787    newHost = prefix + oldHost + suffix;
788  } else if (numDots == 1) {
789    if (prefix && oldHost == prefix) {
790      newHost = oldHost + suffix;
791    } else if (suffix && !oldHost.startsWith(prefix)) {
792      newHost = prefix + oldHost;
793    }
794  }
795  if (!newHost) {
796    return false;
797  }
798
799  // Assign the new host string over the old one
800  try {
801    info.fixedURI = uri
802      .mutate()
803      .setScheme(alternateProtocol)
804      .setHost(newHost)
805      .finalize();
806  } catch (ex) {
807    if (ex.result != Cr.NS_ERROR_MALFORMED_URI) {
808      throw ex;
809    }
810    return false;
811  }
812  info.fixupCreatedAlternateURI = true;
813  return true;
814}
815
816/**
817 * Try to fixup a file URI.
818 * @param {string} uriString The file URI to fix.
819 * @returns {nsIURI} a fixed uri or null.
820 * @note FileURIFixup only returns a URI if it has to add the file: protocol.
821 */
822function fileURIFixup(uriString) {
823  let attemptFixup = false;
824  if (AppConstants.platform == "win") {
825    // Check for "\"" in the url-string or just a drive (e.g. C:).
826    attemptFixup =
827      uriString.includes("\\") ||
828      (uriString.length == 2 && uriString.endsWith(":"));
829  } else {
830    // UNIX: Check if it starts with "/".
831    attemptFixup = uriString.startsWith("/");
832  }
833  if (attemptFixup) {
834    try {
835      // Test if this is a valid path by trying to create a local file
836      // object. The URL of that is returned if successful.
837      let file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile);
838      file.initWithPath(uriString);
839      return Services.io.newURI(
840        fileProtocolHandler.getURLSpecFromActualFile(file)
841      );
842    } catch (ex) {
843      // Not a file uri.
844    }
845  }
846  return null;
847}
848
849/**
850 * Tries to fixup a string to an nsIURI by adding the default protocol.
851 *
852 * Should fix things like:
853 *    no-scheme.com
854 *    ftp.no-scheme.com
855 *    ftp4.no-scheme.com
856 *    no-scheme.com/query?foo=http://www.foo.com
857 *    user:pass@no-scheme.com
858 *
859 * @param {string} uriString The string to fixup.
860 * @returns {nsIURI} an nsIURI built adding the default protocol to the string,
861 *          or null if fixing was not possible.
862 */
863function fixupURIProtocol(uriString) {
864  let schemePos = uriString.indexOf("://");
865  if (schemePos == -1 || schemePos > uriString.search(/[:\/]/)) {
866    uriString = "http://" + uriString;
867  }
868  try {
869    return Services.io.newURI(uriString);
870  } catch (ex) {
871    // We generated an invalid uri.
872  }
873  return null;
874}
875
876/**
877 * Tries to fixup a string to a search url.
878 * @param {string} uriString the string to fixup.
879 * @param {URIFixupInfo} fixupInfo The fixup info object, modified in-place.
880 * @param {boolean} isPrivateContext Whether this happens in a private context.
881 * @param {nsIInputStream} postData optional POST data for the search
882 * @returns {boolean} Whether the keyword fixup was succesful.
883 */
884function keywordURIFixup(uriString, fixupInfo, isPrivateContext) {
885  // Here is a few examples of strings that should be searched:
886  // "what is mozilla"
887  // "what is mozilla?"
888  // "docshell site:mozilla.org" - has a space in the origin part
889  // "?site:mozilla.org - anything that begins with a question mark
890  // "mozilla'.org" - Things that have a quote before the first dot/colon
891  // "mozilla/test" - unknown host
892  // ".mozilla", "mozilla." - starts or ends with a dot ()
893  // "user@nonQualifiedHost"
894
895  // These other strings should not be searched, because they could be URIs:
896  // "www.blah.com" - Domain with a standard or known suffix
897  // "knowndomain" - known domain
898  // "nonQualifiedHost:8888?something" - has a port
899  // "user:pass@nonQualifiedHost"
900  // "blah.com."
901
902  // We do keyword lookups if the input starts with a question mark.
903  if (uriString.startsWith("?")) {
904    return tryKeywordFixupForURIInfo(
905      fixupInfo.originalInput,
906      fixupInfo,
907      isPrivateContext
908    );
909  }
910
911  // Check for IPs.
912  const userPassword = userPasswordRegex.exec(uriString);
913  const ipString = userPassword
914    ? uriString.replace(userPassword[2], "")
915    : uriString;
916  if (IPv4LikeRegex.test(ipString) || IPv6LikeRegex.test(ipString)) {
917    return false;
918  }
919
920  // Avoid keyword lookup if we can identify a host and it's known, or ends
921  // with a dot and has some path.
922  // Note that if dnsFirstForSingleWords is true isDomainKnown will always
923  // return true, so we can avoid checking dnsFirstForSingleWords after this.
924  let asciiHost = fixupInfo.fixedURI?.asciiHost;
925  if (
926    asciiHost &&
927    (isDomainKnown(asciiHost) ||
928      (asciiHost.endsWith(".") &&
929        asciiHost.indexOf(".") != asciiHost.length - 1))
930  ) {
931    return false;
932  }
933
934  // Avoid keyword lookup if the url seems to have password.
935  if (fixupInfo.fixedURI?.password) {
936    return false;
937  }
938
939  // Even if the host is unknown, avoid keyword lookup if the string has
940  // uri-like characteristics, unless it looks like "user@unknownHost".
941  // Note we already excluded passwords at this point.
942  if (
943    !uriLikeRegex.test(uriString) ||
944    (fixupInfo.fixedURI?.userPass && fixupInfo.fixedURI?.pathQueryRef === "/")
945  ) {
946    return tryKeywordFixupForURIInfo(
947      fixupInfo.originalInput,
948      fixupInfo,
949      isPrivateContext
950    );
951  }
952
953  return false;
954}
955
956/**
957 * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect.
958 * This also tries to fixup the scheme if it was clearly mistyped.
959 * @param {string} uriString the string to examine
960 * @param {integer} fixupFlags The original fixup flags
961 * @returns {object}
962 *          scheme: a typo fixed scheme or empty string if one could not be identified
963 *          fixedSchemeUriString: uri string with a typo fixed scheme
964 *          fixupChangedProtocol: true if the scheme is fixed up
965 */
966function extractScheme(uriString, fixupFlags = FIXUP_FLAG_NONE) {
967  const matches = uriString.match(possibleProtocolRegex);
968  const hasColon = matches?.[2] === ":";
969  const hasSlash2 = matches?.[3] === "//";
970
971  const isFixupSchemeTypos =
972    fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS;
973
974  if (
975    !matches ||
976    (!hasColon && !hasSlash2) ||
977    (!hasColon && !isFixupSchemeTypos)
978  ) {
979    return {
980      scheme: "",
981      fixedSchemeUriString: uriString,
982      fixupChangedProtocol: false,
983    };
984  }
985
986  let scheme = matches[1].replace("\t", "").toLowerCase();
987  let fixedSchemeUriString = uriString;
988
989  if (isFixupSchemeTypos && hasSlash2) {
990    // Fix up typos for string that user would have intented as protocol.
991    const afterProtocol = uriString.substring(matches[0].length);
992    fixedSchemeUriString = `${scheme}://${afterProtocol}`;
993  }
994
995  let fixupChangedProtocol = false;
996
997  if (isFixupSchemeTypos) {
998    // Fix up common scheme typos.
999    // TODO: Use levenshtein distance here?
1000    fixupChangedProtocol = [
1001      ["ttp", "http"],
1002      ["htp", "http"],
1003      ["ttps", "https"],
1004      ["tps", "https"],
1005      ["ps", "https"],
1006      ["htps", "https"],
1007      ["ile", "file"],
1008      ["le", "file"],
1009    ].some(([typo, fixed]) => {
1010      if (scheme === typo) {
1011        scheme = fixed;
1012        fixedSchemeUriString =
1013          scheme + fixedSchemeUriString.substring(typo.length);
1014        return true;
1015      }
1016      return false;
1017    });
1018  }
1019
1020  return {
1021    scheme,
1022    fixedSchemeUriString,
1023    fixupChangedProtocol,
1024  };
1025}
1026
1027/**
1028 * View-source is a pseudo scheme. We're interested in fixing up the stuff
1029 * after it. The easiest way to do that is to call this method again with
1030 * the "view-source:" lopped off and then prepend it again afterwards.
1031 * @param {string} uriString The original string to fixup
1032 * @param {integer} fixupFlags The original fixup flags
1033 * @param {nsIInputStream} postData Optional POST data for the search
1034 * @returns {object} {preferredURI, postData} The fixed URI and relative postData
1035 * @throws if it's not possible to fixup the url
1036 */
1037function fixupViewSource(uriString, fixupFlags) {
1038  // We disable keyword lookup and alternate URIs so that small typos don't
1039  // cause us to look at very different domains.
1040  let newFixupFlags =
1041    fixupFlags &
1042    ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP &
1043    ~FIXUP_FLAGS_MAKE_ALTERNATE_URI;
1044
1045  let innerURIString = uriString.substring(12).trim();
1046
1047  // Prevent recursion.
1048  const { scheme: innerScheme } = extractScheme(innerURIString);
1049  if (innerScheme == "view-source") {
1050    throw new Components.Exception(
1051      "Prevent view-source recursion",
1052      Cr.NS_ERROR_FAILURE
1053    );
1054  }
1055
1056  let info = Services.uriFixup.getFixupURIInfo(innerURIString, newFixupFlags);
1057  if (!info.preferredURI) {
1058    throw new Components.Exception(
1059      "Couldn't build a valid uri",
1060      Cr.NS_ERROR_MALFORMED_URI
1061    );
1062  }
1063  return {
1064    preferredURI: Services.io.newURI("view-source:" + info.preferredURI.spec),
1065    postData: info.postData,
1066  };
1067}
1068
1069/**
1070 * Fixup the host of fixedURI if it contains consecutive dots.
1071 * @param {URIFixupInfo} info an URIInfo object
1072 */
1073function fixupConsecutiveDotsHost(fixupInfo) {
1074  const uri = fixupInfo.fixedURI;
1075
1076  try {
1077    if (!uri?.host.includes("..")) {
1078      return;
1079    }
1080  } catch (e) {
1081    return;
1082  }
1083
1084  try {
1085    const isPreferredEqualsToFixed = fixupInfo.preferredURI?.equals(uri);
1086
1087    fixupInfo.fixedURI = uri
1088      .mutate()
1089      .setHost(uri.host.replace(/\.+/g, "."))
1090      .finalize();
1091
1092    if (isPreferredEqualsToFixed) {
1093      fixupInfo.preferredURI = fixupInfo.fixedURI;
1094    }
1095  } catch (e) {
1096    if (e.result !== Cr.NS_ERROR_MALFORMED_URI) {
1097      throw e;
1098    }
1099  }
1100}
1101