1/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*-
2 * vim: sw=2 ts=2 sts=2 expandtab
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7/**
8 * This component handles fixing up URIs, by correcting obvious typos and adding
9 * missing schemes.
10 * URI references:
11 *   http://www.faqs.org/rfcs/rfc1738.html
12 *   http://www.faqs.org/rfcs/rfc2396.html
13 */
14
15// TODO (Bug 1641220) getFixupURIInfo has a complex logic, that likely could be
16// simplified, but the risk of regressing its behavior is high.
17/* eslint complexity: ["error", 43] */
18
19var EXPORTED_SYMBOLS = ["URIFixup", "URIFixupInfo"];
20
21const { ComponentUtils } = ChromeUtils.import(
22  "resource://gre/modules/ComponentUtils.jsm"
23);
24const { XPCOMUtils } = ChromeUtils.import(
25  "resource://gre/modules/XPCOMUtils.jsm"
26);
27const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
28const { AppConstants } = ChromeUtils.import(
29  "resource://gre/modules/AppConstants.jsm"
30);
31
32XPCOMUtils.defineLazyServiceGetter(
33  this,
34  "externalProtocolService",
35  "@mozilla.org/uriloader/external-protocol-service;1",
36  "nsIExternalProtocolService"
37);
38
39XPCOMUtils.defineLazyServiceGetter(
40  this,
41  "defaultProtocolHandler",
42  "@mozilla.org/network/protocol;1?name=default",
43  "nsIProtocolHandler"
44);
45
46XPCOMUtils.defineLazyServiceGetter(
47  this,
48  "fileProtocolHandler",
49  "@mozilla.org/network/protocol;1?name=file",
50  "nsIFileProtocolHandler"
51);
52
53XPCOMUtils.defineLazyPreferenceGetter(
54  this,
55  "fixupSchemeTypos",
56  "browser.fixup.typo.scheme",
57  true
58);
59XPCOMUtils.defineLazyPreferenceGetter(
60  this,
61  "dnsFirstForSingleWords",
62  "browser.fixup.dns_first_for_single_words",
63  false
64);
65XPCOMUtils.defineLazyPreferenceGetter(
66  this,
67  "keywordEnabled",
68  "keyword.enabled",
69  true
70);
71XPCOMUtils.defineLazyPreferenceGetter(
72  this,
73  "alternateEnabled",
74  "browser.fixup.alternate.enabled",
75  true
76);
77XPCOMUtils.defineLazyPreferenceGetter(
78  this,
79  "alternateProtocol",
80  "browser.fixup.alternate.protocol",
81  "https"
82);
83
84const {
85  FIXUP_FLAG_NONE,
86  FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP,
87  FIXUP_FLAGS_MAKE_ALTERNATE_URI,
88  FIXUP_FLAG_PRIVATE_CONTEXT,
89  FIXUP_FLAG_FIX_SCHEME_TYPOS,
90} = Ci.nsIURIFixup;
91
92const COMMON_PROTOCOLS = ["http", "https", "file"];
93
94// Regex used to identify user:password tokens in url strings.
95// This is not a strict valid characters check, because we try to fixup this
96// part of the url too.
97XPCOMUtils.defineLazyGetter(
98  this,
99  "userPasswordRegex",
100  () => /^([a-z+.-]+:\/{0,3})*([^\/@]+@).+/i
101);
102
103// Regex used to identify the string that starts with port expression.
104XPCOMUtils.defineLazyGetter(this, "portRegex", () => /^:\d{1,5}([?#/]|$)/);
105
106// Regex used to identify numbers.
107XPCOMUtils.defineLazyGetter(this, "numberRegex", () => /^[0-9]+(\.[0-9]+)?$/);
108
109// Regex used to identify tab separated content (having at least 2 tabs).
110XPCOMUtils.defineLazyGetter(this, "maxOneTabRegex", () => /^[^\t]*\t?[^\t]*$/);
111
112// Regex used to test if a string with a protocol might instead be a url
113// without a protocol but with a port:
114//
115//   <hostname>:<port> or
116//   <hostname>:<port>/
117//
118// Where <hostname> is a string of alphanumeric characters and dashes
119// separated by dots.
120// and <port> is a 5 or less digits. This actually breaks the rfc2396
121// definition of a scheme which allows dots in schemes.
122//
123// Note:
124//   People expecting this to work with
125//   <user>:<password>@<host>:<port>/<url-path> will be disappointed!
126//
127// Note: Parser could be a lot tighter, tossing out silly hostnames
128//       such as those containing consecutive dots and so on.
129XPCOMUtils.defineLazyGetter(
130  this,
131  "possiblyHostPortRegex",
132  () => /^[a-z0-9-]+(\.[a-z0-9-]+)*:[0-9]{1,5}([/?#]|$)/i
133);
134
135// Regex used to strip newlines.
136XPCOMUtils.defineLazyGetter(this, "newLinesRegex", () => /[\r\n]/g);
137
138// Regex used to match a possible protocol.
139// This resembles the logic in Services.io.extractScheme, thus \t is admitted
140// and stripped later. We don't use Services.io.extractScheme because of
141// performance bottleneck caused by crossing XPConnect.
142XPCOMUtils.defineLazyGetter(
143  this,
144  "possibleProtocolRegex",
145  () => /^([a-z][a-z0-9.+\t-]*)(:|;)?(\/\/)?/i
146);
147
148// Regex used to match IPs. Note that these are not made to validate IPs, but
149// just to detect strings that look like an IP. They also skip protocol.
150// For IPv4 this also accepts a shorthand format with just 2 dots.
151XPCOMUtils.defineLazyGetter(
152  this,
153  "IPv4LikeRegex",
154  () => /^(?:[a-z+.-]+:\/*(?!\/))?(?:\d{1,3}\.){2,3}\d{1,3}(?::\d+|\/)?/i
155);
156XPCOMUtils.defineLazyGetter(
157  this,
158  "IPv6LikeRegex",
159  () =>
160    /^(?:[a-z+.-]+:\/*(?!\/))?\[(?:[0-9a-f]{0,4}:){0,7}[0-9a-f]{0,4}\]?(?::\d+|\/)?/i
161);
162
163// Cache of known domains.
164XPCOMUtils.defineLazyGetter(this, "knownDomains", () => {
165  const branch = "browser.fixup.domainwhitelist.";
166  let domains = new Set(
167    Services.prefs
168      .getChildList(branch)
169      .filter(p => Services.prefs.getBoolPref(p, false))
170      .map(p => p.substring(branch.length))
171  );
172  // Hold onto the observer to avoid it being GC-ed.
173  domains._observer = {
174    observe(subject, topic, data) {
175      let domain = data.substring(branch.length);
176      if (Services.prefs.getBoolPref(data, false)) {
177        domains.add(domain);
178      } else {
179        domains.delete(domain);
180      }
181    },
182    QueryInterface: ChromeUtils.generateQI([
183      "nsIObserver",
184      "nsISupportsWeakReference",
185    ]),
186  };
187  Services.prefs.addObserver(branch, domains._observer, true);
188  return domains;
189});
190
191// Cache of known suffixes.
192// This works differently from the known domains, because when we examine a
193// domain we can't tell how many dot-separated parts constitute the suffix.
194// We create a Map keyed by the last dotted part, containing a Set of
195// all the suffixes ending with that part:
196//   "two" => ["two"]
197//   "three" => ["some.three", "three"]
198// When searching we can restrict the linear scan based on the last part.
199// The ideal structure for this would be a Directed Acyclic Word Graph, but
200// since we expect this list to be small it's not worth the complication.
201XPCOMUtils.defineLazyGetter(this, "knownSuffixes", () => {
202  const branch = "browser.fixup.domainsuffixwhitelist.";
203  let suffixes = new Map();
204  let prefs = Services.prefs
205    .getChildList(branch)
206    .filter(p => Services.prefs.getBoolPref(p, false));
207  for (let pref of prefs) {
208    let suffix = pref.substring(branch.length);
209    let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1);
210    if (lastPart) {
211      let entries = suffixes.get(lastPart);
212      if (!entries) {
213        entries = new Set();
214        suffixes.set(lastPart, entries);
215      }
216      entries.add(suffix);
217    }
218  }
219  // Hold onto the observer to avoid it being GC-ed.
220  suffixes._observer = {
221    observe(subject, topic, data) {
222      let suffix = data.substring(branch.length);
223      let lastPart = suffix.substr(suffix.lastIndexOf(".") + 1);
224      let entries = suffixes.get(lastPart);
225      if (Services.prefs.getBoolPref(data, false)) {
226        // Add the suffix.
227        if (!entries) {
228          entries = new Set();
229          suffixes.set(lastPart, entries);
230        }
231        entries.add(suffix);
232      } else if (entries) {
233        // Remove the suffix.
234        entries.delete(suffix);
235        if (!entries.size) {
236          suffixes.delete(lastPart);
237        }
238      }
239    },
240    QueryInterface: ChromeUtils.generateQI([
241      "nsIObserver",
242      "nsISupportsWeakReference",
243    ]),
244  };
245  Services.prefs.addObserver(branch, suffixes._observer, true);
246  return suffixes;
247});
248
249function URIFixup() {}
250
251URIFixup.prototype = {
252  get FIXUP_FLAG_NONE() {
253    return FIXUP_FLAG_NONE;
254  },
255  get FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP() {
256    return FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP;
257  },
258  get FIXUP_FLAGS_MAKE_ALTERNATE_URI() {
259    return FIXUP_FLAGS_MAKE_ALTERNATE_URI;
260  },
261  get FIXUP_FLAG_PRIVATE_CONTEXT() {
262    return FIXUP_FLAG_PRIVATE_CONTEXT;
263  },
264  get FIXUP_FLAG_FIX_SCHEME_TYPOS() {
265    return FIXUP_FLAG_FIX_SCHEME_TYPOS;
266  },
267
268  getFixupURIInfo(uriString, fixupFlags = FIXUP_FLAG_NONE) {
269    let isPrivateContext = fixupFlags & FIXUP_FLAG_PRIVATE_CONTEXT;
270
271    // Eliminate embedded newlines, which single-line text fields now allow,
272    // and cleanup the empty spaces and tabs that might be on each end.
273    uriString = uriString.trim().replace(newLinesRegex, "");
274
275    if (!uriString) {
276      throw new Components.Exception(
277        "Should pass a non-null uri",
278        Cr.NS_ERROR_FAILURE
279      );
280    }
281
282    let info = new URIFixupInfo(uriString);
283
284    const {
285      scheme,
286      fixedSchemeUriString,
287      fixupChangedProtocol,
288    } = extractScheme(uriString, fixupFlags);
289    uriString = fixedSchemeUriString;
290    info.fixupChangedProtocol = fixupChangedProtocol;
291
292    if (scheme == "view-source") {
293      let { preferredURI, postData } = fixupViewSource(uriString, fixupFlags);
294      info.preferredURI = info.fixedURI = preferredURI;
295      info.postData = postData;
296      return info;
297    }
298
299    if (scheme.length < 2) {
300      // Check if it is a file path. We skip most schemes because the only case
301      // where a file path may look like having a scheme is "X:" on Windows.
302      let fileURI = fileURIFixup(uriString);
303      if (fileURI) {
304        info.preferredURI = info.fixedURI = fileURI;
305        info.fixupChangedProtocol = true;
306        return info;
307      }
308    }
309
310    const isCommonProtocol = COMMON_PROTOCOLS.includes(scheme);
311
312    let canHandleProtocol =
313      scheme &&
314      (isCommonProtocol ||
315        Services.io.getProtocolHandler(scheme) != defaultProtocolHandler ||
316        externalProtocolService.externalProtocolHandlerExists(scheme));
317
318    if (
319      canHandleProtocol ||
320      // If it's an unknown handler and the given URL looks like host:port or
321      // has a user:password we can't pass it to the external protocol handler.
322      // We'll instead try fixing it with http later.
323      (!possiblyHostPortRegex.test(uriString) &&
324        !userPasswordRegex.test(uriString))
325    ) {
326      // Just try to create an URL out of it.
327      try {
328        info.fixedURI = Services.io.newURI(uriString);
329      } catch (ex) {
330        if (ex.result != Cr.NS_ERROR_MALFORMED_URI) {
331          throw ex;
332        }
333      }
334    }
335
336    // We're dealing with a theoretically valid URI but we have no idea how to
337    // load it. (e.g. "christmas:humbug")
338    // It's more likely the user wants to search, and so we chuck this over to
339    // their preferred search provider.
340    // TODO (Bug 1588118): Should check FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP
341    // instead of FIXUP_FLAG_FIX_SCHEME_TYPOS.
342    if (
343      info.fixedURI &&
344      keywordEnabled &&
345      fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS &&
346      scheme &&
347      !canHandleProtocol
348    ) {
349      tryKeywordFixupForURIInfo(uriString, info, isPrivateContext);
350    }
351
352    if (info.fixedURI) {
353      if (!info.preferredURI) {
354        maybeSetAlternateFixedURI(info, fixupFlags);
355        info.preferredURI = info.fixedURI;
356      }
357      fixupConsecutiveDotsHost(info);
358      return info;
359    }
360
361    // Fix up protocol string before calling KeywordURIFixup, because
362    // it cares about the hostname of such URIs.
363    // Prune duff protocol schemes:
364    //   ://totallybroken.url.com
365    //   //shorthand.url.com
366    let inputHadDuffProtocol =
367      uriString.startsWith("://") || uriString.startsWith("//");
368    if (inputHadDuffProtocol) {
369      uriString = uriString.replace(/^:?\/\//, "");
370    }
371
372    // Avoid fixing up content that looks like tab-separated values.
373    // Assume that 1 tab is accidental, but more than 1 implies this is
374    // supposed to be tab-separated content.
375    if (!isCommonProtocol && maxOneTabRegex.test(uriString)) {
376      let uriWithProtocol = fixupURIProtocol(uriString);
377      if (uriWithProtocol) {
378        info.fixedURI = uriWithProtocol;
379        info.fixupChangedProtocol = true;
380        maybeSetAlternateFixedURI(info, fixupFlags);
381        info.preferredURI = info.fixedURI;
382        // Check if it's a forced visit. The user can enforce a visit by
383        // appending a slash, but the string must be in a valid uri format.
384        if (uriString.endsWith("/")) {
385          fixupConsecutiveDotsHost(info);
386          return info;
387        }
388      }
389    }
390
391    // Handle "www.<something>" as a URI.
392    const asciiHost = info.fixedURI?.asciiHost;
393    if (
394      asciiHost?.length > 4 &&
395      asciiHost?.startsWith("www.") &&
396      asciiHost?.lastIndexOf(".") == 3
397    ) {
398      return info;
399    }
400
401    // Memoize the public suffix check, since it may be expensive and should
402    // only run once when necessary.
403    let suffixInfo;
404    function checkSuffix(info) {
405      if (!suffixInfo) {
406        suffixInfo = checkAndFixPublicSuffix(info);
407      }
408      return suffixInfo;
409    }
410
411    // See if it is a keyword and whether a keyword must be fixed up.
412    if (
413      keywordEnabled &&
414      fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP &&
415      !inputHadDuffProtocol &&
416      !checkSuffix(info).suffix &&
417      keywordURIFixup(uriString, info, isPrivateContext)
418    ) {
419      fixupConsecutiveDotsHost(info);
420      return info;
421    }
422
423    if (
424      info.fixedURI &&
425      (!info.fixupChangedProtocol || !checkSuffix(info).hasUnknownSuffix)
426    ) {
427      fixupConsecutiveDotsHost(info);
428      return info;
429    }
430
431    // If we still haven't been able to construct a valid URI, try to force a
432    // keyword match.
433    if (keywordEnabled && fixupFlags & FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP) {
434      tryKeywordFixupForURIInfo(info.originalInput, info, isPrivateContext);
435    }
436
437    if (!info.preferredURI) {
438      // We couldn't salvage anything.
439      throw new Components.Exception(
440        "Couldn't build a valid uri",
441        Cr.NS_ERROR_MALFORMED_URI
442      );
443    }
444
445    fixupConsecutiveDotsHost(info);
446    return info;
447  },
448
449  webNavigationFlagsToFixupFlags(href, navigationFlags) {
450    try {
451      Services.io.newURI(href);
452      // Remove LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP for valid uris.
453      navigationFlags &= ~Ci.nsIWebNavigation
454        .LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP;
455    } catch (ex) {}
456
457    let fixupFlags = FIXUP_FLAG_NONE;
458    if (
459      navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_ALLOW_THIRD_PARTY_FIXUP
460    ) {
461      fixupFlags |= FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP;
462    }
463    if (navigationFlags & Ci.nsIWebNavigation.LOAD_FLAGS_FIXUP_SCHEME_TYPOS) {
464      fixupFlags |= FIXUP_FLAG_FIX_SCHEME_TYPOS;
465    }
466    return fixupFlags;
467  },
468
469  keywordToURI(keyword, isPrivateContext) {
470    if (Services.appinfo.processType == Ci.nsIXULRuntime.PROCESS_TYPE_CONTENT) {
471      // There's no search service in the content process, thus all the calls
472      // from it that care about keywords conversion should go through the
473      // parent process.
474      throw new Components.Exception(
475        "Can't invoke URIFixup in the content process",
476        Cr.NS_ERROR_NOT_AVAILABLE
477      );
478    }
479    let info = new URIFixupInfo(keyword);
480
481    // Strip leading "?" and leading/trailing spaces from aKeyword
482    if (keyword.startsWith("?")) {
483      keyword = keyword.substring(1);
484    }
485    keyword = keyword.trim();
486
487    // Try falling back to the search service's default search engine
488    // We must use an appropriate search engine depending on the private
489    // context.
490    let engine = isPrivateContext
491      ? Services.search.defaultPrivateEngine
492      : Services.search.defaultEngine;
493
494    // We allow default search plugins to specify alternate parameters that are
495    // specific to keyword searches.
496    let responseType = null;
497    if (engine.supportsResponseType("application/x-moz-keywordsearch")) {
498      responseType = "application/x-moz-keywordsearch";
499    }
500    let submission = engine.getSubmission(keyword, responseType, "keyword");
501    if (
502      !submission ||
503      // For security reasons (avoid redirecting to file, data, or other unsafe
504      // protocols) we only allow fixup to http/https search engines.
505      !submission.uri.scheme.startsWith("http")
506    ) {
507      throw new Components.Exception(
508        "Invalid search submission uri",
509        Cr.NS_ERROR_NOT_AVAILABLE
510      );
511    }
512    let submissionPostDataStream = submission.postData;
513    if (submissionPostDataStream) {
514      info.postData = submissionPostDataStream;
515    }
516
517    info.keywordProviderName = engine.name;
518    info.keywordAsSent = keyword;
519    info.preferredURI = submission.uri;
520    return info;
521  },
522
523  isDomainKnown,
524
525  classID: Components.ID("{c6cf88b7-452e-47eb-bdc9-86e3561648ef}"),
526  _xpcom_factory: ComponentUtils.generateSingletonFactory(URIFixup),
527  QueryInterface: ChromeUtils.generateQI(["nsIURIFixup"]),
528};
529
530function URIFixupInfo(originalInput = "") {
531  this._originalInput = originalInput;
532}
533
534URIFixupInfo.prototype = {
535  set consumer(consumer) {
536    this._consumer = consumer || null;
537  },
538  get consumer() {
539    return this._consumer || null;
540  },
541
542  set preferredURI(uri) {
543    this._preferredURI = uri;
544  },
545  get preferredURI() {
546    return this._preferredURI || null;
547  },
548
549  set fixedURI(uri) {
550    this._fixedURI = uri;
551  },
552  get fixedURI() {
553    return this._fixedURI || null;
554  },
555
556  set keywordProviderName(name) {
557    this._keywordProviderName = name;
558  },
559  get keywordProviderName() {
560    return this._keywordProviderName || "";
561  },
562
563  set keywordAsSent(keyword) {
564    this._keywordAsSent = keyword;
565  },
566  get keywordAsSent() {
567    return this._keywordAsSent || "";
568  },
569
570  set fixupChangedProtocol(changed) {
571    this._fixupChangedProtocol = changed;
572  },
573  get fixupChangedProtocol() {
574    return !!this._fixupChangedProtocol;
575  },
576
577  set fixupCreatedAlternateURI(changed) {
578    this._fixupCreatedAlternateURI = changed;
579  },
580  get fixupCreatedAlternateURI() {
581    return !!this._fixupCreatedAlternateURI;
582  },
583
584  set originalInput(input) {
585    this._originalInput = input;
586  },
587  get originalInput() {
588    return this._originalInput || "";
589  },
590
591  set postData(postData) {
592    this._postData = postData;
593  },
594  get postData() {
595    return this._postData || null;
596  },
597
598  classID: Components.ID("{33d75835-722f-42c0-89cc-44f328e56a86}"),
599  QueryInterface: ChromeUtils.generateQI(["nsIURIFixupInfo"]),
600};
601
602// Helpers
603
604/**
605 * Implementation of isDomainKnown, so we don't have to go through the
606 * service.
607 * @param {string} asciiHost
608 * @returns {boolean} whether the domain is known
609 */
610function isDomainKnown(asciiHost) {
611  if (dnsFirstForSingleWords) {
612    return true;
613  }
614  // Check if this domain is known as an actual
615  // domain (which will prevent a keyword query)
616  // Note that any processing of the host here should stay in sync with
617  // code in the front-end(s) that set the pref.
618  let lastDotIndex = asciiHost.lastIndexOf(".");
619  if (lastDotIndex == asciiHost.length - 1) {
620    asciiHost = asciiHost.substring(0, asciiHost.length - 1);
621    lastDotIndex = asciiHost.lastIndexOf(".");
622  }
623  if (knownDomains.has(asciiHost.toLowerCase())) {
624    return true;
625  }
626  // If there's no dot or only a leading dot we are done, otherwise we'll check
627  // against the known suffixes.
628  if (lastDotIndex <= 0) {
629    return false;
630  }
631  // Don't use getPublicSuffix here, since the suffix is not in the PSL,
632  // thus it couldn't tell if the suffix is made up of one or multiple
633  // dot-separated parts.
634  let lastPart = asciiHost.substr(lastDotIndex + 1);
635  let suffixes = knownSuffixes.get(lastPart);
636  if (suffixes) {
637    return Array.from(suffixes).some(s => asciiHost.endsWith(s));
638  }
639  return false;
640}
641
642/**
643 * Checks the suffix of info.fixedURI against the Public Suffix List.
644 * If the suffix is unknown due to a typo this will try to fix it up.
645 * @param {URIFixupInfo} info about the uri to check.
646 * @note this may modify the public suffix of info.fixedURI.
647 * @returns {object} result The lookup result.
648 * @returns {string} result.suffix The public suffix if one can be identified.
649 * @returns {boolean} result.hasUnknownSuffix True when the suffix is not in the
650 *     Public Suffix List and it's not in knownSuffixes. False in the other cases.
651 */
652function checkAndFixPublicSuffix(info) {
653  let uri = info.fixedURI;
654  let asciiHost = uri?.asciiHost;
655  if (
656    !asciiHost ||
657    !asciiHost.includes(".") ||
658    asciiHost.endsWith(".") ||
659    isDomainKnown(asciiHost)
660  ) {
661    return { suffix: "", hasUnknownSuffix: false };
662  }
663
664  // Quick bailouts for most common cases, according to Alexa Top 1 million.
665  if (
666    /^\w/.test(asciiHost) &&
667    (asciiHost.endsWith(".com") ||
668      asciiHost.endsWith(".net") ||
669      asciiHost.endsWith(".org") ||
670      asciiHost.endsWith(".ru") ||
671      asciiHost.endsWith(".de"))
672  ) {
673    return {
674      suffix: asciiHost.substring(asciiHost.lastIndexOf(".") + 1),
675      hasUnknownSuffix: false,
676    };
677  }
678  try {
679    let suffix = Services.eTLD.getKnownPublicSuffix(uri);
680    if (suffix) {
681      return { suffix, hasUnknownSuffix: false };
682    }
683  } catch (ex) {
684    return { suffix: "", hasUnknownSuffix: false };
685  }
686  // Suffix is unknown, try to fix most common 3 chars TLDs typos.
687  // .com is the most commonly mistyped tld, so it has more cases.
688  let suffix = Services.eTLD.getPublicSuffix(uri);
689  if (!suffix || numberRegex.test(suffix)) {
690    return { suffix: "", hasUnknownSuffix: false };
691  }
692  for (let [typo, fixed] of [
693    ["ocm", "com"],
694    ["con", "com"],
695    ["cmo", "com"],
696    ["xom", "com"],
697    ["vom", "com"],
698    ["cpm", "com"],
699    ["com'", "com"],
700    ["ent", "net"],
701    ["ner", "net"],
702    ["nte", "net"],
703    ["met", "net"],
704    ["rog", "org"],
705    ["ogr", "org"],
706    ["prg", "org"],
707    ["orh", "org"],
708  ]) {
709    if (suffix == typo) {
710      let host = uri.host.substring(0, uri.host.length - typo.length) + fixed;
711      let updatePreferredURI = info.preferredURI == info.fixedURI;
712      info.fixedURI = uri
713        .mutate()
714        .setHost(host)
715        .finalize();
716      if (updatePreferredURI) {
717        info.preferredURI = info.fixedURI;
718      }
719      return { suffix: fixed, hasUnknownSuffix: false };
720    }
721  }
722  return { suffix: "", hasUnknownSuffix: true };
723}
724
725function tryKeywordFixupForURIInfo(uriString, fixupInfo, isPrivateContext) {
726  try {
727    let keywordInfo = Services.uriFixup.keywordToURI(
728      uriString,
729      isPrivateContext
730    );
731    fixupInfo.keywordProviderName = keywordInfo.keywordProviderName;
732    fixupInfo.keywordAsSent = keywordInfo.keywordAsSent;
733    fixupInfo.preferredURI = keywordInfo.preferredURI;
734    return true;
735  } catch (ex) {}
736  return false;
737}
738
739/**
740 * This generates an alternate fixedURI, by adding a prefix and a suffix to
741 * the fixedURI host, if and only if the protocol is http. It should _never_
742 * modify URIs with other protocols.
743 * @param {URIFixupInfo} info an URIInfo object
744 * @param {integer} fixupFlags the fixup flags
745 * @returns {boolean} Whether an alternate uri was generated
746 */
747function maybeSetAlternateFixedURI(info, fixupFlags) {
748  let uri = info.fixedURI;
749  if (
750    !(fixupFlags & FIXUP_FLAGS_MAKE_ALTERNATE_URI) ||
751    !alternateEnabled ||
752    // Code only works for http. Not for any other protocol including https!
753    !uri.schemeIs("http") ||
754    // Security - URLs with user / password info should NOT be fixed up
755    uri.userPass ||
756    // Don't fix up hosts with ports
757    uri.port != -1
758  ) {
759    return false;
760  }
761
762  let oldHost = uri.host;
763  // Don't create an alternate uri for localhost, because it would be confusing.
764  // Ditto for 'http' and 'https' as these are frequently the result of typos, e.g.
765  // 'https//foo' (note missing : ).
766  if (oldHost == "localhost" || oldHost == "http" || oldHost == "https") {
767    return false;
768  }
769
770  // Get the prefix and suffix to stick onto the new hostname. By default these
771  // are www. & .com but they could be any other value, e.g. www. & .org
772  let prefix = Services.prefs.getCharPref(
773    "browser.fixup.alternate.prefix",
774    "www."
775  );
776  let suffix = Services.prefs.getCharPref(
777    "browser.fixup.alternate.suffix",
778    ".com"
779  );
780
781  let newHost = "";
782  let numDots = (oldHost.match(/\./g) || []).length;
783  if (numDots == 0) {
784    newHost = prefix + oldHost + suffix;
785  } else if (numDots == 1) {
786    if (prefix && oldHost == prefix) {
787      newHost = oldHost + suffix;
788    } else if (suffix && !oldHost.startsWith(prefix)) {
789      newHost = prefix + oldHost;
790    }
791  }
792  if (!newHost) {
793    return false;
794  }
795
796  // Assign the new host string over the old one
797  try {
798    info.fixedURI = uri
799      .mutate()
800      .setScheme(alternateProtocol)
801      .setHost(newHost)
802      .finalize();
803  } catch (ex) {
804    if (ex.result != Cr.NS_ERROR_MALFORMED_URI) {
805      throw ex;
806    }
807    return false;
808  }
809  info.fixupCreatedAlternateURI = true;
810  return true;
811}
812
813/**
814 * Try to fixup a file URI.
815 * @param {string} uriString The file URI to fix.
816 * @returns {nsIURI} a fixed uri or null.
817 * @note FileURIFixup only returns a URI if it has to add the file: protocol.
818 */
819function fileURIFixup(uriString) {
820  let attemptFixup = false;
821  if (AppConstants.platform == "win") {
822    // Check for "\"" in the url-string or just a drive (e.g. C:).
823    attemptFixup =
824      uriString.includes("\\") ||
825      (uriString.length == 2 && uriString.endsWith(":"));
826  } else {
827    // UNIX: Check if it starts with "/".
828    attemptFixup = uriString.startsWith("/");
829  }
830  if (attemptFixup) {
831    try {
832      // Test if this is a valid path by trying to create a local file
833      // object. The URL of that is returned if successful.
834      let file = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile);
835      file.initWithPath(uriString);
836      return Services.io.newURI(
837        fileProtocolHandler.getURLSpecFromActualFile(file)
838      );
839    } catch (ex) {
840      // Not a file uri.
841    }
842  }
843  return null;
844}
845
846/**
847 * Tries to fixup a string to an nsIURI by adding the default protocol.
848 *
849 * Should fix things like:
850 *    no-scheme.com
851 *    ftp.no-scheme.com
852 *    ftp4.no-scheme.com
853 *    no-scheme.com/query?foo=http://www.foo.com
854 *    user:pass@no-scheme.com
855 *
856 * @param {string} uriString The string to fixup.
857 * @returns {nsIURI} an nsIURI built adding the default protocol to the string,
858 *          or null if fixing was not possible.
859 */
860function fixupURIProtocol(uriString) {
861  let schemePos = uriString.indexOf("://");
862  if (schemePos == -1 || schemePos > uriString.search(/[:\/]/)) {
863    uriString = "http://" + uriString;
864  }
865  try {
866    return Services.io.newURI(uriString);
867  } catch (ex) {
868    // We generated an invalid uri.
869  }
870  return null;
871}
872
873/**
874 * Tries to fixup a string to a search url.
875 * @param {string} uriString the string to fixup.
876 * @param {URIFixupInfo} fixupInfo The fixup info object, modified in-place.
877 * @param {boolean} isPrivateContext Whether this happens in a private context.
878 * @param {nsIInputStream} postData optional POST data for the search
879 * @returns {boolean} Whether the keyword fixup was succesful.
880 */
881function keywordURIFixup(uriString, fixupInfo, isPrivateContext) {
882  // Here is a few examples of strings that should be searched:
883  // "what is mozilla"
884  // "what is mozilla?"
885  // "docshell site:mozilla.org" - has a space in the origin part
886  // "?site:mozilla.org - anything that begins with a question mark
887  // "mozilla'.org" - Things that have a quote before the first dot/colon
888  // "mozilla/test" - unknown host
889  // ".mozilla", "mozilla." - starts or ends with a dot ()
890  // "user@nonQualifiedHost"
891
892  // These other strings should not be searched, because they could be URIs:
893  // "www.blah.com" - Domain with a standard or known suffix
894  // "knowndomain" - known domain
895  // "nonQualifiedHost:8888?something" - has a port
896  // "user:pass@nonQualifiedHost"
897  // "blah.com."
898
899  // We do keyword lookups if the input starts with a question mark.
900  if (uriString.startsWith("?")) {
901    return tryKeywordFixupForURIInfo(
902      fixupInfo.originalInput,
903      fixupInfo,
904      isPrivateContext
905    );
906  }
907
908  // Check for IPs.
909  const userPassword = userPasswordRegex.exec(uriString);
910  const ipString = userPassword
911    ? uriString.replace(userPassword[2], "")
912    : uriString;
913  if (IPv4LikeRegex.test(ipString) || IPv6LikeRegex.test(ipString)) {
914    return false;
915  }
916
917  // Avoid keyword lookup if we can identify a host and it's known, or ends
918  // with a dot and has some path.
919  // Note that if dnsFirstForSingleWords is true isDomainKnown will always
920  // return true, so we can avoid checking dnsFirstForSingleWords after this.
921  let asciiHost = fixupInfo.fixedURI?.asciiHost;
922  if (
923    asciiHost &&
924    (isDomainKnown(asciiHost) ||
925      (asciiHost.endsWith(".") &&
926        asciiHost.indexOf(".") != asciiHost.length - 1))
927  ) {
928    return false;
929  }
930
931  // Avoid keyword lookup if the url seems to have password.
932  if (fixupInfo.fixedURI?.password) {
933    return false;
934  }
935
936  // Even if the host is unknown, avoid keyword lookup if the string has
937  // uri-like characteristics, unless it looks like "user@unknownHost".
938  // Note we already excluded passwords at this point.
939  if (
940    !isURILike(uriString, fixupInfo.fixedURI?.displayHost) ||
941    (fixupInfo.fixedURI?.userPass && fixupInfo.fixedURI?.pathQueryRef === "/")
942  ) {
943    return tryKeywordFixupForURIInfo(
944      fixupInfo.originalInput,
945      fixupInfo,
946      isPrivateContext
947    );
948  }
949
950  return false;
951}
952
953/**
954 * Mimics the logic in Services.io.extractScheme, but avoids crossing XPConnect.
955 * This also tries to fixup the scheme if it was clearly mistyped.
956 * @param {string} uriString the string to examine
957 * @param {integer} fixupFlags The original fixup flags
958 * @returns {object}
959 *          scheme: a typo fixed scheme or empty string if one could not be identified
960 *          fixedSchemeUriString: uri string with a typo fixed scheme
961 *          fixupChangedProtocol: true if the scheme is fixed up
962 */
963function extractScheme(uriString, fixupFlags = FIXUP_FLAG_NONE) {
964  const matches = uriString.match(possibleProtocolRegex);
965  const hasColon = matches?.[2] === ":";
966  const hasSlash2 = matches?.[3] === "//";
967
968  const isFixupSchemeTypos =
969    fixupSchemeTypos && fixupFlags & FIXUP_FLAG_FIX_SCHEME_TYPOS;
970
971  if (
972    !matches ||
973    (!hasColon && !hasSlash2) ||
974    (!hasColon && !isFixupSchemeTypos)
975  ) {
976    return {
977      scheme: "",
978      fixedSchemeUriString: uriString,
979      fixupChangedProtocol: false,
980    };
981  }
982
983  let scheme = matches[1].replace("\t", "").toLowerCase();
984  let fixedSchemeUriString = uriString;
985
986  if (isFixupSchemeTypos && hasSlash2) {
987    // Fix up typos for string that user would have intented as protocol.
988    const afterProtocol = uriString.substring(matches[0].length);
989    fixedSchemeUriString = `${scheme}://${afterProtocol}`;
990  }
991
992  let fixupChangedProtocol = false;
993
994  if (isFixupSchemeTypos) {
995    // Fix up common scheme typos.
996    // TODO: Use levenshtein distance here?
997    fixupChangedProtocol = [
998      ["ttp", "http"],
999      ["htp", "http"],
1000      ["ttps", "https"],
1001      ["tps", "https"],
1002      ["ps", "https"],
1003      ["htps", "https"],
1004      ["ile", "file"],
1005      ["le", "file"],
1006    ].some(([typo, fixed]) => {
1007      if (scheme === typo) {
1008        scheme = fixed;
1009        fixedSchemeUriString =
1010          scheme + fixedSchemeUriString.substring(typo.length);
1011        return true;
1012      }
1013      return false;
1014    });
1015  }
1016
1017  return {
1018    scheme,
1019    fixedSchemeUriString,
1020    fixupChangedProtocol,
1021  };
1022}
1023
1024/**
1025 * View-source is a pseudo scheme. We're interested in fixing up the stuff
1026 * after it. The easiest way to do that is to call this method again with
1027 * the "view-source:" lopped off and then prepend it again afterwards.
1028 * @param {string} uriString The original string to fixup
1029 * @param {integer} fixupFlags The original fixup flags
1030 * @param {nsIInputStream} postData Optional POST data for the search
1031 * @returns {object} {preferredURI, postData} The fixed URI and relative postData
1032 * @throws if it's not possible to fixup the url
1033 */
1034function fixupViewSource(uriString, fixupFlags) {
1035  // We disable keyword lookup and alternate URIs so that small typos don't
1036  // cause us to look at very different domains.
1037  let newFixupFlags =
1038    fixupFlags &
1039    ~FIXUP_FLAG_ALLOW_KEYWORD_LOOKUP &
1040    ~FIXUP_FLAGS_MAKE_ALTERNATE_URI;
1041
1042  let innerURIString = uriString.substring(12).trim();
1043
1044  // Prevent recursion.
1045  const { scheme: innerScheme } = extractScheme(innerURIString);
1046  if (innerScheme == "view-source") {
1047    throw new Components.Exception(
1048      "Prevent view-source recursion",
1049      Cr.NS_ERROR_FAILURE
1050    );
1051  }
1052
1053  let info = Services.uriFixup.getFixupURIInfo(innerURIString, newFixupFlags);
1054  if (!info.preferredURI) {
1055    throw new Components.Exception(
1056      "Couldn't build a valid uri",
1057      Cr.NS_ERROR_MALFORMED_URI
1058    );
1059  }
1060  return {
1061    preferredURI: Services.io.newURI("view-source:" + info.preferredURI.spec),
1062    postData: info.postData,
1063  };
1064}
1065
1066/**
1067 * Fixup the host of fixedURI if it contains consecutive dots.
1068 * @param {URIFixupInfo} info an URIInfo object
1069 */
1070function fixupConsecutiveDotsHost(fixupInfo) {
1071  const uri = fixupInfo.fixedURI;
1072
1073  try {
1074    if (!uri?.host.includes("..")) {
1075      return;
1076    }
1077  } catch (e) {
1078    return;
1079  }
1080
1081  try {
1082    const isPreferredEqualsToFixed = fixupInfo.preferredURI?.equals(uri);
1083
1084    fixupInfo.fixedURI = uri
1085      .mutate()
1086      .setHost(uri.host.replace(/\.+/g, "."))
1087      .finalize();
1088
1089    if (isPreferredEqualsToFixed) {
1090      fixupInfo.preferredURI = fixupInfo.fixedURI;
1091    }
1092  } catch (e) {
1093    if (e.result !== Cr.NS_ERROR_MALFORMED_URI) {
1094      throw e;
1095    }
1096  }
1097}
1098
1099/**
1100 * Return whether or not given string is uri like.
1101 * This function returns true like following strings.
1102 * - ":8080"
1103 * - "localhost:8080" (if given host is "localhost")
1104 * - "/foo?bar"
1105 * - "/foo#bar"
1106 * @param {string} uriString.
1107 * @param {string} host.
1108 * @param {boolean} true if uri like.
1109 */
1110function isURILike(uriString, host) {
1111  const indexOfSlash = uriString.indexOf("/");
1112  if (
1113    indexOfSlash >= 0 &&
1114    (indexOfSlash < uriString.indexOf("?", indexOfSlash) ||
1115      indexOfSlash < uriString.indexOf("#", indexOfSlash))
1116  ) {
1117    return true;
1118  }
1119
1120  if (uriString.startsWith(host)) {
1121    uriString = uriString.substring(host.length);
1122  }
1123
1124  return portRegex.test(uriString);
1125}
1126