1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "MainThreadUtils.h"
7 #include "mozilla/Preferences.h"
8 #include "nsIDNService.h"
9 #include "nsReadableUtils.h"
10 #include "nsCRT.h"
11 #include "nsServiceManagerUtils.h"
12 #include "nsUnicharUtils.h"
13 #include "nsUnicodeProperties.h"
14 #include "nsUnicodeScriptCodes.h"
15 #include "harfbuzz/hb.h"
16 #include "punycode.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/TextUtils.h"
19 #include "mozilla/Utf8.h"
20 
21 // Currently we use the non-transitional processing option -- see
22 // http://unicode.org/reports/tr46/
23 // To switch to transitional processing, change the value of this flag
24 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
25 // (revert bug 1218179).
26 const bool kIDNA2008_TransitionalProcessing = false;
27 
28 #include "ICUUtils.h"
29 #include "unicode/uscript.h"
30 
31 using namespace mozilla;
32 using namespace mozilla::unicode;
33 using namespace mozilla::net;
34 using mozilla::Preferences;
35 
36 //-----------------------------------------------------------------------------
37 // RFC 1034 - 3.1. Name space specifications and terminology
38 static const uint32_t kMaxDNSNodeLen = 63;
39 // RFC 3490 - 5.   ACE prefix
40 static const char kACEPrefix[] = "xn--";
41 #define kACEPrefixLen 4
42 
43 //-----------------------------------------------------------------------------
44 
45 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
46 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
47 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
48 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
49 #define NS_NET_PREF_IDNUSEWHITELIST "network.IDN.use_whitelist"
50 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
51 
isOnlySafeChars(const nsString & in,const nsTArray<BlocklistRange> & aBlocklist)52 static inline bool isOnlySafeChars(const nsString& in,
53                                    const nsTArray<BlocklistRange>& aBlocklist) {
54   if (aBlocklist.IsEmpty()) {
55     return true;
56   }
57   const char16_t* cur = in.BeginReading();
58   const char16_t* end = in.EndReading();
59 
60   for (; cur < end; ++cur) {
61     if (CharInBlocklist(*cur, aBlocklist)) {
62       return false;
63     }
64   }
65   return true;
66 }
67 
68 //-----------------------------------------------------------------------------
69 // nsIDNService
70 //-----------------------------------------------------------------------------
71 
72 /* Implementation file */
73 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService, nsISupportsWeakReference)
74 
75 static const char* gCallbackPrefs[] = {
76     NS_NET_PREF_EXTRAALLOWED,    NS_NET_PREF_EXTRABLOCKED,
77     NS_NET_PREF_SHOWPUNYCODE,    NS_NET_PREF_IDNRESTRICTION,
78     NS_NET_PREF_IDNUSEWHITELIST, nullptr,
79 };
80 
Init()81 nsresult nsIDNService::Init() {
82   MOZ_ASSERT(NS_IsMainThread());
83   MutexAutoLock lock(mLock);
84 
85   nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
86   if (prefs) {
87     prefs->GetBranch(NS_NET_PREF_IDNWHITELIST,
88                      getter_AddRefs(mIDNWhitelistPrefBranch));
89   }
90 
91   Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
92   prefsChanged(nullptr);
93   InitializeBlocklist(mIDNBlocklist);
94 
95   return NS_OK;
96 }
97 
prefsChanged(const char * pref)98 void nsIDNService::prefsChanged(const char* pref) {
99   MOZ_ASSERT(NS_IsMainThread());
100   mLock.AssertCurrentThreadOwns();
101 
102   if (pref && nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref)) {
103     InitializeBlocklist(mIDNBlocklist);
104   }
105   if (pref && nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
106     InitializeBlocklist(mIDNBlocklist);
107   }
108   if (!pref || nsLiteralCString(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
109     bool val;
110     if (NS_SUCCEEDED(Preferences::GetBool(NS_NET_PREF_SHOWPUNYCODE, &val))) {
111       mShowPunycode = val;
112     }
113   }
114   if (!pref || nsLiteralCString(NS_NET_PREF_IDNUSEWHITELIST).Equals(pref)) {
115     bool val;
116     if (NS_SUCCEEDED(Preferences::GetBool(NS_NET_PREF_IDNUSEWHITELIST, &val))) {
117       mIDNUseWhitelist = val;
118     }
119   }
120   if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
121     nsAutoCString profile;
122     if (NS_FAILED(
123             Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
124       profile.Truncate();
125     }
126     if (profile.EqualsLiteral("moderate")) {
127       mRestrictionProfile = eModeratelyRestrictiveProfile;
128     } else if (profile.EqualsLiteral("high")) {
129       mRestrictionProfile = eHighlyRestrictiveProfile;
130     } else {
131       mRestrictionProfile = eASCIIOnlyProfile;
132     }
133   }
134 }
135 
nsIDNService()136 nsIDNService::nsIDNService() {
137   MOZ_ASSERT(NS_IsMainThread());
138 
139   uint32_t IDNAOptions = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ;
140   if (!kIDNA2008_TransitionalProcessing) {
141     IDNAOptions |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
142   }
143   UErrorCode errorCode = U_ZERO_ERROR;
144   mIDNA = uidna_openUTS46(IDNAOptions, &errorCode);
145 }
146 
~nsIDNService()147 nsIDNService::~nsIDNService() {
148   MOZ_ASSERT(NS_IsMainThread());
149 
150   Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
151 
152   uidna_close(mIDNA);
153 }
154 
IDNA2008ToUnicode(const nsACString & input,nsAString & output)155 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
156                                          nsAString& output) {
157   NS_ConvertUTF8toUTF16 inputStr(input);
158   UIDNAInfo info = UIDNA_INFO_INITIALIZER;
159   UErrorCode errorCode = U_ZERO_ERROR;
160   int32_t inLen = inputStr.Length();
161   int32_t outMaxLen = kMaxDNSNodeLen + 1;
162   UChar outputBuffer[kMaxDNSNodeLen + 1];
163 
164   int32_t outLen =
165       uidna_labelToUnicode(mIDNA, (const UChar*)inputStr.get(), inLen,
166                            outputBuffer, outMaxLen, &info, &errorCode);
167   if (info.errors != 0) {
168     return NS_ERROR_MALFORMED_URI;
169   }
170 
171   if (U_SUCCESS(errorCode)) {
172     ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
173   }
174 
175   nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
176   if (rv == NS_ERROR_FAILURE) {
177     rv = NS_ERROR_MALFORMED_URI;
178   }
179   return rv;
180 }
181 
IDNA2008StringPrep(const nsAString & input,nsAString & output,stringPrepFlag flag)182 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
183                                           nsAString& output,
184                                           stringPrepFlag flag) {
185   UIDNAInfo info = UIDNA_INFO_INITIALIZER;
186   UErrorCode errorCode = U_ZERO_ERROR;
187   int32_t inLen = input.Length();
188   int32_t outMaxLen = kMaxDNSNodeLen + 1;
189   UChar outputBuffer[kMaxDNSNodeLen + 1];
190 
191   int32_t outLen =
192       uidna_labelToUnicode(mIDNA, (const UChar*)PromiseFlatString(input).get(),
193                            inLen, outputBuffer, outMaxLen, &info, &errorCode);
194   nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
195   if (rv == NS_ERROR_FAILURE) {
196     rv = NS_ERROR_MALFORMED_URI;
197   }
198   NS_ENSURE_SUCCESS(rv, rv);
199 
200   // Output the result of nameToUnicode even if there were errors.
201   // But in the case of invalid punycode, the uidna_labelToUnicode result
202   // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
203   // confuse our subsequent processing, so we drop that.
204   // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
205   if ((info.errors & UIDNA_ERROR_PUNYCODE) && outLen > 0 &&
206       outputBuffer[outLen - 1] == 0xfffd) {
207     --outLen;
208   }
209   ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
210 
211   if (flag == eStringPrepIgnoreErrors) {
212     return NS_OK;
213   }
214 
215   uint32_t ignoredErrors = 0;
216   if (flag == eStringPrepForDNS) {
217     ignoredErrors = UIDNA_ERROR_LEADING_HYPHEN | UIDNA_ERROR_TRAILING_HYPHEN |
218                     UIDNA_ERROR_HYPHEN_3_4;
219   }
220 
221   if ((info.errors & ~ignoredErrors) != 0) {
222     if (flag == eStringPrepForDNS) {
223       output.Truncate();
224     }
225     rv = NS_ERROR_MALFORMED_URI;
226   }
227 
228   return rv;
229 }
230 
ConvertUTF8toACE(const nsACString & input,nsACString & ace)231 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
232                                              nsACString& ace) {
233   return UTF8toACE(input, ace, eStringPrepForDNS);
234 }
235 
UTF8toACE(const nsACString & input,nsACString & ace,stringPrepFlag flag)236 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
237                                  stringPrepFlag flag) {
238   nsresult rv;
239   NS_ConvertUTF8toUTF16 ustr(input);
240 
241   // map ideographic period to ASCII period etc.
242   normalizeFullStops(ustr);
243 
244   uint32_t len, offset;
245   len = 0;
246   offset = 0;
247   nsAutoCString encodedBuf;
248 
249   nsAString::const_iterator start, end;
250   ustr.BeginReading(start);
251   ustr.EndReading(end);
252   ace.Truncate();
253 
254   // encode nodes if non ASCII
255   while (start != end) {
256     len++;
257     if (*start++ == (char16_t)'.') {
258       rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
259       NS_ENSURE_SUCCESS(rv, rv);
260 
261       ace.Append(encodedBuf);
262       ace.Append('.');
263       offset += len;
264       len = 0;
265     }
266   }
267 
268   // encode the last node if non ASCII
269   if (len) {
270     rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
271     NS_ENSURE_SUCCESS(rv, rv);
272 
273     ace.Append(encodedBuf);
274   }
275 
276   return NS_OK;
277 }
278 
ConvertACEtoUTF8(const nsACString & input,nsACString & _retval)279 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
280                                              nsACString& _retval) {
281   return ACEtoUTF8(input, _retval, eStringPrepForDNS);
282 }
283 
ACEtoUTF8(const nsACString & input,nsACString & _retval,stringPrepFlag flag)284 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
285                                  stringPrepFlag flag) {
286   // RFC 3490 - 4.2 ToUnicode
287   // ToUnicode never fails.  If any step fails, then the original input
288   // sequence is returned immediately in that step.
289   //
290   // Note that this refers to the decoding of a single label.
291   // ACEtoUTF8 may be called with a sequence of labels separated by dots;
292   // this test applies individually to each label.
293 
294   uint32_t len = 0, offset = 0;
295   nsAutoCString decodedBuf;
296 
297   nsACString::const_iterator start, end;
298   input.BeginReading(start);
299   input.EndReading(end);
300   _retval.Truncate();
301 
302   // loop and decode nodes
303   while (start != end) {
304     len++;
305     if (*start++ == '.') {
306       nsDependentCSubstring origLabel(input, offset, len - 1);
307       if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
308         // If decoding failed, use the original input sequence
309         // for this label.
310         _retval.Append(origLabel);
311       } else {
312         _retval.Append(decodedBuf);
313       }
314 
315       _retval.Append('.');
316       offset += len;
317       len = 0;
318     }
319   }
320   // decode the last node
321   if (len) {
322     nsDependentCSubstring origLabel(input, offset, len);
323     if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
324       _retval.Append(origLabel);
325     } else {
326       _retval.Append(decodedBuf);
327     }
328   }
329 
330   return NS_OK;
331 }
332 
IsACE(const nsACString & input,bool * _retval)333 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
334   // look for the ACE prefix in the input string.  it may occur
335   // at the beginning of any segment in the domain name.  for
336   // example: "www.xn--ENCODED.com"
337 
338   if (!IsAscii(input)) {
339     *_retval = false;
340     return NS_OK;
341   }
342 
343   auto stringContains = [](const nsACString& haystack,
344                            const nsACString& needle) {
345     return std::search(haystack.BeginReading(), haystack.EndReading(),
346                        needle.BeginReading(),
347                        needle.EndReading()) != haystack.EndReading();
348   };
349 
350   *_retval = StringBeginsWith(input, "xn--"_ns) ||
351              (!input.IsEmpty() && input[0] != '.' &&
352               stringContains(input, ".xn--"_ns));
353   return NS_OK;
354 }
355 
Normalize(const nsACString & input,nsACString & output)356 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
357                                       nsACString& output) {
358   // protect against bogus input
359   NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
360 
361   NS_ConvertUTF8toUTF16 inUTF16(input);
362   normalizeFullStops(inUTF16);
363 
364   // pass the domain name to stringprep label by label
365   nsAutoString outUTF16, outLabel;
366 
367   uint32_t len = 0, offset = 0;
368   nsresult rv;
369   nsAString::const_iterator start, end;
370   inUTF16.BeginReading(start);
371   inUTF16.EndReading(end);
372 
373   while (start != end) {
374     len++;
375     if (*start++ == char16_t('.')) {
376       rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
377                       eStringPrepIgnoreErrors);
378       NS_ENSURE_SUCCESS(rv, rv);
379 
380       outUTF16.Append(outLabel);
381       outUTF16.Append(char16_t('.'));
382       offset += len;
383       len = 0;
384     }
385   }
386   if (len) {
387     rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
388                     eStringPrepIgnoreErrors);
389     NS_ENSURE_SUCCESS(rv, rv);
390 
391     outUTF16.Append(outLabel);
392   }
393 
394   CopyUTF16toUTF8(outUTF16, output);
395   return NS_OK;
396 }
397 
398 namespace {
399 
400 class MOZ_STACK_CLASS MutexSettableAutoUnlock final {
401  private:
402   Mutex* mMutex = nullptr;
403 
404  public:
405   MutexSettableAutoUnlock() = default;
406 
Acquire(mozilla::Mutex & aMutex)407   void Acquire(mozilla::Mutex& aMutex) {
408     MOZ_ASSERT(!mMutex);
409     mMutex = &aMutex;
410     mMutex->Lock();
411   }
412 
~MutexSettableAutoUnlock()413   ~MutexSettableAutoUnlock() {
414     if (mMutex) {
415       mMutex->Unlock();
416     }
417   }
418 };
419 
420 }  // anonymous namespace
421 
ConvertToDisplayIDN(const nsACString & input,bool * _isASCII,nsACString & _retval)422 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
423                                                 bool* _isASCII,
424                                                 nsACString& _retval) {
425   MutexSettableAutoUnlock lock;
426   if (!NS_IsMainThread()) {
427     lock.Acquire(mLock);
428   }
429 
430   // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
431   // Else, if host is already UTF-8, then make sure it is normalized per IDN.
432 
433   nsresult rv = NS_OK;
434 
435   // Even if the hostname is not ASCII, individual labels may still be ACE, so
436   // test IsACE before testing IsASCII
437   bool isACE;
438   IsACE(input, &isACE);
439 
440   if (IsAscii(input)) {
441     // first, canonicalize the host to lowercase, for whitelist lookup
442     _retval = input;
443     ToLowerCase(_retval);
444 
445     if (isACE && !mShowPunycode) {
446       // ACEtoUTF8() can't fail, but might return the original ACE string
447       nsAutoCString temp(_retval);
448       // If the domain is in the whitelist, return the host in UTF-8.
449       // Otherwise convert from ACE to UTF8 only those labels which are
450       // considered safe for display
451       ACEtoUTF8(
452           temp, _retval,
453           isInWhitelist(temp) ? eStringPrepIgnoreErrors : eStringPrepForUI);
454       *_isASCII = IsAscii(_retval);
455     } else {
456       *_isASCII = true;
457     }
458   } else {
459     // We have to normalize the hostname before testing against the domain
460     // whitelist (see bug 315411), and to ensure the entire string gets
461     // normalized.
462     //
463     // Normalization and the tests for safe display below, assume that the
464     // input is Unicode, so first convert any ACE labels to UTF8
465     if (isACE) {
466       nsAutoCString temp;
467       ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
468       rv = Normalize(temp, _retval);
469     } else {
470       rv = Normalize(input, _retval);
471     }
472     if (NS_FAILED(rv)) {
473       return rv;
474     }
475 
476     if (mShowPunycode &&
477         NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
478       *_isASCII = true;
479       return NS_OK;
480     }
481 
482     // normalization could result in an ASCII-only hostname. alternatively, if
483     // the host is converted to ACE by the normalizer, then the host may contain
484     // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
485     // and bug 309311.
486     *_isASCII = IsAscii(_retval);
487     if (!*_isASCII && !isInWhitelist(_retval)) {
488       // UTF8toACE with eStringPrepForUI may return a domain name where
489       // some labels are in UTF-8 and some are in ACE, depending on
490       // whether they are considered safe for display
491       rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
492       *_isASCII = IsAscii(_retval);
493       return rv;
494     }
495   }
496 
497   return NS_OK;
498 }
499 
500 //-----------------------------------------------------------------------------
501 
utf16ToUcs4(const nsAString & in,uint32_t * out,uint32_t outBufLen,uint32_t * outLen)502 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
503                             uint32_t outBufLen, uint32_t* outLen) {
504   uint32_t i = 0;
505   nsAString::const_iterator start, end;
506   in.BeginReading(start);
507   in.EndReading(end);
508 
509   while (start != end) {
510     char16_t curChar;
511 
512     curChar = *start++;
513 
514     if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
515       out[i] = SURROGATE_TO_UCS4(curChar, *start);
516       ++start;
517     } else {
518       out[i] = curChar;
519     }
520 
521     i++;
522     if (i >= outBufLen) {
523       return NS_ERROR_MALFORMED_URI;
524     }
525   }
526   out[i] = (uint32_t)'\0';
527   *outLen = i;
528   return NS_OK;
529 }
530 
punycode(const nsAString & in,nsACString & out)531 static nsresult punycode(const nsAString& in, nsACString& out) {
532   uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
533   uint32_t ucs4Len = 0u;
534   nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
535   NS_ENSURE_SUCCESS(rv, rv);
536 
537   // need maximum 20 bits to encode 16 bit Unicode character
538   // (include null terminator)
539   const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
540   char encodedBuf[kEncodedBufSize];
541   punycode_uint encodedLength = kEncodedBufSize;
542 
543   enum punycode_status status =
544       punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
545 
546   if (punycode_success != status || encodedLength >= kEncodedBufSize) {
547     return NS_ERROR_MALFORMED_URI;
548   }
549 
550   encodedBuf[encodedLength] = '\0';
551   out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
552 
553   return rv;
554 }
555 
556 // RFC 3454
557 //
558 // 1) Map -- For each character in the input, check if it has a mapping
559 // and, if so, replace it with its mapping. This is described in section 3.
560 //
561 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
562 // normalization. This is described in section 4.
563 //
564 // 3) Prohibit -- Check for any characters that are not allowed in the
565 // output. If any are found, return an error. This is described in section
566 // 5.
567 //
568 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
569 // are found, make sure that the whole string satisfies the requirements
570 // for bidirectional strings. If the string does not satisfy the requirements
571 // for bidirectional strings, return an error. This is described in section 6.
572 //
573 // 5) Check unassigned code points -- If allowUnassigned is false, check for
574 // any unassigned Unicode points and if any are found return an error.
575 // This is described in section 7.
576 //
stringPrep(const nsAString & in,nsAString & out,stringPrepFlag flag)577 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
578                                   stringPrepFlag flag) {
579   return IDNA2008StringPrep(in, out, flag);
580 }
581 
stringPrepAndACE(const nsAString & in,nsACString & out,stringPrepFlag flag)582 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
583                                         stringPrepFlag flag) {
584   nsresult rv = NS_OK;
585 
586   out.Truncate();
587 
588   if (in.Length() > kMaxDNSNodeLen) {
589     NS_WARNING("IDN node too large");
590     return NS_ERROR_MALFORMED_URI;
591   }
592 
593   if (IsAscii(in)) {
594     LossyCopyUTF16toASCII(in, out);
595     return NS_OK;
596   }
597 
598   nsAutoString strPrep;
599   rv = stringPrep(in, strPrep, flag);
600   if (flag == eStringPrepForDNS) {
601     NS_ENSURE_SUCCESS(rv, rv);
602   }
603 
604   if (IsAscii(strPrep)) {
605     LossyCopyUTF16toASCII(strPrep, out);
606     return NS_OK;
607   }
608 
609   if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
610     CopyUTF16toUTF8(strPrep, out);
611     return NS_OK;
612   }
613 
614   rv = punycode(strPrep, out);
615   // Check that the encoded output isn't larger than the maximum length
616   // of a DNS node per RFC 1034.
617   // This test isn't necessary in the code paths above where the input
618   // is ASCII (since the output will be the same length as the input) or
619   // where we convert to UTF-8 (since the output is only used for
620   // display in the UI and not passed to DNS and can legitimately be
621   // longer than the limit).
622   if (out.Length() > kMaxDNSNodeLen) {
623     NS_WARNING("IDN node too large");
624     return NS_ERROR_MALFORMED_URI;
625   }
626 
627   return rv;
628 }
629 
630 // RFC 3490
631 // 1) Whenever dots are used as label separators, the following characters
632 //    MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
633 //    stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
634 //    stop).
635 
normalizeFullStops(nsAString & s)636 void nsIDNService::normalizeFullStops(nsAString& s) {
637   nsAString::const_iterator start, end;
638   s.BeginReading(start);
639   s.EndReading(end);
640   int32_t index = 0;
641 
642   while (start != end) {
643     switch (*start) {
644       case 0x3002:
645       case 0xFF0E:
646       case 0xFF61:
647         s.ReplaceLiteral(index, 1, u".");
648         break;
649       default:
650         break;
651     }
652     start++;
653     index++;
654   }
655 }
656 
decodeACE(const nsACString & in,nsACString & out,stringPrepFlag flag)657 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
658                                  stringPrepFlag flag) {
659   bool isAce;
660   IsACE(in, &isAce);
661   if (!isAce) {
662     out.Assign(in);
663     return NS_OK;
664   }
665 
666   nsAutoString utf16;
667   nsresult result = IDNA2008ToUnicode(in, utf16);
668   NS_ENSURE_SUCCESS(result, result);
669 
670   if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
671     CopyUTF16toUTF8(utf16, out);
672   } else {
673     out.Assign(in);
674     return NS_OK;
675   }
676 
677   // Validation: encode back to ACE and compare the strings
678   nsAutoCString ace;
679   nsresult rv = UTF8toACE(out, ace, flag);
680   NS_ENSURE_SUCCESS(rv, rv);
681 
682   if (flag == eStringPrepForDNS &&
683       !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
684     return NS_ERROR_MALFORMED_URI;
685   }
686 
687   return NS_OK;
688 }
689 
isInWhitelist(const nsACString & host)690 bool nsIDNService::isInWhitelist(const nsACString& host) {
691   if (!NS_IsMainThread()) {
692     mLock.AssertCurrentThreadOwns();
693   }
694 
695   if (mIDNUseWhitelist && mIDNWhitelistPrefBranch) {
696     nsAutoCString tld(host);
697     // make sure the host is ACE for lookup and check that there are no
698     // unassigned codepoints
699     if (!IsAscii(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
700       return false;
701     }
702 
703     // truncate trailing dots first
704     tld.Trim(".");
705     int32_t pos = tld.RFind(".");
706     if (pos == kNotFound) {
707       return false;
708     }
709 
710     tld.Cut(0, pos + 1);
711 
712     bool safe;
713     if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe))) {
714       return safe;
715     }
716   }
717 
718   return false;
719 }
720 
isLabelSafe(const nsAString & label)721 bool nsIDNService::isLabelSafe(const nsAString& label) {
722   if (!NS_IsMainThread()) {
723     mLock.AssertCurrentThreadOwns();
724   }
725 
726   if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
727     return false;
728   }
729 
730   // We should never get here if the label is ASCII
731   NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
732   if (mRestrictionProfile == eASCIIOnlyProfile) {
733     return false;
734   }
735 
736   nsAString::const_iterator current, end;
737   label.BeginReading(current);
738   label.EndReading(end);
739 
740   Script lastScript = Script::INVALID;
741   uint32_t previousChar = 0;
742   uint32_t baseChar = 0;  // last non-diacritic seen (base char for marks)
743   uint32_t savedNumberingSystem = 0;
744 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
745 #if 0
746   HanVariantType savedHanVariant = HVT_NotHan;
747 #endif
748 
749   int32_t savedScript = -1;
750 
751   while (current != end) {
752     uint32_t ch = *current++;
753 
754     if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
755       ch = SURROGATE_TO_UCS4(ch, *current++);
756     }
757 
758     IdentifierType idType = GetIdentifierType(ch);
759     if (idType == IDTYPE_RESTRICTED) {
760       return false;
761     }
762     MOZ_ASSERT(idType == IDTYPE_ALLOWED);
763 
764     // Check for mixed script
765     Script script = GetScriptCode(ch);
766     if (script != Script::COMMON && script != Script::INHERITED &&
767         script != lastScript) {
768       if (illegalScriptCombo(script, savedScript)) {
769         return false;
770       }
771     }
772 
773     // Check for mixed numbering systems
774     auto genCat = GetGeneralCategory(ch);
775     if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
776       uint32_t zeroCharacter = ch - GetNumericValue(ch);
777       if (savedNumberingSystem == 0) {
778         // If we encounter a decimal number, save the zero character from that
779         // numbering system.
780         savedNumberingSystem = zeroCharacter;
781       } else if (zeroCharacter != savedNumberingSystem) {
782         return false;
783       }
784     }
785 
786     if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
787       // Check for consecutive non-spacing marks.
788       if (previousChar != 0 && previousChar == ch) {
789         return false;
790       }
791       // Check for marks whose expected script doesn't match the base script.
792       if (lastScript != Script::INVALID) {
793         const size_t kMaxScripts = 32;  // more than ample for current values
794                                         // of ScriptExtensions property
795         UScriptCode scripts[kMaxScripts];
796         UErrorCode errorCode = U_ZERO_ERROR;
797         int nScripts =
798             uscript_getScriptExtensions(ch, scripts, kMaxScripts, &errorCode);
799         MOZ_ASSERT(U_SUCCESS(errorCode), "uscript_getScriptExtensions failed");
800         if (U_FAILURE(errorCode)) {
801           return false;
802         }
803         // nScripts will always be >= 1, because even for undefined characters
804         // uscript_getScriptExtensions will return Script::INVALID.
805         // If the mark just has script=COMMON or INHERITED, we can't check any
806         // more carefully, but if it has specific scriptExtension codes, then
807         // assume those are the only valid scripts to use it with.
808         if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
809                              Script(scripts[0]) != Script::INHERITED)) {
810           while (--nScripts >= 0) {
811             if (Script(scripts[nScripts]) == lastScript) {
812               break;
813             }
814           }
815           if (nScripts == -1) {
816             return false;
817           }
818         }
819       }
820       // Check for diacritics on dotless-i, which would be indistinguishable
821       // from normal accented letter i.
822       if (baseChar == 0x0131 &&
823           ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
824         return false;
825       }
826     } else {
827       baseChar = ch;
828     }
829 
830     if (script != Script::COMMON && script != Script::INHERITED) {
831       lastScript = script;
832     }
833 
834     // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
835 #if 0
836 
837     // Check for both simplified-only and traditional-only Chinese characters
838     HanVariantType hanVariant = GetHanVariant(ch);
839     if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
840       if (savedHanVariant == HVT_NotHan) {
841         savedHanVariant = hanVariant;
842       } else if (hanVariant != savedHanVariant)  {
843         return false;
844       }
845     }
846 #endif
847 
848     previousChar = ch;
849   }
850   return true;
851 }
852 
853 // Scripts that we care about in illegalScriptCombo
854 static const Script scriptTable[] = {
855     Script::BOPOMOFO, Script::CYRILLIC, Script::GREEK,    Script::HANGUL,
856     Script::HAN,      Script::HIRAGANA, Script::KATAKANA, Script::LATIN};
857 
858 #define BOPO 0
859 #define CYRL 1
860 #define GREK 2
861 #define HANG 3
862 #define HANI 4
863 #define HIRA 5
864 #define KATA 6
865 #define LATN 7
866 #define OTHR 8
867 #define JPAN 9   // Latin + Han + Hiragana + Katakana
868 #define CHNA 10  // Latin + Han + Bopomofo
869 #define KORE 11  // Latin + Han + Hangul
870 #define HNLT 12  // Latin + Han (could be any of the above combinations)
871 #define FAIL 13
872 
findScriptIndex(Script aScript)873 static inline int32_t findScriptIndex(Script aScript) {
874   int32_t tableLength = mozilla::ArrayLength(scriptTable);
875   for (int32_t index = 0; index < tableLength; ++index) {
876     if (aScript == scriptTable[index]) {
877       return index;
878     }
879   }
880   return OTHR;
881 }
882 
883 static const int32_t scriptComboTable[13][9] = {
884     /* thisScript: BOPO  CYRL  GREK  HANG  HANI  HIRA  KATA  LATN  OTHR
885      * savedScript */
886     /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
887     /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
888     /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
889     /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
890     /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
891     /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
892     /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
893     /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
894     /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
895     /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
896     /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
897     /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
898     /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
899 
illegalScriptCombo(Script script,int32_t & savedScript)900 bool nsIDNService::illegalScriptCombo(Script script, int32_t& savedScript) {
901   if (!NS_IsMainThread()) {
902     mLock.AssertCurrentThreadOwns();
903   }
904 
905   if (savedScript == -1) {
906     savedScript = findScriptIndex(script);
907     return false;
908   }
909 
910   savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
911   /*
912    * Special case combinations that depend on which profile is in use
913    * In the Highly Restrictive profile Latin is not allowed with any
914    *  other script
915    *
916    * In the Moderately Restrictive profile Latin mixed with any other
917    *  single script is allowed.
918    */
919   return ((savedScript == OTHR &&
920            mRestrictionProfile == eHighlyRestrictiveProfile) ||
921           savedScript == FAIL);
922 }
923 
924 #undef BOPO
925 #undef CYRL
926 #undef GREK
927 #undef HANG
928 #undef HANI
929 #undef HIRA
930 #undef KATA
931 #undef LATN
932 #undef OTHR
933 #undef JPAN
934 #undef CHNA
935 #undef KORE
936 #undef HNLT
937 #undef FAIL
938