1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include "MainThreadUtils.h"
7 #include "mozilla/Preferences.h"
8 #include "nsIDNService.h"
9 #include "nsReadableUtils.h"
10 #include "nsCRT.h"
11 #include "nsServiceManagerUtils.h"
12 #include "nsUnicharUtils.h"
13 #include "nsUnicodeProperties.h"
14 #include "nsUnicodeScriptCodes.h"
15 #include "harfbuzz/hb.h"
16 #include "punycode.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/TextUtils.h"
19 #include "mozilla/Utf8.h"
20
21 // Currently we use the non-transitional processing option -- see
22 // http://unicode.org/reports/tr46/
23 // To switch to transitional processing, change the value of this flag
24 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
25 // (revert bug 1218179).
26 const bool kIDNA2008_TransitionalProcessing = false;
27
28 #include "ICUUtils.h"
29 #include "unicode/uscript.h"
30
31 using namespace mozilla;
32 using namespace mozilla::unicode;
33 using namespace mozilla::net;
34 using mozilla::Preferences;
35
36 //-----------------------------------------------------------------------------
37 // RFC 1034 - 3.1. Name space specifications and terminology
38 static const uint32_t kMaxDNSNodeLen = 63;
39 // RFC 3490 - 5. ACE prefix
40 static const char kACEPrefix[] = "xn--";
41 #define kACEPrefixLen 4
42
43 //-----------------------------------------------------------------------------
44
45 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
46 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
47 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
48 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
49 #define NS_NET_PREF_IDNUSEWHITELIST "network.IDN.use_whitelist"
50 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
51
isOnlySafeChars(const nsString & in,const nsTArray<BlocklistRange> & aBlocklist)52 static inline bool isOnlySafeChars(const nsString& in,
53 const nsTArray<BlocklistRange>& aBlocklist) {
54 if (aBlocklist.IsEmpty()) {
55 return true;
56 }
57 const char16_t* cur = in.BeginReading();
58 const char16_t* end = in.EndReading();
59
60 for (; cur < end; ++cur) {
61 if (CharInBlocklist(*cur, aBlocklist)) {
62 return false;
63 }
64 }
65 return true;
66 }
67
68 //-----------------------------------------------------------------------------
69 // nsIDNService
70 //-----------------------------------------------------------------------------
71
72 /* Implementation file */
73 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService, nsISupportsWeakReference)
74
75 static const char* gCallbackPrefs[] = {
76 NS_NET_PREF_EXTRAALLOWED, NS_NET_PREF_EXTRABLOCKED,
77 NS_NET_PREF_SHOWPUNYCODE, NS_NET_PREF_IDNRESTRICTION,
78 NS_NET_PREF_IDNUSEWHITELIST, nullptr,
79 };
80
Init()81 nsresult nsIDNService::Init() {
82 MOZ_ASSERT(NS_IsMainThread());
83 MutexAutoLock lock(mLock);
84
85 nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
86 if (prefs) {
87 prefs->GetBranch(NS_NET_PREF_IDNWHITELIST,
88 getter_AddRefs(mIDNWhitelistPrefBranch));
89 }
90
91 Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
92 prefsChanged(nullptr);
93 InitializeBlocklist(mIDNBlocklist);
94
95 return NS_OK;
96 }
97
prefsChanged(const char * pref)98 void nsIDNService::prefsChanged(const char* pref) {
99 MOZ_ASSERT(NS_IsMainThread());
100 mLock.AssertCurrentThreadOwns();
101
102 if (pref && nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref)) {
103 InitializeBlocklist(mIDNBlocklist);
104 }
105 if (pref && nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
106 InitializeBlocklist(mIDNBlocklist);
107 }
108 if (!pref || nsLiteralCString(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
109 bool val;
110 if (NS_SUCCEEDED(Preferences::GetBool(NS_NET_PREF_SHOWPUNYCODE, &val))) {
111 mShowPunycode = val;
112 }
113 }
114 if (!pref || nsLiteralCString(NS_NET_PREF_IDNUSEWHITELIST).Equals(pref)) {
115 bool val;
116 if (NS_SUCCEEDED(Preferences::GetBool(NS_NET_PREF_IDNUSEWHITELIST, &val))) {
117 mIDNUseWhitelist = val;
118 }
119 }
120 if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
121 nsAutoCString profile;
122 if (NS_FAILED(
123 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
124 profile.Truncate();
125 }
126 if (profile.EqualsLiteral("moderate")) {
127 mRestrictionProfile = eModeratelyRestrictiveProfile;
128 } else if (profile.EqualsLiteral("high")) {
129 mRestrictionProfile = eHighlyRestrictiveProfile;
130 } else {
131 mRestrictionProfile = eASCIIOnlyProfile;
132 }
133 }
134 }
135
nsIDNService()136 nsIDNService::nsIDNService() {
137 MOZ_ASSERT(NS_IsMainThread());
138
139 uint32_t IDNAOptions = UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ;
140 if (!kIDNA2008_TransitionalProcessing) {
141 IDNAOptions |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
142 }
143 UErrorCode errorCode = U_ZERO_ERROR;
144 mIDNA = uidna_openUTS46(IDNAOptions, &errorCode);
145 }
146
~nsIDNService()147 nsIDNService::~nsIDNService() {
148 MOZ_ASSERT(NS_IsMainThread());
149
150 Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
151
152 uidna_close(mIDNA);
153 }
154
IDNA2008ToUnicode(const nsACString & input,nsAString & output)155 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
156 nsAString& output) {
157 NS_ConvertUTF8toUTF16 inputStr(input);
158 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
159 UErrorCode errorCode = U_ZERO_ERROR;
160 int32_t inLen = inputStr.Length();
161 int32_t outMaxLen = kMaxDNSNodeLen + 1;
162 UChar outputBuffer[kMaxDNSNodeLen + 1];
163
164 int32_t outLen =
165 uidna_labelToUnicode(mIDNA, (const UChar*)inputStr.get(), inLen,
166 outputBuffer, outMaxLen, &info, &errorCode);
167 if (info.errors != 0) {
168 return NS_ERROR_MALFORMED_URI;
169 }
170
171 if (U_SUCCESS(errorCode)) {
172 ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
173 }
174
175 nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
176 if (rv == NS_ERROR_FAILURE) {
177 rv = NS_ERROR_MALFORMED_URI;
178 }
179 return rv;
180 }
181
IDNA2008StringPrep(const nsAString & input,nsAString & output,stringPrepFlag flag)182 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
183 nsAString& output,
184 stringPrepFlag flag) {
185 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
186 UErrorCode errorCode = U_ZERO_ERROR;
187 int32_t inLen = input.Length();
188 int32_t outMaxLen = kMaxDNSNodeLen + 1;
189 UChar outputBuffer[kMaxDNSNodeLen + 1];
190
191 int32_t outLen =
192 uidna_labelToUnicode(mIDNA, (const UChar*)PromiseFlatString(input).get(),
193 inLen, outputBuffer, outMaxLen, &info, &errorCode);
194 nsresult rv = ICUUtils::UErrorToNsResult(errorCode);
195 if (rv == NS_ERROR_FAILURE) {
196 rv = NS_ERROR_MALFORMED_URI;
197 }
198 NS_ENSURE_SUCCESS(rv, rv);
199
200 // Output the result of nameToUnicode even if there were errors.
201 // But in the case of invalid punycode, the uidna_labelToUnicode result
202 // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
203 // confuse our subsequent processing, so we drop that.
204 // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
205 if ((info.errors & UIDNA_ERROR_PUNYCODE) && outLen > 0 &&
206 outputBuffer[outLen - 1] == 0xfffd) {
207 --outLen;
208 }
209 ICUUtils::AssignUCharArrayToString(outputBuffer, outLen, output);
210
211 if (flag == eStringPrepIgnoreErrors) {
212 return NS_OK;
213 }
214
215 uint32_t ignoredErrors = 0;
216 if (flag == eStringPrepForDNS) {
217 ignoredErrors = UIDNA_ERROR_LEADING_HYPHEN | UIDNA_ERROR_TRAILING_HYPHEN |
218 UIDNA_ERROR_HYPHEN_3_4;
219 }
220
221 if ((info.errors & ~ignoredErrors) != 0) {
222 if (flag == eStringPrepForDNS) {
223 output.Truncate();
224 }
225 rv = NS_ERROR_MALFORMED_URI;
226 }
227
228 return rv;
229 }
230
ConvertUTF8toACE(const nsACString & input,nsACString & ace)231 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
232 nsACString& ace) {
233 return UTF8toACE(input, ace, eStringPrepForDNS);
234 }
235
UTF8toACE(const nsACString & input,nsACString & ace,stringPrepFlag flag)236 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
237 stringPrepFlag flag) {
238 nsresult rv;
239 NS_ConvertUTF8toUTF16 ustr(input);
240
241 // map ideographic period to ASCII period etc.
242 normalizeFullStops(ustr);
243
244 uint32_t len, offset;
245 len = 0;
246 offset = 0;
247 nsAutoCString encodedBuf;
248
249 nsAString::const_iterator start, end;
250 ustr.BeginReading(start);
251 ustr.EndReading(end);
252 ace.Truncate();
253
254 // encode nodes if non ASCII
255 while (start != end) {
256 len++;
257 if (*start++ == (char16_t)'.') {
258 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
259 NS_ENSURE_SUCCESS(rv, rv);
260
261 ace.Append(encodedBuf);
262 ace.Append('.');
263 offset += len;
264 len = 0;
265 }
266 }
267
268 // encode the last node if non ASCII
269 if (len) {
270 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
271 NS_ENSURE_SUCCESS(rv, rv);
272
273 ace.Append(encodedBuf);
274 }
275
276 return NS_OK;
277 }
278
ConvertACEtoUTF8(const nsACString & input,nsACString & _retval)279 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
280 nsACString& _retval) {
281 return ACEtoUTF8(input, _retval, eStringPrepForDNS);
282 }
283
ACEtoUTF8(const nsACString & input,nsACString & _retval,stringPrepFlag flag)284 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
285 stringPrepFlag flag) {
286 // RFC 3490 - 4.2 ToUnicode
287 // ToUnicode never fails. If any step fails, then the original input
288 // sequence is returned immediately in that step.
289 //
290 // Note that this refers to the decoding of a single label.
291 // ACEtoUTF8 may be called with a sequence of labels separated by dots;
292 // this test applies individually to each label.
293
294 uint32_t len = 0, offset = 0;
295 nsAutoCString decodedBuf;
296
297 nsACString::const_iterator start, end;
298 input.BeginReading(start);
299 input.EndReading(end);
300 _retval.Truncate();
301
302 // loop and decode nodes
303 while (start != end) {
304 len++;
305 if (*start++ == '.') {
306 nsDependentCSubstring origLabel(input, offset, len - 1);
307 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
308 // If decoding failed, use the original input sequence
309 // for this label.
310 _retval.Append(origLabel);
311 } else {
312 _retval.Append(decodedBuf);
313 }
314
315 _retval.Append('.');
316 offset += len;
317 len = 0;
318 }
319 }
320 // decode the last node
321 if (len) {
322 nsDependentCSubstring origLabel(input, offset, len);
323 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
324 _retval.Append(origLabel);
325 } else {
326 _retval.Append(decodedBuf);
327 }
328 }
329
330 return NS_OK;
331 }
332
IsACE(const nsACString & input,bool * _retval)333 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
334 // look for the ACE prefix in the input string. it may occur
335 // at the beginning of any segment in the domain name. for
336 // example: "www.xn--ENCODED.com"
337
338 if (!IsAscii(input)) {
339 *_retval = false;
340 return NS_OK;
341 }
342
343 auto stringContains = [](const nsACString& haystack,
344 const nsACString& needle) {
345 return std::search(haystack.BeginReading(), haystack.EndReading(),
346 needle.BeginReading(),
347 needle.EndReading()) != haystack.EndReading();
348 };
349
350 *_retval = StringBeginsWith(input, "xn--"_ns) ||
351 (!input.IsEmpty() && input[0] != '.' &&
352 stringContains(input, ".xn--"_ns));
353 return NS_OK;
354 }
355
Normalize(const nsACString & input,nsACString & output)356 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
357 nsACString& output) {
358 // protect against bogus input
359 NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
360
361 NS_ConvertUTF8toUTF16 inUTF16(input);
362 normalizeFullStops(inUTF16);
363
364 // pass the domain name to stringprep label by label
365 nsAutoString outUTF16, outLabel;
366
367 uint32_t len = 0, offset = 0;
368 nsresult rv;
369 nsAString::const_iterator start, end;
370 inUTF16.BeginReading(start);
371 inUTF16.EndReading(end);
372
373 while (start != end) {
374 len++;
375 if (*start++ == char16_t('.')) {
376 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
377 eStringPrepIgnoreErrors);
378 NS_ENSURE_SUCCESS(rv, rv);
379
380 outUTF16.Append(outLabel);
381 outUTF16.Append(char16_t('.'));
382 offset += len;
383 len = 0;
384 }
385 }
386 if (len) {
387 rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
388 eStringPrepIgnoreErrors);
389 NS_ENSURE_SUCCESS(rv, rv);
390
391 outUTF16.Append(outLabel);
392 }
393
394 CopyUTF16toUTF8(outUTF16, output);
395 return NS_OK;
396 }
397
398 namespace {
399
400 class MOZ_STACK_CLASS MutexSettableAutoUnlock final {
401 private:
402 Mutex* mMutex = nullptr;
403
404 public:
405 MutexSettableAutoUnlock() = default;
406
Acquire(mozilla::Mutex & aMutex)407 void Acquire(mozilla::Mutex& aMutex) {
408 MOZ_ASSERT(!mMutex);
409 mMutex = &aMutex;
410 mMutex->Lock();
411 }
412
~MutexSettableAutoUnlock()413 ~MutexSettableAutoUnlock() {
414 if (mMutex) {
415 mMutex->Unlock();
416 }
417 }
418 };
419
420 } // anonymous namespace
421
ConvertToDisplayIDN(const nsACString & input,bool * _isASCII,nsACString & _retval)422 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
423 bool* _isASCII,
424 nsACString& _retval) {
425 MutexSettableAutoUnlock lock;
426 if (!NS_IsMainThread()) {
427 lock.Acquire(mLock);
428 }
429
430 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
431 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
432
433 nsresult rv = NS_OK;
434
435 // Even if the hostname is not ASCII, individual labels may still be ACE, so
436 // test IsACE before testing IsASCII
437 bool isACE;
438 IsACE(input, &isACE);
439
440 if (IsAscii(input)) {
441 // first, canonicalize the host to lowercase, for whitelist lookup
442 _retval = input;
443 ToLowerCase(_retval);
444
445 if (isACE && !mShowPunycode) {
446 // ACEtoUTF8() can't fail, but might return the original ACE string
447 nsAutoCString temp(_retval);
448 // If the domain is in the whitelist, return the host in UTF-8.
449 // Otherwise convert from ACE to UTF8 only those labels which are
450 // considered safe for display
451 ACEtoUTF8(
452 temp, _retval,
453 isInWhitelist(temp) ? eStringPrepIgnoreErrors : eStringPrepForUI);
454 *_isASCII = IsAscii(_retval);
455 } else {
456 *_isASCII = true;
457 }
458 } else {
459 // We have to normalize the hostname before testing against the domain
460 // whitelist (see bug 315411), and to ensure the entire string gets
461 // normalized.
462 //
463 // Normalization and the tests for safe display below, assume that the
464 // input is Unicode, so first convert any ACE labels to UTF8
465 if (isACE) {
466 nsAutoCString temp;
467 ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
468 rv = Normalize(temp, _retval);
469 } else {
470 rv = Normalize(input, _retval);
471 }
472 if (NS_FAILED(rv)) {
473 return rv;
474 }
475
476 if (mShowPunycode &&
477 NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
478 *_isASCII = true;
479 return NS_OK;
480 }
481
482 // normalization could result in an ASCII-only hostname. alternatively, if
483 // the host is converted to ACE by the normalizer, then the host may contain
484 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
485 // and bug 309311.
486 *_isASCII = IsAscii(_retval);
487 if (!*_isASCII && !isInWhitelist(_retval)) {
488 // UTF8toACE with eStringPrepForUI may return a domain name where
489 // some labels are in UTF-8 and some are in ACE, depending on
490 // whether they are considered safe for display
491 rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
492 *_isASCII = IsAscii(_retval);
493 return rv;
494 }
495 }
496
497 return NS_OK;
498 }
499
500 //-----------------------------------------------------------------------------
501
utf16ToUcs4(const nsAString & in,uint32_t * out,uint32_t outBufLen,uint32_t * outLen)502 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
503 uint32_t outBufLen, uint32_t* outLen) {
504 uint32_t i = 0;
505 nsAString::const_iterator start, end;
506 in.BeginReading(start);
507 in.EndReading(end);
508
509 while (start != end) {
510 char16_t curChar;
511
512 curChar = *start++;
513
514 if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
515 out[i] = SURROGATE_TO_UCS4(curChar, *start);
516 ++start;
517 } else {
518 out[i] = curChar;
519 }
520
521 i++;
522 if (i >= outBufLen) {
523 return NS_ERROR_MALFORMED_URI;
524 }
525 }
526 out[i] = (uint32_t)'\0';
527 *outLen = i;
528 return NS_OK;
529 }
530
punycode(const nsAString & in,nsACString & out)531 static nsresult punycode(const nsAString& in, nsACString& out) {
532 uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
533 uint32_t ucs4Len = 0u;
534 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
535 NS_ENSURE_SUCCESS(rv, rv);
536
537 // need maximum 20 bits to encode 16 bit Unicode character
538 // (include null terminator)
539 const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
540 char encodedBuf[kEncodedBufSize];
541 punycode_uint encodedLength = kEncodedBufSize;
542
543 enum punycode_status status =
544 punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
545
546 if (punycode_success != status || encodedLength >= kEncodedBufSize) {
547 return NS_ERROR_MALFORMED_URI;
548 }
549
550 encodedBuf[encodedLength] = '\0';
551 out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
552
553 return rv;
554 }
555
556 // RFC 3454
557 //
558 // 1) Map -- For each character in the input, check if it has a mapping
559 // and, if so, replace it with its mapping. This is described in section 3.
560 //
561 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
562 // normalization. This is described in section 4.
563 //
564 // 3) Prohibit -- Check for any characters that are not allowed in the
565 // output. If any are found, return an error. This is described in section
566 // 5.
567 //
568 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
569 // are found, make sure that the whole string satisfies the requirements
570 // for bidirectional strings. If the string does not satisfy the requirements
571 // for bidirectional strings, return an error. This is described in section 6.
572 //
573 // 5) Check unassigned code points -- If allowUnassigned is false, check for
574 // any unassigned Unicode points and if any are found return an error.
575 // This is described in section 7.
576 //
stringPrep(const nsAString & in,nsAString & out,stringPrepFlag flag)577 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
578 stringPrepFlag flag) {
579 return IDNA2008StringPrep(in, out, flag);
580 }
581
stringPrepAndACE(const nsAString & in,nsACString & out,stringPrepFlag flag)582 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
583 stringPrepFlag flag) {
584 nsresult rv = NS_OK;
585
586 out.Truncate();
587
588 if (in.Length() > kMaxDNSNodeLen) {
589 NS_WARNING("IDN node too large");
590 return NS_ERROR_MALFORMED_URI;
591 }
592
593 if (IsAscii(in)) {
594 LossyCopyUTF16toASCII(in, out);
595 return NS_OK;
596 }
597
598 nsAutoString strPrep;
599 rv = stringPrep(in, strPrep, flag);
600 if (flag == eStringPrepForDNS) {
601 NS_ENSURE_SUCCESS(rv, rv);
602 }
603
604 if (IsAscii(strPrep)) {
605 LossyCopyUTF16toASCII(strPrep, out);
606 return NS_OK;
607 }
608
609 if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
610 CopyUTF16toUTF8(strPrep, out);
611 return NS_OK;
612 }
613
614 rv = punycode(strPrep, out);
615 // Check that the encoded output isn't larger than the maximum length
616 // of a DNS node per RFC 1034.
617 // This test isn't necessary in the code paths above where the input
618 // is ASCII (since the output will be the same length as the input) or
619 // where we convert to UTF-8 (since the output is only used for
620 // display in the UI and not passed to DNS and can legitimately be
621 // longer than the limit).
622 if (out.Length() > kMaxDNSNodeLen) {
623 NS_WARNING("IDN node too large");
624 return NS_ERROR_MALFORMED_URI;
625 }
626
627 return rv;
628 }
629
630 // RFC 3490
631 // 1) Whenever dots are used as label separators, the following characters
632 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
633 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
634 // stop).
635
normalizeFullStops(nsAString & s)636 void nsIDNService::normalizeFullStops(nsAString& s) {
637 nsAString::const_iterator start, end;
638 s.BeginReading(start);
639 s.EndReading(end);
640 int32_t index = 0;
641
642 while (start != end) {
643 switch (*start) {
644 case 0x3002:
645 case 0xFF0E:
646 case 0xFF61:
647 s.ReplaceLiteral(index, 1, u".");
648 break;
649 default:
650 break;
651 }
652 start++;
653 index++;
654 }
655 }
656
decodeACE(const nsACString & in,nsACString & out,stringPrepFlag flag)657 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
658 stringPrepFlag flag) {
659 bool isAce;
660 IsACE(in, &isAce);
661 if (!isAce) {
662 out.Assign(in);
663 return NS_OK;
664 }
665
666 nsAutoString utf16;
667 nsresult result = IDNA2008ToUnicode(in, utf16);
668 NS_ENSURE_SUCCESS(result, result);
669
670 if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
671 CopyUTF16toUTF8(utf16, out);
672 } else {
673 out.Assign(in);
674 return NS_OK;
675 }
676
677 // Validation: encode back to ACE and compare the strings
678 nsAutoCString ace;
679 nsresult rv = UTF8toACE(out, ace, flag);
680 NS_ENSURE_SUCCESS(rv, rv);
681
682 if (flag == eStringPrepForDNS &&
683 !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
684 return NS_ERROR_MALFORMED_URI;
685 }
686
687 return NS_OK;
688 }
689
isInWhitelist(const nsACString & host)690 bool nsIDNService::isInWhitelist(const nsACString& host) {
691 if (!NS_IsMainThread()) {
692 mLock.AssertCurrentThreadOwns();
693 }
694
695 if (mIDNUseWhitelist && mIDNWhitelistPrefBranch) {
696 nsAutoCString tld(host);
697 // make sure the host is ACE for lookup and check that there are no
698 // unassigned codepoints
699 if (!IsAscii(tld) && NS_FAILED(UTF8toACE(tld, tld, eStringPrepForDNS))) {
700 return false;
701 }
702
703 // truncate trailing dots first
704 tld.Trim(".");
705 int32_t pos = tld.RFind(".");
706 if (pos == kNotFound) {
707 return false;
708 }
709
710 tld.Cut(0, pos + 1);
711
712 bool safe;
713 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe))) {
714 return safe;
715 }
716 }
717
718 return false;
719 }
720
isLabelSafe(const nsAString & label)721 bool nsIDNService::isLabelSafe(const nsAString& label) {
722 if (!NS_IsMainThread()) {
723 mLock.AssertCurrentThreadOwns();
724 }
725
726 if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
727 return false;
728 }
729
730 // We should never get here if the label is ASCII
731 NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
732 if (mRestrictionProfile == eASCIIOnlyProfile) {
733 return false;
734 }
735
736 nsAString::const_iterator current, end;
737 label.BeginReading(current);
738 label.EndReading(end);
739
740 Script lastScript = Script::INVALID;
741 uint32_t previousChar = 0;
742 uint32_t baseChar = 0; // last non-diacritic seen (base char for marks)
743 uint32_t savedNumberingSystem = 0;
744 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
745 #if 0
746 HanVariantType savedHanVariant = HVT_NotHan;
747 #endif
748
749 int32_t savedScript = -1;
750
751 while (current != end) {
752 uint32_t ch = *current++;
753
754 if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
755 ch = SURROGATE_TO_UCS4(ch, *current++);
756 }
757
758 IdentifierType idType = GetIdentifierType(ch);
759 if (idType == IDTYPE_RESTRICTED) {
760 return false;
761 }
762 MOZ_ASSERT(idType == IDTYPE_ALLOWED);
763
764 // Check for mixed script
765 Script script = GetScriptCode(ch);
766 if (script != Script::COMMON && script != Script::INHERITED &&
767 script != lastScript) {
768 if (illegalScriptCombo(script, savedScript)) {
769 return false;
770 }
771 }
772
773 // Check for mixed numbering systems
774 auto genCat = GetGeneralCategory(ch);
775 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
776 uint32_t zeroCharacter = ch - GetNumericValue(ch);
777 if (savedNumberingSystem == 0) {
778 // If we encounter a decimal number, save the zero character from that
779 // numbering system.
780 savedNumberingSystem = zeroCharacter;
781 } else if (zeroCharacter != savedNumberingSystem) {
782 return false;
783 }
784 }
785
786 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
787 // Check for consecutive non-spacing marks.
788 if (previousChar != 0 && previousChar == ch) {
789 return false;
790 }
791 // Check for marks whose expected script doesn't match the base script.
792 if (lastScript != Script::INVALID) {
793 const size_t kMaxScripts = 32; // more than ample for current values
794 // of ScriptExtensions property
795 UScriptCode scripts[kMaxScripts];
796 UErrorCode errorCode = U_ZERO_ERROR;
797 int nScripts =
798 uscript_getScriptExtensions(ch, scripts, kMaxScripts, &errorCode);
799 MOZ_ASSERT(U_SUCCESS(errorCode), "uscript_getScriptExtensions failed");
800 if (U_FAILURE(errorCode)) {
801 return false;
802 }
803 // nScripts will always be >= 1, because even for undefined characters
804 // uscript_getScriptExtensions will return Script::INVALID.
805 // If the mark just has script=COMMON or INHERITED, we can't check any
806 // more carefully, but if it has specific scriptExtension codes, then
807 // assume those are the only valid scripts to use it with.
808 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
809 Script(scripts[0]) != Script::INHERITED)) {
810 while (--nScripts >= 0) {
811 if (Script(scripts[nScripts]) == lastScript) {
812 break;
813 }
814 }
815 if (nScripts == -1) {
816 return false;
817 }
818 }
819 }
820 // Check for diacritics on dotless-i, which would be indistinguishable
821 // from normal accented letter i.
822 if (baseChar == 0x0131 &&
823 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
824 return false;
825 }
826 } else {
827 baseChar = ch;
828 }
829
830 if (script != Script::COMMON && script != Script::INHERITED) {
831 lastScript = script;
832 }
833
834 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
835 #if 0
836
837 // Check for both simplified-only and traditional-only Chinese characters
838 HanVariantType hanVariant = GetHanVariant(ch);
839 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
840 if (savedHanVariant == HVT_NotHan) {
841 savedHanVariant = hanVariant;
842 } else if (hanVariant != savedHanVariant) {
843 return false;
844 }
845 }
846 #endif
847
848 previousChar = ch;
849 }
850 return true;
851 }
852
853 // Scripts that we care about in illegalScriptCombo
854 static const Script scriptTable[] = {
855 Script::BOPOMOFO, Script::CYRILLIC, Script::GREEK, Script::HANGUL,
856 Script::HAN, Script::HIRAGANA, Script::KATAKANA, Script::LATIN};
857
858 #define BOPO 0
859 #define CYRL 1
860 #define GREK 2
861 #define HANG 3
862 #define HANI 4
863 #define HIRA 5
864 #define KATA 6
865 #define LATN 7
866 #define OTHR 8
867 #define JPAN 9 // Latin + Han + Hiragana + Katakana
868 #define CHNA 10 // Latin + Han + Bopomofo
869 #define KORE 11 // Latin + Han + Hangul
870 #define HNLT 12 // Latin + Han (could be any of the above combinations)
871 #define FAIL 13
872
findScriptIndex(Script aScript)873 static inline int32_t findScriptIndex(Script aScript) {
874 int32_t tableLength = mozilla::ArrayLength(scriptTable);
875 for (int32_t index = 0; index < tableLength; ++index) {
876 if (aScript == scriptTable[index]) {
877 return index;
878 }
879 }
880 return OTHR;
881 }
882
883 static const int32_t scriptComboTable[13][9] = {
884 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
885 * savedScript */
886 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
887 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
888 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
889 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
890 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
891 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
892 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
893 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
894 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
895 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
896 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
897 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
898 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
899
illegalScriptCombo(Script script,int32_t & savedScript)900 bool nsIDNService::illegalScriptCombo(Script script, int32_t& savedScript) {
901 if (!NS_IsMainThread()) {
902 mLock.AssertCurrentThreadOwns();
903 }
904
905 if (savedScript == -1) {
906 savedScript = findScriptIndex(script);
907 return false;
908 }
909
910 savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
911 /*
912 * Special case combinations that depend on which profile is in use
913 * In the Highly Restrictive profile Latin is not allowed with any
914 * other script
915 *
916 * In the Moderately Restrictive profile Latin mixed with any other
917 * single script is allowed.
918 */
919 return ((savedScript == OTHR &&
920 mRestrictionProfile == eHighlyRestrictiveProfile) ||
921 savedScript == FAIL);
922 }
923
924 #undef BOPO
925 #undef CYRL
926 #undef GREK
927 #undef HANG
928 #undef HANI
929 #undef HIRA
930 #undef KATA
931 #undef LATN
932 #undef OTHR
933 #undef JPAN
934 #undef CHNA
935 #undef KORE
936 #undef HNLT
937 #undef FAIL
938