1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsUnicodeNormalizer.h"
8 #include "ICUUtils.h"
9 #include "unicode/unorm2.h"
10 #include "unicode/utext.h"
11 
NS_IMPL_ISUPPORTS(nsUnicodeNormalizer,nsIUnicodeNormalizer)12 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
13 
14 nsUnicodeNormalizer::nsUnicodeNormalizer()
15 {
16 }
17 
~nsUnicodeNormalizer()18 nsUnicodeNormalizer::~nsUnicodeNormalizer()
19 {
20 }
21 
22 static nsresult
DoNormalization(const UNormalizer2 * aNorm,const nsAString & aSrc,nsAString & aDest)23 DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc,
24                 nsAString& aDest)
25 {
26   UErrorCode errorCode = U_ZERO_ERROR;
27   const int32_t length = aSrc.Length();
28   const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading());
29   // Initial guess for a capacity that is likely to be enough for most cases.
30   int32_t capacity = length + (length >> 8) + 8;
31   do {
32     aDest.SetLength(capacity);
33     UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting());
34     int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity,
35                                    &errorCode);
36     if (U_SUCCESS(errorCode)) {
37       aDest.SetLength(len);
38       break;
39     }
40     if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
41       // Buffer wasn't big enough; adjust to the reported size and try again.
42       capacity = len;
43       errorCode = U_ZERO_ERROR;
44       continue;
45     }
46   } while (false);
47   return ICUUtils::UErrorToNsResult(errorCode);
48 }
49 
50 nsresult
NormalizeUnicodeNFD(const nsAString & aSrc,nsAString & aDest)51 nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc,
52                                          nsAString& aDest)
53 {
54   // The unorm2_getNF*Instance functions return static singletons that should
55   // not be deleted, so we just get them once on first use.
56   static UErrorCode errorCode = U_ZERO_ERROR;
57   static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode);
58   if (U_SUCCESS(errorCode)) {
59     return DoNormalization(norm, aSrc, aDest);
60   }
61   return ICUUtils::UErrorToNsResult(errorCode);
62 }
63 
64 nsresult
NormalizeUnicodeNFC(const nsAString & aSrc,nsAString & aDest)65 nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc,
66                                          nsAString& aDest)
67 {
68   static UErrorCode errorCode = U_ZERO_ERROR;
69   static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode);
70   if (U_SUCCESS(errorCode)) {
71     return DoNormalization(norm, aSrc, aDest);
72   }
73   return ICUUtils::UErrorToNsResult(errorCode);
74 }
75 
76 nsresult
NormalizeUnicodeNFKD(const nsAString & aSrc,nsAString & aDest)77 nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc,
78                                           nsAString& aDest)
79 {
80   static UErrorCode errorCode = U_ZERO_ERROR;
81   static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode);
82   if (U_SUCCESS(errorCode)) {
83     return DoNormalization(norm, aSrc, aDest);
84   }
85   return ICUUtils::UErrorToNsResult(errorCode);
86 }
87 
88 nsresult
NormalizeUnicodeNFKC(const nsAString & aSrc,nsAString & aDest)89 nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc,
90                                           nsAString& aDest)
91 {
92   static UErrorCode errorCode = U_ZERO_ERROR;
93   static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode);
94   if (U_SUCCESS(errorCode)) {
95     return DoNormalization(norm, aSrc, aDest);
96   }
97   return ICUUtils::UErrorToNsResult(errorCode);
98 }
99