1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #include <algorithm>
6 #include <string.h>
7 #include "mozilla/intl/Collator.h"
8 
9 namespace mozilla::intl {
10 
Collator(UCollator * aCollator)11 Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
12   MOZ_ASSERT(aCollator);
13 }
14 
~Collator()15 Collator::~Collator() {
16   if (mCollator.GetMut()) {
17     ucol_close(mCollator.GetMut());
18   }
19 }
20 
TryCreate(const char * aLocale)21 Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
22   UErrorCode status = U_ZERO_ERROR;
23   UCollator* collator = ucol_open(IcuLocale(aLocale), &status);
24   if (U_FAILURE(status)) {
25     return Err(ToICUError(status));
26   }
27   return MakeUnique<Collator>(collator);
28 };
29 
CompareStrings(Span<const char16_t> aSource,Span<const char16_t> aTarget) const30 int32_t Collator::CompareStrings(Span<const char16_t> aSource,
31                                  Span<const char16_t> aTarget) const {
32   switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
33                        static_cast<int32_t>(aSource.size()), aTarget.data(),
34                        static_cast<int32_t>(aTarget.size()))) {
35     case UCOL_LESS:
36       return -1;
37     case UCOL_EQUAL:
38       return 0;
39     case UCOL_GREATER:
40       return 1;
41   }
42   MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
43   return 0;
44 }
45 
CompareSortKeys(Span<const uint8_t> aKey1,Span<const uint8_t> aKey2) const46 int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
47                                   Span<const uint8_t> aKey2) const {
48   size_t minLength = std::min(aKey1.Length(), aKey2.Length());
49   int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
50                               (const char*)aKey2.Elements(), minLength);
51   if (tmpResult < 0) {
52     return -1;
53   }
54   if (tmpResult > 0) {
55     return 1;
56   }
57   if (aKey1.Length() > minLength) {
58     // First string contains second one, so comes later, hence return > 0.
59     return 1;
60   }
61   if (aKey2.Length() > minLength) {
62     // First string is a substring of second one, so comes earlier,
63     // hence return < 0.
64     return -1;
65   }
66   return 0;
67 }
68 
CaseFirstToICU(Collator::CaseFirst caseFirst)69 static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
70   switch (caseFirst) {
71     case Collator::CaseFirst::False:
72       return UCOL_OFF;
73     case Collator::CaseFirst::Upper:
74       return UCOL_UPPER_FIRST;
75     case Collator::CaseFirst::Lower:
76       return UCOL_LOWER_FIRST;
77   }
78 
79   MOZ_ASSERT_UNREACHABLE();
80   return UCOL_DEFAULT;
81 }
82 
83 // Define this as a macro to work around exposing the UColAttributeValue type to
84 // the header file. Collation::Feature is private to the class.
85 #define FEATURE_TO_ICU(featureICU, feature) \
86   switch (feature) {                        \
87     case Collator::Feature::On:             \
88       (featureICU) = UCOL_ON;               \
89       break;                                \
90     case Collator::Feature::Off:            \
91       (featureICU) = UCOL_OFF;              \
92       break;                                \
93     case Collator::Feature::Default:        \
94       (featureICU) = UCOL_DEFAULT;          \
95       break;                                \
96   }
97 
SetStrength(Collator::Strength aStrength)98 void Collator::SetStrength(Collator::Strength aStrength) {
99   UColAttributeValue strength;
100   switch (aStrength) {
101     case Collator::Strength::Default:
102       strength = UCOL_DEFAULT_STRENGTH;
103       break;
104     case Collator::Strength::Primary:
105       strength = UCOL_PRIMARY;
106       break;
107     case Collator::Strength::Secondary:
108       strength = UCOL_SECONDARY;
109       break;
110     case Collator::Strength::Tertiary:
111       strength = UCOL_TERTIARY;
112       break;
113     case Collator::Strength::Quaternary:
114       strength = UCOL_QUATERNARY;
115       break;
116     case Collator::Strength::Identical:
117       strength = UCOL_IDENTICAL;
118       break;
119   }
120 
121   ucol_setStrength(mCollator.GetMut(), strength);
122 }
123 
SetCaseLevel(Collator::Feature aFeature)124 ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
125   UErrorCode status = U_ZERO_ERROR;
126   UColAttributeValue featureICU;
127   FEATURE_TO_ICU(featureICU, aFeature);
128   ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
129   return ToICUResult(status);
130 }
131 
SetAlternateHandling(Collator::AlternateHandling aAlternateHandling)132 ICUResult Collator::SetAlternateHandling(
133     Collator::AlternateHandling aAlternateHandling) {
134   UErrorCode status = U_ZERO_ERROR;
135   UColAttributeValue handling;
136   switch (aAlternateHandling) {
137     case Collator::AlternateHandling::NonIgnorable:
138       handling = UCOL_NON_IGNORABLE;
139       break;
140     case Collator::AlternateHandling::Shifted:
141       handling = UCOL_SHIFTED;
142       break;
143     case Collator::AlternateHandling::Default:
144       handling = UCOL_DEFAULT;
145       break;
146   }
147 
148   ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
149                     &status);
150   return ToICUResult(status);
151 }
152 
SetNumericCollation(Collator::Feature aFeature)153 ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
154   UErrorCode status = U_ZERO_ERROR;
155   UColAttributeValue featureICU;
156   FEATURE_TO_ICU(featureICU, aFeature);
157 
158   ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
159                     &status);
160   return ToICUResult(status);
161 }
162 
SetNormalizationMode(Collator::Feature aFeature)163 ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
164   UErrorCode status = U_ZERO_ERROR;
165   UColAttributeValue featureICU;
166   FEATURE_TO_ICU(featureICU, aFeature);
167   ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
168                     &status);
169   return ToICUResult(status);
170 }
171 
SetCaseFirst(Collator::CaseFirst aCaseFirst)172 ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
173   UErrorCode status = U_ZERO_ERROR;
174   ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
175                     CaseFirstToICU(aCaseFirst), &status);
176   return ToICUResult(status);
177 }
178 
SetOptions(const Options & aOptions,const Maybe<Options &> aPrevOptions)179 ICUResult Collator::SetOptions(const Options& aOptions,
180                                const Maybe<Options&> aPrevOptions) {
181   if (aPrevOptions &&
182       // Check the equality of the previous options.
183       aPrevOptions->sensitivity == aOptions.sensitivity &&
184       aPrevOptions->caseFirst == aOptions.caseFirst &&
185       aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
186       aPrevOptions->numeric == aOptions.numeric) {
187     return Ok();
188   }
189 
190   Collator::Strength strength = Collator::Strength::Default;
191   Collator::Feature caseLevel = Collator::Feature::Off;
192   switch (aOptions.sensitivity) {
193     case Collator::Sensitivity::Base:
194       strength = Collator::Strength::Primary;
195       break;
196     case Collator::Sensitivity::Accent:
197       strength = Collator::Strength::Secondary;
198       break;
199     case Collator::Sensitivity::Case:
200       caseLevel = Collator::Feature::On;
201       strength = Collator::Strength::Primary;
202       break;
203     case Collator::Sensitivity::Variant:
204       strength = Collator::Strength::Tertiary;
205       break;
206   }
207 
208   SetStrength(strength);
209 
210   ICUResult result = Ok();
211 
212   // According to the ICU team, UCOL_SHIFTED causes punctuation to be
213   // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
214   // Markup Language, "shifted" causes whitespace and punctuation to be
215   // ignored - that's a bit more than asked for, but there's no way to get
216   // less.
217   result = this->SetAlternateHandling(
218       aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
219                                  : Collator::AlternateHandling::Default);
220   if (result.isErr()) {
221     return result;
222   }
223 
224   result = SetCaseLevel(caseLevel);
225   if (result.isErr()) {
226     return result;
227   }
228 
229   result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
230                                                 : Collator::Feature::Off);
231   if (result.isErr()) {
232     return result;
233   }
234 
235   // Normalization is always on to meet the canonical equivalence requirement.
236   result = SetNormalizationMode(Collator::Feature::On);
237   if (result.isErr()) {
238     return result;
239   }
240 
241   result = SetCaseFirst(aOptions.caseFirst);
242   if (result.isErr()) {
243     return result;
244   }
245   return Ok();
246 }
247 
248 #undef FEATURE_TO_ICU
249 
GetCaseFirst() const250 Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const {
251   UErrorCode status = U_ZERO_ERROR;
252   UColAttributeValue caseFirst =
253       ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status);
254   if (U_FAILURE(status)) {
255     return Err(ToICUError(status));
256   }
257 
258   if (caseFirst == UCOL_OFF) {
259     return CaseFirst::False;
260   }
261   if (caseFirst == UCOL_UPPER_FIRST) {
262     return CaseFirst::Upper;
263   }
264   MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST);
265   return CaseFirst::Lower;
266 }
267 
268 /* static */
269 Result<Collator::Bcp47ExtEnumeration, ICUError>
GetBcp47KeywordValuesForLocale(const char * aLocale,CommonlyUsed aCommonlyUsed)270 Collator::GetBcp47KeywordValuesForLocale(const char* aLocale,
271                                          CommonlyUsed aCommonlyUsed) {
272   UErrorCode status = U_ZERO_ERROR;
273   UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
274       "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status);
275 
276   if (U_SUCCESS(status)) {
277     return Bcp47ExtEnumeration(enumeration);
278   }
279 
280   return Err(ToICUError(status));
281 }
282 
283 /* static */
284 Result<Collator::Bcp47ExtEnumeration, ICUError>
GetBcp47KeywordValues()285 Collator::GetBcp47KeywordValues() {
286   UErrorCode status = U_ZERO_ERROR;
287   UEnumeration* enumeration = ucol_getKeywordValues("collation", &status);
288 
289   if (U_SUCCESS(status)) {
290     return Bcp47ExtEnumeration(enumeration);
291   }
292 
293   return Err(ToICUError(status));
294 }
295 
296 /* static */
KeywordValueToBcp47Extension(const char * aKeyword,int32_t aLength)297 SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
298                                                         int32_t aLength) {
299   if (aKeyword == nullptr) {
300     return Err(InternalError{});
301   }
302   return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
303 }
304 
305 }  // namespace mozilla::intl
306