1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include <algorithm>
6 #include <string.h>
7 #include "mozilla/intl/Collator.h"
8
9 namespace mozilla::intl {
10
Collator(UCollator * aCollator)11 Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
12 MOZ_ASSERT(aCollator);
13 }
14
~Collator()15 Collator::~Collator() {
16 if (mCollator.GetMut()) {
17 ucol_close(mCollator.GetMut());
18 }
19 }
20
TryCreate(const char * aLocale)21 Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
22 UErrorCode status = U_ZERO_ERROR;
23 UCollator* collator = ucol_open(IcuLocale(aLocale), &status);
24 if (U_FAILURE(status)) {
25 return Err(ToICUError(status));
26 }
27 return MakeUnique<Collator>(collator);
28 };
29
CompareStrings(Span<const char16_t> aSource,Span<const char16_t> aTarget) const30 int32_t Collator::CompareStrings(Span<const char16_t> aSource,
31 Span<const char16_t> aTarget) const {
32 switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
33 static_cast<int32_t>(aSource.size()), aTarget.data(),
34 static_cast<int32_t>(aTarget.size()))) {
35 case UCOL_LESS:
36 return -1;
37 case UCOL_EQUAL:
38 return 0;
39 case UCOL_GREATER:
40 return 1;
41 }
42 MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
43 return 0;
44 }
45
CompareSortKeys(Span<const uint8_t> aKey1,Span<const uint8_t> aKey2) const46 int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
47 Span<const uint8_t> aKey2) const {
48 size_t minLength = std::min(aKey1.Length(), aKey2.Length());
49 int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
50 (const char*)aKey2.Elements(), minLength);
51 if (tmpResult < 0) {
52 return -1;
53 }
54 if (tmpResult > 0) {
55 return 1;
56 }
57 if (aKey1.Length() > minLength) {
58 // First string contains second one, so comes later, hence return > 0.
59 return 1;
60 }
61 if (aKey2.Length() > minLength) {
62 // First string is a substring of second one, so comes earlier,
63 // hence return < 0.
64 return -1;
65 }
66 return 0;
67 }
68
CaseFirstToICU(Collator::CaseFirst caseFirst)69 static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
70 switch (caseFirst) {
71 case Collator::CaseFirst::False:
72 return UCOL_OFF;
73 case Collator::CaseFirst::Upper:
74 return UCOL_UPPER_FIRST;
75 case Collator::CaseFirst::Lower:
76 return UCOL_LOWER_FIRST;
77 }
78
79 MOZ_ASSERT_UNREACHABLE();
80 return UCOL_DEFAULT;
81 }
82
83 // Define this as a macro to work around exposing the UColAttributeValue type to
84 // the header file. Collation::Feature is private to the class.
85 #define FEATURE_TO_ICU(featureICU, feature) \
86 switch (feature) { \
87 case Collator::Feature::On: \
88 (featureICU) = UCOL_ON; \
89 break; \
90 case Collator::Feature::Off: \
91 (featureICU) = UCOL_OFF; \
92 break; \
93 case Collator::Feature::Default: \
94 (featureICU) = UCOL_DEFAULT; \
95 break; \
96 }
97
SetStrength(Collator::Strength aStrength)98 void Collator::SetStrength(Collator::Strength aStrength) {
99 UColAttributeValue strength;
100 switch (aStrength) {
101 case Collator::Strength::Default:
102 strength = UCOL_DEFAULT_STRENGTH;
103 break;
104 case Collator::Strength::Primary:
105 strength = UCOL_PRIMARY;
106 break;
107 case Collator::Strength::Secondary:
108 strength = UCOL_SECONDARY;
109 break;
110 case Collator::Strength::Tertiary:
111 strength = UCOL_TERTIARY;
112 break;
113 case Collator::Strength::Quaternary:
114 strength = UCOL_QUATERNARY;
115 break;
116 case Collator::Strength::Identical:
117 strength = UCOL_IDENTICAL;
118 break;
119 }
120
121 ucol_setStrength(mCollator.GetMut(), strength);
122 }
123
SetCaseLevel(Collator::Feature aFeature)124 ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
125 UErrorCode status = U_ZERO_ERROR;
126 UColAttributeValue featureICU;
127 FEATURE_TO_ICU(featureICU, aFeature);
128 ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
129 return ToICUResult(status);
130 }
131
SetAlternateHandling(Collator::AlternateHandling aAlternateHandling)132 ICUResult Collator::SetAlternateHandling(
133 Collator::AlternateHandling aAlternateHandling) {
134 UErrorCode status = U_ZERO_ERROR;
135 UColAttributeValue handling;
136 switch (aAlternateHandling) {
137 case Collator::AlternateHandling::NonIgnorable:
138 handling = UCOL_NON_IGNORABLE;
139 break;
140 case Collator::AlternateHandling::Shifted:
141 handling = UCOL_SHIFTED;
142 break;
143 case Collator::AlternateHandling::Default:
144 handling = UCOL_DEFAULT;
145 break;
146 }
147
148 ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
149 &status);
150 return ToICUResult(status);
151 }
152
SetNumericCollation(Collator::Feature aFeature)153 ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
154 UErrorCode status = U_ZERO_ERROR;
155 UColAttributeValue featureICU;
156 FEATURE_TO_ICU(featureICU, aFeature);
157
158 ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
159 &status);
160 return ToICUResult(status);
161 }
162
SetNormalizationMode(Collator::Feature aFeature)163 ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
164 UErrorCode status = U_ZERO_ERROR;
165 UColAttributeValue featureICU;
166 FEATURE_TO_ICU(featureICU, aFeature);
167 ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
168 &status);
169 return ToICUResult(status);
170 }
171
SetCaseFirst(Collator::CaseFirst aCaseFirst)172 ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
173 UErrorCode status = U_ZERO_ERROR;
174 ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
175 CaseFirstToICU(aCaseFirst), &status);
176 return ToICUResult(status);
177 }
178
SetOptions(const Options & aOptions,const Maybe<Options &> aPrevOptions)179 ICUResult Collator::SetOptions(const Options& aOptions,
180 const Maybe<Options&> aPrevOptions) {
181 if (aPrevOptions &&
182 // Check the equality of the previous options.
183 aPrevOptions->sensitivity == aOptions.sensitivity &&
184 aPrevOptions->caseFirst == aOptions.caseFirst &&
185 aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
186 aPrevOptions->numeric == aOptions.numeric) {
187 return Ok();
188 }
189
190 Collator::Strength strength = Collator::Strength::Default;
191 Collator::Feature caseLevel = Collator::Feature::Off;
192 switch (aOptions.sensitivity) {
193 case Collator::Sensitivity::Base:
194 strength = Collator::Strength::Primary;
195 break;
196 case Collator::Sensitivity::Accent:
197 strength = Collator::Strength::Secondary;
198 break;
199 case Collator::Sensitivity::Case:
200 caseLevel = Collator::Feature::On;
201 strength = Collator::Strength::Primary;
202 break;
203 case Collator::Sensitivity::Variant:
204 strength = Collator::Strength::Tertiary;
205 break;
206 }
207
208 SetStrength(strength);
209
210 ICUResult result = Ok();
211
212 // According to the ICU team, UCOL_SHIFTED causes punctuation to be
213 // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
214 // Markup Language, "shifted" causes whitespace and punctuation to be
215 // ignored - that's a bit more than asked for, but there's no way to get
216 // less.
217 result = this->SetAlternateHandling(
218 aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
219 : Collator::AlternateHandling::Default);
220 if (result.isErr()) {
221 return result;
222 }
223
224 result = SetCaseLevel(caseLevel);
225 if (result.isErr()) {
226 return result;
227 }
228
229 result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
230 : Collator::Feature::Off);
231 if (result.isErr()) {
232 return result;
233 }
234
235 // Normalization is always on to meet the canonical equivalence requirement.
236 result = SetNormalizationMode(Collator::Feature::On);
237 if (result.isErr()) {
238 return result;
239 }
240
241 result = SetCaseFirst(aOptions.caseFirst);
242 if (result.isErr()) {
243 return result;
244 }
245 return Ok();
246 }
247
248 #undef FEATURE_TO_ICU
249
GetCaseFirst() const250 Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const {
251 UErrorCode status = U_ZERO_ERROR;
252 UColAttributeValue caseFirst =
253 ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status);
254 if (U_FAILURE(status)) {
255 return Err(ToICUError(status));
256 }
257
258 if (caseFirst == UCOL_OFF) {
259 return CaseFirst::False;
260 }
261 if (caseFirst == UCOL_UPPER_FIRST) {
262 return CaseFirst::Upper;
263 }
264 MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST);
265 return CaseFirst::Lower;
266 }
267
268 /* static */
269 Result<Collator::Bcp47ExtEnumeration, ICUError>
GetBcp47KeywordValuesForLocale(const char * aLocale,CommonlyUsed aCommonlyUsed)270 Collator::GetBcp47KeywordValuesForLocale(const char* aLocale,
271 CommonlyUsed aCommonlyUsed) {
272 UErrorCode status = U_ZERO_ERROR;
273 UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
274 "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status);
275
276 if (U_SUCCESS(status)) {
277 return Bcp47ExtEnumeration(enumeration);
278 }
279
280 return Err(ToICUError(status));
281 }
282
283 /* static */
284 Result<Collator::Bcp47ExtEnumeration, ICUError>
GetBcp47KeywordValues()285 Collator::GetBcp47KeywordValues() {
286 UErrorCode status = U_ZERO_ERROR;
287 UEnumeration* enumeration = ucol_getKeywordValues("collation", &status);
288
289 if (U_SUCCESS(status)) {
290 return Bcp47ExtEnumeration(enumeration);
291 }
292
293 return Err(ToICUError(status));
294 }
295
296 /* static */
KeywordValueToBcp47Extension(const char * aKeyword,int32_t aLength)297 SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
298 int32_t aLength) {
299 if (aKeyword == nullptr) {
300 return Err(InternalError{});
301 }
302 return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
303 }
304
305 } // namespace mozilla::intl
306