1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #include "gtest/gtest.h"
5
6 #include <string.h>
7 #include <string_view>
8 #include "mozilla/intl/Collator.h"
9 #include "mozilla/Span.h"
10 #include "TestBuffer.h"
11
12 namespace mozilla::intl {
13
TEST(IntlCollator,SetAttributesInternal)14 TEST(IntlCollator, SetAttributesInternal)
15 {
16 // Run through each settings to make sure MOZ_ASSERT is not triggered for
17 // misconfigured attributes.
18 auto result = Collator::TryCreate("en-US");
19 ASSERT_TRUE(result.isOk());
20 auto collator = result.unwrap();
21
22 collator->SetStrength(Collator::Strength::Primary);
23 collator->SetStrength(Collator::Strength::Secondary);
24 collator->SetStrength(Collator::Strength::Tertiary);
25 collator->SetStrength(Collator::Strength::Quaternary);
26 collator->SetStrength(Collator::Strength::Identical);
27 collator->SetStrength(Collator::Strength::Default);
28
29 collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable)
30 .unwrap();
31 collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap();
32 collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap();
33
34 collator->SetCaseFirst(Collator::CaseFirst::False).unwrap();
35 collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap();
36 collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap();
37
38 collator->SetCaseLevel(Collator::Feature::On).unwrap();
39 collator->SetCaseLevel(Collator::Feature::Off).unwrap();
40 collator->SetCaseLevel(Collator::Feature::Default).unwrap();
41
42 collator->SetNumericCollation(Collator::Feature::On).unwrap();
43 collator->SetNumericCollation(Collator::Feature::Off).unwrap();
44 collator->SetNumericCollation(Collator::Feature::Default).unwrap();
45
46 collator->SetNormalizationMode(Collator::Feature::On).unwrap();
47 collator->SetNormalizationMode(Collator::Feature::Off).unwrap();
48 collator->SetNormalizationMode(Collator::Feature::Default).unwrap();
49 }
50
TEST(IntlCollator,GetSortKey)51 TEST(IntlCollator, GetSortKey)
52 {
53 // Do some light sort key comparisons to ensure everything is wired up
54 // correctly. This is not doing extensive correctness testing.
55 auto result = Collator::TryCreate("en-US");
56 ASSERT_TRUE(result.isOk());
57 auto collator = result.unwrap();
58 TestBuffer<uint8_t> bufferA;
59 TestBuffer<uint8_t> bufferB;
60
61 auto compareSortKeys = [&](const char16_t* a, const char16_t* b) {
62 collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap();
63 collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap();
64 return strcmp(reinterpret_cast<const char*>(bufferA.data()),
65 reinterpret_cast<const char*>(bufferB.data()));
66 };
67
68 ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0);
69 ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0);
70 ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0);
71 ASSERT_TRUE(compareSortKeys(u"", u"") < 0);
72 }
73
TEST(IntlCollator,CompareStrings)74 TEST(IntlCollator, CompareStrings)
75 {
76 // Do some light string comparisons to ensure everything is wired up
77 // correctly. This is not doing extensive correctness testing.
78 auto result = Collator::TryCreate("en-US");
79 ASSERT_TRUE(result.isOk());
80 auto collator = result.unwrap();
81 TestBuffer<uint8_t> bufferA;
82 TestBuffer<uint8_t> bufferB;
83
84 ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1);
85 ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1);
86 ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0);
87 ASSERT_EQ(collator->CompareStrings(u"", u""), -1);
88 }
89
TEST(IntlCollator,SetOptionsSensitivity)90 TEST(IntlCollator, SetOptionsSensitivity)
91 {
92 // Test the ECMA 402 sensitivity behavior per:
93 // https://tc39.es/ecma402/#sec-collator-comparestrings
94 auto result = Collator::TryCreate("en-US");
95 ASSERT_TRUE(result.isOk());
96 auto collator = result.unwrap();
97
98 TestBuffer<uint8_t> bufferA;
99 TestBuffer<uint8_t> bufferB;
100 ICUResult optResult = Ok();
101 Collator::Options options{};
102
103 options.sensitivity = Collator::Sensitivity::Base;
104 optResult = collator->SetOptions(options);
105 ASSERT_TRUE(optResult.isOk());
106 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
107 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
108 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
109
110 options.sensitivity = Collator::Sensitivity::Accent;
111 optResult = collator->SetOptions(options);
112 ASSERT_TRUE(optResult.isOk());
113 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
114 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
115 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
116
117 options.sensitivity = Collator::Sensitivity::Case;
118 optResult = collator->SetOptions(options);
119 ASSERT_TRUE(optResult.isOk());
120 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
121 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
122 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
123
124 options.sensitivity = Collator::Sensitivity::Variant;
125 optResult = collator->SetOptions(options);
126 ASSERT_TRUE(optResult.isOk());
127 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
128 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
129 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
130 }
131
TEST(IntlCollator,LocaleSensitiveCollations)132 TEST(IntlCollator, LocaleSensitiveCollations)
133 {
134 UniquePtr<Collator> collator = nullptr;
135 TestBuffer<uint8_t> bufferA;
136 TestBuffer<uint8_t> bufferB;
137
138 auto changeLocale = [&](const char* locale) {
139 auto result = Collator::TryCreate(locale);
140 ASSERT_TRUE(result.isOk());
141 collator = result.unwrap();
142
143 Collator::Options options{};
144 options.sensitivity = Collator::Sensitivity::Base;
145 auto optResult = collator->SetOptions(options);
146 ASSERT_TRUE(optResult.isOk());
147 };
148
149 // Swedish treats "Ö" as a separate character, which sorts after "Z".
150 changeLocale("en-US");
151 ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1);
152 changeLocale("sv-SE");
153 ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1);
154
155 // Country names in their respective scripts.
156 auto china = MakeStringSpan(u"中国");
157 auto japan = MakeStringSpan(u"日本");
158 auto korea = MakeStringSpan(u"한국");
159
160 changeLocale("en-US");
161 ASSERT_EQ(collator->CompareStrings(china, japan), -1);
162 ASSERT_EQ(collator->CompareStrings(china, korea), 1);
163 changeLocale("zh");
164 ASSERT_EQ(collator->CompareStrings(china, japan), 1);
165 ASSERT_EQ(collator->CompareStrings(china, korea), -1);
166 changeLocale("ja");
167 ASSERT_EQ(collator->CompareStrings(china, japan), -1);
168 ASSERT_EQ(collator->CompareStrings(china, korea), -1);
169 changeLocale("ko");
170 ASSERT_EQ(collator->CompareStrings(china, japan), 1);
171 ASSERT_EQ(collator->CompareStrings(china, korea), -1);
172 }
173
TEST(IntlCollator,IgnorePunctuation)174 TEST(IntlCollator, IgnorePunctuation)
175 {
176 TestBuffer<uint8_t> bufferA;
177 TestBuffer<uint8_t> bufferB;
178
179 auto result = Collator::TryCreate("en-US");
180 ASSERT_TRUE(result.isOk());
181 auto collator = result.unwrap();
182 Collator::Options options{};
183 options.ignorePunctuation = true;
184
185 auto optResult = collator->SetOptions(options);
186 ASSERT_TRUE(optResult.isOk());
187
188 ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1);
189
190 options.ignorePunctuation = false;
191 optResult = collator->SetOptions(options);
192 ASSERT_TRUE(optResult.isOk());
193
194 ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
195 }
196
TEST(IntlCollator,GetBcp47KeywordValuesForLocale)197 TEST(IntlCollator, GetBcp47KeywordValuesForLocale)
198 {
199 auto extsResult = Collator::GetBcp47KeywordValuesForLocale("de");
200 ASSERT_TRUE(extsResult.isOk());
201 auto extensions = extsResult.unwrap();
202
203 // Since this list is dependent on ICU, and may change between upgrades, only
204 // test a subset of the keywords.
205 auto standard = MakeStringSpan("standard");
206 auto search = MakeStringSpan("search");
207 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47.
208 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47.
209 bool hasStandard = false;
210 bool hasSearch = false;
211 bool hasPhonebk = false;
212 bool hasPhonebook = false;
213
214 for (auto extensionResult : extensions) {
215 ASSERT_TRUE(extensionResult.isOk());
216 auto extension = extensionResult.unwrap();
217 hasStandard |= extension == standard;
218 hasSearch |= extension == search;
219 hasPhonebk |= extension == phonebk;
220 hasPhonebook |= extension == phonebook;
221 }
222
223 ASSERT_TRUE(hasStandard);
224 ASSERT_TRUE(hasSearch);
225 ASSERT_TRUE(hasPhonebk);
226
227 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47.
228 }
229
TEST(IntlCollator,GetBcp47KeywordValuesForLocaleCommonlyUsed)230 TEST(IntlCollator, GetBcp47KeywordValuesForLocaleCommonlyUsed)
231 {
232 auto extsResult = Collator::GetBcp47KeywordValuesForLocale(
233 "fr", Collator::CommonlyUsed::Yes);
234 ASSERT_TRUE(extsResult.isOk());
235 auto extensions = extsResult.unwrap();
236
237 // Since this list is dependent on ICU, and may change between upgrades, only
238 // test a subset of the keywords.
239 auto standard = MakeStringSpan("standard");
240 auto search = MakeStringSpan("search");
241 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47.
242 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47.
243 bool hasStandard = false;
244 bool hasSearch = false;
245 bool hasPhonebk = false;
246 bool hasPhonebook = false;
247
248 for (auto extensionResult : extensions) {
249 ASSERT_TRUE(extensionResult.isOk());
250 auto extension = extensionResult.unwrap();
251 hasStandard |= extension == standard;
252 hasSearch |= extension == search;
253 hasPhonebk |= extension == phonebk;
254 hasPhonebook |= extension == phonebook;
255 }
256
257 ASSERT_TRUE(hasStandard);
258 ASSERT_TRUE(hasSearch);
259
260 ASSERT_FALSE(hasPhonebk); // Not commonly used in French.
261 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47.
262 }
263
TEST(IntlCollator,GetBcp47KeywordValues)264 TEST(IntlCollator, GetBcp47KeywordValues)
265 {
266 auto extsResult = Collator::GetBcp47KeywordValues();
267 ASSERT_TRUE(extsResult.isOk());
268 auto extensions = extsResult.unwrap();
269
270 // Since this list is dependent on ICU, and may change between upgrades, only
271 // test a subset of the keywords.
272 auto standard = MakeStringSpan("standard");
273 auto search = MakeStringSpan("search");
274 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47.
275 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47.
276 bool hasStandard = false;
277 bool hasSearch = false;
278 bool hasPhonebk = false;
279 bool hasPhonebook = false;
280
281 for (auto extensionResult : extensions) {
282 ASSERT_TRUE(extensionResult.isOk());
283 auto extension = extensionResult.unwrap();
284 hasStandard |= extension == standard;
285 hasSearch |= extension == search;
286 hasPhonebk |= extension == phonebk;
287 hasPhonebook |= extension == phonebook;
288 }
289
290 ASSERT_TRUE(hasStandard);
291 ASSERT_TRUE(hasSearch);
292 ASSERT_TRUE(hasPhonebk);
293
294 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47.
295 }
296
TEST(IntlCollator,GetAvailableLocales)297 TEST(IntlCollator, GetAvailableLocales)
298 {
299 using namespace std::literals;
300
301 int32_t english = 0;
302 int32_t german = 0;
303 int32_t chinese = 0;
304
305 // Since this list is dependent on ICU, and may change between upgrades, only
306 // test a subset of the available locales.
307 for (const char* locale : Collator::GetAvailableLocales()) {
308 if (locale == "en"sv) {
309 english++;
310 } else if (locale == "de"sv) {
311 german++;
312 } else if (locale == "zh"sv) {
313 chinese++;
314 }
315 }
316
317 // Each locale should be found exactly once.
318 ASSERT_EQ(english, 1);
319 ASSERT_EQ(german, 1);
320 ASSERT_EQ(chinese, 1);
321 }
322
TEST(IntlCollator,GetCaseFirst)323 TEST(IntlCollator, GetCaseFirst)
324 {
325 auto result = Collator::TryCreate("en-US");
326 ASSERT_TRUE(result.isOk());
327 auto collator = result.unwrap();
328
329 auto caseFirst = collator->GetCaseFirst();
330 ASSERT_TRUE(caseFirst.isOk());
331 ASSERT_EQ(caseFirst.unwrap(), Collator::CaseFirst::False);
332
333 for (auto kf : {Collator::CaseFirst::Upper, Collator::CaseFirst::Lower,
334 Collator::CaseFirst::False}) {
335 Collator::Options options{};
336 options.caseFirst = kf;
337
338 auto optResult = collator->SetOptions(options);
339 ASSERT_TRUE(optResult.isOk());
340
341 auto caseFirst = collator->GetCaseFirst();
342 ASSERT_TRUE(caseFirst.isOk());
343 ASSERT_EQ(caseFirst.unwrap(), kf);
344 }
345 }
346
347 } // namespace mozilla::intl
348