1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #include "gtest/gtest.h"
5 
6 #include <string.h>
7 #include <string_view>
8 #include "mozilla/intl/Collator.h"
9 #include "mozilla/Span.h"
10 #include "TestBuffer.h"
11 
12 namespace mozilla::intl {
13 
TEST(IntlCollator,SetAttributesInternal)14 TEST(IntlCollator, SetAttributesInternal)
15 {
16   // Run through each settings to make sure MOZ_ASSERT is not triggered for
17   // misconfigured attributes.
18   auto result = Collator::TryCreate("en-US");
19   ASSERT_TRUE(result.isOk());
20   auto collator = result.unwrap();
21 
22   collator->SetStrength(Collator::Strength::Primary);
23   collator->SetStrength(Collator::Strength::Secondary);
24   collator->SetStrength(Collator::Strength::Tertiary);
25   collator->SetStrength(Collator::Strength::Quaternary);
26   collator->SetStrength(Collator::Strength::Identical);
27   collator->SetStrength(Collator::Strength::Default);
28 
29   collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable)
30       .unwrap();
31   collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap();
32   collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap();
33 
34   collator->SetCaseFirst(Collator::CaseFirst::False).unwrap();
35   collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap();
36   collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap();
37 
38   collator->SetCaseLevel(Collator::Feature::On).unwrap();
39   collator->SetCaseLevel(Collator::Feature::Off).unwrap();
40   collator->SetCaseLevel(Collator::Feature::Default).unwrap();
41 
42   collator->SetNumericCollation(Collator::Feature::On).unwrap();
43   collator->SetNumericCollation(Collator::Feature::Off).unwrap();
44   collator->SetNumericCollation(Collator::Feature::Default).unwrap();
45 
46   collator->SetNormalizationMode(Collator::Feature::On).unwrap();
47   collator->SetNormalizationMode(Collator::Feature::Off).unwrap();
48   collator->SetNormalizationMode(Collator::Feature::Default).unwrap();
49 }
50 
TEST(IntlCollator,GetSortKey)51 TEST(IntlCollator, GetSortKey)
52 {
53   // Do some light sort key comparisons to ensure everything is wired up
54   // correctly. This is not doing extensive correctness testing.
55   auto result = Collator::TryCreate("en-US");
56   ASSERT_TRUE(result.isOk());
57   auto collator = result.unwrap();
58   TestBuffer<uint8_t> bufferA;
59   TestBuffer<uint8_t> bufferB;
60 
61   auto compareSortKeys = [&](const char16_t* a, const char16_t* b) {
62     collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap();
63     collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap();
64     return strcmp(reinterpret_cast<const char*>(bufferA.data()),
65                   reinterpret_cast<const char*>(bufferB.data()));
66   };
67 
68   ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0);
69   ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0);
70   ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0);
71   ASSERT_TRUE(compareSortKeys(u"��", u"��") < 0);
72 }
73 
TEST(IntlCollator,CompareStrings)74 TEST(IntlCollator, CompareStrings)
75 {
76   // Do some light string comparisons to ensure everything is wired up
77   // correctly. This is not doing extensive correctness testing.
78   auto result = Collator::TryCreate("en-US");
79   ASSERT_TRUE(result.isOk());
80   auto collator = result.unwrap();
81   TestBuffer<uint8_t> bufferA;
82   TestBuffer<uint8_t> bufferB;
83 
84   ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1);
85   ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1);
86   ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0);
87   ASSERT_EQ(collator->CompareStrings(u"��", u"��"), -1);
88 }
89 
TEST(IntlCollator,SetOptionsSensitivity)90 TEST(IntlCollator, SetOptionsSensitivity)
91 {
92   // Test the ECMA 402 sensitivity behavior per:
93   // https://tc39.es/ecma402/#sec-collator-comparestrings
94   auto result = Collator::TryCreate("en-US");
95   ASSERT_TRUE(result.isOk());
96   auto collator = result.unwrap();
97 
98   TestBuffer<uint8_t> bufferA;
99   TestBuffer<uint8_t> bufferB;
100   ICUResult optResult = Ok();
101   Collator::Options options{};
102 
103   options.sensitivity = Collator::Sensitivity::Base;
104   optResult = collator->SetOptions(options);
105   ASSERT_TRUE(optResult.isOk());
106   ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
107   ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
108   ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
109 
110   options.sensitivity = Collator::Sensitivity::Accent;
111   optResult = collator->SetOptions(options);
112   ASSERT_TRUE(optResult.isOk());
113   ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
114   ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
115   ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
116 
117   options.sensitivity = Collator::Sensitivity::Case;
118   optResult = collator->SetOptions(options);
119   ASSERT_TRUE(optResult.isOk());
120   ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
121   ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
122   ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
123 
124   options.sensitivity = Collator::Sensitivity::Variant;
125   optResult = collator->SetOptions(options);
126   ASSERT_TRUE(optResult.isOk());
127   ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
128   ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
129   ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
130 }
131 
TEST(IntlCollator,LocaleSensitiveCollations)132 TEST(IntlCollator, LocaleSensitiveCollations)
133 {
134   UniquePtr<Collator> collator = nullptr;
135   TestBuffer<uint8_t> bufferA;
136   TestBuffer<uint8_t> bufferB;
137 
138   auto changeLocale = [&](const char* locale) {
139     auto result = Collator::TryCreate(locale);
140     ASSERT_TRUE(result.isOk());
141     collator = result.unwrap();
142 
143     Collator::Options options{};
144     options.sensitivity = Collator::Sensitivity::Base;
145     auto optResult = collator->SetOptions(options);
146     ASSERT_TRUE(optResult.isOk());
147   };
148 
149   // Swedish treats "Ö" as a separate character, which sorts after "Z".
150   changeLocale("en-US");
151   ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1);
152   changeLocale("sv-SE");
153   ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1);
154 
155   // Country names in their respective scripts.
156   auto china = MakeStringSpan(u"中国");
157   auto japan = MakeStringSpan(u"日本");
158   auto korea = MakeStringSpan(u"한국");
159 
160   changeLocale("en-US");
161   ASSERT_EQ(collator->CompareStrings(china, japan), -1);
162   ASSERT_EQ(collator->CompareStrings(china, korea), 1);
163   changeLocale("zh");
164   ASSERT_EQ(collator->CompareStrings(china, japan), 1);
165   ASSERT_EQ(collator->CompareStrings(china, korea), -1);
166   changeLocale("ja");
167   ASSERT_EQ(collator->CompareStrings(china, japan), -1);
168   ASSERT_EQ(collator->CompareStrings(china, korea), -1);
169   changeLocale("ko");
170   ASSERT_EQ(collator->CompareStrings(china, japan), 1);
171   ASSERT_EQ(collator->CompareStrings(china, korea), -1);
172 }
173 
TEST(IntlCollator,IgnorePunctuation)174 TEST(IntlCollator, IgnorePunctuation)
175 {
176   TestBuffer<uint8_t> bufferA;
177   TestBuffer<uint8_t> bufferB;
178 
179   auto result = Collator::TryCreate("en-US");
180   ASSERT_TRUE(result.isOk());
181   auto collator = result.unwrap();
182   Collator::Options options{};
183   options.ignorePunctuation = true;
184 
185   auto optResult = collator->SetOptions(options);
186   ASSERT_TRUE(optResult.isOk());
187 
188   ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1);
189 
190   options.ignorePunctuation = false;
191   optResult = collator->SetOptions(options);
192   ASSERT_TRUE(optResult.isOk());
193 
194   ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
195 }
196 
TEST(IntlCollator,GetBcp47KeywordValuesForLocale)197 TEST(IntlCollator, GetBcp47KeywordValuesForLocale)
198 {
199   auto extsResult = Collator::GetBcp47KeywordValuesForLocale("de");
200   ASSERT_TRUE(extsResult.isOk());
201   auto extensions = extsResult.unwrap();
202 
203   // Since this list is dependent on ICU, and may change between upgrades, only
204   // test a subset of the keywords.
205   auto standard = MakeStringSpan("standard");
206   auto search = MakeStringSpan("search");
207   auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
208   auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
209   bool hasStandard = false;
210   bool hasSearch = false;
211   bool hasPhonebk = false;
212   bool hasPhonebook = false;
213 
214   for (auto extensionResult : extensions) {
215     ASSERT_TRUE(extensionResult.isOk());
216     auto extension = extensionResult.unwrap();
217     hasStandard |= extension == standard;
218     hasSearch |= extension == search;
219     hasPhonebk |= extension == phonebk;
220     hasPhonebook |= extension == phonebook;
221   }
222 
223   ASSERT_TRUE(hasStandard);
224   ASSERT_TRUE(hasSearch);
225   ASSERT_TRUE(hasPhonebk);
226 
227   ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
228 }
229 
TEST(IntlCollator,GetBcp47KeywordValuesForLocaleCommonlyUsed)230 TEST(IntlCollator, GetBcp47KeywordValuesForLocaleCommonlyUsed)
231 {
232   auto extsResult = Collator::GetBcp47KeywordValuesForLocale(
233       "fr", Collator::CommonlyUsed::Yes);
234   ASSERT_TRUE(extsResult.isOk());
235   auto extensions = extsResult.unwrap();
236 
237   // Since this list is dependent on ICU, and may change between upgrades, only
238   // test a subset of the keywords.
239   auto standard = MakeStringSpan("standard");
240   auto search = MakeStringSpan("search");
241   auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
242   auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
243   bool hasStandard = false;
244   bool hasSearch = false;
245   bool hasPhonebk = false;
246   bool hasPhonebook = false;
247 
248   for (auto extensionResult : extensions) {
249     ASSERT_TRUE(extensionResult.isOk());
250     auto extension = extensionResult.unwrap();
251     hasStandard |= extension == standard;
252     hasSearch |= extension == search;
253     hasPhonebk |= extension == phonebk;
254     hasPhonebook |= extension == phonebook;
255   }
256 
257   ASSERT_TRUE(hasStandard);
258   ASSERT_TRUE(hasSearch);
259 
260   ASSERT_FALSE(hasPhonebk);    // Not commonly used in French.
261   ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
262 }
263 
TEST(IntlCollator,GetBcp47KeywordValues)264 TEST(IntlCollator, GetBcp47KeywordValues)
265 {
266   auto extsResult = Collator::GetBcp47KeywordValues();
267   ASSERT_TRUE(extsResult.isOk());
268   auto extensions = extsResult.unwrap();
269 
270   // Since this list is dependent on ICU, and may change between upgrades, only
271   // test a subset of the keywords.
272   auto standard = MakeStringSpan("standard");
273   auto search = MakeStringSpan("search");
274   auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
275   auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
276   bool hasStandard = false;
277   bool hasSearch = false;
278   bool hasPhonebk = false;
279   bool hasPhonebook = false;
280 
281   for (auto extensionResult : extensions) {
282     ASSERT_TRUE(extensionResult.isOk());
283     auto extension = extensionResult.unwrap();
284     hasStandard |= extension == standard;
285     hasSearch |= extension == search;
286     hasPhonebk |= extension == phonebk;
287     hasPhonebook |= extension == phonebook;
288   }
289 
290   ASSERT_TRUE(hasStandard);
291   ASSERT_TRUE(hasSearch);
292   ASSERT_TRUE(hasPhonebk);
293 
294   ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
295 }
296 
TEST(IntlCollator,GetAvailableLocales)297 TEST(IntlCollator, GetAvailableLocales)
298 {
299   using namespace std::literals;
300 
301   int32_t english = 0;
302   int32_t german = 0;
303   int32_t chinese = 0;
304 
305   // Since this list is dependent on ICU, and may change between upgrades, only
306   // test a subset of the available locales.
307   for (const char* locale : Collator::GetAvailableLocales()) {
308     if (locale == "en"sv) {
309       english++;
310     } else if (locale == "de"sv) {
311       german++;
312     } else if (locale == "zh"sv) {
313       chinese++;
314     }
315   }
316 
317   // Each locale should be found exactly once.
318   ASSERT_EQ(english, 1);
319   ASSERT_EQ(german, 1);
320   ASSERT_EQ(chinese, 1);
321 }
322 
TEST(IntlCollator,GetCaseFirst)323 TEST(IntlCollator, GetCaseFirst)
324 {
325   auto result = Collator::TryCreate("en-US");
326   ASSERT_TRUE(result.isOk());
327   auto collator = result.unwrap();
328 
329   auto caseFirst = collator->GetCaseFirst();
330   ASSERT_TRUE(caseFirst.isOk());
331   ASSERT_EQ(caseFirst.unwrap(), Collator::CaseFirst::False);
332 
333   for (auto kf : {Collator::CaseFirst::Upper, Collator::CaseFirst::Lower,
334                   Collator::CaseFirst::False}) {
335     Collator::Options options{};
336     options.caseFirst = kf;
337 
338     auto optResult = collator->SetOptions(options);
339     ASSERT_TRUE(optResult.isOk());
340 
341     auto caseFirst = collator->GetCaseFirst();
342     ASSERT_TRUE(caseFirst.isOk());
343     ASSERT_EQ(caseFirst.unwrap(), kf);
344   }
345 }
346 
347 }  // namespace mozilla::intl
348