1 /*
2  ******************************************************************************************
3  * Copyright (C) 2009-2014, Google, Inc.; International Business Machines Corporation and *
4  * others. All Rights Reserved.                                                           *
5  ******************************************************************************************
6  */
7 
8 package org.unicode.cldr.unittest;
9 
10 import java.util.Set;
11 import java.util.TreeSet;
12 
13 import com.ibm.icu.dev.test.TestFmwk;
14 import com.ibm.icu.util.LocaleMatcher;
15 import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
16 import com.ibm.icu.util.LocalePriorityList;
17 import com.ibm.icu.util.ULocale;
18 
19 /**
20  * Test the LocaleMatcher.
21  *
22  * @author markdavis
23  */
24 @SuppressWarnings("deprecation")
25 public class LocaleMatcherTest extends TestFmwk {
26 
27     private static final ULocale ZH_MO = new ULocale("zh_MO");
28     private static final ULocale ZH_HK = new ULocale("zh_HK");
29     static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
30 
31     private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
32         return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
33     }
34 
35     private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
36         return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
37     }
38 
39     private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
40         return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
41     }
42 
43     private LocaleMatcher newLocaleMatcher(String string) {
44         return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
45     }
46 
47     // public LocaleMatcher(LocalePriorityList languagePriorityList,
48     // LocaleMatcherData matcherData, double threshold)
49 
50     public static void main(String[] args) throws Exception {
51         new LocaleMatcherTest().run(args);
52     }
53 
54     public void testParentLocales() {
55         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
56             return;
57         }
58 
59         assertCloser("es_AR", "es_419", "es_ES");
60         assertCloser("es_AR", "es_419", "es");
61 
62         assertCloser("es_AR", "es_MX", "es");
63         assertCloser("es_AR", "es_MX", "es");
64 
65         assertCloser("en_AU", "en_GB", "en_US");
66         assertCloser("en_AU", "en_GB", "en");
67 
68         assertCloser("en_AU", "en_NZ", "en_US");
69         assertCloser("en_AU", "en_NZ", "en");
70 
71         assertCloser("pt_AO", "pt_PT", "pt_BR");
72         assertCloser("pt_AO", "pt_PT", "pt");
73 
74         assertCloser("zh_HK", "zh_MO", "zh_TW");
75         assertCloser("zh_HK", "zh_MO", "zh_CN");
76         assertCloser("zh_HK", "zh_MO", "zh");
77     }
78 
79     private void assertCloser(String a, String closer, String further) {
80         LocaleMatcher matcher = newLocaleMatcher(further + ", " + closer);
81         assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
82         matcher = newLocaleMatcher(closer + ", " + further);
83         assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
84     }
85 
86     //    public void testParentLocales() {
87     //        // find all the regions that have a closer relation because of an explicit parent
88     //        Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
89     //        explicitParents.remove("root");
90     //        Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
91     //        for (String locale : explicitParents) {
92     //            while (true) {
93     //                locale = LocaleIDParser.getParent(locale);
94     //                if (locale == null || locale.equals("root")) {
95     //                    break;
96     //                }
97     //                otherParents.add(locale);
98     //            }
99     //        }
100     //        otherParents.remove("root");
101     //
102     //        for (String locale : CONFIG.getCldrFactory().getAvailable()) {
103     //            String parentId = LocaleIDParser.getParent(locale);
104     //            String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
105     //            if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
106     //                continue;
107     //            }
108     //            System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
109     //        }
110     //    }
111 
112     public void testChinese() {
113         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
114             return;
115         }
116         LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
117         ULocale taiwanChinese = new ULocale("zh_TW");
118         ULocale chinaChinese = new ULocale("zh_CN");
119         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant_TW"));
120         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant"));
121         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_TW"));
122         assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh_Hans_CN"));
123         assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh_CN"));
124         assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh"));
125         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant_HK"));
126     }
127 
128     public void testenGB() {
129         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
130             return;
131         }
132 
133         final LocaleMatcher matcher = newLocaleMatcher("fr, en, en_GB, es_MX, es_419, es");
134         assertEquals("en_GB", matcher.getBestMatch("en_NZ").toString());
135         assertEquals("es", matcher.getBestMatch("es_ES").toString());
136         assertEquals("es_419", matcher.getBestMatch("es_AR").toString());
137         assertEquals("es_MX", matcher.getBestMatch("es_MX").toString());
138     }
139 
140     public void testFallbacks() {
141         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
142             return;
143         }
144         LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
145         final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
146         assertEquals("hi", matcher.getBestMatch("sa").toString());
147     }
148 
149     public void testOverrideData() {
150         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
151             return;
152         }
153         double threshold = 0.05;
154         LanguageMatcherData localeMatcherData = new LanguageMatcherData()
155             .addDistance("br", "fr", 10, true)
156             .addDistance("es", "cy", 10, true);
157         logln(localeMatcherData.toString());
158 
159         final LocaleMatcher matcher = newLocaleMatcher(
160             LocalePriorityList
161                 .add(ULocale.ENGLISH)
162                 .add(ULocale.FRENCH)
163                 .add(ULocale.UK)
164                 .build(),
165             localeMatcherData, threshold);
166         logln(matcher.toString());
167 
168         assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
169         assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
170         // way
171     }
172 
173     public void testBasics() {
174         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
175             return;
176         }
177         final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
178             .add(ULocale.ENGLISH).build());
179         logln(matcher.toString());
180 
181         assertEquals(ULocale.UK, matcher.getBestMatch(ULocale.UK));
182         assertEquals(ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
183         assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
184         assertEquals(ULocale.FRENCH, matcher.getBestMatch(ULocale.JAPAN));
185     }
186 
187     public void testFallback() {
188         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
189             return;
190         }
191         // check that script fallbacks are handled right
192         final LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
193         assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant"));
194         assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
195         assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
196         assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK"));
197         assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT"));
198     }
199 
200     public void testSpecials() {
201         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
202             return;
203         }
204         // check that nearby languages are handled
205         final LocaleMatcher matcher = newLocaleMatcher("en, fil, ro, nn");
206         assertEquals(new ULocale("fil"), matcher.getBestMatch("tl"));
207         assertEquals(new ULocale("ro"), matcher.getBestMatch("mo"));
208         assertEquals(new ULocale("nn"), matcher.getBestMatch("nb"));
209         // make sure default works
210         assertEquals(new ULocale("en"), matcher.getBestMatch("ja"));
211     }
212 
213     public void testRegionalSpecials() {
214         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
215             return;
216         }
217 
218         // verify that en_AU is closer to en_GB than to en (which is en_US)
219         final LocaleMatcher matcher = newLocaleMatcher("en, en_GB, es, es_419");
220         assertEquals("es_MX in {en, en_GB, es, es_419}", new ULocale("es_419"), matcher.getBestMatch("es_MX"));
221         assertEquals("en_AU in {en, en_GB, es, es_419}", new ULocale("en_GB"), matcher.getBestMatch("en_AU"));
222         assertEquals("es_ES in {en, en_GB, es, es_419}", new ULocale("es"), matcher.getBestMatch("es_ES"));
223     }
224 
225     public void testHK() {
226         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
227             return;
228         }
229 
230         // HK and MO are closer to each other for Hant than to TW
231         final LocaleMatcher matcher = newLocaleMatcher("zh, zh_TW, zh_MO");
232         assertEquals("zh_HK in {zh, zh_TW, zh_MO}", ZH_MO, matcher.getBestMatch("zh_HK"));
233         final LocaleMatcher matcher2 = newLocaleMatcher("zh, zh_TW, zh_HK");
234         assertEquals("zh_MO in {zh, zh_TW, zh_HK}", ZH_HK, matcher2.getBestMatch("zh_MO"));
235     }
236 
237     public void TestLocaleMatcherCoverage() {
238         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
239             return;
240         }
241         // Add tests for better code coverage
242         LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
243         logln(matcher.toString());
244 
245         LanguageMatcherData data = new LanguageMatcherData();
246 
247         LanguageMatcherData clone = data.cloneAsThawed();
248 
249         if (clone.equals(data)) {
250             errln("Error cloneAsThawed() is equal.");
251         }
252 
253         if (data.isFrozen()) {
254             errln("Error LocaleMatcherData is frozen!");
255         }
256     }
257 
258     private void assertEquals(Object expected, Object string) {
259         assertEquals("", expected, string);
260     }
261 
262     private void assertNull(Object bestMatch) {
263         assertNull("", bestMatch);
264     }
265 
266     public void testEmpty() {
267         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
268             return;
269         }
270         final LocaleMatcher matcher = newLocaleMatcher("");
271         assertNull(matcher.getBestMatch(ULocale.FRENCH));
272     }
273 
274     static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
275 
276     public void testMatch_exact() {
277         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
278             return;
279         }
280         assertEquals(1.0,
281             LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
282     }
283 
284     public void testMatch_none() {
285         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
286             return;
287         }
288         double match = LocaleMatcher.match(
289             new ULocale("ar_MK"),
290             ENGLISH_CANADA);
291         assertTrue("Actual < 0: " + match, 0 <= match);
292         assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
293     }
294 
295     public void testMatch_matchOnMazimized() {
296         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
297             return;
298         }
299         ULocale undTw = new ULocale("und_TW");
300         ULocale zhHant = new ULocale("zh_Hant");
301         double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
302         double matchZhHant = LocaleMatcher.match(undTw, zhHant);
303         assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
304             ") than to zh (" + matchZh + ")",
305             matchZh < matchZhHant);
306         double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
307             zhHant);
308         assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
309             ") than to en_Hant_TW (" + matchEnHantTw + ")",
310             matchEnHantTw < matchZhHant);
311         assertTrue("zh should be closer to und_TW (" + matchZh +
312             ") than to en_Hant_TW (" + matchEnHantTw + ")",
313             matchEnHantTw < matchZh);
314     }
315 
316     public void testMatchLegacyCode() {
317         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
318             return;
319         }
320         final LocaleMatcher matcher = newLocaleMatcher("fr, i_klingon, en_Latn_US");
321         assertEquals("en_Latn_US", matcher.getBestMatch("en_GB_oed").toString());
322         // assertEquals("tlh", matcher.getBestMatch("i_klingon").toString());
323     }
324 
325     public void testGetBestMatchForList_exactMatch() {
326         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
327             return;
328         }
329         final LocaleMatcher matcher = newLocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
330         assertEquals("ja", matcher.getBestMatch("ja, de").toString());
331     }
332 
333     public void testGetBestMatchForList_simpleVariantMatch() {
334         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
335             return;
336         }
337         final LocaleMatcher matcher = newLocaleMatcher("fr, en_GB, ja, es_ES, es_MX");
338         // Intentionally avoiding a perfect_match or two candidates for variant
339         // matches.
340         assertEquals("en_GB", matcher.getBestMatch("de, en_US").toString());
341         // Fall back.
342         assertEquals("fr", matcher.getBestMatch("de, zh").toString());
343     }
344 
345     public void testGetBestMatchForList_matchOnMaximized() {
346         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
347             return;
348         }
349         final LocaleMatcher matcher = newLocaleMatcher("en, ja");
350         // final LocaleMatcher matcher =
351         // newLocaleMatcher("fr, en, ja, es_ES, es_MX");
352         // Check that if the preference is maximized already, it works as well.
353         assertEquals("Match for ja_Jpan_JP (maximized already)",
354             "ja", matcher.getBestMatch("ja_Jpan_JP, en-AU").toString());
355         if (true)
356             return;
357         // ja_JP matches ja on likely subtags, and it's listed first, thus it
358         // wins over
359         // thus it wins over the second preference en_GB.
360         assertEquals("Match for ja_JP, with likely region subtag",
361             "ja", matcher.getBestMatch("ja_JP, en_US").toString());
362         // Check that if the preference is maximized already, it works as well.
363         assertEquals("Match for ja_Jpan_JP (maximized already)",
364             "ja", matcher.getBestMatch("ja_Jpan_JP, en_US").toString());
365     }
366 
367     public void testGetBestMatchForList_noMatchOnMaximized() {
368         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
369             return;
370         }
371         // Regression test for http://b/5714572 .
372         final LocaleMatcher matcher = newLocaleMatcher("en, de, fr, ja");
373         // de maximizes to de_DE. Pick the exact match for the secondary
374         // language instead.
375         assertEquals("de", matcher.getBestMatch("de_CH, fr").toString());
376     }
377 
378     public void testBestMatchForTraditionalChinese() {
379         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
380             return;
381         }
382         // Scenario: An application that only supports Simplified Chinese (and
383         // some other languages),
384         // but does not support Traditional Chinese. zh_Hans_CN could be
385         // replaced with zh_CN, zh, or
386         // zh_Hans, it wouldn't make much of a difference.
387         final LocaleMatcher matcher = newLocaleMatcher("fr, zh_Hans_CN, en_US");
388 
389         // The script distance (simplified vs. traditional Han) is considered
390         // small enough
391         // to be an acceptable match. The regional difference is considered
392         // almost insignificant.
393         assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_TW").toString());
394         assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_Hant").toString());
395 
396         // For geo_political reasons, you might want to avoid a zh_Hant ->
397         // zh_Hans match.
398         // In this case, if zh_TW, zh_HK or a tag starting with zh_Hant is
399         // requested, you can
400         // change your call to getBestMatch to include a 2nd language
401         // preference.
402         // "en" is a better match since its distance to "en_US" is closer than
403         // the distance
404         // from "zh_TW" to "zh_CN" (script distance).
405         assertEquals("en_US", matcher.getBestMatch("zh_TW, en").toString());
406         assertEquals("en_US", matcher.getBestMatch("zh_Hant_CN, en").toString());
407         assertEquals("zh_Hans_CN", matcher.getBestMatch("zh_Hans, en").toString());
408     }
409 
410     public void testUndefined() {
411         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
412             return;
413         }
414         // When the undefined language doesn't match anything in the list,
415         // getBestMatch returns
416         // the default, as usual.
417         LocaleMatcher matcher = newLocaleMatcher("it,fr");
418         assertEquals("it", matcher.getBestMatch("und").toString());
419 
420         // When it *does* occur in the list, BestMatch returns it, as expected.
421         // Note that as of ICU 64, "und" is normalized to ""
422         matcher = newLocaleMatcher("it,und");
423         assertEquals("", matcher.getBestMatch("und").toString());
424 
425         // The unusual part:
426         // max("und") = "en_Latn_US", and since matching is based on maximized
427         // tags, the undefined
428         // language would normally match English. But that would produce the
429         // counterintuitive results
430         // that getBestMatch("und", LocaleMatcher("it,en")) would be "en", and
431         // getBestMatch("en", LocaleMatcher("it,und")) would be "und".
432         //
433         // To avoid that, we change the matcher's definitions of max
434         // (AddLikelySubtagsWithDefaults)
435         // so that max("und")="und". That produces the following, more desirable
436         // results:
437         matcher = newLocaleMatcher("it,en");
438         assertEquals("it", matcher.getBestMatch("und").toString());
439         matcher = newLocaleMatcher("it,und");
440         assertEquals("it", matcher.getBestMatch("en").toString());
441     }
442 
443     // public void testGetBestMatch_emptyList() {
444     // final LocaleMatcher matcher = newLocaleMatcher(
445     // new LocalePriorityList(new HashMap()));
446     // assertNull(matcher.getBestMatch(ULocale.ENGLISH));
447     // }
448 
449     public void testGetBestMatch_googlePseudoLocales() {
450         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
451             return;
452         }
453         // Google pseudo locales are primarily based on variant subtags.
454         // See http://sites/intl_eng/pseudo_locales.
455         // (See below for the region code based fall back options.)
456         final LocaleMatcher matcher = newLocaleMatcher(
457             "fr, pt");
458         assertEquals("fr", matcher.getBestMatch("de").toString());
459         assertEquals("fr", matcher.getBestMatch("en_US").toString());
460         assertEquals("fr", matcher.getBestMatch("en").toString());
461         assertEquals("pt", matcher.getBestMatch("pt_BR").toString());
462     }
463 
464     public void testGetBestMatch_regionDistance() {
465         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
466             return;
467         }
468 
469         LocaleMatcher matcher = newLocaleMatcher("es_AR, es");
470         assertEquals("es_AR", matcher.getBestMatch("es_MX").toString());
471 
472         matcher = newLocaleMatcher("fr, en, en_GB");
473         assertEquals("en_GB", matcher.getBestMatch("en_CA").toString());
474 
475         matcher = newLocaleMatcher("de_AT, de_DE, de_CH");
476         assertEquals("de_DE", matcher.getBestMatch("de").toString());
477 
478         showDistance(matcher, "en", "en_CA");
479         showDistance(matcher, "en_CA", "en");
480         showDistance(matcher, "en_US", "en_CA");
481         showDistance(matcher, "en_CA", "en_US");
482         showDistance(matcher, "en_GB", "en_CA");
483         showDistance(matcher, "en_CA", "en_GB");
484         showDistance(matcher, "en", "en_UM");
485         showDistance(matcher, "en_UM", "en");
486     }
487 
488     private void showDistance(LocaleMatcher matcher, String desired, String supported) {
489         ULocale desired2 = new ULocale(desired);
490         ULocale supported2 = new ULocale(supported);
491         double distance = matcher.match(desired2, ULocale.addLikelySubtags(desired2), supported2, ULocale.addLikelySubtags(supported2));
492         logln(desired + " to " + supported + " :\t" + distance);
493     }
494 
495     /**
496      * If all the base languages are the same, then each sublocale matches
497      * itself most closely
498      */
499     public void testExactMatches() {
500         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
501             return;
502         }
503         String lastBase = "";
504         TreeSet<ULocale> sorted = new TreeSet<>();
505         for (ULocale loc : ULocale.getAvailableLocales()) {
506             String language = loc.getLanguage();
507             if (!lastBase.equals(language)) {
508                 check(sorted);
509                 sorted.clear();
510                 lastBase = language;
511             }
512             sorted.add(loc);
513         }
514         check(sorted);
515     }
516 
517     private void check(Set<ULocale> sorted) {
518         if (sorted.isEmpty()) {
519             return;
520         }
521         check2(sorted);
522         ULocale first = sorted.iterator().next();
523         ULocale max = ULocale.addLikelySubtags(first);
524         sorted.add(max);
525         check2(sorted);
526     }
527 
528     /**
529      * @param sorted
530      */
531     private void check2(Set<ULocale> sorted) {
532         // TODO Auto-generated method stub
533         logln("Checking: " + sorted);
534         LocaleMatcher matcher = newLocaleMatcher(
535             LocalePriorityList.add(
536                 sorted.toArray(new ULocale[sorted.size()]))
537                 .build());
538         for (ULocale loc : sorted) {
539             String stringLoc = loc.toString();
540             assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
541         }
542     }
543 
544     // public void testComputeDistance_monkeyTest() {
545     // RegionCode[] codes = RegionCode.values();
546     // Random random = new Random();
547     // for (int i = 0; i < 1000; ++i) {
548     // RegionCode x = codes[random.nextInt(codes.length)];
549     // RegionCode y = codes[random.nextInt(codes.length)];
550     // double d = LocaleMatcher.getRegionDistance(x, y, null, null);
551     // if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
552     // assertEquals(LocaleMatcher.REGION_DISTANCE, d);
553     // } else if (x == y) {
554     // assertEquals(0.0, d);
555     // } else {
556     // assertTrue(d > 0);
557     // assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
558     // }
559     // }
560     // }
561 
562     public void testGetBestMatchForList_matchOnMaximized2() {
563         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
564             return;
565         }
566         final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
567         // ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
568         // thus it wins over the second preference en-GB.
569         assertEquals("Match for ja-JP, with likely region subtag",
570             "ja", matcher.getBestMatch("ja-JP, en-GB").toString());
571         // Check that if the preference is maximized already, it works as well.
572         assertEquals("Match for ja-Jpan-JP (maximized already)",
573             "ja", matcher.getBestMatch("ja-Jpan-JP, en-GB").toString());
574     }
575 
576     public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
577         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
578             return;
579         }
580         final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
581         assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
582         assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
583     }
584 
585     public void testGetBestMatchForPortuguese() {
586         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
587             return;
588         }
589         final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
590         final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
591         // Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
592 
593         final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
594         // European user who prefers Spanish over Brazillian Portuguese as a fallback.
595 
596         assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
597         assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
598         assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
599 
600         // Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
601         // The asymmetry between this case and above is because it's "pt_PT" that's missing between the
602         // matchers as "pt_BR" is a much more common language.
603         assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
604         assertEquals("pt", withPTImplicit.getBestMatch("pt, es_419, pt_PT").toString());
605         assertEquals("pt_BR", withoutPT.getBestMatch("pt, es_419, pt_PT").toString());
606 
607         // Code that adds the user's country can get "pt_US" for a user's language.
608         // That should fall back to "pt_BR".
609         assertEquals("pt_BR", withPTExplicit.getBestMatch("pt_US, pt_PT").toString());
610         assertEquals("pt", withPTImplicit.getBestMatch("pt_US, pt_PT").toString());
611     }
612 
613     public void testVariantWithScriptMatch() {
614         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
615             return;
616         }
617         final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
618         assertEquals("en", matcher.getBestMatch("en-GB").toString());
619         assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
620     }
621 
622     public void testVariantWithScriptMatch2() {
623         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
624             return;
625         }
626         final LocaleMatcher matcher = newLocaleMatcher("en, sv");
627         assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
628     }
629 
630     public void testPerf() {
631         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
632             return;
633         }
634         final String desired = "sv, en";
635 
636         final LocaleMatcher matcherShort = newLocaleMatcher(desired);
637         final LocaleMatcher matcherLong = newLocaleMatcher(
638             "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
639         final LocaleMatcher matcherVeryLong = newLocaleMatcher(
640             "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
641 
642         //LocaleMatcher.DEBUG = true;
643         ULocale expected = new ULocale("sv");
644         assertEquals(expected, matcherShort.getBestMatch(desired));
645         assertEquals(expected, matcherLong.getBestMatch(desired));
646         assertEquals(expected, matcherVeryLong.getBestMatch(desired));
647         //LocaleMatcher.DEBUG = false;
648 
649         for (int i = 0; i < 2; ++i) {
650             int iterations = i == 0 ? 1000 : 100000;
651             boolean showMessage = i != 0;
652             long timeShort = timeLocaleMatcher("Duration (few  supported):\t", desired, matcherShort, showMessage, iterations, 0);
653             timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
654             timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
655         }
656     }
657 
658     private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
659         boolean showmessage, int iterations, long comparisonTime) {
660         long start = System.nanoTime();
661         for (int i = iterations; i > 0; --i) {
662             matcher.getBestMatch(desired);
663         }
664         long delta = System.nanoTime() - start;
665         if (showmessage) logln(title + (delta / iterations) + " nanos, "
666             + (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
667         return delta;
668     }
669 }
670