1 package org.unicode.cldr.unittest;
2 
3 import java.util.HashSet;
4 import java.util.LinkedHashMap;
5 import java.util.Map;
6 import java.util.Set;
7 
8 import org.unicode.cldr.util.CLDRConfig;
9 import org.unicode.cldr.util.Pair;
10 
11 import com.ibm.icu.dev.test.TestFmwk;
12 import com.ibm.icu.impl.Row.R4;
13 import com.ibm.icu.util.LocaleMatcher;
14 import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
15 import com.ibm.icu.util.LocalePriorityList;
16 import com.ibm.icu.util.ULocale;
17 
18 public class LanguageInfoTest extends TestFmwk {
19     static CLDRConfig testInfo = CLDRConfig.getInstance();
20     static LanguageMatcherData data = LocaleMatcherTest.LANGUAGE_MATCHER_DATA;
21     static Map<ULocale, ULocale> FALLBACKS = new LinkedHashMap<>();
22 
23     //	@Override
24     //	protected void init() throws Exception {
25     //		super.init();
26     //		SupplementalDataInfo supp = testInfo.getSupplementalDataInfo();
27     //		List<R4<String, String, Integer, Boolean>> languageData = supp
28     //				.getLanguageMatcherData("written");
29     //		for (R4<String, String, Integer, Boolean> item : languageData) {
30     //			data.addDistance(item.get0().replace('_', '-'), item.get1()
31     //					.replace('_', '-'), item.get2(), item.get3());
32     //			logln(item.get0() + "\t" + getName(item.get0()) + "\t"
33     //					+ item.get1() + "\t" + getName(item.get1()) + "\t"
34     //					+ item.get2() + "\t" + item.get3());
35     //			if (item.get2() == 10) {
36     //				FALLBACKS.put(new ULocale(item.get0()),
37     //						new ULocale(item.get1()));
38     //			}
39     //		}
40     //		data.freeze();
41     //	}
42 
testGetData()43     public void testGetData() {
44         Set<Pair<String, String>> alreadySeen = new HashSet<>();
45         for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) {
46             //            assertTrue("check bounds", foo.get2() >= 0 && foo.get2() <= 100);
47 
48             String desired = foo.get0();
49             String supported = foo.get1();
50             Integer score = foo.get2();
51             Boolean oneway = foo.get3();
52             assertEquals("Same number of fields", count('_', desired), count('_', supported));
53 
54             Pair<String, String> source = Pair.of(desired, supported);
55             if (alreadySeen.contains(source)) {
56                 errln("Duplicate entry for " + source);
57                 continue;
58             }
59             alreadySeen.add(source);
60             logln(score
61                 + "\t" + desired + "\t" + getName(desired)
62                 + "\t" + supported + "\t" + getName(supported)
63                 + "\t" + oneway);
64         }
65     }
66 
count(char c, String string)67     private int count(char c, String string) {
68         int count = 0;
69         int pos = string.indexOf(c);
70         while (pos >= 0) {
71             ++count;
72             pos = string.indexOf(c, pos + 1);
73         }
74         return count;
75     }
76 
getName(String item)77     public static String getName(String item) {
78         return item.contains("*") ? "n/a" : item.contains("$") ? item : testInfo.getEnglish().getName(item);
79     }
80 
main(String[] args)81     public static void main(String[] args) {
82         new LanguageInfoTest().run(args);
83     }
84 
testBasics()85     public void testBasics() {
86         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
87             return;
88         }
89         final LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList
90             .add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH)
91             .build(), data);
92         logln(matcher.toString());
93 
94         assertEquals("UK in FR, UK, EN", ULocale.UK,
95             matcher.getBestMatch(ULocale.UK));
96         assertEquals("US in FR, UK, EN", ULocale.ENGLISH,
97             matcher.getBestMatch(ULocale.US));
98         assertEquals("FR in FR, UK, EN", ULocale.FRENCH,
99             matcher.getBestMatch(ULocale.FRANCE));
100         assertEquals("JA in FR, UK, EN", ULocale.FRENCH,
101             matcher.getBestMatch(ULocale.JAPAN));
102     }
103 
TestChinese()104     public void TestChinese() {
105         //		if (logKnownIssue("Cldrbug:8169",
106         //				"Problems with language matcher TestChinese.")) {
107         //			return;
108         //		}
109         if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
110             return;
111         }
112         LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add(
113             "zh_CN, zh_TW, iw").build(), data);
114         ULocale taiwanChinese = new ULocale("zh_TW");
115         ULocale chinaChinese = new ULocale("zh_CN");
116         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese,
117             matcher.getBestMatch("zh_Hant_HK"));
118 
119         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese,
120             matcher.getBestMatch("zh_Hant_TW"));
121         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese,
122             matcher.getBestMatch("zh_Hant"));
123         assertEquals("zh_CN, zh_TW, iw;", taiwanChinese,
124             matcher.getBestMatch("zh_TW"));
125         assertEquals("zh_CN, zh_TW, iw;", chinaChinese,
126             matcher.getBestMatch("zh_Hans_CN"));
127         assertEquals("zh_CN, zh_TW, iw;", chinaChinese,
128             matcher.getBestMatch("zh_CN"));
129         assertEquals("zh_CN, zh_TW, iw;", chinaChinese,
130             matcher.getBestMatch("zh"));
131     }
132 
133     static final ULocale MUL = new ULocale("mul");
134 
testFallbacks()135     public void testFallbacks() {
136         if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) {
137             return;
138         }
139         for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) {
140             String rawDesired = foo.get0();
141             if (rawDesired.contains("*")) {
142                 continue;
143             }
144             if (rawDesired.equals("tlh")) {
145                 if (logKnownIssue("cldrbug:8919", "Hack until tlh has likely subtags")) {
146                     continue;
147                 }
148             }
149             ULocale desired = new ULocale(rawDesired);
150             ULocale supported = new ULocale(foo.get1());
151             Integer score = foo.get2();
152             Boolean oneway = foo.get3();
153             if (!oneway) {
154                 continue;
155             }
156 
157             // we put "mul" first in the list, to verify that the fallback works enough to be better than the default.
158 
159             @SuppressWarnings("deprecation")
160             final LocaleMatcher matcher = new LocaleMatcher(
161                 LocalePriorityList
162                     .add(MUL).add(supported)
163                     .build(),
164                 data);
165 
166             ULocale bestMatch = matcher.getBestMatch(desired);
167             if (!assertEquals("fallback for " + desired + ", " + score, supported, bestMatch)) {
168                 ULocale max = ULocale.addLikelySubtags(desired);
169                 warnln("Might be missing something like\n"
170                     + "<languageMatch desired=\""
171                     + desired.getLanguage() + "_" + max.getScript()
172                     + "\" supported=\"en_Latn\" percent=\"90\" oneway=\"true\" />");
173                 bestMatch = matcher.getBestMatch(desired); // for debugging
174             }
175         }
176     }
177 }
178