1 package org.unicode.cldr.unittest; 2 3 import java.util.HashSet; 4 import java.util.LinkedHashMap; 5 import java.util.Map; 6 import java.util.Set; 7 8 import org.unicode.cldr.util.CLDRConfig; 9 import org.unicode.cldr.util.Pair; 10 11 import com.ibm.icu.dev.test.TestFmwk; 12 import com.ibm.icu.impl.Row.R4; 13 import com.ibm.icu.util.LocaleMatcher; 14 import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData; 15 import com.ibm.icu.util.LocalePriorityList; 16 import com.ibm.icu.util.ULocale; 17 18 public class LanguageInfoTest extends TestFmwk { 19 static CLDRConfig testInfo = CLDRConfig.getInstance(); 20 static LanguageMatcherData data = LocaleMatcherTest.LANGUAGE_MATCHER_DATA; 21 static Map<ULocale, ULocale> FALLBACKS = new LinkedHashMap<>(); 22 23 // @Override 24 // protected void init() throws Exception { 25 // super.init(); 26 // SupplementalDataInfo supp = testInfo.getSupplementalDataInfo(); 27 // List<R4<String, String, Integer, Boolean>> languageData = supp 28 // .getLanguageMatcherData("written"); 29 // for (R4<String, String, Integer, Boolean> item : languageData) { 30 // data.addDistance(item.get0().replace('_', '-'), item.get1() 31 // .replace('_', '-'), item.get2(), item.get3()); 32 // logln(item.get0() + "\t" + getName(item.get0()) + "\t" 33 // + item.get1() + "\t" + getName(item.get1()) + "\t" 34 // + item.get2() + "\t" + item.get3()); 35 // if (item.get2() == 10) { 36 // FALLBACKS.put(new ULocale(item.get0()), 37 // new ULocale(item.get1())); 38 // } 39 // } 40 // data.freeze(); 41 // } 42 testGetData()43 public void testGetData() { 44 Set<Pair<String, String>> alreadySeen = new HashSet<>(); 45 for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) { 46 // assertTrue("check bounds", foo.get2() >= 0 && foo.get2() <= 100); 47 48 String desired = foo.get0(); 49 String supported = foo.get1(); 50 Integer score = foo.get2(); 51 Boolean oneway = foo.get3(); 52 assertEquals("Same number of fields", count('_', desired), count('_', supported)); 53 54 Pair<String, String> source = Pair.of(desired, supported); 55 if (alreadySeen.contains(source)) { 56 errln("Duplicate entry for " + source); 57 continue; 58 } 59 alreadySeen.add(source); 60 logln(score 61 + "\t" + desired + "\t" + getName(desired) 62 + "\t" + supported + "\t" + getName(supported) 63 + "\t" + oneway); 64 } 65 } 66 count(char c, String string)67 private int count(char c, String string) { 68 int count = 0; 69 int pos = string.indexOf(c); 70 while (pos >= 0) { 71 ++count; 72 pos = string.indexOf(c, pos + 1); 73 } 74 return count; 75 } 76 getName(String item)77 public static String getName(String item) { 78 return item.contains("*") ? "n/a" : item.contains("$") ? item : testInfo.getEnglish().getName(item); 79 } 80 main(String[] args)81 public static void main(String[] args) { 82 new LanguageInfoTest().run(args); 83 } 84 testBasics()85 public void testBasics() { 86 if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) { 87 return; 88 } 89 final LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList 90 .add(ULocale.FRENCH).add(ULocale.UK).add(ULocale.ENGLISH) 91 .build(), data); 92 logln(matcher.toString()); 93 94 assertEquals("UK in FR, UK, EN", ULocale.UK, 95 matcher.getBestMatch(ULocale.UK)); 96 assertEquals("US in FR, UK, EN", ULocale.ENGLISH, 97 matcher.getBestMatch(ULocale.US)); 98 assertEquals("FR in FR, UK, EN", ULocale.FRENCH, 99 matcher.getBestMatch(ULocale.FRANCE)); 100 assertEquals("JA in FR, UK, EN", ULocale.FRENCH, 101 matcher.getBestMatch(ULocale.JAPAN)); 102 } 103 TestChinese()104 public void TestChinese() { 105 // if (logKnownIssue("Cldrbug:8169", 106 // "Problems with language matcher TestChinese.")) { 107 // return; 108 // } 109 if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) { 110 return; 111 } 112 LocaleMatcher matcher = new LocaleMatcher(LocalePriorityList.add( 113 "zh_CN, zh_TW, iw").build(), data); 114 ULocale taiwanChinese = new ULocale("zh_TW"); 115 ULocale chinaChinese = new ULocale("zh_CN"); 116 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 117 matcher.getBestMatch("zh_Hant_HK")); 118 119 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 120 matcher.getBestMatch("zh_Hant_TW")); 121 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 122 matcher.getBestMatch("zh_Hant")); 123 assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, 124 matcher.getBestMatch("zh_TW")); 125 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 126 matcher.getBestMatch("zh_Hans_CN")); 127 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 128 matcher.getBestMatch("zh_CN")); 129 assertEquals("zh_CN, zh_TW, iw;", chinaChinese, 130 matcher.getBestMatch("zh")); 131 } 132 133 static final ULocale MUL = new ULocale("mul"); 134 testFallbacks()135 public void testFallbacks() { 136 if (logKnownIssue("ICU-21241", "waiting on LocaleMatcherData update")) { 137 return; 138 } 139 for (R4<String, String, Integer, Boolean> foo : testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) { 140 String rawDesired = foo.get0(); 141 if (rawDesired.contains("*")) { 142 continue; 143 } 144 if (rawDesired.equals("tlh")) { 145 if (logKnownIssue("cldrbug:8919", "Hack until tlh has likely subtags")) { 146 continue; 147 } 148 } 149 ULocale desired = new ULocale(rawDesired); 150 ULocale supported = new ULocale(foo.get1()); 151 Integer score = foo.get2(); 152 Boolean oneway = foo.get3(); 153 if (!oneway) { 154 continue; 155 } 156 157 // we put "mul" first in the list, to verify that the fallback works enough to be better than the default. 158 159 @SuppressWarnings("deprecation") 160 final LocaleMatcher matcher = new LocaleMatcher( 161 LocalePriorityList 162 .add(MUL).add(supported) 163 .build(), 164 data); 165 166 ULocale bestMatch = matcher.getBestMatch(desired); 167 if (!assertEquals("fallback for " + desired + ", " + score, supported, bestMatch)) { 168 ULocale max = ULocale.addLikelySubtags(desired); 169 warnln("Might be missing something like\n" 170 + "<languageMatch desired=\"" 171 + desired.getLanguage() + "_" + max.getScript() 172 + "\" supported=\"en_Latn\" percent=\"90\" oneway=\"true\" />"); 173 bestMatch = matcher.getBestMatch(desired); // for debugging 174 } 175 } 176 } 177 } 178