1 package org.unicode.cldr.unittest; 2 3 import java.util.ArrayList; 4 import java.util.Arrays; 5 import java.util.Collection; 6 import java.util.Collections; 7 import java.util.Date; 8 import java.util.EnumMap; 9 import java.util.EnumSet; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.LinkedHashMap; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Locale; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import org.unicode.cldr.draft.ScriptMetadata; 26 import org.unicode.cldr.test.CoverageLevel2; 27 import org.unicode.cldr.tool.LikelySubtags; 28 import org.unicode.cldr.tool.PluralMinimalPairs; 29 import org.unicode.cldr.tool.PluralRulesFactory; 30 import org.unicode.cldr.util.Builder; 31 import org.unicode.cldr.util.CLDRConfig; 32 import org.unicode.cldr.util.CLDRFile; 33 import org.unicode.cldr.util.CLDRFile.WinningChoice; 34 import org.unicode.cldr.util.CLDRLocale; 35 import org.unicode.cldr.util.CldrUtility; 36 import org.unicode.cldr.util.GrammarInfo; 37 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 38 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 39 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 40 import org.unicode.cldr.util.Iso639Data; 41 import org.unicode.cldr.util.Iso639Data.Scope; 42 import org.unicode.cldr.util.IsoCurrencyParser; 43 import org.unicode.cldr.util.LanguageTagCanonicalizer; 44 import org.unicode.cldr.util.LanguageTagParser; 45 import org.unicode.cldr.util.Level; 46 import org.unicode.cldr.util.Organization; 47 import org.unicode.cldr.util.Pair; 48 import org.unicode.cldr.util.PluralRanges; 49 import org.unicode.cldr.util.PreferredAndAllowedHour; 50 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle; 51 import org.unicode.cldr.util.StandardCodes; 52 import org.unicode.cldr.util.StandardCodes.CodeType; 53 import org.unicode.cldr.util.StandardCodes.LstrType; 54 import org.unicode.cldr.util.SupplementalDataInfo; 55 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 57 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 58 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.DateRange; 61 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 62 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 63 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 65 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 66 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 67 import org.unicode.cldr.util.SupplementalDataInfo.SampleList; 68 import org.unicode.cldr.util.Validity; 69 import org.unicode.cldr.util.Validity.Status; 70 71 import com.google.common.base.Joiner; 72 import com.google.common.collect.ImmutableSet; 73 import com.google.common.collect.Multimap; 74 import com.google.common.collect.TreeMultimap; 75 import com.ibm.icu.impl.Relation; 76 import com.ibm.icu.impl.Row; 77 import com.ibm.icu.impl.Row.R2; 78 import com.ibm.icu.impl.Row.R3; 79 import com.ibm.icu.impl.Utility; 80 import com.ibm.icu.lang.UCharacter; 81 import com.ibm.icu.lang.UCharacterEnums; 82 import com.ibm.icu.lang.UScript; 83 import com.ibm.icu.text.PluralRules; 84 import com.ibm.icu.text.PluralRules.FixedDecimal; 85 import com.ibm.icu.text.PluralRules.FixedDecimalRange; 86 import com.ibm.icu.text.PluralRules.FixedDecimalSamples; 87 import com.ibm.icu.text.PluralRules.Operand; 88 import com.ibm.icu.text.PluralRules.SampleType; 89 import com.ibm.icu.text.StringTransform; 90 import com.ibm.icu.text.UnicodeSet; 91 import com.ibm.icu.util.Output; 92 import com.ibm.icu.util.TimeZone; 93 import com.ibm.icu.util.ULocale; 94 95 public class TestSupplementalInfo extends TestFmwkPlus { 96 private static final boolean DEBUG = true; 97 98 static CLDRConfig testInfo = CLDRConfig.getInstance(); 99 100 private static final StandardCodes STANDARD_CODES = testInfo 101 .getStandardCodes(); 102 103 private static final SupplementalDataInfo SUPPLEMENTAL = testInfo 104 .getSupplementalDataInfo(); 105 main(String[] args)106 public static void main(String[] args) { 107 new TestSupplementalInfo().run(args); 108 } 109 TestPluralSampleOrder()110 public void TestPluralSampleOrder() { 111 HashSet<PluralInfo> seen = new HashSet<>(); 112 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 113 if (locale.equals("root")) { 114 continue; 115 } 116 PluralInfo pi = SUPPLEMENTAL.getPlurals(locale); 117 if (seen.contains(pi)) { 118 continue; 119 } 120 seen.add(pi); 121 for (SampleType s : SampleType.values()) { 122 for (Count c : pi.getCounts(s)) { 123 FixedDecimalSamples sSamples = pi.getPluralRules() 124 .getDecimalSamples(c.toString(), s); 125 if (sSamples == null) { 126 errln(locale + " no sample for " + c); 127 continue; 128 } 129 if (s == SampleType.DECIMAL) { 130 continue; // skip 131 } 132 FixedDecimalRange lastSample = null; 133 for (FixedDecimalRange sample : sSamples.samples) { 134 if (lastSample != null) { 135 if (compare(lastSample.start,sample.start) > 0) { 136 errln(locale + ":" + c + ": out of order with " 137 + lastSample + " > " + sample); 138 } else if (false) { 139 logln(locale + ":" + c + ": in order with " 140 + lastSample + " < " + sample); 141 } 142 } 143 lastSample = sample; 144 } 145 } 146 } 147 } 148 } 149 150 /** 151 * Hack until ICU's FixedDecimal is fixed 152 * 153 */ compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other)154 public static int compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other) { 155 if (me.getPluralOperand(Operand.e) != other.getPluralOperand(Operand.e)) { 156 return me.getPluralOperand(Operand.e) < other.getPluralOperand(Operand.e) ? -1 : 1; 157 } 158 if (me.getIntegerValue() != other.getIntegerValue()) { 159 return me.getIntegerValue() < other.getIntegerValue() ? -1 : 1; 160 } 161 if (me.getSource() != other.getSource()) { 162 return me.getSource() < other.getSource() ? -1 : 1; 163 } 164 if (me.getVisibleDecimalDigitCount() != other.getVisibleDecimalDigitCount()) { 165 return me.getVisibleDecimalDigitCount() < other.getVisibleDecimalDigitCount() ? -1 : 1; 166 } 167 long diff = me.getDecimalDigits() - other.getDecimalDigits(); 168 if (diff != 0) { 169 return diff < 0 ? -1 : 1; 170 } 171 return 0; 172 } 173 TestPluralRanges()174 public void TestPluralRanges() { 175 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 176 Set<String> localesToTest = new TreeSet<>( 177 SUPPLEMENTAL.getPluralRangesLocales()); 178 for (String locale : StandardCodes.make().getLocaleCoverageLocales( 179 "google")) { // superset 180 if (locale.equals("*") || locale.contains("_")) { 181 continue; 182 } 183 localesToTest.add(locale); 184 } 185 Set<String> modernLocales = testInfo.getStandardCodes() 186 .getLocaleCoverageLocales(Organization.cldr, 187 EnumSet.of(Level.MODERN)); 188 189 Output<FixedDecimal> maxSample = new Output<>(); 190 Output<FixedDecimal> minSample = new Output<>(); 191 192 for (String locale : localesToTest) { 193 final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:"; 194 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 195 Set<Count> counts = pluralInfo.getCounts(); 196 197 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString()); 198 199 // check that there are no null values 200 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 201 if (pluralRanges == null) { 202 if (!modernLocales.contains(locale)) { 203 logln("Missing plural ranges for " + locale); 204 } else { 205 errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales"); 206 StringBuilder failureCases = new StringBuilder(templateLine); 207 for (Count start : counts) { 208 for (Count end : counts) { 209 pluralInfo.rangeExists(start, end, minSample, maxSample); 210 final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns); 211 failureCases.append("\n" + locale + "\t" + rangeLine); 212 } 213 } 214 errOrLog(CoverageIssue.warn, failureCases.toString()); 215 } 216 continue; 217 } 218 EnumSet<Count> found = EnumSet.noneOf(Count.class); 219 for (Count count : Count.values()) { 220 if (pluralRanges.isExplicitlySet(count) 221 && !counts.contains(count)) { 222 assertTrue( 223 locale 224 + "\t pluralRanges categories must be valid for locale:\t" 225 + count + " must be in " + counts, 226 !pluralRanges.isExplicitlySet(count)); 227 } 228 for (Count end : Count.values()) { 229 Count result = pluralRanges.getExplicit(count, end); 230 if (result != null) { 231 found.add(result); 232 } 233 } 234 } 235 236 // check empty range results 237 if (found.isEmpty()) { 238 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales"); 239 } else { 240 if (samplePatterns == null) { 241 errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales"); 242 } else { 243 for (Count result : found) { 244 String samplePattern = samplePatterns.get( 245 PluralRules.PluralType.CARDINAL, result); 246 if (samplePattern != null && !samplePattern.contains("{0}")) { 247 errln("Plural Ranges cannot have results that don't use {0} in samples: " 248 + locale 249 + ", " 250 + result 251 + "\t«" 252 + samplePattern + "»"); 253 } 254 } 255 } 256 if (isVerbose()) { 257 logln("Range results for " + locale + ":\t" + found); 258 } 259 } 260 261 // check for missing values 262 boolean failure = false; 263 StringBuilder failureCases = new StringBuilder(templateLine); 264 for (Count start : counts) { 265 for (Count end : counts) { 266 boolean needsValue = pluralInfo.rangeExists(start, end, 267 minSample, maxSample); 268 Count explicitValue = pluralRanges.getExplicit(start, end); 269 final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns); 270 failureCases.append("\n" + locale + "\t" + rangeLine); 271 if (needsValue && explicitValue == null) { 272 errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: " 273 + rangeLine, 274 "Cldrbug:7839", "Missing plural data for modern locales"); 275 failure = true; 276 failureCases.append("\tError — need explicit result"); 277 } else if (!needsValue && explicitValue != null) { 278 errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: " 279 + PluralRanges.showRange(start, end, explicitValue), 280 "Cldrbug:7839", "Missing plural data for modern locales"); 281 failureCases.append("\tUnnecessary"); 282 failure = true; 283 } else { 284 failureCases.append("\tOK"); 285 } 286 } 287 } 288 if (failure) { 289 errOrLog(CoverageIssue.warn, failureCases.toString()); 290 } 291 } 292 } 293 getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)294 private String getRangeLine(Count start, Count end, Count result, 295 Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, 296 PluralMinimalPairs samplePatterns) { 297 final String range = minSample + "–" + maxSample; 298 String example = range; 299 if (samplePatterns != null) { 300 example = ""; 301 if (result != null) { 302 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 303 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»"; 304 } else { 305 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) { 306 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c); 307 example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR "; 308 } 309 example += " …"; 310 } 311 } 312 return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example; 313 } 314 getRangeLine(Count count, PluralRules pluralRules, String pattern)315 private String getRangeLine(Count count, PluralRules pluralRules, String pattern) { 316 String sample = "?"; 317 FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER); 318 if (exampleList == null) { 319 exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL); 320 } 321 FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList); 322 sample = sampleDecimal.toString(); 323 324 String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»"; 325 return count + "\t" + example; 326 } 327 TestPluralSamples()328 public void TestPluralSamples() { 329 String[][] test = { { "en", "ordinal", "1", "one" }, 330 { "en", "ordinal", "2", "two" }, 331 { "en", "ordinal", "3", "few" }, 332 { "en", "ordinal", "4", "other" }, 333 { "sl", "cardinal", "2", "two" }, }; 334 for (String[] row : test) { 335 checkPluralSamples(row); 336 } 337 } 338 TestPluralSamples2()339 public void TestPluralSamples2() { 340 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 341 for (String locale : prf.getLocales()) { 342 if (locale.equals("und")) { 343 continue; 344 } 345 if (locale.equals("pl")) { 346 int debug = 0; 347 } 348 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale); 349 for (PluralRules.PluralType type : PluralRules.PluralType.values()) { 350 PluralInfo rules = SUPPLEMENTAL.getPlurals( 351 SupplementalDataInfo.PluralType.fromStandardType(type), 352 locale.toString()); 353 if (rules.getCounts().size() == 1) { 354 continue; // don't require rules for unary cases 355 } 356 Multimap<String, Count> sampleToCount = TreeMultimap.create(); 357 358 for (Count count : rules.getCounts()) { 359 String sample = samplePatterns.get(type, count); 360 if (sample == null) { 361 errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075", 362 "Missing ordinal minimal pairs"); 363 } else { 364 sampleToCount.put(sample, count); 365 PluralRules pRules = rules.getPluralRules(); 366 double unique = pRules.getUniqueKeywordValue(count 367 .toString()); 368 if (unique == PluralRules.NO_UNIQUE_VALUE 369 && !sample.contains("{0}")) { 370 errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»"); 371 } 372 } 373 } 374 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) { 375 if (entry.getValue().size() > 1) { 376 errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»"); 377 } 378 } 379 } 380 } 381 } 382 TestCldrScriptCodes()383 public void TestCldrScriptCodes() { 384 Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes(); 385 386 Set<String> unicodeScripts = ScriptMetadata.getScripts(); 387 assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts); 388 389 ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz"); 390 assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials); 391 392 ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore"); 393 assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos); 394 395 Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script); 396 for (Entry<Status, Set<String>> e : scripts.entrySet()) { 397 switch (e.getKey()) { 398 case regular: 399 case special: 400 case unknown: 401 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue()); 402 break; 403 default: 404 break; // do nothin 405 } 406 } 407 408 ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn"); 409 assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants); 410 } 411 checkPluralSamples(String... row)412 public void checkPluralSamples(String... row) { 413 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals( 414 PluralType.valueOf(row[1]), row[0]); 415 Count count = pluralInfo.getCount(new FixedDecimal(row[2])); 416 assertEquals(String.join(", ", row), 417 Count.valueOf(row[3]), count); 418 } 419 TestPluralLocales()420 public void TestPluralLocales() { 421 // get the unique rules 422 for (PluralType type : PluralType.values()) { 423 Relation<PluralInfo, String> pluralsToLocale = Relation.of( 424 new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 425 for (String locale : new TreeSet<>( 426 SUPPLEMENTAL.getPluralLocales(type))) { 427 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale); 428 pluralsToLocale.put(pluralInfo, locale); 429 } 430 431 String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" }, 432 { "he", "iw" }, { "in", "id" }, { "jw", "jv" }, 433 { "ji", "yi" }, { "sh", "sr" }, }; 434 for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale 435 .keyValuesSet()) { 436 PluralInfo pluralInfo2 = pluralInfoEntry.getKey(); 437 Set<String> locales = pluralInfoEntry.getValue(); 438 // check that equivalent locales are either both in or both out 439 for (String[] row : equivalents) { 440 assertEquals( 441 type + " must be equivalent: " + Arrays.asList(row), 442 locales.contains(row[0]), locales.contains(row[1])); 443 } 444 // check that no rules contain 'within' 445 for (Count count : pluralInfo2.getCounts()) { 446 String rule = pluralInfo2.getRule(count); 447 if (rule == null) { 448 continue; 449 } 450 assertFalse( 451 "Rule '" + rule + "' for " + Arrays.asList(locales) 452 + " doesn't contain 'within'", 453 rule.contains("within")); 454 } 455 } 456 } 457 } 458 TestDigitPluralCases()459 public void TestDigitPluralCases() { 460 String[][] tests = { 461 { "en", "one", "1", "1" }, 462 { "en", "one", "2", "" }, 463 { "en", "one", "3", "" }, 464 { "en", "one", "4", "" }, 465 { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" }, 466 { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" }, 467 { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" }, 468 { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" }, 469 { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" }, 470 { "hr", "one", "2", 471 "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" }, 472 { "hr", "one", "3", 473 "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" }, 474 { "hr", "one", "4", 475 "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" }, 476 { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" }, 477 { "hr", "few", "2", 478 "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" }, 479 { "hr", "few", "3", 480 "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" }, 481 { "hr", "few", "4", 482 "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" }, 483 { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" }, 484 { "hr", "other", "2", 485 "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" }, 486 { "hr", "other", "3", 487 "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" }, 488 { "hr", "other", "4", 489 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, }; 490 for (String[] row : tests) { 491 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 492 SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]), 493 Integer.parseInt(row[2])); 494 assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], 495 uset.toString()); 496 } 497 } 498 TestDigitPluralCompleteness()499 public void TestDigitPluralCompleteness() { 500 String[][] exceptionStrings = { 501 // defaults 502 { "*", "zero", "0,00,000,0000" }, 503 { "*", "one", "0" }, 504 { "*", "two", "0,00,000,0000" }, 505 { "*", "few", "0,00,000,0000" }, 506 { "*", "many", "0,00,000,0000" }, 507 { "*", "other", "0,00,000,0000" }, 508 // others 509 { "mo", "other", "00,000,0000" }, // 510 { "ro", "other", "00,000,0000" }, // 511 { "cs", "few", "0" }, // j in 2..4 512 { "sk", "few", "0" }, // j in 2..4 513 { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2 514 { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1 515 { "sv", "one", "0" }, // j is 1 516 { "he", "two", "0" }, // j is 2 517 { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 518 // is not 11 519 { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 520 // is not 11 521 { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 522 // is not 11 or f mod 10 is 523 // 1 and f mod 100 is not 11 524 { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 525 // is not 11 or f mod 10 is 526 // 1 and f mod 100 is not 11 527 { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 528 // is not 11 or f mod 10 is 529 // 1 and f mod 100 is not 11 530 { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100 531 // is not 11 or f mod 10 is 532 // 1 and f mod 100 is not 11 533 { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10 534 // is 1 535 { "sl", "one", "0,000,0000" }, // j mod 100 is 1 536 { "sl", "two", "0,000,0000" }, // j mod 100 is 2 537 { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10 538 // is 0 539 { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99 540 { "gd", "one", "0,00" }, // n in 1,11 541 { "gd", "two", "0,00" }, // n in 2,12 542 { "shi", "few", "0,00" }, // n in 2..10 543 { "gd", "few", "0,00" }, // n in 3..10,13..19 544 { "ga", "few", "0" }, // n in 3..6 545 { "ga", "many", "0,00" }, // n in 7..10 546 { "ar", "zero", "0" }, // n is 0 547 { "cy", "zero", "0" }, // n is 0 548 { "ksh", "zero", "0" }, // n is 0 549 { "lag", "zero", "0" }, // n is 0 550 { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1 551 { "pt_PT", "one", "0" }, // n = 1 and v = 0 552 { "ar", "two", "0" }, // n is 2 553 { "cy", "two", "0" }, // n is 2 554 { "ga", "two", "0" }, // n is 2 555 { "iu", "two", "0" }, // n is 2 556 { "naq", "two", "0" }, // n is 2 557 { "se", "two", "0" }, // n is 2 558 { "sma", "two", "0" }, // n is 2 559 { "smi", "two", "0" }, // n is 2 560 { "smj", "two", "0" }, // n is 2 561 { "smn", "two", "0" }, // n is 2 562 { "sms", "two", "0" }, // n is 2 563 { "cy", "few", "0" }, // n is 3 564 { "cy", "many", "0" }, // n is 6 565 { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0 566 { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1 567 { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 568 // is not 11 569 { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 570 // is not 11 or v is 2 and f 571 // mod 10 is 1 and f mod 100 572 // is not 11 or v is not 2 573 // and f mod 10 is 1 574 { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 575 // not in 11,71,91 576 { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100 577 // not in 11..19 578 { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 579 // 0 and i % 10 != 4,6,9 or 580 // v != 0 and f % 10 != 581 // 4,6,9 582 { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v = 583 // 0 and i % 10 != 4,6,9 or 584 // v != 0 and f % 10 != 585 // 4,6,9 586 { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f 587 // % 100 = 1 588 {"kw", "many", "00,000,0000"}, // n != 1 and n % 100 = 1,21,41,61,81 589 {"kw", "zero", "0"}, // n = 0 590 {"fr", "many", ""}, // e is special 591 }; 592 // parse out the exceptions 593 Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>(); 594 Relation<Count, Integer> fallback = Relation.of( 595 new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class); 596 for (String[] row : exceptionStrings) { 597 Relation<Count, Integer> countToDigits; 598 if (row[0].equals("*")) { 599 countToDigits = fallback; 600 } else { 601 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 602 countToDigits = exceptions.get(plurals); 603 if (countToDigits == null) { 604 exceptions.put( 605 plurals, 606 countToDigits = Relation.of( 607 new EnumMap<Count, Set<Integer>>( 608 Count.class), 609 TreeSet.class)); 610 } 611 } 612 Count c = Count.valueOf(row[1]); 613 for (String digit : row[2].split(",")) { 614 // "99" is special, just to have the result be non-empty 615 countToDigits.put(c, digit.length()); 616 } 617 } 618 Set<PluralInfo> seen = new HashSet<>(); 619 Set<String> sorted = new TreeSet<>( 620 SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 621 Relation<String, String> ruleToExceptions = Relation.of( 622 new TreeMap<String, Set<String>>(), TreeSet.class); 623 624 for (String locale : sorted) { 625 PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale); 626 if (seen.contains(plurals)) { // skip identicals 627 continue; 628 } 629 Relation<Count, Integer> countToDigits = exceptions.get(plurals); 630 if (countToDigits == null) { 631 countToDigits = fallback; 632 } 633 for (Count c : plurals.getCounts()) { 634 List<String> compose = new ArrayList<>(); 635 boolean needLine = false; 636 Set<Integer> digitSet = countToDigits.get(c); 637 if (digitSet == null) { 638 digitSet = fallback.get(c); 639 } 640 for (int digits = 1; digits < 5; ++digits) { 641 boolean expected = digitSet.contains(digits); 642 boolean hasSamples = plurals.hasSamples(c, digits); 643 if (hasSamples) { 644 compose.add(Utility.repeat("0", digits)); 645 } 646 if (!assertEquals(locale + ", " + digits + ", " + c, 647 expected, hasSamples)) { 648 needLine = true; 649 } 650 } 651 if (needLine) { 652 String countRules = plurals.getPluralRules().getRules( 653 c.toString()); 654 ruleToExceptions.put(countRules == null ? "" : countRules, 655 "{\"" + locale + "\", \"" + c + "\", \"" 656 + Joiner.on(",").join(compose) 657 + "\"},"); 658 } 659 } 660 } 661 if (!ruleToExceptions.isEmpty()) { 662 System.out 663 .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness"); 664 for (Entry<String, String> entry : ruleToExceptions.entrySet()) { 665 System.out.println(entry.getValue() + "\t// " + entry.getKey()); 666 } 667 } 668 } 669 TestLikelyCode()670 public void TestLikelyCode() { 671 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 672 String[][] tests = { { "it_AQ", "it_Latn_AQ" }, 673 { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, }; 674 for (String[] pair : tests) { 675 String newMax = LikelySubtags.maximize(pair[0], likely); 676 assertEquals("Likely", pair[1], newMax); 677 } 678 679 } 680 TestLikelySubtagCompleteness()681 public void TestLikelySubtagCompleteness() { 682 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 683 684 for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) { 685 if (!likely.containsKey(language)) { 686 logln("WARNING: No likely subtag for CLDR language code (" 687 + language + ")"); 688 } 689 } 690 for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) { 691 if (!likely.containsKey("und_" + script) 692 && !script.equals("Latn") 693 && !script.equals("Zinh") 694 && !script.equals("Zyyy") 695 && ScriptMetadata.getInfo(script) != null 696 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION 697 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) { 698 errln("No likely subtag for CLDR script code (und_" + script 699 + ")"); 700 } 701 } 702 703 } 704 TestEquivalentLocales()705 public void TestEquivalentLocales() { 706 Set<Set<String>> seen = new HashSet<>(); 707 Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory() 708 .getAvailable()); 709 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet()); 710 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values()); 711 toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales()); 712 LanguageTagParser ltp = new LanguageTagParser(); 713 main: for (String locale : toTest) { 714 if (locale.startsWith("und") || locale.equals("root")) { 715 continue; 716 } 717 Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale); 718 if (seen.contains(s)) { 719 continue; 720 } 721 // System.out.println(s + " => " + VettingViewer.gatherCodes(s)); 722 723 List<String> ss = new ArrayList<>(s); 724 String last = ss.get(ss.size() - 1); 725 ltp.set(last); 726 if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) { 727 continue; // skip variants for now. 728 } 729 String language = ltp.getLanguage(); 730 String script = ltp.getScript(); 731 String region = ltp.getRegion(); 732 if (!script.isEmpty() && !region.isEmpty()) { 733 String noScript = ltp.setScript("").toString(); 734 String noRegion = ltp.setScript(script).setRegion("") 735 .toString(); 736 switch (s.size()) { 737 case 1: // ok if already maximized and strange script/country, 738 // eg it_Arab_JA 739 continue main; 740 case 2: // ok if adds default country/script, eg {en_Cyrl, 741 // en_Cyrl_US} or {en_GB, en_Latn_GB} 742 String first = ss.get(0); 743 if (first.equals(noScript) || first.equals(noRegion)) { 744 continue main; 745 } 746 break; 747 case 3: // ok if different script in different country, eg 748 // {az_IR, az_Arab, az_Arab_IR} 749 if (noScript.equals(ss.get(0)) 750 && noRegion.equals(ss.get(1))) { 751 continue main; 752 } 753 break; 754 case 4: // ok if all combinations, eg {en, en_US, en_Latn, 755 // en_Latn_US} 756 if (language.equals(ss.get(0)) 757 && noScript.equals(ss.get(1)) 758 && noRegion.equals(ss.get(2))) { 759 continue main; 760 } 761 break; 762 } 763 } 764 errln("Strange size or composition:\t" + s + " \t" 765 + showLocaleParts(s)); 766 seen.add(s); 767 } 768 } 769 showLocaleParts(Set<String> s)770 private String showLocaleParts(Set<String> s) { 771 LanguageTagParser ltp = new LanguageTagParser(); 772 Set<String> b = new LinkedHashSet<>(); 773 for (String ss : s) { 774 ltp.set(ss); 775 addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b); 776 addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b); 777 addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b); 778 } 779 return Joiner.on("; ").join(b); 780 } 781 addName(int languageName, String code, Set<String> b)782 private void addName(int languageName, String code, Set<String> b) { 783 if (code.isEmpty()) { 784 return; 785 } 786 String name = testInfo.getEnglish().getName(languageName, code); 787 if (!code.equals(name)) { 788 b.add(code + "=" + name); 789 } 790 } 791 TestDefaultScriptCompleteness()792 public void TestDefaultScriptCompleteness() { 793 Relation<String, String> scriptToBase = Relation.of( 794 new LinkedHashMap<String, Set<String>>(), TreeSet.class); 795 main: for (String locale : testInfo.getCldrFactory() 796 .getAvailableLanguages()) { 797 if (!locale.contains("_") && !"root".equals(locale)) { 798 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale); 799 if (defaultScript != null) { 800 continue; 801 } 802 CLDRFile cldrFile = testInfo.getCLDRFile(locale, 803 false); 804 UnicodeSet set = cldrFile.getExemplarSet("", 805 WinningChoice.NORMAL); 806 for (String s : set) { 807 int script = UScript.getScript(s.codePointAt(0)); 808 if (script != UScript.UNKNOWN && script != UScript.COMMON 809 && script != UScript.INHERITED) { 810 scriptToBase.put(UScript.getShortName(script), locale); 811 continue main; 812 } 813 } 814 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale); 815 } 816 } 817 if (scriptToBase.size() != 0) { 818 for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) { 819 errln("Default Scripts missing:\t" + entry.getKey() + "\t" 820 + entry.getValue()); 821 } 822 } 823 } 824 TestTimeData()825 public void TestTimeData() { 826 Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL 827 .getTimeData(); 828 Set<String> regionsSoFar = new HashSet<>(); 829 Set<String> current24only = new HashSet<>(); 830 Set<String> current12preferred = new HashSet<>(); 831 832 boolean haveWorld = false; 833 834 ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k)); 835 836 for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) { 837 String region = e.getKey(); 838 if (region.equals("001")) { 839 haveWorld = true; 840 } 841 regionsSoFar.add(region); 842 PreferredAndAllowedHour preferredAndAllowedHour = e.getValue(); 843 assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred); 844 845 // find first h or H 846 HourStyle found = null; 847 848 for (HourStyle item : preferredAndAllowedHour.allowed) { 849 if (oldSchool.contains(item)) { 850 found = item; 851 if (item != preferredAndAllowedHour.preferred) { 852 String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred 853 + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed; 854 //if (!logKnownIssue("cldrbug:11448", message)) { 855 errln(message); 856 //} 857 } 858 break; 859 } 860 } 861 if (found == null) { 862 errln(region + ": preferred " + preferredAndAllowedHour.preferred 863 + " not in " + preferredAndAllowedHour.allowed); 864 } 865 // final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next(); 866 // if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h 867 // || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb 868 // || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) { 869 // errln(region + ": allowed " + preferredAndAllowedHour.allowed 870 // + " starts with preferred " + preferredAndAllowedHour.preferred); 871 // } else if (isVerbose()) { 872 // logln(region + ": allowed " + preferredAndAllowedHour.allowed 873 // + " starts with preferred " + preferredAndAllowedHour.preferred); 874 // } 875 // for (HourStyle c : preferredAndAllowedHour.allowed) { 876 // if (!PreferredAndAllowedHour.HOURS.contains(c)) { 877 // errln(region + ": illegal character in " + 878 // preferredAndAllowedHour.allowed + ". It contains " + c 879 // + " which is not in " + PreferredAndAllowedHour.HOURS); 880 // } 881 // } 882 if (!preferredAndAllowedHour.allowed.contains(HourStyle.h) 883 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) { 884 current24only.add(region); 885 } 886 if (preferredAndAllowedHour.preferred == HourStyle.h) { 887 current12preferred.add(region); 888 } 889 } 890 Set<String> missing = new TreeSet<>( 891 STANDARD_CODES.getGoodAvailableCodes(CodeType.territory)); 892 missing.removeAll(regionsSoFar); 893 for (Iterator<String> it = missing.iterator(); it.hasNext();) { 894 if (!StandardCodes.isCountry(it.next())) { 895 it.remove(); 896 } 897 } 898 899 // if we don't have 001, then we can't miss any regions 900 if (!missing.isEmpty()) { 901 if (haveWorld) { 902 logln("Implicit regions: " + missing); 903 } else { 904 errln("Missing regions: " + missing); 905 } 906 } 907 908 // The feedback gathered from our translators is that the following use 909 // 24 hour time ONLY: 910 Set<String> only24lang = new TreeSet<>( 911 Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, " 912 + "fr, gl, he, is, id, it, nb, pt, ro, ru, sr, sk, sl, sv, tr, hy") 913 .split(",\\s*"))); 914 // With the new preferences, this is changed 915 Set<String> only24region = new TreeSet<>(); 916 Set<String> either24or12region = new TreeSet<>(); 917 918 // get all countries where official or de-facto official 919 // add them two one of two lists, based on the above list of languages 920 for (String language : SUPPLEMENTAL 921 .getLanguagesForTerritoriesPopulationData()) { 922 boolean a24lang = only24lang.contains(language); 923 for (String region : SUPPLEMENTAL 924 .getTerritoriesForPopulationData(language)) { 925 PopulationData pop = SUPPLEMENTAL 926 .getLanguageAndTerritoryPopulationData(language, region); 927 if (pop.getOfficialStatus().compareTo( 928 OfficialStatus.de_facto_official) < 0) { 929 continue; 930 } 931 if (a24lang) { 932 only24region.add(region); 933 } else { 934 either24or12region.add(region); 935 } 936 } 937 } 938 // if we have a case like CA, where en uses 12/24 but fr uses 24, remove 939 // it for safety 940 only24region.removeAll(either24or12region); 941 // There are always exceptions... Remove SM (San Marino) and VA (Vatican), 942 // since they allows 12/24 but the de facto langauge is Italian. 943 only24region.remove("SM"); 944 only24region.remove("VA"); 945 // also remove all the regions where 'h' is preferred 946 only24region.removeAll(current12preferred); 947 // now verify 948 if (!current24only.containsAll(only24region)) { 949 Set<String> missing24only = new TreeSet<>(only24region); 950 missing24only.removeAll(current24only); 951 952 errln("24-hour-only doesn't include needed items:\n" 953 + " add " 954 + CldrUtility.join(missing24only, " ") 955 + "\n\t\t" 956 + CldrUtility.join(missing24only, "\n\t\t", 957 new NameCodeTransform(testInfo.getEnglish(), 958 CLDRFile.TERRITORY_NAME))); 959 } 960 } 961 962 public static class NameCodeTransform implements StringTransform { 963 private final CLDRFile file; 964 private final int codeType; 965 NameCodeTransform(CLDRFile file, int code)966 public NameCodeTransform(CLDRFile file, int code) { 967 this.file = file; 968 this.codeType = code; 969 } 970 971 @Override transform(String code)972 public String transform(String code) { 973 return file.getName(codeType, code) + " [" + code + "]"; 974 } 975 } 976 TestAliases()977 public void TestAliases() { 978 testInfo.getStandardCodes(); 979 Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes 980 .getLStreg(); 981 Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL 982 .getLocaleAliasInfo(); 983 984 for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases 985 .entrySet()) { 986 String type = typeMap.getKey(); 987 Map<String, R2<List<String>, String>> codeReplacement = typeMap 988 .getValue(); 989 990 Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data 991 .get(type.equals("territory") ? "region" : type); 992 if (bcp47DataTypeData == null) { 993 logln("skipping BCP47 test for " + type); 994 } else { 995 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData 996 .entrySet()) { 997 String code = codeData.getKey(); 998 if (codeReplacement.containsKey(code) 999 || codeReplacement.containsKey(code 1000 .toUpperCase(Locale.ENGLISH))) { 1001 continue; 1002 // TODO, check the value 1003 } 1004 Map<String, String> data = codeData.getValue(); 1005 if (data.containsKey("Deprecated") 1006 && SUPPLEMENTAL.getCLDRLanguageCodes().contains( 1007 code)) { 1008 errln("supplementalMetadata.xml: alias is missing <languageAlias type=\"" 1009 + code + "\" ... /> " + "\t" + data); 1010 } 1011 } 1012 } 1013 1014 Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>(); 1015 Set<String> nullReplacements = new TreeSet<>(); 1016 for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement 1017 .entrySet()) { 1018 String code = codeRep.getKey(); 1019 List<String> replacements = codeRep.getValue().get0(); 1020 if (replacements == null) { 1021 nullReplacements.add(code); 1022 continue; 1023 } 1024 Set<String> fixedReplacements = new LinkedHashSet<>(); 1025 for (String replacement : replacements) { 1026 R2<List<String>, String> newReplacement = codeReplacement 1027 .get(replacement); 1028 if (newReplacement != null) { 1029 List<String> list = newReplacement.get0(); 1030 if (list != null) { 1031 fixedReplacements.addAll(list); 1032 } 1033 } else { 1034 fixedReplacements.add(replacement); 1035 } 1036 } 1037 List<String> fixedList = new ArrayList<>( 1038 fixedReplacements); 1039 if (!replacements.equals(fixedList)) { 1040 R3<String, List<String>, List<String>> row = Row.of(code, 1041 replacements, fixedList); 1042 System.out.println(row.toString()); 1043 failures.add(row); 1044 } 1045 } 1046 1047 if (failures.size() != 0) { 1048 for (R3<String, List<String>, List<String>> item : failures) { 1049 String code = item.get0(); 1050 List<String> oldReplacement = item.get1(); 1051 List<String> newReplacement = item.get2(); 1052 1053 errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t" 1054 + "<" + type + "Alias type=\"" + code 1055 + "\" replacement=\"" 1056 + Joiner.on(" ").join(newReplacement) 1057 + "\" reason=\"XXX\"/> <!-- YYY -->\n"); 1058 } 1059 } 1060 if (nullReplacements.size() != 0) { 1061 logln("No Replacements\t" + type + "\t" + nullReplacements); 1062 } 1063 } 1064 } 1065 1066 static final List<String> oldRegions = Arrays 1067 .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU" 1068 .split(", ")); 1069 TestTerritoryContainment()1070 public void TestTerritoryContainment() { 1071 Relation<String, String> map = SUPPLEMENTAL 1072 .getTerritoryToContained(ContainmentStyle.all); 1073 Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore(); 1074 Set<String> mapItems = new LinkedHashSet<>(); 1075 // get all the items 1076 for (String item : map.keySet()) { 1077 mapItems.add(item); 1078 mapItems.addAll(map.getAll(item)); 1079 } 1080 Map<String, Map<String, String>> bcp47RegionData = StandardCodes 1081 .getLStreg().get("region"); 1082 1083 // verify that all regions are covered 1084 Set<String> bcp47Regions = new LinkedHashSet<>( 1085 bcp47RegionData.keySet()); 1086 bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the 1087 // unknown region... 1088 for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) { 1089 String region = it.next(); 1090 Map<String, String> data = bcp47RegionData.get(region); 1091 if (data.containsKey("Deprecated")) { 1092 logln("Removing deprecated " + region); 1093 it.remove(); 1094 } 1095 if ("Private use".equals(data.get("Description"))) { 1096 it.remove(); 1097 } 1098 } 1099 1100 if (!mapItems.equals(bcp47Regions)) { 1101 mapItems.removeAll(oldRegions); 1102 errlnDiff("containment items not in bcp47 regions: ", mapItems, 1103 bcp47Regions); 1104 errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, 1105 mapItems); 1106 } 1107 1108 // verify that everything in the containment core can be reached 1109 // downwards from 001. 1110 1111 Map<String, Integer> from001 = getRecursiveContainment("001", map, 1112 new LinkedHashMap<String, Integer>(), 1); 1113 from001.put("001", 0); 1114 Set<String> keySet = from001.keySet(); 1115 for (String region : keySet) { 1116 logln(Utility.repeat("\t", from001.get(region)) + "\t" + region 1117 + "\t" + getRegionName(region)); 1118 } 1119 1120 // Populate mapItems with the core containment 1121 mapItems.clear(); 1122 for (String item : mapCore.keySet()) { 1123 mapItems.add(item); 1124 mapItems.addAll(mapCore.getAll(item)); 1125 } 1126 1127 if (!mapItems.equals(keySet)) { 1128 errlnDiff( 1129 "containment core items that can't be reached from 001: ", 1130 mapItems, keySet); 1131 } 1132 } 1133 errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1134 private void errlnDiff(String title, Set<String> mapItems, 1135 Set<String> keySet) { 1136 Set<String> diff = new LinkedHashSet<>(mapItems); 1137 diff.removeAll(keySet); 1138 if (diff.size() != 0) { 1139 errln(title + diff); 1140 } 1141 } 1142 getRegionName(String region)1143 private String getRegionName(String region) { 1144 return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region); 1145 } 1146 getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1147 private Map<String, Integer> getRecursiveContainment(String region, 1148 Relation<String, String> map, Map<String, Integer> result, int depth) { 1149 Set<String> contained = map.getAll(region); 1150 if (contained == null) { 1151 return result; 1152 } 1153 for (String item : contained) { 1154 if (result.containsKey(item)) { 1155 logln("Duplicate containment " + item + "\t" 1156 + getRegionName(item)); 1157 continue; 1158 } 1159 result.put(item, depth); 1160 getRecursiveContainment(item, map, result, depth + 1); 1161 } 1162 return result; 1163 } 1164 TestMacrolanguages()1165 public void TestMacrolanguages() { 1166 Set<String> languageCodes = STANDARD_CODES 1167 .getAvailableCodes("language"); 1168 Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL 1169 .getLocaleAliasInfo(); 1170 Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement 1171 .get("language"); 1172 1173 Relation<String, String> replacementToReplaced = Relation.of( 1174 new TreeMap<String, Set<String>>(), TreeSet.class); 1175 for (String language : tagToReplacement.keySet()) { 1176 List<String> replacements = tagToReplacement.get(language).get0(); 1177 if (replacements != null) { 1178 replacementToReplaced.putAll(replacements, language); 1179 } 1180 } 1181 replacementToReplaced.freeze(); 1182 1183 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes 1184 .getLStreg(); 1185 Map<String, Map<String, String>> lstregLanguageInfo = lstreg 1186 .get("language"); 1187 1188 Relation<Scope, String> scopeToCodes = Relation.of( 1189 new TreeMap<Scope, Set<String>>(), TreeSet.class); 1190 // the invariant is that every macrolanguage has exactly 1 encompassed 1191 // language that maps to it 1192 1193 main: for (String language : Builder.with(new TreeSet<String>()) 1194 .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) { 1195 if (language.equals("no") || language.equals("sh")) 1196 continue; // special cases 1197 Scope languageScope = getScope(language, lstregLanguageInfo); 1198 if (languageScope == Scope.Macrolanguage) { 1199 if (Iso639Data.getHeirarchy(language) != null) { 1200 continue main; // is real family 1201 } 1202 Set<String> replacements = replacementToReplaced 1203 .getAll(language); 1204 if (replacements == null || replacements.size() == 0) { 1205 scopeToCodes.put(languageScope, language); 1206 } else { 1207 // it still might be bad, if we don't have a mapping to a 1208 // regular language 1209 for (String replacement : replacements) { 1210 Scope replacementScope = getScope(replacement, 1211 lstregLanguageInfo); 1212 if (replacementScope == Scope.Individual) { 1213 continue main; 1214 } 1215 } 1216 scopeToCodes.put(languageScope, language); 1217 } 1218 } 1219 } 1220 // now show the items we found 1221 for (Scope scope : scopeToCodes.keySet()) { 1222 for (String language : scopeToCodes.getAll(scope)) { 1223 String name = testInfo.getEnglish().getName(language); 1224 if (name == null || name.equals(language)) { 1225 Set<String> set = Iso639Data.getNames(language); 1226 if (set != null) { 1227 name = set.iterator().next(); 1228 } else { 1229 Map<String, String> languageInfo = lstregLanguageInfo 1230 .get(language); 1231 if (languageInfo != null) { 1232 name = languageInfo.get("Description"); 1233 } 1234 } 1235 } 1236 errln(scope + "\t" + language + "\t" + name + "\t" 1237 + Iso639Data.getType(language)); 1238 } 1239 } 1240 } 1241 getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1242 private Scope getScope(String language, 1243 Map<String, Map<String, String>> lstregLanguageInfo) { 1244 Scope languageScope = Iso639Data.getScope(language); 1245 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1246 if (languageInfo == null) { 1247 // System.out.println("Couldn't get lstreg info for " + language); 1248 } else { 1249 String lstregScope = languageInfo.get("Scope"); 1250 if (lstregScope != null) { 1251 Scope scope2 = Scope.fromString(lstregScope); 1252 if (languageScope != scope2) { 1253 // System.out.println("Mismatch in scope between LSTR and ISO 639:\t" 1254 // + scope2 + "\t" + 1255 // languageScope); 1256 languageScope = scope2; 1257 } 1258 } 1259 } 1260 return languageScope; 1261 } 1262 1263 static final boolean LOCALES_FIXED = true; 1264 TestPopulation()1265 public void TestPopulation() { 1266 Set<String> languages = SUPPLEMENTAL 1267 .getLanguagesForTerritoriesPopulationData(); 1268 Relation<String, String> baseToLanguages = Relation.of( 1269 new TreeMap<String, Set<String>>(), TreeSet.class); 1270 LanguageTagParser ltp = new LanguageTagParser(); 1271 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false); 1272 1273 for (String language : languages) { 1274 if (LOCALES_FIXED) { 1275 String canonicalForm = ltc.transform(language); 1276 if (!assertEquals("Canonical form", canonicalForm, language)) { 1277 int debug = 0; 1278 } 1279 } 1280 1281 String base = ltp.set(language).getLanguage(); 1282 String script = ltp.getScript(); 1283 baseToLanguages.put(base, language); 1284 1285 // add basic data, basically just for wo! 1286 // if there are primary scripts, they must include script (if not 1287 // empty) 1288 Set<String> primaryScripts = Collections.emptySet(); 1289 Set<String> secondaryScripts = Collections.emptySet(); 1290 Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL 1291 .getBasicLanguageDataMap(base); 1292 if (basicData != null) { 1293 BasicLanguageData s = basicData 1294 .get(BasicLanguageData.Type.primary); 1295 if (s != null) { 1296 primaryScripts = s.getScripts(); 1297 } 1298 s = basicData.get(BasicLanguageData.Type.secondary); 1299 if (s != null) { 1300 secondaryScripts = s.getScripts(); 1301 } 1302 } 1303 1304 // do some consistency tests; if there is a script, it must be in 1305 // primaryScripts or secondaryScripts 1306 if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) { 1307 errln(base + ": Script found in territory data (" + script 1308 + ") is not in primary scripts :\t" + primaryScripts 1309 + " and not in secondary scripts :\t" + secondaryScripts); 1310 } 1311 1312 // if there are multiple primary scripts, they will be in 1313 // baseToLanguages 1314 if (primaryScripts.size() > 1) { 1315 for (String script2 : primaryScripts) { 1316 baseToLanguages.put(base, base + "_" + script2); 1317 } 1318 } 1319 } 1320 1321 if (!LOCALES_FIXED) { 1322 // the invariants are that if we have a base, we must not have a script. 1323 // and if we don't have a base, we must have two items 1324 for (String base : baseToLanguages.keySet()) { 1325 Set<String> languagesForBase = baseToLanguages.getAll(base); 1326 if (languagesForBase.contains(base)) { 1327 if (languagesForBase.size() > 1) { 1328 errln("Cannot have base alone with other scripts:\t" 1329 + languagesForBase); 1330 } 1331 } else { 1332 if (languagesForBase.size() == 1) { 1333 errln("Cannot have only one script for language:\t" 1334 + languagesForBase); 1335 } 1336 } 1337 } 1338 } 1339 } 1340 TestCompleteness()1341 public void TestCompleteness() { 1342 if (SUPPLEMENTAL.getSkippedElements().size() > 0) { 1343 logln("SupplementalDataInfo API doesn't support: " 1344 + SUPPLEMENTAL.getSkippedElements().toString()); 1345 } 1346 } 1347 1348 // these are settings for exceptional cases we want to allow 1349 private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>( 1350 Arrays.asList("ILS", "NZD", "PGK", "TWD")); 1351 1352 // ok since there is no problem with confusion 1353 private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>( 1354 Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", 1355 "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG", 1356 "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN", 1357 "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD", 1358 "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", 1359 "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", 1360 "YUN", "ZRZ", "GWE")); 1361 1362 private static final Date LIMIT_FOR_NEW_CURRENCY = new Date( 1363 new Date().getYear() - 5, 1, 1); 1364 private static final Date NOW = new Date(); 1365 1366 private Matcher oldMatcher = Pattern.compile( 1367 "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE) 1368 .matcher(""); 1369 private Matcher newMatcher = Pattern.compile("\\bnew\\b", 1370 Pattern.CASE_INSENSITIVE).matcher(""); 1371 1372 /** 1373 * Test that access to currency info in supplemental data is ok. At this 1374 * point just a simple test. 1375 * 1376 * @param args 1377 */ TestCurrency()1378 public void TestCurrency() { 1379 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1380 Set<String> currencyCodes = STANDARD_CODES 1381 .getGoodAvailableCodes("currency"); 1382 Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation 1383 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1384 TreeSet.class); 1385 Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation 1386 .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), 1387 TreeSet.class); 1388 Set<String> territoriesWithoutModernCurrencies = new TreeSet<>( 1389 STANDARD_CODES.getGoodAvailableCodes("territory")); 1390 Map<String, Date> currencyFirstValid = new TreeMap<>(); 1391 Map<String, Date> currencyLastValid = new TreeMap<>(); 1392 territoriesWithoutModernCurrencies.remove("ZZ"); 1393 1394 for (String territory : STANDARD_CODES 1395 .getGoodAvailableCodes("territory")) { 1396 /* "EU" behaves like a country for purposes of this test */ 1397 if ((SUPPLEMENTAL.getContained(territory) != null) 1398 && !territory.equals("EU")) { 1399 territoriesWithoutModernCurrencies.remove(territory); 1400 continue; 1401 } 1402 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1403 .getCurrencyDateInfo(territory); 1404 if (currencyInfo == null) { 1405 continue; // error, but will pick up below. 1406 } 1407 for (CurrencyDateInfo dateInfo : currencyInfo) { 1408 final String currency = dateInfo.getCurrency(); 1409 final Date start = dateInfo.getStart(); 1410 final Date end = dateInfo.getEnd(); 1411 if (dateInfo.getErrors().length() != 0) { 1412 logln("parsing " + territory + "\t" + dateInfo.toString() 1413 + "\t" + dateInfo.getErrors()); 1414 } 1415 Date firstValue = currencyFirstValid.get(currency); 1416 if (firstValue == null || firstValue.compareTo(start) < 0) { 1417 currencyFirstValid.put(currency, start); 1418 } 1419 Date lastValue = currencyLastValid.get(currency); 1420 if (lastValue == null || lastValue.compareTo(end) > 0) { 1421 currencyLastValid.put(currency, end); 1422 } 1423 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender 1424 // is 1425 // OK... 1426 modernCurrencyCodes.put(currency, 1427 new Pair<>(territory, 1428 dateInfo)); 1429 territoriesWithoutModernCurrencies.remove(territory); 1430 } else { 1431 nonModernCurrencyCodes.put(currency, 1432 new Pair<>(territory, 1433 dateInfo)); 1434 } 1435 logln(territory 1436 + "\t" 1437 + dateInfo.toString() 1438 + "\t" 1439 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, 1440 currency)); 1441 } 1442 } 1443 // fix up 1444 nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet()); 1445 Relation<String, String> isoCurrenciesToCountries = Relation.of( 1446 new TreeMap<String, Set<String>>(), TreeSet.class) 1447 .addAllInverted(isoCodes.getCountryToCodes()); 1448 // now print error messages 1449 logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" 1450 + modernCurrencyCodes); 1451 Set<String> missing = new TreeSet<>( 1452 isoCurrenciesToCountries.keySet()); 1453 missing.removeAll(modernCurrencyCodes.keySet()); 1454 if (missing.size() != 0) { 1455 errln("Missing codes compared to ISO: " + missing.toString()); 1456 } 1457 1458 for (String currency : modernCurrencyCodes.keySet()) { 1459 Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes 1460 .getAll(currency); 1461 final String name = testInfo.getEnglish().getName( 1462 CLDRFile.CURRENCY_NAME, currency); 1463 1464 Set<String> isoCountries = isoCurrenciesToCountries 1465 .getAll(currency); 1466 if (isoCountries == null) { 1467 isoCountries = new TreeSet<>(); 1468 } 1469 1470 TreeSet<String> cldrCountries = new TreeSet<>(); 1471 for (Pair<String, CurrencyDateInfo> x : data) { 1472 cldrCountries.add(x.getFirst()); 1473 } 1474 if (!isoCountries.equals(cldrCountries)) { 1475 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) { 1476 1477 errln("Mismatch between ISO and Cldr modern currencies for " 1478 + currency + "\tISO:" + isoCountries + "\tCLDR:" 1479 + cldrCountries); 1480 showCountries("iso-cldr", isoCountries, cldrCountries, missing); 1481 showCountries("cldr-iso", cldrCountries, isoCountries, missing); 1482 } 1483 } 1484 1485 if (oldMatcher.reset(name).find()) { 1486 errln("Has 'old' in name but still used " + "\t" + currency 1487 + "\t" + name + "\t" + data); 1488 } 1489 if (newMatcher.reset(name).find() 1490 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1491 // find the first use. If older than 5 years, flag as error 1492 if (currencyFirstValid.get(currency).compareTo( 1493 LIMIT_FOR_NEW_CURRENCY) < 0) { 1494 errln("Has 'new' in name but used since " 1495 + CurrencyDateInfo.formatDate(currencyFirstValid 1496 .get(currency)) 1497 + "\t" + currency + "\t" 1498 + name + "\t" + data); 1499 } else { 1500 logln("Has 'new' in name but used since " 1501 + CurrencyDateInfo.formatDate(currencyFirstValid 1502 .get(currency)) 1503 + "\t" + currency + "\t" 1504 + name + "\t" + data); 1505 } 1506 } 1507 } 1508 logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size() 1509 + "\t" + nonModernCurrencyCodes); 1510 for (String currency : nonModernCurrencyCodes.keySet()) { 1511 final String name = testInfo.getEnglish().getName( 1512 CLDRFile.CURRENCY_NAME, currency); 1513 if (newMatcher.reset(name).find() 1514 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1515 logln("Has 'new' in name but NOT used since " 1516 + CurrencyDateInfo.formatDate(currencyLastValid 1517 .get(currency)) 1518 + "\t" + currency + "\t" + name 1519 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1520 } else if (!oldMatcher.reset(name).find() 1521 && !OK_TO_NOT_HAVE_OLD.contains(currency)) { 1522 logln("Doesn't have 'old' or date range in name but NOT used since " 1523 + CurrencyDateInfo.formatDate(currencyLastValid 1524 .get(currency)) 1525 + "\t" 1526 + currency 1527 + "\t" 1528 + name 1529 + "\t" + nonModernCurrencyCodes.getAll(currency)); 1530 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes 1531 .getAll(currency)) { 1532 final String territory = pair.getFirst(); 1533 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL 1534 .getCurrencyDateInfo(territory); 1535 for (CurrencyDateInfo dateInfo : currencyInfo) { 1536 if (dateInfo.getEnd().compareTo(NOW) < 0) { 1537 continue; 1538 } 1539 logln("\tCurrencies used instead: " 1540 + territory 1541 + "\t" 1542 + dateInfo 1543 + "\t" 1544 + testInfo.getEnglish().getName( 1545 CLDRFile.CURRENCY_NAME, 1546 dateInfo.getCurrency())); 1547 1548 } 1549 } 1550 1551 } 1552 } 1553 Set<String> remainder = new TreeSet<>(); 1554 remainder.addAll(currencyCodes); 1555 remainder.removeAll(nonModernCurrencyCodes.keySet()); 1556 // TODO make this an error, except for allowed exceptions. 1557 logln("Currencies without Territories: " + remainder); 1558 if (territoriesWithoutModernCurrencies.size() != 0) { 1559 errln("Modern territory missing currency: " 1560 + territoriesWithoutModernCurrencies); 1561 } 1562 } 1563 showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1564 private void showCountries(final String title, Set<String> isoCountries, 1565 Set<String> cldrCountries, Set<String> missing) { 1566 missing.clear(); 1567 missing.addAll(isoCountries); 1568 missing.removeAll(cldrCountries); 1569 for (String country : missing) { 1570 logln("\t\tExtra in " + title + "\t" + country + " - " 1571 + getRegionName(country)); 1572 } 1573 } 1574 TestCurrencyDecimalPlaces()1575 public void TestCurrencyDecimalPlaces() { 1576 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1577 Relation<String, IsoCurrencyParser.Data> codeList = isoCodes 1578 .getCodeList(); 1579 Set<String> currencyCodes = STANDARD_CODES 1580 .getGoodAvailableCodes("currency"); 1581 for (String cc : currencyCodes) { 1582 Set<IsoCurrencyParser.Data> d = codeList.get(cc); 1583 if (d != null) { 1584 for (IsoCurrencyParser.Data x : d) { 1585 CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc); 1586 if (cni.digits != x.getMinorUnit()) { 1587 logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc + 1588 ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits); 1589 } 1590 } 1591 } 1592 } 1593 } 1594 1595 /** 1596 * Verify that we have a default script for every CLDR base language 1597 */ TestDefaultScripts()1598 public void TestDefaultScripts() { 1599 SupplementalDataInfo supp = SUPPLEMENTAL; 1600 Map<String, String> likelyData = supp.getLikelySubtags(); 1601 Map<String, String> baseToDefaultContentScript = new HashMap<>(); 1602 for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) { 1603 String script = locale.getScript(); 1604 if (!script.isEmpty() && locale.getCountry().isEmpty()) { 1605 baseToDefaultContentScript.put(locale.getLanguage(), script); 1606 } 1607 } 1608 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1609 if ("root".equals(locale)) { 1610 continue; 1611 } 1612 CLDRLocale loc = CLDRLocale.getInstance(locale); 1613 String baseLanguage = loc.getLanguage(); 1614 String defaultScript = supp.getDefaultScript(baseLanguage); 1615 1616 String defaultContentScript = baseToDefaultContentScript 1617 .get(baseLanguage); 1618 if (defaultContentScript != null) { 1619 assertEquals(loc + " defaultContentScript = default", 1620 defaultScript, defaultContentScript); 1621 } 1622 String likely = likelyData.get(baseLanguage); 1623 String likelyScript = likely == null ? null : CLDRLocale 1624 .getInstance(likely).getScript(); 1625 Map<Type, BasicLanguageData> scriptInfo = supp 1626 .getBasicLanguageDataMap(baseLanguage); 1627 if (scriptInfo == null) { 1628 errln(loc + ": has no BasicLanguageData"); 1629 } else { 1630 BasicLanguageData data = scriptInfo.get(Type.primary); 1631 if (data == null) { 1632 data = scriptInfo.get(Type.secondary); 1633 } 1634 if (data == null) { 1635 errln(loc + ": has no scripts in BasicLanguageData"); 1636 } else if (!data.getScripts().contains(defaultScript)) { 1637 errln(loc + ": " + defaultScript 1638 + " not in BasicLanguageData " + data.getScripts()); 1639 } 1640 } 1641 1642 assertEquals(loc + " likely = default", defaultScript, likelyScript); 1643 1644 assertNotNull(loc + ": needs default script", defaultScript); 1645 1646 if (!loc.getScript().isEmpty()) { 1647 if (!loc.getScript().equals(defaultScript)) { 1648 assertNotEquals(locale 1649 + ": only include script if not default", 1650 loc.getScript(), defaultScript); 1651 } 1652 } 1653 1654 } 1655 } 1656 1657 enum CoverageIssue { 1658 log, warn, error 1659 } 1660 TestPluralCompleteness()1661 public void TestPluralCompleteness() { 1662 // Set<String> cardinalLocales = new 1663 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 1664 // Set<String> ordinalLocales = new 1665 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal)); 1666 // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals = 1667 // PluralRulesFactory.getLocaleToSamplePatterns(); 1668 // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales(); 1669 // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale, 1670 // type).keySet()); 1671 // Map<ULocale, PluralRules> overrideCardinals = 1672 // PluralRulesFactory.getPluralOverrides(); 1673 // Set<ULocale> overrideCardinalLocales = new 1674 // HashSet<ULocale>(overrideCardinals.keySet()); 1675 1676 Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales( 1677 Organization.google, EnumSet.of(Level.MODERN)); 1678 Set<String> allLocales = testInfo.getCldrFactory().getAvailable(); 1679 LanguageTagParser ltp = new LanguageTagParser(); 1680 for (String locale : allLocales) { 1681 // the only known case where plural rules depend on region or script 1682 // is pt_PT 1683 if (locale.equals("root")) { 1684 continue; 1685 } 1686 ltp.set(locale); 1687 if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) { 1688 continue; 1689 } 1690 CoverageIssue needsCoverage = testLocales.contains(locale) 1691 ? CoverageIssue.error 1692 : CoverageIssue.log; 1693 CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage; 1694 1695 // if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) { 1696 // if (locale.equals("be") || locale.equals("ga")) { 1697 // needsCoverage = CoverageIssue.warn; 1698 // } 1699 // } 1700 PluralRulesFactory prf = PluralRulesFactory 1701 .getInstance(CLDRConfig.getInstance() 1702 .getSupplementalDataInfo()); 1703 1704 for (PluralType type : PluralType.values()) { 1705 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, 1706 false); 1707 if (pluralInfo == null) { 1708 errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales"); 1709 continue; 1710 } 1711 Set<Count> counts = pluralInfo.getCounts(); 1712 // if (counts.size() == 1) { 1713 // continue; // skip checking samples 1714 // } 1715 HashSet<String> samples = new HashSet<>(); 1716 EnumSet<Count> countsWithNoSamples = EnumSet 1717 .noneOf(Count.class); 1718 Relation<String, Count> samplesToCounts = Relation.of( 1719 new HashMap(), LinkedHashSet.class); 1720 Set<Count> countsFound = prf.getSampleCounts(locale, 1721 type.standardType); 1722 StringBuilder failureCases = new StringBuilder(); 1723 for (Count count : counts) { 1724 String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count); 1725 final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern); 1726 failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine); 1727 if (countsFound == null || !countsFound.contains(count)) { 1728 countsWithNoSamples.add(count); 1729 } else { 1730 samplesToCounts.put(pattern, count); 1731 logln(locale + "\t" + type + "\t" + count + "\t" 1732 + pattern); 1733 } 1734 } 1735 if (!countsWithNoSamples.isEmpty()) { 1736 errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples, 1737 "cldrbug:7075", "Missing ordinal minimal pairs"); 1738 errOrLog(needsCoverage2, failureCases.toString()); 1739 } 1740 for (Entry<String, Set<Count>> entry : samplesToCounts 1741 .keyValuesSet()) { 1742 if (entry.getValue().size() != 1) { 1743 errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue() 1744 + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs"); 1745 errOrLog(needsCoverage2, failureCases.toString()); 1746 } 1747 } 1748 } 1749 } 1750 } 1751 errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1752 public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) { 1753 switch (causeError) { 1754 case error: 1755 if (logTicket == null) { 1756 errln(message); 1757 break; 1758 } 1759 logKnownIssue(logTicket, logComment); 1760 // fall through 1761 case warn: 1762 warnln(message); 1763 break; 1764 case log: 1765 logln(message); 1766 break; 1767 } 1768 } 1769 errOrLog(CoverageIssue causeError, String message)1770 public void errOrLog(CoverageIssue causeError, String message) { 1771 errOrLog(causeError, message, null, null); 1772 } 1773 TestNumberingSystemDigits()1774 public void TestNumberingSystemDigits() { 1775 1776 // Don't worry about digits from supplemental planes yet ( ICU can't 1777 // handle them anyways ) 1778 // hanidec is the only known non codepoint order numbering system 1779 // TODO: Fix so that it works properly on non-BMP digit strings. 1780 String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd", 1781 "sora", "takr" }; 1782 List<String> knownExceptionList = Arrays.asList(knownExceptions); 1783 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1784 if (knownExceptionList.contains(ns)) { 1785 continue; 1786 } 1787 String digits = SUPPLEMENTAL.getDigits(ns); 1788 int previousChar = 0; 1789 int ch; 1790 1791 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1792 ch = digits.codePointAt(i); 1793 if (i > 0 && ch != previousChar + 1) { 1794 errln("Digits for numbering system " 1795 + ns 1796 + " are not in code point order. Previous char = U+" 1797 + Utility.hex(previousChar, 4) 1798 + " Current char = U+" + Utility.hex(ch, 4)); 1799 break; 1800 } 1801 previousChar = ch; 1802 } 1803 } 1804 } 1805 TestNumberingSystemDigitCompleteness()1806 public void TestNumberingSystemDigitCompleteness() { 1807 List<Integer> unicodeDigits = new ArrayList<>(); 1808 for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) { 1809 if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) { 1810 unicodeDigits.add(Integer.valueOf(cp)); 1811 } 1812 } 1813 1814 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1815 String digits = SUPPLEMENTAL.getDigits(ns); 1816 int ch; 1817 1818 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1819 ch = digits.codePointAt(i); 1820 unicodeDigits.remove(Integer.valueOf(ch)); 1821 } 1822 } 1823 1824 if (unicodeDigits.size() > 0) { 1825 for (Integer i : unicodeDigits) { 1826 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = " 1827 + UScript.getShortName(UScript.getScript(i))); 1828 } 1829 } 1830 } 1831 TestMetazones()1832 public void TestMetazones() { 1833 Date goalMin = new Date(70, 0, 1); 1834 Date goalMax = new Date(300, 0, 2); 1835 ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov"); 1836 for (String timezoneRaw : TimeZone.getAvailableIDs()) { 1837 String timezone = TimeZone.getCanonicalID(timezoneRaw); 1838 String region = TimeZone.getRegion(timezone); 1839 if (!timezone.equals(timezoneRaw) || "001".equals(region)) { 1840 continue; 1841 } 1842 if (knownTZWithoutMetazone.contains(timezone)) { 1843 continue; 1844 } 1845 final Set<MetaZoneRange> ranges = SUPPLEMENTAL 1846 .getMetaZoneRanges(timezone); 1847 1848 if (assertNotNull("metazones for " + timezone, ranges)) { 1849 long min = Long.MAX_VALUE; 1850 long max = Long.MIN_VALUE; 1851 for (MetaZoneRange range : ranges) { 1852 if (range.dateRange.from != DateRange.START_OF_TIME) { 1853 min = Math.min(min, range.dateRange.from); 1854 } 1855 if (range.dateRange.to != DateRange.END_OF_TIME) { 1856 max = Math.max(max, range.dateRange.to); 1857 } 1858 } 1859 assertRelation(timezone + " has metazone before 1970?", true, 1860 goalMin, LEQ, new Date(min)); 1861 assertRelation(timezone 1862 + " has metazone until way in the future?", true, 1863 goalMax, GEQ, new Date(max)); 1864 } 1865 } 1866 com.google.common.collect.Interners i; 1867 } 1868 Test9924()1869 public void Test9924() { 1870 PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(LOCALES_FIXED ? "zh" : "zh_Hans", "CN"); 1871 PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN"); 1872 assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation()); 1873 } 1874 Test10765()1875 public void Test10765() { // 1876 Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool 1877 Set<String> mainLanguages = new TreeSet<>(); 1878 LanguageTagParser ltp = new LanguageTagParser(); 1879 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1880 mainLanguages.add(ltp.set(locale).getLanguage()); 1881 } 1882 // add special codes we want to see anyway 1883 mainLanguages.add("und"); 1884 mainLanguages.add("mul"); 1885 mainLanguages.add("zxx"); 1886 1887 if (!mainLanguages.containsAll(surveyToolLanguages)) { 1888 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale 1889 Set<String> temp = new TreeSet<>(surveyToolLanguages); 1890 temp.removeAll(mainLanguages); 1891 Set<String> modern = new TreeSet<>(); 1892 Set<String> comprehensive = new TreeSet<>(); 1893 for (String lang : temp) { 1894 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang)); 1895 if (level.compareTo(Level.MODERN) <= 0) { 1896 modern.add(lang); 1897 } else { 1898 comprehensive.add(lang); 1899 } 1900 } 1901 warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern)); 1902 logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive)); 1903 } 1904 if (!surveyToolLanguages.containsAll(mainLanguages)) { 1905 mainLanguages.removeAll(surveyToolLanguages); 1906 assertEquals("No main/* languages are missing from Survey Tool:language names (eg <variable id='$language' type='choice'>) ", 1907 Collections.EMPTY_SET, mainLanguages); 1908 } 1909 } 1910 getNames(Set<String> temp)1911 private Set<String> getNames(Set<String> temp) { 1912 Set<String> tempNames = new TreeSet<>(); 1913 for (String langCode : temp) { 1914 tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")"); 1915 } 1916 return tempNames; 1917 } 1918 TestGrammarInfo()1919 public void TestGrammarInfo() { 1920 Multimap<String,String> allValues = TreeMultimap.create(); 1921 for (String locale : SUPPLEMENTAL.hasGrammarInfo()) { 1922 if (locale.contentEquals("tr")) { 1923 int debug = 0; 1924 } 1925 GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale); 1926 for (GrammaticalTarget target : GrammaticalTarget.values()) { 1927 for (GrammaticalFeature feature : GrammaticalFeature.values()) { 1928 Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general); 1929 for (GrammaticalScope scope : GrammaticalScope.values()) { 1930 Collection<String> units = grammarInfo.get(target, feature, scope); 1931 allValues.putAll(target + "/" + feature + "/" + scope, units); 1932 if (scope != GrammaticalScope.general) { 1933 assertTrue(general + " > " + scope + " " + units, general.containsAll(units)); 1934 } 1935 } 1936 } 1937 } 1938 if (DEBUG) { 1939 System.out.println(grammarInfo.toString("\n" + locale + "\t")); 1940 } 1941 } 1942 if (DEBUG) { 1943 System.out.println(); 1944 for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) { 1945 System.out.println(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue())); 1946 } 1947 } 1948 } 1949 } 1950