1 package org.unicode.cldr.unittest;
2 
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Date;
8 import java.util.EnumMap;
9 import java.util.EnumSet;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedHashMap;
14 import java.util.LinkedHashSet;
15 import java.util.List;
16 import java.util.Locale;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24 
25 import org.unicode.cldr.draft.ScriptMetadata;
26 import org.unicode.cldr.test.CoverageLevel2;
27 import org.unicode.cldr.tool.LikelySubtags;
28 import org.unicode.cldr.tool.PluralMinimalPairs;
29 import org.unicode.cldr.tool.PluralRulesFactory;
30 import org.unicode.cldr.util.Builder;
31 import org.unicode.cldr.util.CLDRConfig;
32 import org.unicode.cldr.util.CLDRFile;
33 import org.unicode.cldr.util.CLDRFile.WinningChoice;
34 import org.unicode.cldr.util.CLDRLocale;
35 import org.unicode.cldr.util.CldrUtility;
36 import org.unicode.cldr.util.GrammarInfo;
37 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
38 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
39 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
40 import org.unicode.cldr.util.Iso639Data;
41 import org.unicode.cldr.util.Iso639Data.Scope;
42 import org.unicode.cldr.util.IsoCurrencyParser;
43 import org.unicode.cldr.util.LanguageTagCanonicalizer;
44 import org.unicode.cldr.util.LanguageTagParser;
45 import org.unicode.cldr.util.Level;
46 import org.unicode.cldr.util.Organization;
47 import org.unicode.cldr.util.Pair;
48 import org.unicode.cldr.util.PluralRanges;
49 import org.unicode.cldr.util.PreferredAndAllowedHour;
50 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
51 import org.unicode.cldr.util.StandardCodes;
52 import org.unicode.cldr.util.StandardCodes.CodeType;
53 import org.unicode.cldr.util.StandardCodes.LstrType;
54 import org.unicode.cldr.util.SupplementalDataInfo;
55 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
56 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
57 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
58 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
61 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
62 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
63 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
64 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
65 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
66 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
67 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
68 import org.unicode.cldr.util.Validity;
69 import org.unicode.cldr.util.Validity.Status;
70 
71 import com.google.common.base.Joiner;
72 import com.google.common.collect.ImmutableSet;
73 import com.google.common.collect.Multimap;
74 import com.google.common.collect.TreeMultimap;
75 import com.ibm.icu.impl.Relation;
76 import com.ibm.icu.impl.Row;
77 import com.ibm.icu.impl.Row.R2;
78 import com.ibm.icu.impl.Row.R3;
79 import com.ibm.icu.impl.Utility;
80 import com.ibm.icu.lang.UCharacter;
81 import com.ibm.icu.lang.UCharacterEnums;
82 import com.ibm.icu.lang.UScript;
83 import com.ibm.icu.text.PluralRules;
84 import com.ibm.icu.text.PluralRules.FixedDecimal;
85 import com.ibm.icu.text.PluralRules.FixedDecimalRange;
86 import com.ibm.icu.text.PluralRules.FixedDecimalSamples;
87 import com.ibm.icu.text.PluralRules.Operand;
88 import com.ibm.icu.text.PluralRules.SampleType;
89 import com.ibm.icu.text.StringTransform;
90 import com.ibm.icu.text.UnicodeSet;
91 import com.ibm.icu.util.Output;
92 import com.ibm.icu.util.TimeZone;
93 import com.ibm.icu.util.ULocale;
94 
95 public class TestSupplementalInfo extends TestFmwkPlus {
96     private static final boolean DEBUG = true;
97 
98     static CLDRConfig testInfo = CLDRConfig.getInstance();
99 
100     private static final StandardCodes STANDARD_CODES = testInfo
101         .getStandardCodes();
102 
103     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo
104         .getSupplementalDataInfo();
105 
main(String[] args)106     public static void main(String[] args) {
107         new TestSupplementalInfo().run(args);
108     }
109 
TestPluralSampleOrder()110     public void TestPluralSampleOrder() {
111         HashSet<PluralInfo> seen = new HashSet<>();
112         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
113             if (locale.equals("root")) {
114                 continue;
115             }
116             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
117             if (seen.contains(pi)) {
118                 continue;
119             }
120             seen.add(pi);
121             for (SampleType s : SampleType.values()) {
122                 for (Count c : pi.getCounts(s)) {
123                     FixedDecimalSamples sSamples = pi.getPluralRules()
124                         .getDecimalSamples(c.toString(), s);
125                     if (sSamples == null) {
126                         errln(locale + " no sample for " + c);
127                         continue;
128                     }
129                     if (s == SampleType.DECIMAL) {
130                         continue; // skip
131                     }
132                     FixedDecimalRange lastSample = null;
133                     for (FixedDecimalRange sample : sSamples.samples) {
134                         if (lastSample != null) {
135                             if (compare(lastSample.start,sample.start) > 0) {
136                                 errln(locale + ":" + c + ": out of order with "
137                                     + lastSample + " > " + sample);
138                             } else if (false) {
139                                 logln(locale + ":" + c + ": in order with "
140                                     + lastSample + " < " + sample);
141                             }
142                         }
143                         lastSample = sample;
144                     }
145                 }
146             }
147         }
148     }
149 
150     /**
151      * Hack until ICU's FixedDecimal is fixed
152      *
153      */
compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other)154     public static int compare(PluralRules.FixedDecimal me, PluralRules.FixedDecimal other) {
155         if (me.getPluralOperand(Operand.e) != other.getPluralOperand(Operand.e)) {
156             return me.getPluralOperand(Operand.e) < other.getPluralOperand(Operand.e) ? -1 : 1;
157         }
158         if (me.getIntegerValue() != other.getIntegerValue()) {
159             return me.getIntegerValue() < other.getIntegerValue() ? -1 : 1;
160         }
161         if (me.getSource() != other.getSource()) {
162             return me.getSource() < other.getSource() ? -1 : 1;
163         }
164         if (me.getVisibleDecimalDigitCount() != other.getVisibleDecimalDigitCount()) {
165             return me.getVisibleDecimalDigitCount() < other.getVisibleDecimalDigitCount() ? -1 : 1;
166         }
167         long diff = me.getDecimalDigits() - other.getDecimalDigits();
168         if (diff != 0) {
169             return diff < 0 ? -1 : 1;
170         }
171         return 0;
172     }
173 
TestPluralRanges()174     public void TestPluralRanges() {
175         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
176         Set<String> localesToTest = new TreeSet<>(
177             SUPPLEMENTAL.getPluralRangesLocales());
178         for (String locale : StandardCodes.make().getLocaleCoverageLocales(
179             "google")) { // superset
180             if (locale.equals("*") || locale.contains("_")) {
181                 continue;
182             }
183             localesToTest.add(locale);
184         }
185         Set<String> modernLocales = testInfo.getStandardCodes()
186             .getLocaleCoverageLocales(Organization.cldr,
187                 EnumSet.of(Level.MODERN));
188 
189         Output<FixedDecimal> maxSample = new Output<>();
190         Output<FixedDecimal> minSample = new Output<>();
191 
192         for (String locale : localesToTest) {
193             final String templateLine = "Template for " + ULocale.getDisplayName(locale, "en") + " (" + locale + ") translators to fix:";
194             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
195             Set<Count> counts = pluralInfo.getCounts();
196 
197             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(new ULocale(locale).toString());
198 
199             // check that there are no null values
200             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
201             if (pluralRanges == null) {
202                 if (!modernLocales.contains(locale)) {
203                     logln("Missing plural ranges for " + locale);
204                 } else {
205                     errOrLog(CoverageIssue.error, locale + "\tMissing plural ranges", "Cldrbug:7839", "Missing plural data for modern locales");
206                     StringBuilder failureCases = new StringBuilder(templateLine);
207                     for (Count start : counts) {
208                         for (Count end : counts) {
209                             pluralInfo.rangeExists(start, end, minSample, maxSample);
210                             final String rangeLine = getRangeLine(start, end, null, maxSample, minSample, samplePatterns);
211                             failureCases.append("\n" + locale + "\t" + rangeLine);
212                         }
213                     }
214                     errOrLog(CoverageIssue.warn, failureCases.toString());
215                 }
216                 continue;
217             }
218             EnumSet<Count> found = EnumSet.noneOf(Count.class);
219             for (Count count : Count.values()) {
220                 if (pluralRanges.isExplicitlySet(count)
221                     && !counts.contains(count)) {
222                     assertTrue(
223                         locale
224                         + "\t pluralRanges categories must be valid for locale:\t"
225                         + count + " must be in " + counts,
226                         !pluralRanges.isExplicitlySet(count));
227                 }
228                 for (Count end : Count.values()) {
229                     Count result = pluralRanges.getExplicit(count, end);
230                     if (result != null) {
231                         found.add(result);
232                     }
233                 }
234             }
235 
236             // check empty range results
237             if (found.isEmpty()) {
238                 errOrLog(CoverageIssue.error, "Empty range results for " + locale, "Cldrbug:7839", "Missing plural data for modern locales");
239             } else {
240                 if (samplePatterns == null) {
241                     errOrLog(CoverageIssue.error, locale + "\tMissing sample patterns", "Cldrbug:7839", "Missing plural data for modern locales");
242                 } else {
243                     for (Count result : found) {
244                         String samplePattern = samplePatterns.get(
245                             PluralRules.PluralType.CARDINAL, result);
246                         if (samplePattern != null && !samplePattern.contains("{0}")) {
247                             errln("Plural Ranges cannot have results that don't use {0} in samples: "
248                                 + locale
249                                 + ", "
250                                 + result
251                                 + "\t«"
252                                 + samplePattern + "»");
253                         }
254                     }
255                 }
256                 if (isVerbose()) {
257                     logln("Range results for " + locale + ":\t" + found);
258                 }
259             }
260 
261             // check for missing values
262             boolean failure = false;
263             StringBuilder failureCases = new StringBuilder(templateLine);
264             for (Count start : counts) {
265                 for (Count end : counts) {
266                     boolean needsValue = pluralInfo.rangeExists(start, end,
267                         minSample, maxSample);
268                     Count explicitValue = pluralRanges.getExplicit(start, end);
269                     final String rangeLine = getRangeLine(start, end, explicitValue, maxSample, minSample, samplePatterns);
270                     failureCases.append("\n" + locale + "\t" + rangeLine);
271                     if (needsValue && explicitValue == null) {
272                         errOrLog(CoverageIssue.error, locale + "\tNo explicit value for range: "
273                             + rangeLine,
274                             "Cldrbug:7839", "Missing plural data for modern locales");
275                         failure = true;
276                         failureCases.append("\tError — need explicit result");
277                     } else if (!needsValue && explicitValue != null) {
278                         errOrLog(CoverageIssue.error, locale + "\tDoesn't need explicit value, but has one: "
279                             + PluralRanges.showRange(start, end, explicitValue),
280                             "Cldrbug:7839", "Missing plural data for modern locales");
281                         failureCases.append("\tUnnecessary");
282                         failure = true;
283                     } else {
284                         failureCases.append("\tOK");
285                     }
286                 }
287             }
288             if (failure) {
289                 errOrLog(CoverageIssue.warn, failureCases.toString());
290             }
291         }
292     }
293 
getRangeLine(Count start, Count end, Count result, Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample, PluralMinimalPairs samplePatterns)294     private String getRangeLine(Count start, Count end, Count result,
295         Output<FixedDecimal> maxSample, Output<FixedDecimal> minSample,
296         PluralMinimalPairs samplePatterns) {
297         final String range = minSample + "–" + maxSample;
298         String example = range;
299         if (samplePatterns != null) {
300             example = "";
301             if (result != null) {
302                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
303                 example += "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»";
304             } else {
305                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
306                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
307                     example += c + ":«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»" + "?\tOR ";
308                 }
309                 example += " …";
310             }
311         }
312         return start + "\t" + end + "\t" + (result == null ? "?" : result.toString()) + "\t" + example;
313     }
314 
getRangeLine(Count count, PluralRules pluralRules, String pattern)315     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
316         String sample = "?";
317         FixedDecimalSamples exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
318         if (exampleList == null) {
319             exampleList = pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
320         }
321         FixedDecimal sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
322         sample = sampleDecimal.toString();
323 
324         String example = pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»";
325         return count + "\t" + example;
326     }
327 
TestPluralSamples()328     public void TestPluralSamples() {
329         String[][] test = { { "en", "ordinal", "1", "one" },
330             { "en", "ordinal", "2", "two" },
331             { "en", "ordinal", "3", "few" },
332             { "en", "ordinal", "4", "other" },
333             { "sl", "cardinal", "2", "two" }, };
334         for (String[] row : test) {
335             checkPluralSamples(row);
336         }
337     }
338 
TestPluralSamples2()339     public void TestPluralSamples2() {
340         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
341         for (String locale : prf.getLocales()) {
342             if (locale.equals("und")) {
343                 continue;
344             }
345             if (locale.equals("pl")) {
346                 int debug = 0;
347             }
348             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
349             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
350                 PluralInfo rules = SUPPLEMENTAL.getPlurals(
351                     SupplementalDataInfo.PluralType.fromStandardType(type),
352                     locale.toString());
353                 if (rules.getCounts().size() == 1) {
354                     continue; // don't require rules for unary cases
355                 }
356                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
357 
358                 for (Count count : rules.getCounts()) {
359                     String sample = samplePatterns.get(type, count);
360                     if (sample == null) {
361                         errOrLog(CoverageIssue.error, locale + "\t" + type + " \tmissing samples for " + count, "cldrbug:7075",
362                             "Missing ordinal minimal pairs");
363                     } else {
364                         sampleToCount.put(sample, count);
365                         PluralRules pRules = rules.getPluralRules();
366                         double unique = pRules.getUniqueKeywordValue(count
367                             .toString());
368                         if (unique == PluralRules.NO_UNIQUE_VALUE
369                             && !sample.contains("{0}")) {
370                             errln("Missing {0} in sample: " + locale + ", " + type + ", " + count + " «" + sample + "»");
371                         }
372                     }
373                 }
374                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
375                     if (entry.getValue().size() > 1) {
376                         errln("Colliding minimal pair samples: " + locale + ", " + type + ", " + entry.getValue() + " «" + entry.getKey() + "»");
377                     }
378                 }
379             }
380         }
381     }
382 
TestCldrScriptCodes()383     public void TestCldrScriptCodes() {
384         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
385 
386         Set<String> unicodeScripts = ScriptMetadata.getScripts();
387         assertRelation("getCLDRScriptCodes contains Unicode Scripts", true, codes, CONTAINS_ALL, unicodeScripts);
388 
389         ImmutableSet<String> allSpecials = ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
390         assertRelation("getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
391 
392         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
393         assertRelation("getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
394 
395         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
396         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
397             switch (e.getKey()) {
398             case regular:
399             case special:
400             case unknown:
401                 assertRelation("getCLDRScriptCodes contains " + e.getKey(), true, codes, CONTAINS_ALL, e.getValue());
402                 break;
403             default:
404                 break; // do nothin
405             }
406         }
407 
408         ImmutableSet<String> variants = ImmutableSet.of("Cyrs", "Geok", "Latf", "Latg", "Syre", "Syrj", "Syrn");
409         assertRelation("getCLDRScriptCodes contains variants", false, codes, CONTAINS_SOME, variants);
410     }
411 
checkPluralSamples(String... row)412     public void checkPluralSamples(String... row) {
413         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(
414             PluralType.valueOf(row[1]), row[0]);
415         Count count = pluralInfo.getCount(new FixedDecimal(row[2]));
416         assertEquals(String.join(", ", row),
417             Count.valueOf(row[3]), count);
418     }
419 
TestPluralLocales()420     public void TestPluralLocales() {
421         // get the unique rules
422         for (PluralType type : PluralType.values()) {
423             Relation<PluralInfo, String> pluralsToLocale = Relation.of(
424                 new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
425             for (String locale : new TreeSet<>(
426                 SUPPLEMENTAL.getPluralLocales(type))) {
427                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
428                 pluralsToLocale.put(pluralInfo, locale);
429             }
430 
431             String[][] equivalents = { { "mo", "ro" }, { "tl", "fil" },
432                 { "he", "iw" }, { "in", "id" }, { "jw", "jv" },
433                 { "ji", "yi" }, { "sh", "sr" }, };
434             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale
435                 .keyValuesSet()) {
436                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
437                 Set<String> locales = pluralInfoEntry.getValue();
438                 // check that equivalent locales are either both in or both out
439                 for (String[] row : equivalents) {
440                     assertEquals(
441                         type + " must be equivalent: " + Arrays.asList(row),
442                         locales.contains(row[0]), locales.contains(row[1]));
443                 }
444                 // check that no rules contain 'within'
445                 for (Count count : pluralInfo2.getCounts()) {
446                     String rule = pluralInfo2.getRule(count);
447                     if (rule == null) {
448                         continue;
449                     }
450                     assertFalse(
451                         "Rule '" + rule + "' for " + Arrays.asList(locales)
452                         + " doesn't contain 'within'",
453                         rule.contains("within"));
454                 }
455             }
456         }
457     }
458 
TestDigitPluralCases()459     public void TestDigitPluralCases() {
460         String[][] tests = {
461             { "en", "one", "1", "1" },
462             { "en", "one", "2", "" },
463             { "en", "one", "3", "" },
464             { "en", "one", "4", "" },
465             { "en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …" },
466             { "en", "other", "2", "10-99, 10.0, 10.1, 10.2, …" },
467             { "en", "other", "3", "100-999, 100.0, 100.1, 100.2, …" },
468             { "en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …" },
469             { "hr", "one", "1", "1, 0.1, 2.10, 1.1, …" },
470             { "hr", "one", "2",
471             "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …" },
472             { "hr", "one", "3",
473             "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …" },
474             { "hr", "one", "4",
475             "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …" },
476             { "hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …" },
477             { "hr", "few", "2",
478             "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …" },
479             { "hr", "few", "3",
480             "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …" },
481             { "hr", "few", "4",
482             "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …" },
483             { "hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …" },
484             { "hr", "other", "2",
485             "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …" },
486             { "hr", "other", "3",
487             "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …" },
488             { "hr", "other", "4",
489             "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" }, };
490         for (String[] row : tests) {
491             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
492             SampleList uset = plurals.getSamples9999(Count.valueOf(row[1]),
493                 Integer.parseInt(row[2]));
494             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3],
495                 uset.toString());
496         }
497     }
498 
TestDigitPluralCompleteness()499     public void TestDigitPluralCompleteness() {
500         String[][] exceptionStrings = {
501             // defaults
502             { "*", "zero", "0,00,000,0000" },
503             { "*", "one", "0" },
504             { "*", "two", "0,00,000,0000" },
505             { "*", "few", "0,00,000,0000" },
506             { "*", "many", "0,00,000,0000" },
507             { "*", "other", "0,00,000,0000" },
508             // others
509             { "mo", "other", "00,000,0000" }, //
510             { "ro", "other", "00,000,0000" }, //
511             { "cs", "few", "0" }, // j in 2..4
512             { "sk", "few", "0" }, // j in 2..4
513             { "da", "one", "0" }, // j is 1 or t is not 0 and n within 0..2
514             { "is", "one", "0,00,000,0000" }, // j is 1 or f is 1
515             { "sv", "one", "0" }, // j is 1
516             { "he", "two", "0" }, // j is 2
517             { "ru", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
518             // is not 11
519             { "uk", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
520             // is not 11
521             { "bs", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
522             // is not 11 or f mod 10 is
523             // 1 and f mod 100 is not 11
524             { "hr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
525             // is not 11 or f mod 10 is
526             // 1 and f mod 100 is not 11
527             { "sh", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
528             // is not 11 or f mod 10 is
529             // 1 and f mod 100 is not 11
530             { "sr", "one", "0,00,000,0000" }, // j mod 10 is 1 and j mod 100
531             // is not 11 or f mod 10 is
532             // 1 and f mod 100 is not 11
533             { "mk", "one", "0,00,000,0000" }, // j mod 10 is 1 or f mod 10
534             // is 1
535             { "sl", "one", "0,000,0000" }, // j mod 100 is 1
536             { "sl", "two", "0,000,0000" }, // j mod 100 is 2
537             { "he", "many", "00,000,0000" }, // j not in 0..10 and j mod 10
538             // is 0
539             { "tzm", "one", "0,00" }, // n in 0..1 or n in 11..99
540             { "gd", "one", "0,00" }, // n in 1,11
541             { "gd", "two", "0,00" }, // n in 2,12
542             { "shi", "few", "0,00" }, // n in 2..10
543             { "gd", "few", "0,00" }, // n in 3..10,13..19
544             { "ga", "few", "0" }, // n in 3..6
545             { "ga", "many", "0,00" }, // n in 7..10
546             { "ar", "zero", "0" }, // n is 0
547             { "cy", "zero", "0" }, // n is 0
548             { "ksh", "zero", "0" }, // n is 0
549             { "lag", "zero", "0" }, // n is 0
550             { "pt", "one", "0" }, // i = 1 and v = 0 or i = 0 and t = 1
551             { "pt_PT", "one", "0" }, // n = 1 and v = 0
552             { "ar", "two", "0" }, // n is 2
553             { "cy", "two", "0" }, // n is 2
554             { "ga", "two", "0" }, // n is 2
555             { "iu", "two", "0" }, // n is 2
556             { "naq", "two", "0" }, // n is 2
557             { "se", "two", "0" }, // n is 2
558             { "sma", "two", "0" }, // n is 2
559             { "smi", "two", "0" }, // n is 2
560             { "smj", "two", "0" }, // n is 2
561             { "smn", "two", "0" }, // n is 2
562             { "sms", "two", "0" }, // n is 2
563             { "cy", "few", "0" }, // n is 3
564             { "cy", "many", "0" }, // n is 6
565             { "br", "many", "" }, // n is not 0 and n mod 1000000 is 0
566             { "gv", "one", "0,00,000,0000" }, // n mod 10 is 1
567             { "be", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
568             // is not 11
569             { "lv", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
570             // is not 11 or v is 2 and f
571             // mod 10 is 1 and f mod 100
572             // is not 11 or v is not 2
573             // and f mod 10 is 1
574             { "br", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
575             // not in 11,71,91
576             { "lt", "one", "0,00,000,0000" }, // n mod 10 is 1 and n mod 100
577             // not in 11..19
578             { "fil", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
579             // 0 and i % 10 != 4,6,9 or
580             // v != 0 and f % 10 !=
581             // 4,6,9
582             { "tl", "one", "0,00,000,0000" }, // v = 0 and i = 1,2,3 or v =
583             // 0 and i % 10 != 4,6,9 or
584             // v != 0 and f % 10 !=
585             // 4,6,9
586             { "dsb", "one", "0,00,000,0000" }, // v = 0 and i % 100 = 1 or f
587             // % 100 = 1
588             {"kw", "many", "00,000,0000"},  // n != 1 and n % 100 = 1,21,41,61,81
589             {"kw", "zero", "0"},    // n = 0
590             {"fr", "many", ""},    // e is special
591         };
592         // parse out the exceptions
593         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>();
594         Relation<Count, Integer> fallback = Relation.of(
595             new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
596         for (String[] row : exceptionStrings) {
597             Relation<Count, Integer> countToDigits;
598             if (row[0].equals("*")) {
599                 countToDigits = fallback;
600             } else {
601                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
602                 countToDigits = exceptions.get(plurals);
603                 if (countToDigits == null) {
604                     exceptions.put(
605                         plurals,
606                         countToDigits = Relation.of(
607                             new EnumMap<Count, Set<Integer>>(
608                                 Count.class),
609                             TreeSet.class));
610                 }
611             }
612             Count c = Count.valueOf(row[1]);
613             for (String digit : row[2].split(",")) {
614                 // "99" is special, just to have the result be non-empty
615                 countToDigits.put(c, digit.length());
616             }
617         }
618         Set<PluralInfo> seen = new HashSet<>();
619         Set<String> sorted = new TreeSet<>(
620             SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
621         Relation<String, String> ruleToExceptions = Relation.of(
622             new TreeMap<String, Set<String>>(), TreeSet.class);
623 
624         for (String locale : sorted) {
625             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
626             if (seen.contains(plurals)) { // skip identicals
627                 continue;
628             }
629             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
630             if (countToDigits == null) {
631                 countToDigits = fallback;
632             }
633             for (Count c : plurals.getCounts()) {
634                 List<String> compose = new ArrayList<>();
635                 boolean needLine = false;
636                 Set<Integer> digitSet = countToDigits.get(c);
637                 if (digitSet == null) {
638                     digitSet = fallback.get(c);
639                 }
640                 for (int digits = 1; digits < 5; ++digits) {
641                     boolean expected = digitSet.contains(digits);
642                     boolean hasSamples = plurals.hasSamples(c, digits);
643                     if (hasSamples) {
644                         compose.add(Utility.repeat("0", digits));
645                     }
646                     if (!assertEquals(locale + ", " + digits + ", " + c,
647                         expected, hasSamples)) {
648                         needLine = true;
649                     }
650                 }
651                 if (needLine) {
652                     String countRules = plurals.getPluralRules().getRules(
653                         c.toString());
654                     ruleToExceptions.put(countRules == null ? "" : countRules,
655                         "{\"" + locale + "\", \"" + c + "\", \""
656                             + Joiner.on(",").join(compose)
657                             + "\"},");
658                 }
659             }
660         }
661         if (!ruleToExceptions.isEmpty()) {
662             System.out
663             .println("To fix the above, review the following, then replace in TestDigitPluralCompleteness");
664             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
665                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
666             }
667         }
668     }
669 
TestLikelyCode()670     public void TestLikelyCode() {
671         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
672         String[][] tests = { { "it_AQ", "it_Latn_AQ" },
673             { "it_Arab", "it_Arab_IT" }, { "az_Cyrl", "az_Cyrl_AZ" }, };
674         for (String[] pair : tests) {
675             String newMax = LikelySubtags.maximize(pair[0], likely);
676             assertEquals("Likely", pair[1], newMax);
677         }
678 
679     }
680 
TestLikelySubtagCompleteness()681     public void TestLikelySubtagCompleteness() {
682         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
683 
684         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
685             if (!likely.containsKey(language)) {
686                 logln("WARNING: No likely subtag for CLDR language code ("
687                     + language + ")");
688             }
689         }
690         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
691             if (!likely.containsKey("und_" + script)
692                 && !script.equals("Latn")
693                 && !script.equals("Zinh")
694                 && !script.equals("Zyyy")
695                 && ScriptMetadata.getInfo(script) != null
696                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
697                 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
698                 errln("No likely subtag for CLDR script code (und_" + script
699                     + ")");
700             }
701         }
702 
703     }
704 
TestEquivalentLocales()705     public void TestEquivalentLocales() {
706         Set<Set<String>> seen = new HashSet<>();
707         Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory()
708             .getAvailable());
709         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
710         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
711         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
712         LanguageTagParser ltp = new LanguageTagParser();
713         main: for (String locale : toTest) {
714             if (locale.startsWith("und") || locale.equals("root")) {
715                 continue;
716             }
717             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
718             if (seen.contains(s)) {
719                 continue;
720             }
721             // System.out.println(s + " => " + VettingViewer.gatherCodes(s));
722 
723             List<String> ss = new ArrayList<>(s);
724             String last = ss.get(ss.size() - 1);
725             ltp.set(last);
726             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
727                 continue; // skip variants for now.
728             }
729             String language = ltp.getLanguage();
730             String script = ltp.getScript();
731             String region = ltp.getRegion();
732             if (!script.isEmpty() && !region.isEmpty()) {
733                 String noScript = ltp.setScript("").toString();
734                 String noRegion = ltp.setScript(script).setRegion("")
735                     .toString();
736                 switch (s.size()) {
737                 case 1: // ok if already maximized and strange script/country,
738                     // eg it_Arab_JA
739                     continue main;
740                 case 2: // ok if adds default country/script, eg {en_Cyrl,
741                     // en_Cyrl_US} or {en_GB, en_Latn_GB}
742                     String first = ss.get(0);
743                     if (first.equals(noScript) || first.equals(noRegion)) {
744                         continue main;
745                     }
746                     break;
747                 case 3: // ok if different script in different country, eg
748                     // {az_IR, az_Arab, az_Arab_IR}
749                     if (noScript.equals(ss.get(0))
750                         && noRegion.equals(ss.get(1))) {
751                         continue main;
752                     }
753                     break;
754                 case 4: // ok if all combinations, eg {en, en_US, en_Latn,
755                     // en_Latn_US}
756                     if (language.equals(ss.get(0))
757                         && noScript.equals(ss.get(1))
758                         && noRegion.equals(ss.get(2))) {
759                         continue main;
760                     }
761                     break;
762                 }
763             }
764             errln("Strange size or composition:\t" + s + " \t"
765                 + showLocaleParts(s));
766             seen.add(s);
767         }
768     }
769 
showLocaleParts(Set<String> s)770     private String showLocaleParts(Set<String> s) {
771         LanguageTagParser ltp = new LanguageTagParser();
772         Set<String> b = new LinkedHashSet<>();
773         for (String ss : s) {
774             ltp.set(ss);
775             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
776             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
777             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
778         }
779         return Joiner.on("; ").join(b);
780     }
781 
addName(int languageName, String code, Set<String> b)782     private void addName(int languageName, String code, Set<String> b) {
783         if (code.isEmpty()) {
784             return;
785         }
786         String name = testInfo.getEnglish().getName(languageName, code);
787         if (!code.equals(name)) {
788             b.add(code + "=" + name);
789         }
790     }
791 
TestDefaultScriptCompleteness()792     public void TestDefaultScriptCompleteness() {
793         Relation<String, String> scriptToBase = Relation.of(
794             new LinkedHashMap<String, Set<String>>(), TreeSet.class);
795         main: for (String locale : testInfo.getCldrFactory()
796             .getAvailableLanguages()) {
797             if (!locale.contains("_") && !"root".equals(locale)) {
798                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
799                 if (defaultScript != null) {
800                     continue;
801                 }
802                 CLDRFile cldrFile = testInfo.getCLDRFile(locale,
803                     false);
804                 UnicodeSet set = cldrFile.getExemplarSet("",
805                     WinningChoice.NORMAL);
806                 for (String s : set) {
807                     int script = UScript.getScript(s.codePointAt(0));
808                     if (script != UScript.UNKNOWN && script != UScript.COMMON
809                         && script != UScript.INHERITED) {
810                         scriptToBase.put(UScript.getShortName(script), locale);
811                         continue main;
812                     }
813                 }
814                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
815             }
816         }
817         if (scriptToBase.size() != 0) {
818             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
819                 errln("Default Scripts missing:\t" + entry.getKey() + "\t"
820                     + entry.getValue());
821             }
822         }
823     }
824 
TestTimeData()825     public void TestTimeData() {
826         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL
827             .getTimeData();
828         Set<String> regionsSoFar = new HashSet<>();
829         Set<String> current24only = new HashSet<>();
830         Set<String> current12preferred = new HashSet<>();
831 
832         boolean haveWorld = false;
833 
834         ImmutableSet<HourStyle> oldSchool = ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
835 
836         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
837             String region = e.getKey();
838             if (region.equals("001")) {
839                 haveWorld = true;
840             }
841             regionsSoFar.add(region);
842             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
843             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
844 
845             // find first h or H
846             HourStyle found = null;
847 
848             for (HourStyle item : preferredAndAllowedHour.allowed) {
849                 if (oldSchool.contains(item)) {
850                     found = item;
851                     if (item != preferredAndAllowedHour.preferred) {
852                         String message = "Inconsistent values for " + region + ": preferred=" + preferredAndAllowedHour.preferred
853                             + " but that isn't the first " + oldSchool + " in allowed: " + preferredAndAllowedHour.allowed;
854                         //if (!logKnownIssue("cldrbug:11448", message)) {
855                         errln(message);
856                         //}
857                     }
858                     break;
859                 }
860             }
861             if (found == null) {
862                 errln(region + ": preferred " + preferredAndAllowedHour.preferred
863                     + " not in " + preferredAndAllowedHour.allowed);
864             }
865 //            final HourStyle firstAllowed = preferredAndAllowedHour.allowed.iterator().next();
866 //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.h
867 //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == HourStyle.hb
868 //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == HourStyle.H) {
869 //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
870 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
871 //            } else if (isVerbose()) {
872 //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
873 //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
874 //            }
875             // for (HourStyle c : preferredAndAllowedHour.allowed) {
876             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
877             // errln(region + ": illegal character in " +
878             // preferredAndAllowedHour.allowed + ". It contains " + c
879             // + " which is not in " + PreferredAndAllowedHour.HOURS);
880             // }
881             // }
882             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
883                 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
884                 current24only.add(region);
885             }
886             if (preferredAndAllowedHour.preferred == HourStyle.h) {
887                 current12preferred.add(region);
888             }
889         }
890         Set<String> missing = new TreeSet<>(
891             STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
892         missing.removeAll(regionsSoFar);
893         for (Iterator<String> it = missing.iterator(); it.hasNext();) {
894             if (!StandardCodes.isCountry(it.next())) {
895                 it.remove();
896             }
897         }
898 
899         // if we don't have 001, then we can't miss any regions
900         if (!missing.isEmpty()) {
901             if (haveWorld) {
902                 logln("Implicit regions: " + missing);
903             } else {
904                 errln("Missing regions: " + missing);
905             }
906         }
907 
908         // The feedback gathered from our translators is that the following use
909         // 24 hour time ONLY:
910         Set<String> only24lang = new TreeSet<>(
911             Arrays.asList(("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
912                 + "fr, gl, he, is, id, it, nb, pt, ro, ru, sr, sk, sl, sv, tr, hy")
913                 .split(",\\s*")));
914         // With the new preferences, this is changed
915         Set<String> only24region = new TreeSet<>();
916         Set<String> either24or12region = new TreeSet<>();
917 
918         // get all countries where official or de-facto official
919         // add them two one of two lists, based on the above list of languages
920         for (String language : SUPPLEMENTAL
921             .getLanguagesForTerritoriesPopulationData()) {
922             boolean a24lang = only24lang.contains(language);
923             for (String region : SUPPLEMENTAL
924                 .getTerritoriesForPopulationData(language)) {
925                 PopulationData pop = SUPPLEMENTAL
926                     .getLanguageAndTerritoryPopulationData(language, region);
927                 if (pop.getOfficialStatus().compareTo(
928                     OfficialStatus.de_facto_official) < 0) {
929                     continue;
930                 }
931                 if (a24lang) {
932                     only24region.add(region);
933                 } else {
934                     either24or12region.add(region);
935                 }
936             }
937         }
938         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
939         // it for safety
940         only24region.removeAll(either24or12region);
941         // There are always exceptions... Remove SM (San Marino) and VA (Vatican),
942         // since they allows 12/24 but the de facto langauge is Italian.
943         only24region.remove("SM");
944         only24region.remove("VA");
945         // also remove all the regions where 'h' is preferred
946         only24region.removeAll(current12preferred);
947         // now verify
948         if (!current24only.containsAll(only24region)) {
949             Set<String> missing24only = new TreeSet<>(only24region);
950             missing24only.removeAll(current24only);
951 
952             errln("24-hour-only doesn't include needed items:\n"
953                 + " add "
954                 + CldrUtility.join(missing24only, " ")
955                 + "\n\t\t"
956                 + CldrUtility.join(missing24only, "\n\t\t",
957                     new NameCodeTransform(testInfo.getEnglish(),
958                         CLDRFile.TERRITORY_NAME)));
959         }
960     }
961 
962     public static class NameCodeTransform implements StringTransform {
963         private final CLDRFile file;
964         private final int codeType;
965 
NameCodeTransform(CLDRFile file, int code)966         public NameCodeTransform(CLDRFile file, int code) {
967             this.file = file;
968             this.codeType = code;
969         }
970 
971         @Override
transform(String code)972         public String transform(String code) {
973             return file.getName(codeType, code) + " [" + code + "]";
974         }
975     }
976 
TestAliases()977     public void TestAliases() {
978         testInfo.getStandardCodes();
979         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes
980             .getLStreg();
981         Map<String, Map<String, R2<List<String>, String>>> aliases = SUPPLEMENTAL
982             .getLocaleAliasInfo();
983 
984         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases
985             .entrySet()) {
986             String type = typeMap.getKey();
987             Map<String, R2<List<String>, String>> codeReplacement = typeMap
988                 .getValue();
989 
990             Map<String, Map<String, String>> bcp47DataTypeData = bcp47Data
991                 .get(type.equals("territory") ? "region" : type);
992             if (bcp47DataTypeData == null) {
993                 logln("skipping BCP47 test for " + type);
994             } else {
995                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData
996                     .entrySet()) {
997                     String code = codeData.getKey();
998                     if (codeReplacement.containsKey(code)
999                         || codeReplacement.containsKey(code
1000                             .toUpperCase(Locale.ENGLISH))) {
1001                         continue;
1002                         // TODO, check the value
1003                     }
1004                     Map<String, String> data = codeData.getValue();
1005                     if (data.containsKey("Deprecated")
1006                         && SUPPLEMENTAL.getCLDRLanguageCodes().contains(
1007                             code)) {
1008                         errln("supplementalMetadata.xml: alias is missing <languageAlias type=\""
1009                             + code + "\" ... /> " + "\t" + data);
1010                     }
1011                 }
1012             }
1013 
1014             Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>();
1015             Set<String> nullReplacements = new TreeSet<>();
1016             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement
1017                 .entrySet()) {
1018                 String code = codeRep.getKey();
1019                 List<String> replacements = codeRep.getValue().get0();
1020                 if (replacements == null) {
1021                     nullReplacements.add(code);
1022                     continue;
1023                 }
1024                 Set<String> fixedReplacements = new LinkedHashSet<>();
1025                 for (String replacement : replacements) {
1026                     R2<List<String>, String> newReplacement = codeReplacement
1027                         .get(replacement);
1028                     if (newReplacement != null) {
1029                         List<String> list = newReplacement.get0();
1030                         if (list != null) {
1031                             fixedReplacements.addAll(list);
1032                         }
1033                     } else {
1034                         fixedReplacements.add(replacement);
1035                     }
1036                 }
1037                 List<String> fixedList = new ArrayList<>(
1038                     fixedReplacements);
1039                 if (!replacements.equals(fixedList)) {
1040                     R3<String, List<String>, List<String>> row = Row.of(code,
1041                         replacements, fixedList);
1042                     System.out.println(row.toString());
1043                     failures.add(row);
1044                 }
1045             }
1046 
1047             if (failures.size() != 0) {
1048                 for (R3<String, List<String>, List<String>> item : failures) {
1049                     String code = item.get0();
1050                     List<String> oldReplacement = item.get1();
1051                     List<String> newReplacement = item.get2();
1052 
1053                     errln(code + "\t=>\t" + oldReplacement + "\tshould be:\n\t"
1054                         + "<" + type + "Alias type=\"" + code
1055                         + "\" replacement=\""
1056                         + Joiner.on(" ").join(newReplacement)
1057                         + "\" reason=\"XXX\"/> <!-- YYY -->\n");
1058                 }
1059             }
1060             if (nullReplacements.size() != 0) {
1061                 logln("No Replacements\t" + type + "\t" + nullReplacements);
1062             }
1063         }
1064     }
1065 
1066     static final List<String> oldRegions = Arrays
1067         .asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU"
1068             .split(", "));
1069 
TestTerritoryContainment()1070     public void TestTerritoryContainment() {
1071         Relation<String, String> map = SUPPLEMENTAL
1072             .getTerritoryToContained(ContainmentStyle.all);
1073         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
1074         Set<String> mapItems = new LinkedHashSet<>();
1075         // get all the items
1076         for (String item : map.keySet()) {
1077             mapItems.add(item);
1078             mapItems.addAll(map.getAll(item));
1079         }
1080         Map<String, Map<String, String>> bcp47RegionData = StandardCodes
1081             .getLStreg().get("region");
1082 
1083         // verify that all regions are covered
1084         Set<String> bcp47Regions = new LinkedHashSet<>(
1085             bcp47RegionData.keySet());
1086         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
1087         // unknown region...
1088         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext();) {
1089             String region = it.next();
1090             Map<String, String> data = bcp47RegionData.get(region);
1091             if (data.containsKey("Deprecated")) {
1092                 logln("Removing deprecated " + region);
1093                 it.remove();
1094             }
1095             if ("Private use".equals(data.get("Description"))) {
1096                 it.remove();
1097             }
1098         }
1099 
1100         if (!mapItems.equals(bcp47Regions)) {
1101             mapItems.removeAll(oldRegions);
1102             errlnDiff("containment items not in bcp47 regions: ", mapItems,
1103                 bcp47Regions);
1104             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions,
1105                 mapItems);
1106         }
1107 
1108         // verify that everything in the containment core can be reached
1109         // downwards from 001.
1110 
1111         Map<String, Integer> from001 = getRecursiveContainment("001", map,
1112             new LinkedHashMap<String, Integer>(), 1);
1113         from001.put("001", 0);
1114         Set<String> keySet = from001.keySet();
1115         for (String region : keySet) {
1116             logln(Utility.repeat("\t", from001.get(region)) + "\t" + region
1117                 + "\t" + getRegionName(region));
1118         }
1119 
1120         // Populate mapItems with the core containment
1121         mapItems.clear();
1122         for (String item : mapCore.keySet()) {
1123             mapItems.add(item);
1124             mapItems.addAll(mapCore.getAll(item));
1125         }
1126 
1127         if (!mapItems.equals(keySet)) {
1128             errlnDiff(
1129                 "containment core items that can't be reached from 001: ",
1130                 mapItems, keySet);
1131         }
1132     }
1133 
errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1134     private void errlnDiff(String title, Set<String> mapItems,
1135         Set<String> keySet) {
1136         Set<String> diff = new LinkedHashSet<>(mapItems);
1137         diff.removeAll(keySet);
1138         if (diff.size() != 0) {
1139             errln(title + diff);
1140         }
1141     }
1142 
getRegionName(String region)1143     private String getRegionName(String region) {
1144         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
1145     }
1146 
getRecursiveContainment(String region, Relation<String, String> map, Map<String, Integer> result, int depth)1147     private Map<String, Integer> getRecursiveContainment(String region,
1148         Relation<String, String> map, Map<String, Integer> result, int depth) {
1149         Set<String> contained = map.getAll(region);
1150         if (contained == null) {
1151             return result;
1152         }
1153         for (String item : contained) {
1154             if (result.containsKey(item)) {
1155                 logln("Duplicate containment " + item + "\t"
1156                     + getRegionName(item));
1157                 continue;
1158             }
1159             result.put(item, depth);
1160             getRecursiveContainment(item, map, result, depth + 1);
1161         }
1162         return result;
1163     }
1164 
TestMacrolanguages()1165     public void TestMacrolanguages() {
1166         Set<String> languageCodes = STANDARD_CODES
1167             .getAvailableCodes("language");
1168         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = SUPPLEMENTAL
1169             .getLocaleAliasInfo();
1170         Map<String, R2<List<String>, String>> tagToReplacement = typeToTagToReplacement
1171             .get("language");
1172 
1173         Relation<String, String> replacementToReplaced = Relation.of(
1174             new TreeMap<String, Set<String>>(), TreeSet.class);
1175         for (String language : tagToReplacement.keySet()) {
1176             List<String> replacements = tagToReplacement.get(language).get0();
1177             if (replacements != null) {
1178                 replacementToReplaced.putAll(replacements, language);
1179             }
1180         }
1181         replacementToReplaced.freeze();
1182 
1183         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes
1184             .getLStreg();
1185         Map<String, Map<String, String>> lstregLanguageInfo = lstreg
1186             .get("language");
1187 
1188         Relation<Scope, String> scopeToCodes = Relation.of(
1189             new TreeMap<Scope, Set<String>>(), TreeSet.class);
1190         // the invariant is that every macrolanguage has exactly 1 encompassed
1191         // language that maps to it
1192 
1193         main: for (String language : Builder.with(new TreeSet<String>())
1194             .addAll(languageCodes).addAll(Iso639Data.getAvailable()).get()) {
1195             if (language.equals("no") || language.equals("sh"))
1196                 continue; // special cases
1197             Scope languageScope = getScope(language, lstregLanguageInfo);
1198             if (languageScope == Scope.Macrolanguage) {
1199                 if (Iso639Data.getHeirarchy(language) != null) {
1200                     continue main; // is real family
1201                 }
1202                 Set<String> replacements = replacementToReplaced
1203                     .getAll(language);
1204                 if (replacements == null || replacements.size() == 0) {
1205                     scopeToCodes.put(languageScope, language);
1206                 } else {
1207                     // it still might be bad, if we don't have a mapping to a
1208                     // regular language
1209                     for (String replacement : replacements) {
1210                         Scope replacementScope = getScope(replacement,
1211                             lstregLanguageInfo);
1212                         if (replacementScope == Scope.Individual) {
1213                             continue main;
1214                         }
1215                     }
1216                     scopeToCodes.put(languageScope, language);
1217                 }
1218             }
1219         }
1220         // now show the items we found
1221         for (Scope scope : scopeToCodes.keySet()) {
1222             for (String language : scopeToCodes.getAll(scope)) {
1223                 String name = testInfo.getEnglish().getName(language);
1224                 if (name == null || name.equals(language)) {
1225                     Set<String> set = Iso639Data.getNames(language);
1226                     if (set != null) {
1227                         name = set.iterator().next();
1228                     } else {
1229                         Map<String, String> languageInfo = lstregLanguageInfo
1230                             .get(language);
1231                         if (languageInfo != null) {
1232                             name = languageInfo.get("Description");
1233                         }
1234                     }
1235                 }
1236                 errln(scope + "\t" + language + "\t" + name + "\t"
1237                     + Iso639Data.getType(language));
1238             }
1239         }
1240     }
1241 
getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1242     private Scope getScope(String language,
1243         Map<String, Map<String, String>> lstregLanguageInfo) {
1244         Scope languageScope = Iso639Data.getScope(language);
1245         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1246         if (languageInfo == null) {
1247             // System.out.println("Couldn't get lstreg info for " + language);
1248         } else {
1249             String lstregScope = languageInfo.get("Scope");
1250             if (lstregScope != null) {
1251                 Scope scope2 = Scope.fromString(lstregScope);
1252                 if (languageScope != scope2) {
1253                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
1254                     // + scope2 + "\t" +
1255                     // languageScope);
1256                     languageScope = scope2;
1257                 }
1258             }
1259         }
1260         return languageScope;
1261     }
1262 
1263     static final boolean LOCALES_FIXED = true;
1264 
TestPopulation()1265     public void TestPopulation() {
1266         Set<String> languages = SUPPLEMENTAL
1267             .getLanguagesForTerritoriesPopulationData();
1268         Relation<String, String> baseToLanguages = Relation.of(
1269             new TreeMap<String, Set<String>>(), TreeSet.class);
1270         LanguageTagParser ltp = new LanguageTagParser();
1271         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
1272 
1273         for (String language : languages) {
1274             if (LOCALES_FIXED) {
1275                 String canonicalForm = ltc.transform(language);
1276                 if (!assertEquals("Canonical form", canonicalForm, language)) {
1277                     int debug = 0;
1278                 }
1279             }
1280 
1281             String base = ltp.set(language).getLanguage();
1282             String script = ltp.getScript();
1283             baseToLanguages.put(base, language);
1284 
1285             // add basic data, basically just for wo!
1286             // if there are primary scripts, they must include script (if not
1287             // empty)
1288             Set<String> primaryScripts = Collections.emptySet();
1289             Set<String> secondaryScripts = Collections.emptySet();
1290             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL
1291                 .getBasicLanguageDataMap(base);
1292             if (basicData != null) {
1293                 BasicLanguageData s = basicData
1294                     .get(BasicLanguageData.Type.primary);
1295                 if (s != null) {
1296                     primaryScripts = s.getScripts();
1297                 }
1298                 s = basicData.get(BasicLanguageData.Type.secondary);
1299                 if (s != null) {
1300                     secondaryScripts = s.getScripts();
1301                 }
1302             }
1303 
1304             // do some consistency tests; if there is a script, it must be in
1305             // primaryScripts or secondaryScripts
1306             if (!script.isEmpty() && !primaryScripts.contains(script) && !secondaryScripts.contains(script)) {
1307                 errln(base + ": Script found in territory data (" + script
1308                     + ") is not in primary scripts :\t" + primaryScripts
1309                     + " and not in secondary scripts :\t" + secondaryScripts);
1310             }
1311 
1312             // if there are multiple primary scripts, they will be in
1313             // baseToLanguages
1314             if (primaryScripts.size() > 1) {
1315                 for (String script2 : primaryScripts) {
1316                     baseToLanguages.put(base, base + "_" + script2);
1317                 }
1318             }
1319         }
1320 
1321         if (!LOCALES_FIXED) {
1322             // the invariants are that if we have a base, we must not have a script.
1323             // and if we don't have a base, we must have two items
1324             for (String base : baseToLanguages.keySet()) {
1325                 Set<String> languagesForBase = baseToLanguages.getAll(base);
1326                 if (languagesForBase.contains(base)) {
1327                     if (languagesForBase.size() > 1) {
1328                         errln("Cannot have base alone with other scripts:\t"
1329                             + languagesForBase);
1330                     }
1331                 } else {
1332                     if (languagesForBase.size() == 1) {
1333                         errln("Cannot have only one script for language:\t"
1334                             + languagesForBase);
1335                     }
1336                 }
1337             }
1338         }
1339     }
1340 
TestCompleteness()1341     public void TestCompleteness() {
1342         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
1343             logln("SupplementalDataInfo API doesn't support: "
1344                 + SUPPLEMENTAL.getSkippedElements().toString());
1345         }
1346     }
1347 
1348     // these are settings for exceptional cases we want to allow
1349     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = new TreeSet<>(
1350         Arrays.asList("ILS", "NZD", "PGK", "TWD"));
1351 
1352     // ok since there is no problem with confusion
1353     private static final Set<String> OK_TO_NOT_HAVE_OLD = new TreeSet<>(
1354         Arrays.asList("ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM",
1355             "FRF", "GRD", "IEP", "ITL", "LUF", "MTL", "MTP", "NLG",
1356             "PTE", "YUM", "ARA", "BAD", "BGL", "BOP", "BRC", "BRN",
1357             "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", "HRD",
1358             "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI",
1359             "PES", "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD",
1360             "YUN", "ZRZ", "GWE"));
1361 
1362     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(
1363         new Date().getYear() - 5, 1, 1);
1364     private static final Date NOW = new Date();
1365 
1366     private Matcher oldMatcher = Pattern.compile(
1367         "\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
1368         .matcher("");
1369     private Matcher newMatcher = Pattern.compile("\\bnew\\b",
1370         Pattern.CASE_INSENSITIVE).matcher("");
1371 
1372     /**
1373      * Test that access to currency info in supplemental data is ok. At this
1374      * point just a simple test.
1375      *
1376      * @param args
1377      */
TestCurrency()1378     public void TestCurrency() {
1379         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1380         Set<String> currencyCodes = STANDARD_CODES
1381             .getGoodAvailableCodes("currency");
1382         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = Relation
1383             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1384                 TreeSet.class);
1385         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = Relation
1386             .of(new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(),
1387                 TreeSet.class);
1388         Set<String> territoriesWithoutModernCurrencies = new TreeSet<>(
1389             STANDARD_CODES.getGoodAvailableCodes("territory"));
1390         Map<String, Date> currencyFirstValid = new TreeMap<>();
1391         Map<String, Date> currencyLastValid = new TreeMap<>();
1392         territoriesWithoutModernCurrencies.remove("ZZ");
1393 
1394         for (String territory : STANDARD_CODES
1395             .getGoodAvailableCodes("territory")) {
1396             /* "EU" behaves like a country for purposes of this test */
1397             if ((SUPPLEMENTAL.getContained(territory) != null)
1398                 && !territory.equals("EU")) {
1399                 territoriesWithoutModernCurrencies.remove(territory);
1400                 continue;
1401             }
1402             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1403                 .getCurrencyDateInfo(territory);
1404             if (currencyInfo == null) {
1405                 continue; // error, but will pick up below.
1406             }
1407             for (CurrencyDateInfo dateInfo : currencyInfo) {
1408                 final String currency = dateInfo.getCurrency();
1409                 final Date start = dateInfo.getStart();
1410                 final Date end = dateInfo.getEnd();
1411                 if (dateInfo.getErrors().length() != 0) {
1412                     logln("parsing " + territory + "\t" + dateInfo.toString()
1413                     + "\t" + dateInfo.getErrors());
1414                 }
1415                 Date firstValue = currencyFirstValid.get(currency);
1416                 if (firstValue == null || firstValue.compareTo(start) < 0) {
1417                     currencyFirstValid.put(currency, start);
1418                 }
1419                 Date lastValue = currencyLastValid.get(currency);
1420                 if (lastValue == null || lastValue.compareTo(end) > 0) {
1421                     currencyLastValid.put(currency, end);
1422                 }
1423                 if (start.compareTo(NOW) < 0 && end.compareTo(NOW) >= 0) { // Non-tender
1424                     // is
1425                     // OK...
1426                     modernCurrencyCodes.put(currency,
1427                         new Pair<>(territory,
1428                             dateInfo));
1429                     territoriesWithoutModernCurrencies.remove(territory);
1430                 } else {
1431                     nonModernCurrencyCodes.put(currency,
1432                         new Pair<>(territory,
1433                             dateInfo));
1434                 }
1435                 logln(territory
1436                     + "\t"
1437                     + dateInfo.toString()
1438                     + "\t"
1439                     + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME,
1440                         currency));
1441             }
1442         }
1443         // fix up
1444         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
1445         Relation<String, String> isoCurrenciesToCountries = Relation.of(
1446             new TreeMap<String, Set<String>>(), TreeSet.class)
1447             .addAllInverted(isoCodes.getCountryToCodes());
1448         // now print error messages
1449         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t"
1450             + modernCurrencyCodes);
1451         Set<String> missing = new TreeSet<>(
1452             isoCurrenciesToCountries.keySet());
1453         missing.removeAll(modernCurrencyCodes.keySet());
1454         if (missing.size() != 0) {
1455             errln("Missing codes compared to ISO: " + missing.toString());
1456         }
1457 
1458         for (String currency : modernCurrencyCodes.keySet()) {
1459             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes
1460                 .getAll(currency);
1461             final String name = testInfo.getEnglish().getName(
1462                 CLDRFile.CURRENCY_NAME, currency);
1463 
1464             Set<String> isoCountries = isoCurrenciesToCountries
1465                 .getAll(currency);
1466             if (isoCountries == null) {
1467                 isoCountries = new TreeSet<>();
1468             }
1469 
1470             TreeSet<String> cldrCountries = new TreeSet<>();
1471             for (Pair<String, CurrencyDateInfo> x : data) {
1472                 cldrCountries.add(x.getFirst());
1473             }
1474             if (!isoCountries.equals(cldrCountries)) {
1475                 if (!logKnownIssue("cldrbug:10765", "Missing codes compared to ISO: " + missing.toString())) {
1476 
1477                     errln("Mismatch between ISO and Cldr modern currencies for "
1478                         + currency + "\tISO:" + isoCountries + "\tCLDR:"
1479                         + cldrCountries);
1480                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
1481                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
1482                 }
1483             }
1484 
1485             if (oldMatcher.reset(name).find()) {
1486                 errln("Has 'old' in name but still used " + "\t" + currency
1487                     + "\t" + name + "\t" + data);
1488             }
1489             if (newMatcher.reset(name).find()
1490                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1491                 // find the first use. If older than 5 years, flag as error
1492                 if (currencyFirstValid.get(currency).compareTo(
1493                     LIMIT_FOR_NEW_CURRENCY) < 0) {
1494                     errln("Has 'new' in name but used since "
1495                         + CurrencyDateInfo.formatDate(currencyFirstValid
1496                             .get(currency))
1497                         + "\t" + currency + "\t"
1498                         + name + "\t" + data);
1499                 } else {
1500                     logln("Has 'new' in name but used since "
1501                         + CurrencyDateInfo.formatDate(currencyFirstValid
1502                             .get(currency))
1503                         + "\t" + currency + "\t"
1504                         + name + "\t" + data);
1505                 }
1506             }
1507         }
1508         logln("Non-Modern Codes (with dates): " + nonModernCurrencyCodes.size()
1509         + "\t" + nonModernCurrencyCodes);
1510         for (String currency : nonModernCurrencyCodes.keySet()) {
1511             final String name = testInfo.getEnglish().getName(
1512                 CLDRFile.CURRENCY_NAME, currency);
1513             if (newMatcher.reset(name).find()
1514                 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1515                 logln("Has 'new' in name but NOT used since "
1516                     + CurrencyDateInfo.formatDate(currencyLastValid
1517                         .get(currency))
1518                     + "\t" + currency + "\t" + name
1519                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1520             } else if (!oldMatcher.reset(name).find()
1521                 && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
1522                 logln("Doesn't have 'old' or date range in name but NOT used since "
1523                     + CurrencyDateInfo.formatDate(currencyLastValid
1524                         .get(currency))
1525                     + "\t"
1526                     + currency
1527                     + "\t"
1528                     + name
1529                     + "\t" + nonModernCurrencyCodes.getAll(currency));
1530                 for (Pair<String, CurrencyDateInfo> pair : nonModernCurrencyCodes
1531                     .getAll(currency)) {
1532                     final String territory = pair.getFirst();
1533                     Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL
1534                         .getCurrencyDateInfo(territory);
1535                     for (CurrencyDateInfo dateInfo : currencyInfo) {
1536                         if (dateInfo.getEnd().compareTo(NOW) < 0) {
1537                             continue;
1538                         }
1539                         logln("\tCurrencies used instead: "
1540                             + territory
1541                             + "\t"
1542                             + dateInfo
1543                             + "\t"
1544                             + testInfo.getEnglish().getName(
1545                                 CLDRFile.CURRENCY_NAME,
1546                                 dateInfo.getCurrency()));
1547 
1548                     }
1549                 }
1550 
1551             }
1552         }
1553         Set<String> remainder = new TreeSet<>();
1554         remainder.addAll(currencyCodes);
1555         remainder.removeAll(nonModernCurrencyCodes.keySet());
1556         // TODO make this an error, except for allowed exceptions.
1557         logln("Currencies without Territories: " + remainder);
1558         if (territoriesWithoutModernCurrencies.size() != 0) {
1559             errln("Modern territory missing currency: "
1560                 + territoriesWithoutModernCurrencies);
1561         }
1562     }
1563 
showCountries(final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1564     private void showCountries(final String title, Set<String> isoCountries,
1565         Set<String> cldrCountries, Set<String> missing) {
1566         missing.clear();
1567         missing.addAll(isoCountries);
1568         missing.removeAll(cldrCountries);
1569         for (String country : missing) {
1570             logln("\t\tExtra in " + title + "\t" + country + " - "
1571                 + getRegionName(country));
1572         }
1573     }
1574 
TestCurrencyDecimalPlaces()1575     public void TestCurrencyDecimalPlaces() {
1576         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1577         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes
1578             .getCodeList();
1579         Set<String> currencyCodes = STANDARD_CODES
1580             .getGoodAvailableCodes("currency");
1581         for (String cc : currencyCodes) {
1582             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
1583             if (d != null) {
1584                 for (IsoCurrencyParser.Data x : d) {
1585                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
1586                     if (cni.digits != x.getMinorUnit()) {
1587                         logln("Mismatch between ISO/CLDR for decimal places for currency => " + cc +
1588                             ". ISO = " + x.getMinorUnit() + " CLDR = " + cni.digits);
1589                     }
1590                 }
1591             }
1592         }
1593     }
1594 
1595     /**
1596      * Verify that we have a default script for every CLDR base language
1597      */
TestDefaultScripts()1598     public void TestDefaultScripts() {
1599         SupplementalDataInfo supp = SUPPLEMENTAL;
1600         Map<String, String> likelyData = supp.getLikelySubtags();
1601         Map<String, String> baseToDefaultContentScript = new HashMap<>();
1602         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
1603             String script = locale.getScript();
1604             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
1605                 baseToDefaultContentScript.put(locale.getLanguage(), script);
1606             }
1607         }
1608         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1609             if ("root".equals(locale)) {
1610                 continue;
1611             }
1612             CLDRLocale loc = CLDRLocale.getInstance(locale);
1613             String baseLanguage = loc.getLanguage();
1614             String defaultScript = supp.getDefaultScript(baseLanguage);
1615 
1616             String defaultContentScript = baseToDefaultContentScript
1617                 .get(baseLanguage);
1618             if (defaultContentScript != null) {
1619                 assertEquals(loc + " defaultContentScript = default",
1620                     defaultScript, defaultContentScript);
1621             }
1622             String likely = likelyData.get(baseLanguage);
1623             String likelyScript = likely == null ? null : CLDRLocale
1624                 .getInstance(likely).getScript();
1625             Map<Type, BasicLanguageData> scriptInfo = supp
1626                 .getBasicLanguageDataMap(baseLanguage);
1627             if (scriptInfo == null) {
1628                 errln(loc + ": has no BasicLanguageData");
1629             } else {
1630                 BasicLanguageData data = scriptInfo.get(Type.primary);
1631                 if (data == null) {
1632                     data = scriptInfo.get(Type.secondary);
1633                 }
1634                 if (data == null) {
1635                     errln(loc + ": has no scripts in BasicLanguageData");
1636                 } else if (!data.getScripts().contains(defaultScript)) {
1637                     errln(loc + ": " + defaultScript
1638                         + " not in BasicLanguageData " + data.getScripts());
1639                 }
1640             }
1641 
1642             assertEquals(loc + " likely = default", defaultScript, likelyScript);
1643 
1644             assertNotNull(loc + ": needs default script", defaultScript);
1645 
1646             if (!loc.getScript().isEmpty()) {
1647                 if (!loc.getScript().equals(defaultScript)) {
1648                     assertNotEquals(locale
1649                         + ": only include script if not default",
1650                         loc.getScript(), defaultScript);
1651                 }
1652             }
1653 
1654         }
1655     }
1656 
1657     enum CoverageIssue {
1658         log, warn, error
1659     }
1660 
TestPluralCompleteness()1661     public void TestPluralCompleteness() {
1662         // Set<String> cardinalLocales = new
1663         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
1664         // Set<String> ordinalLocales = new
1665         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
1666         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
1667         // PluralRulesFactory.getLocaleToSamplePatterns();
1668         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
1669         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
1670         // type).keySet());
1671         // Map<ULocale, PluralRules> overrideCardinals =
1672         // PluralRulesFactory.getPluralOverrides();
1673         // Set<ULocale> overrideCardinalLocales = new
1674         // HashSet<ULocale>(overrideCardinals.keySet());
1675 
1676         Set<String> testLocales = STANDARD_CODES.getLocaleCoverageLocales(
1677             Organization.google, EnumSet.of(Level.MODERN));
1678         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
1679         LanguageTagParser ltp = new LanguageTagParser();
1680         for (String locale : allLocales) {
1681             // the only known case where plural rules depend on region or script
1682             // is pt_PT
1683             if (locale.equals("root")) {
1684                 continue;
1685             }
1686             ltp.set(locale);
1687             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
1688                 continue;
1689             }
1690             CoverageIssue needsCoverage = testLocales.contains(locale)
1691                 ? CoverageIssue.error
1692                     : CoverageIssue.log;
1693             CoverageIssue needsCoverage2 = needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
1694 
1695             //            if (logKnownIssue("Cldrbug:8809", "Missing plural rules/samples be and ga locales")) {
1696             //                if (locale.equals("be") || locale.equals("ga")) {
1697             //                    needsCoverage = CoverageIssue.warn;
1698             //                }
1699             //            }
1700             PluralRulesFactory prf = PluralRulesFactory
1701                 .getInstance(CLDRConfig.getInstance()
1702                     .getSupplementalDataInfo());
1703 
1704             for (PluralType type : PluralType.values()) {
1705                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale,
1706                     false);
1707                 if (pluralInfo == null) {
1708                     errOrLog(needsCoverage, locale + "\t" + type + " \tmissing plural rules", "Cldrbug:7839", "Missing plural data for modern locales");
1709                     continue;
1710                 }
1711                 Set<Count> counts = pluralInfo.getCounts();
1712                 // if (counts.size() == 1) {
1713                 // continue; // skip checking samples
1714                 // }
1715                 HashSet<String> samples = new HashSet<>();
1716                 EnumSet<Count> countsWithNoSamples = EnumSet
1717                     .noneOf(Count.class);
1718                 Relation<String, Count> samplesToCounts = Relation.of(
1719                     new HashMap(), LinkedHashSet.class);
1720                 Set<Count> countsFound = prf.getSampleCounts(locale,
1721                     type.standardType);
1722                 StringBuilder failureCases = new StringBuilder();
1723                 for (Count count : counts) {
1724                     String pattern = PluralRulesFactory.getSamplePattern(locale, type.standardType, count);
1725                     final String rangeLine = getRangeLine(count, pluralInfo.getPluralRules(), pattern);
1726                     failureCases.append('\n').append(locale).append('\t').append(type).append('\t').append(rangeLine);
1727                     if (countsFound == null || !countsFound.contains(count)) {
1728                         countsWithNoSamples.add(count);
1729                     } else {
1730                         samplesToCounts.put(pattern, count);
1731                         logln(locale + "\t" + type + "\t" + count + "\t"
1732                             + pattern);
1733                     }
1734                 }
1735                 if (!countsWithNoSamples.isEmpty()) {
1736                     errOrLog(needsCoverage, locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
1737                         "cldrbug:7075", "Missing ordinal minimal pairs");
1738                     errOrLog(needsCoverage2, failureCases.toString());
1739                 }
1740                 for (Entry<String, Set<Count>> entry : samplesToCounts
1741                     .keyValuesSet()) {
1742                     if (entry.getValue().size() != 1) {
1743                         errOrLog(needsCoverage, locale + "\t" + type + "\t duplicate samples: " + entry.getValue()
1744                         + " => «" + entry.getKey() + "»", "cldrbug:7119", "Some duplicate minimal pairs");
1745                         errOrLog(needsCoverage2, failureCases.toString());
1746                     }
1747                 }
1748             }
1749         }
1750     }
1751 
errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment)1752     public void errOrLog(CoverageIssue causeError, String message, String logTicket, String logComment) {
1753         switch (causeError) {
1754         case error:
1755             if (logTicket == null) {
1756                 errln(message);
1757                 break;
1758             }
1759             logKnownIssue(logTicket, logComment);
1760             // fall through
1761         case warn:
1762             warnln(message);
1763             break;
1764         case log:
1765             logln(message);
1766             break;
1767         }
1768     }
1769 
errOrLog(CoverageIssue causeError, String message)1770     public void errOrLog(CoverageIssue causeError, String message) {
1771         errOrLog(causeError, message, null, null);
1772     }
1773 
TestNumberingSystemDigits()1774     public void TestNumberingSystemDigits() {
1775 
1776         // Don't worry about digits from supplemental planes yet ( ICU can't
1777         // handle them anyways )
1778         // hanidec is the only known non codepoint order numbering system
1779         // TODO: Fix so that it works properly on non-BMP digit strings.
1780         String[] knownExceptions = { "brah", "cakm", "hanidec", "osma", "shrd",
1781             "sora", "takr" };
1782         List<String> knownExceptionList = Arrays.asList(knownExceptions);
1783         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1784             if (knownExceptionList.contains(ns)) {
1785                 continue;
1786             }
1787             String digits = SUPPLEMENTAL.getDigits(ns);
1788             int previousChar = 0;
1789             int ch;
1790 
1791             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1792                 ch = digits.codePointAt(i);
1793                 if (i > 0 && ch != previousChar + 1) {
1794                     errln("Digits for numbering system "
1795                         + ns
1796                         + " are not in code point order. Previous char = U+"
1797                         + Utility.hex(previousChar, 4)
1798                         + " Current char = U+" + Utility.hex(ch, 4));
1799                     break;
1800                 }
1801                 previousChar = ch;
1802             }
1803         }
1804     }
1805 
TestNumberingSystemDigitCompleteness()1806     public void TestNumberingSystemDigitCompleteness() {
1807         List<Integer> unicodeDigits = new ArrayList<>();
1808         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
1809             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
1810                 unicodeDigits.add(Integer.valueOf(cp));
1811             }
1812         }
1813 
1814         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1815             String digits = SUPPLEMENTAL.getDigits(ns);
1816             int ch;
1817 
1818             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1819                 ch = digits.codePointAt(i);
1820                 unicodeDigits.remove(Integer.valueOf(ch));
1821             }
1822         }
1823 
1824         if (unicodeDigits.size() > 0) {
1825             for (Integer i : unicodeDigits) {
1826                 errln("Unicode digit: " + UCharacter.getName(i) + " is not in any numbering system. Script = "
1827                     + UScript.getShortName(UScript.getScript(i)));
1828             }
1829         }
1830     }
1831 
TestMetazones()1832     public void TestMetazones() {
1833         Date goalMin = new Date(70, 0, 1);
1834         Date goalMax = new Date(300, 0, 2);
1835         ImmutableSet<String> knownTZWithoutMetazone = ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
1836         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
1837             String timezone = TimeZone.getCanonicalID(timezoneRaw);
1838             String region = TimeZone.getRegion(timezone);
1839             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
1840                 continue;
1841             }
1842             if (knownTZWithoutMetazone.contains(timezone)) {
1843                 continue;
1844             }
1845             final Set<MetaZoneRange> ranges = SUPPLEMENTAL
1846                 .getMetaZoneRanges(timezone);
1847 
1848             if (assertNotNull("metazones for " + timezone, ranges)) {
1849                 long min = Long.MAX_VALUE;
1850                 long max = Long.MIN_VALUE;
1851                 for (MetaZoneRange range : ranges) {
1852                     if (range.dateRange.from != DateRange.START_OF_TIME) {
1853                         min = Math.min(min, range.dateRange.from);
1854                     }
1855                     if (range.dateRange.to != DateRange.END_OF_TIME) {
1856                         max = Math.max(max, range.dateRange.to);
1857                     }
1858                 }
1859                 assertRelation(timezone + " has metazone before 1970?", true,
1860                     goalMin, LEQ, new Date(min));
1861                 assertRelation(timezone
1862                     + " has metazone until way in the future?", true,
1863                     goalMax, GEQ, new Date(max));
1864             }
1865         }
1866         com.google.common.collect.Interners i;
1867     }
1868 
Test9924()1869     public void Test9924() {
1870         PopulationData zhCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(LOCALES_FIXED ? "zh" : "zh_Hans", "CN");
1871         PopulationData yueCNData = SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
1872         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
1873     }
1874 
Test10765()1875     public void Test10765() { //
1876         Set<String> surveyToolLanguages = SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
1877         Set<String> mainLanguages = new TreeSet<>();
1878         LanguageTagParser ltp = new LanguageTagParser();
1879         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1880             mainLanguages.add(ltp.set(locale).getLanguage());
1881         }
1882         // add special codes we want to see anyway
1883         mainLanguages.add("und");
1884         mainLanguages.add("mul");
1885         mainLanguages.add("zxx");
1886 
1887         if (!mainLanguages.containsAll(surveyToolLanguages)) {
1888             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
1889             Set<String> temp = new TreeSet<>(surveyToolLanguages);
1890             temp.removeAll(mainLanguages);
1891             Set<String> modern = new TreeSet<>();
1892             Set<String> comprehensive = new TreeSet<>();
1893             for (String lang : temp) {
1894                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
1895                 if (level.compareTo(Level.MODERN) <= 0) {
1896                     modern.add(lang);
1897                 } else {
1898                     comprehensive.add(lang);
1899                 }
1900             }
1901             warnln("«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(modern));
1902             logln("«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " + getNames(comprehensive));
1903         }
1904         if (!surveyToolLanguages.containsAll(mainLanguages)) {
1905             mainLanguages.removeAll(surveyToolLanguages);
1906             assertEquals("No main/* languages are missing from Survey Tool:language names (eg <variable id='$language' type='choice'>) ",
1907                 Collections.EMPTY_SET, mainLanguages);
1908         }
1909     }
1910 
getNames(Set<String> temp)1911     private Set<String> getNames(Set<String> temp) {
1912         Set<String> tempNames = new TreeSet<>();
1913         for (String langCode : temp) {
1914             tempNames.add(testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) + " (" + langCode + ")");
1915         }
1916         return tempNames;
1917     }
1918 
TestGrammarInfo()1919     public void TestGrammarInfo() {
1920         Multimap<String,String> allValues = TreeMultimap.create();
1921         for (String locale : SUPPLEMENTAL.hasGrammarInfo()) {
1922             if (locale.contentEquals("tr")) {
1923                 int debug = 0;
1924             }
1925             GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale);
1926             for (GrammaticalTarget target : GrammaticalTarget.values()) {
1927                 for (GrammaticalFeature feature : GrammaticalFeature.values()) {
1928                     Collection<String> general = grammarInfo.get(target, feature, GrammaticalScope.general);
1929                     for (GrammaticalScope scope : GrammaticalScope.values()) {
1930                         Collection<String> units = grammarInfo.get(target, feature, scope);
1931                         allValues.putAll(target + "/" + feature + "/" + scope, units);
1932                         if (scope != GrammaticalScope.general) {
1933                             assertTrue(general + " > " + scope + " " + units, general.containsAll(units));
1934                         }
1935                     }
1936                 }
1937             }
1938             if (DEBUG) {
1939                 System.out.println(grammarInfo.toString("\n" + locale + "\t"));
1940             }
1941         }
1942         if (DEBUG) {
1943             System.out.println();
1944             for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) {
1945                 System.out.println(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue()));
1946             }
1947         }
1948     }
1949 }
1950