1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.Collection;
8 import java.util.Collections;
9 import java.util.EnumMap;
10 import java.util.EnumSet;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.LinkedHashMap;
15 import java.util.LinkedHashSet;
16 import java.util.List;
17 import java.util.Locale;
18 import java.util.Map;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
23 import java.util.regex.Matcher;
24 
25 import org.unicode.cldr.draft.FileUtilities;
26 import org.unicode.cldr.test.CheckCLDR.InputMethod;
27 import org.unicode.cldr.test.CheckCLDR.Phase;
28 import org.unicode.cldr.test.CheckCLDR.StatusAction;
29 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
30 import org.unicode.cldr.tool.Option.Options;
31 import org.unicode.cldr.util.Annotations;
32 import org.unicode.cldr.util.CLDRConfig;
33 import org.unicode.cldr.util.CLDRFile;
34 import org.unicode.cldr.util.CLDRFile.DraftStatus;
35 import org.unicode.cldr.util.CLDRFile.Status;
36 import org.unicode.cldr.util.CLDRInfo.CandidateInfo;
37 import org.unicode.cldr.util.CLDRInfo.PathValueInfo;
38 import org.unicode.cldr.util.CLDRInfo.UserInfo;
39 import org.unicode.cldr.util.CLDRLocale;
40 import org.unicode.cldr.util.CLDRPaths;
41 import org.unicode.cldr.util.CLDRURLS;
42 import org.unicode.cldr.util.CldrUtility;
43 import org.unicode.cldr.util.CoreCoverageInfo;
44 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems;
45 import org.unicode.cldr.util.Counter;
46 import org.unicode.cldr.util.Counter2;
47 import org.unicode.cldr.util.CoverageInfo;
48 import org.unicode.cldr.util.DtdType;
49 import org.unicode.cldr.util.LanguageTagCanonicalizer;
50 import org.unicode.cldr.util.LanguageTagParser;
51 import org.unicode.cldr.util.Level;
52 import org.unicode.cldr.util.Organization;
53 import org.unicode.cldr.util.PathHeader;
54 import org.unicode.cldr.util.PathHeader.Factory;
55 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
56 import org.unicode.cldr.util.PathStarrer;
57 import org.unicode.cldr.util.PatternCache;
58 import org.unicode.cldr.util.RegexLookup;
59 import org.unicode.cldr.util.RegexLookup.LookupType;
60 import org.unicode.cldr.util.SimpleFactory;
61 import org.unicode.cldr.util.StandardCodes;
62 import org.unicode.cldr.util.StringId;
63 import org.unicode.cldr.util.SupplementalDataInfo;
64 import org.unicode.cldr.util.VettingViewer;
65 import org.unicode.cldr.util.VettingViewer.MissingStatus;
66 import org.unicode.cldr.util.VoteResolver.VoterInfo;
67 import org.unicode.cldr.util.XPathParts;
68 
69 import com.google.common.base.Joiner;
70 import com.google.common.collect.HashMultimap;
71 import com.google.common.collect.ImmutableList;
72 import com.google.common.collect.ImmutableSet;
73 import com.google.common.collect.LinkedHashMultimap;
74 import com.google.common.collect.Multimap;
75 import com.google.common.collect.Ordering;
76 import com.google.common.collect.TreeMultimap;
77 import com.ibm.icu.impl.Relation;
78 import com.ibm.icu.impl.Row.R2;
79 import com.ibm.icu.lang.UCharacter;
80 import com.ibm.icu.text.NumberFormat;
81 import com.ibm.icu.text.UnicodeSet;
82 import com.ibm.icu.util.ULocale;
83 import com.ibm.icu.util.VersionInfo;
84 
85 public class ShowLocaleCoverage {
86     private static final String VXML_CONSTANT = CLDRPaths.AUX_DIRECTORY + "voting/" + CLDRFile.GEN_VERSION + "/vxml/common/";
87     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
88     private static final String TSV_MISSING_SUMMARY_HEADER =
89         "#Path Level"
90             + "\t#Locales"
91             + "\tLocales"
92             + "\tSection"
93             + "\tPage"
94             + "\tHeader"
95             + "\tCode"
96             ;
97     private static final String TSV_LOCALE_COVERAGE_HEADER =
98         "#Dir"
99             + "\tCode"
100             + "\tEnglish Name"
101             + "\tNative Name"
102             + "\tScript"
103             + "\tCLDR Target"
104             + "\tSublocales"
105             + "\tFields\tUC\tMissing"
106             + "\tModern\tMiss +UC"
107             + "\tModerate\tMiss +UC"
108             + "\tBasic\tMiss +UC"
109             + "\tCore\tMiss +UC"
110             + "\tCore-Missing";
111 
112     private static final String TSV_MISSING_HEADER =
113         "#LCode"
114             + "\tEnglish Name"
115             + "\tScript"
116             //        + "\tEnglish Value"
117             //        + "\tNative Value"
118             + "\tLocale Level"
119             + "\tPath Level"
120             //        + "\tStatus"
121             //        + "\tAction"
122             + "\tSTStatus"
123             + "\tBailey"
124             + "\tVxml"
125             + "\tVStatus"
126             //        + "\tST Link"
127             + "\tSection"
128             + "\tPage"
129             + "\tHeader"
130             + "\tCode"
131             + "\tST Link"
132             + "\tConfig Action"
133             ;
134     private static final String TSV_MISSING_BASIC_HEADER = "";
135 
136     private static final boolean DEBUG = true;
137     private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter
138 
139     private static final String LATEST = ToolConstants.CHART_VERSION;
140     private static final double CORE_SIZE = CoreItems.values().length - CoreItems.ONLY_RECOMMENDED.size();
141     public static CLDRConfig testInfo = ToolConfig.getToolInstance();
142     private static final StandardCodes SC = testInfo.getStandardCodes();
143     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo();
144     private static final StandardCodes STANDARD_CODES = SC;
145 
146     static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory();
147     private static final CLDRFile ENGLISH = factory.make("en", true);
148 
149     private static UnicodeSet ENG_ANN = Annotations.getData("en").keySet();
150 
151     // added info using pattern in VettingViewer.
152 
153     static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath)
154         .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true)
155         .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true)
156         .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true)
157         .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true)
158         .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true)
159         .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true);
160 
161     //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY;
162 
163     final static Options myOptions = new Options();
164 
165     enum MyOptions {
166         filter(".+", ".*", "Filter the information based on id, using a regex argument."),
167         //        draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."),
168         chart(null, null, "chart only"),
169         growth("true", "true", "Compute growth data"),
170         organization(".+", null, "Only locales for organization"),
171         version(".+",
172             LATEST, "To get different versions"),
173         rawData(null, null, "Output the raw data from all coverage levels"),
174         targetDir(".*",
175             CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."),
176         directories("(.*:)?[a-z]+(,[a-z]+)*", "common",
177             "Space-delimited list of main source directories: common,seed,exemplar.\n" +
178             "Optional, <baseDir>:common,seed"),;
179 
180         // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."),
181         // layouts(null, null, "Only create html files for keyboard layouts"),
182         // repertoire(null, null, "Only create html files for repertoire"), ;
183         // boilerplate
184         final Option option;
185 
MyOptions(String argumentPattern, String defaultArgument, String helpText)186         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
187             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
188         }
189     }
190 
191     static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>()
192         .add("\\[@alt=\"accounting\"]", true)
193         .add("\\[@alt=\"variant\"]", true)
194         .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true)
195         .add("^//ldml/localeDisplayNames/languages/language.*_", true)
196         .add("^//ldml/numbers/currencies/currency.*/symbol", true)
197         .add("^//ldml/characters/exemplarCharacters", true);
198 
199     static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed;
200     static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH);
201 
202     static boolean RAW_DATA = true;
203     private static Set<String> COMMON_LOCALES;
204 
main(String[] args)205     public static void main(String[] args) throws IOException {
206         myOptions.parse(MyOptions.filter, args, true);
207 
208         Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher("");
209 
210         if (MyOptions.chart.option.doesOccur()) {
211             showCoverage(null, matcher);
212             return;
213         }
214 
215 
216         if (MyOptions.growth.option.doesOccur()) {
217             try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) {
218                 doGrowth(matcher, out);
219                 return;
220             }
221         }
222 
223         Set<String> locales = null;
224         String organization = MyOptions.organization.option.getValue();
225         boolean useOrgLevel = MyOptions.organization.option.doesOccur();
226         if (useOrgLevel) {
227             locales = STANDARD_CODES.getLocaleCoverageLocales(organization);
228         }
229 
230         if (MyOptions.version.option.doesOccur()) {
231             String number = MyOptions.version.option.getValue().trim();
232             if (!number.contains(".")) {
233                 number += ".0";
234             }
235             factory = org.unicode.cldr.util.Factory.make(
236                 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*");
237         } else {
238             if (MyOptions.directories.option.doesOccur()) {
239                 String directories = MyOptions.directories.option.getValue().trim();
240                 CLDRConfig cldrConfig = CONFIG;
241                 String base = null;
242                 int colonPos = directories.indexOf(':');
243                 if (colonPos >= 0) {
244                     base = directories.substring(0, colonPos).trim();
245                     directories = directories.substring(colonPos + 1).trim();
246                 } else {
247                     base = cldrConfig.getCldrBaseDirectory().toString();
248                 }
249                 String[] items = directories.split(",\\s*");
250                 File[] fullDirectories = new File[items.length];
251                 int i = 0;
252                 for (String item : items) {
253                     fullDirectories[i++] = new File(base + "/" + item + "/main");
254                 }
255                 factory = SimpleFactory.make(fullDirectories, ".*");
256                 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages();
257             }
258         }
259         fixCommonLocales();
260 
261         RAW_DATA = MyOptions.rawData.option.doesOccur();
262 
263         //showEnglish();
264 
265         showCoverage(null, matcher, locales, useOrgLevel);
266     }
267 
fixCommonLocales()268     public static void fixCommonLocales() {
269         if (COMMON_LOCALES == null) {
270             COMMON_LOCALES = factory.getAvailableLanguages();
271         }
272     }
273 
doGrowth(Matcher matcher, PrintWriter out)274     private static void doGrowth(Matcher matcher, PrintWriter out) {
275         TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending
276 //        if (DEBUG) {
277 //            for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) {
278 //                if (!dir.startsWith("cldr")) {
279 //                    continue;
280 //                }
281 //                String version = getNormalizedVersion(dir);
282 //                if (version == null) {
283 //                    continue;
284 //                }
285 //                org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make(
286 //                    CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*");
287 //                System.out.println("Reading: " + version);
288 //                Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher);
289 //                System.out.println("Read: " + version + "\t" + currentData);
290 //                break;
291 //            }
292 //        }
293         Map<String, FoundAndTotal> latestData = null;
294         for (ReleaseInfo versionNormalizedVersionAndYear : versionToYear) {
295             VersionInfo version = versionNormalizedVersionAndYear.version;
296             int year = versionNormalizedVersionAndYear.year;
297             String dir = ToolConstants.getBaseDirectory(version.getVersionString(2, 3));
298             Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false);
299             System.out.println("year: " + year + "; version: " + version + "; size: " + currentData);
300             if (latestData == null) {
301                 latestData = currentData;
302             }
303             Counter2<String> completionData = getCompletion(latestData, currentData);
304             addCompletionList(year+"", completionData, growthData);
305             if (DEBUG) System.out.println(currentData);
306         }
307 //        Map<String, FoundAndTotal> latestData = addGrowth(factory, null, matcher, false);
308 //        addCompletionList(getYearFromVersion(LATEST, false), getCompletion(latestData, latestData), growthData);
309 //        if (DEBUG) System.out.println(latestData);
310 //        //System.out.println(growthData);
311 //        List<String> dirs = new ArrayList<>(Arrays.asList(new File(CLDRPaths.ARCHIVE_DIRECTORY).list()));
312 //        Collections.reverse(dirs);
313 //        for (String dir : dirs) {
314 //            if (!dir.startsWith("cldr")) {
315 //                continue;
316 //            }
317 //            String version = getNormalizedVersion(dir);
318 //            if (version == null) {
319 //                continue;
320 //            }
321 ////            if (version.compareTo("12") < 0) {
322 ////                continue;
323 ////            }
324 //            System.out.println("Reading: " + version);
325 //            if (version.equals("2008")) {
326 //                int debug = 0;
327 //            }
328 //            Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false);
329 //            System.out.println("Read: " + version + "\t" + currentData);
330 //            Counter2<String> completionData = getCompletion(latestData, currentData);
331 //            //System.out.println(version + "\t" + completionData);
332 //            addCompletionList(version, completionData, growthData);
333 //            if (DEBUG) System.out.println(currentData);
334 //        }
335         boolean first = true;
336         for (Entry<String, List<Double>> entry : growthData.entrySet()) {
337             if (first) {
338                 for (int i = 0; i < entry.getValue().size(); ++i) {
339                     out.print("\t" + i);
340                 }
341                 out.println();
342                 first = false;
343             }
344             out.println(entry.getKey() + "\t" + Joiner.on("\t").join(entry.getValue()));
345         }
346     }
347 
348     static final class ReleaseInfo {
ReleaseInfo(VersionInfo versionInfo, int year)349         public ReleaseInfo(VersionInfo versionInfo, int year) {
350             this.version = versionInfo;
351             this.year = year;
352         }
353         VersionInfo version;
354         int year;
355     }
356 
357     // TODO merge this into ToolConstants, and have the version expressed as VersionInfo.
358     static final List<ReleaseInfo> versionToYear;
359     static {
360         Object[][] mapping = {
361             { VersionInfo.getInstance(37), 2020 },
362             { VersionInfo.getInstance(36), 2019 },
363             { VersionInfo.getInstance(34), 2018 },
364             { VersionInfo.getInstance(32), 2017 },
365             { VersionInfo.getInstance(30), 2016 },
366             { VersionInfo.getInstance(28), 2015 },
367             { VersionInfo.getInstance(26), 2014 },
368             { VersionInfo.getInstance(24), 2013 },
369             { VersionInfo.getInstance(22,1), 2012 },
370             { VersionInfo.getInstance(2,0,1), 2011 },
371             { VersionInfo.getInstance(1,9,1), 2010 },
372             { VersionInfo.getInstance(1,7,2), 2009 },
373             { VersionInfo.getInstance(1,6,1), 2008 },
374             { VersionInfo.getInstance(1,5,1), 2007 },
375             { VersionInfo.getInstance(1,4,1), 2006 },
376             { VersionInfo.getInstance(1,3), 2005 },
377             { VersionInfo.getInstance(1,2), 2004 },
378             { VersionInfo.getInstance(1,1,1), 2003 },
379         };
380         List<ReleaseInfo> _versionToYear = new ArrayList<>();
381         for (Object[] row : mapping) {
_versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]))382             _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]));
383         }
384         versionToYear = ImmutableList.copyOf(_versionToYear);
385     }
386 
387 //    public static String getNormalizedVersion(String dir) {
388 //        String rawVersion = dir.substring(dir.indexOf('-') + 1);
389 //        int firstDot = rawVersion.indexOf('.');
390 //        int secondDot = rawVersion.indexOf('.', firstDot + 1);
391 //        if (secondDot > 0) {
392 //            rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot);
393 //        } else {
394 //            rawVersion = rawVersion.substring(0, firstDot);
395 //        }
396 //        String result = getYearFromVersion(rawVersion, true);
397 //        return result == null ? null : result.toString();
398 //    }
399 
400 //    private static String getYearFromVersion(String version, boolean allowNull) {
401 //        String result = versionToYear.get(version);
402 //        if (!allowNull && result == null) {
403 //            throw new IllegalArgumentException("No year for version: " + version);
404 //        }
405 //        return result;
406 //    }
407 //
408 //    private static String getVersionFromYear(String year, boolean allowNull) {
409 //        String result = versionToYear.inverse().get(year);
410 //        if (!allowNull && result == null) {
411 //            throw new IllegalArgumentException("No version for year: " + year);
412 //        }
413 //        return result;
414 //    }
415 
addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)416     public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) {
417         List<Double> x = new ArrayList<>();
418         for (String key : completionData.getKeysetSortedByCount(false)) {
419             x.add(completionData.getCount(key));
420         }
421         growthData.put(version, x);
422         System.out.println(version + "\t" + x.size());
423     }
424 
getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)425     public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) {
426         Counter2<String> completionData = new Counter2<>();
427         for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) {
428             final String locale = entry.getKey();
429             final FoundAndTotal currentRecord = currentData.get(locale);
430             if (currentRecord == null) {
431                 continue;
432             }
433             double total = entry.getValue().total;
434             if (total == 0) {
435                 continue;
436             }
437             double completion = currentRecord.found / total;
438             completionData.add(locale, completion);
439         }
440         return completionData;
441     }
442 
443     static class FoundAndTotal {
444         final int found;
445         final int total;
446 
FoundAndTotal(Counter<Level>.... counters)447         public FoundAndTotal(Counter<Level>... counters) {
448             final int[] count = { 0, 0, 0 };
449             for (Level level : Level.values()) {
450                 if (level == Level.COMPREHENSIVE || level == Level.OPTIONAL) {
451                     continue;
452                 }
453                 int i = 0;
454                 for (Counter<Level> counter : counters) {
455                     count[i++] += counter.get(level);
456                 }
457             }
458             found = count[0];
459             total = found + count[1] + count[2];
460         }
461 
462         @Override
toString()463         public String toString() {
464             return found + "/" + total;
465         }
466     }
467 
addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)468     private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) {
469         final File mainDir = new File(dir + "/common/main/");
470         final File annotationDir = new File(dir + "/common/annotations/");
471         File[] paths = annotationDir.exists() ? new File[] {mainDir, annotationDir} : new File[] {mainDir};
472         org.unicode.cldr.util.Factory newFactory = SimpleFactory.make(paths, ".*");
473         Map<String, FoundAndTotal> data = new HashMap<>();
474         char c = 0;
475         Set<String> latestAvailable = newFactory.getAvailableLanguages();
476         for (String locale : newFactory.getAvailableLanguages()) {
477             if (!matcher.reset(locale).matches()) {
478                 continue;
479             }
480             if (!latestAvailable.contains(locale)) {
481                 continue;
482             }
483             if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale)
484                 || locale.equals("root")
485                 || locale.equals("supplementalData")) {
486                 continue;
487             }
488             char nc = locale.charAt(0);
489             if (nc != c) {
490                 System.out.println("\t" + locale);
491                 c = nc;
492             }
493             if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) {
494                 continue;
495             }
496             CLDRFile latestFile = null;
497             try {
498                 latestFile = latestFactory.make(locale, true);
499             } catch (Exception e2) {
500                 continue;
501             }
502             final CLDRFile file = newFactory.make(locale, true);
503             // HACK check bogus
504 //            Collection<String> extra = file.getExtraPaths();
505 //
506 //            final Iterable<String> fullIterable = file.fullIterable();
507 //            for (String path : fullIterable) {
508 //                if (path.contains("\"one[@")) {
509 //                    boolean inside = extra.contains(path);
510 //                    Status status = new Status();
511 //                    String loc = file.getSourceLocaleID(path, status );
512 //                    int debug = 0;
513 //                }
514 //            }
515             // END HACK
516             Counter<Level> foundCounter = new Counter<>();
517             Counter<Level> unconfirmedCounter = new Counter<>();
518             Counter<Level> missingCounter = new Counter<>();
519             Set<String> unconfirmedPaths = null;
520             Relation<MissingStatus, String> missingPaths = null;
521             unconfirmedPaths = new LinkedHashSet<>();
522             missingPaths = Relation.of(new LinkedHashMap(), LinkedHashSet.class);
523             VettingViewer.getStatus(latestFile.fullIterable(), file,
524                 pathHeaderFactory, foundCounter, unconfirmedCounter,
525                 missingCounter, missingPaths, unconfirmedPaths);
526 
527             // HACK
528             Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>();
529             for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
530                 if (e.getKey() == MissingStatus.ABSENT) {
531                     final String path = e.getValue();
532                     if (HACK.get(path) != null) {
533                         missingRemovals.add(e);
534                         missingCounter.add(Level.MODERN, -1);
535                         foundCounter.add(Level.MODERN, 1);
536                     } else {
537                         Status status = new Status();
538                         String loc = file.getSourceLocaleID(path, status);
539                         int debug = 0;
540                     }
541                 }
542             }
543             for (Entry<MissingStatus, String> e : missingRemovals) {
544                 missingPaths.remove(e.getKey(), e.getValue());
545             }
546             // END HACK
547 
548             if (showMissing) {
549                 int count = 0;
550                 for (String s : unconfirmedPaths) {
551                     System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s);
552                 }
553                 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
554                     String path = e.getValue();
555                     Status status = new Status();
556                     String loc = file.getSourceLocaleID(path, status);
557                     int debug = 0;
558 
559                     System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e));
560                 }
561                 int debug = 0;
562             }
563 
564             data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter));
565         }
566         return Collections.unmodifiableMap(data);
567     }
568 
showCoverage(Anchors anchors, Matcher matcher)569     public static void showCoverage(Anchors anchors, Matcher matcher) throws IOException {
570         showCoverage(anchors, matcher, null, false);
571     }
572 
showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)573     public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException {
574         final String title = "Locale Coverage";
575         try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors));
576             PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv");
577             PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv");
578             PrintWriter tsv_missing_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv");
579             PrintWriter tsv_missing_basic = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv");
580             ){
581             tsv_summary.println(TSV_LOCALE_COVERAGE_HEADER);
582             tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER);
583             tsv_missing.println(TSV_MISSING_HEADER);
584             tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER);
585 
586             Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN));
587             Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages());
588             availableLanguages.addAll(checkModernLocales);
589             Relation<String, String> languageToRegion = Relation.of(new TreeMap(), TreeSet.class);
590             LanguageTagParser ltp = new LanguageTagParser();
591             LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true);
592             for (String locale : factory.getAvailable()) {
593                 String country = ltp.set(locale).getRegion();
594                 if (!country.isEmpty()) {
595                     languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country);
596                 }
597             }
598 
599             fixCommonLocales();
600 
601             System.out.println(Joiner.on("\n").join(languageToRegion.keyValuesSet()));
602 
603             System.out.println("# Checking: " + availableLanguages);
604 
605             pw.println("<p style='text-align: left'>This chart shows the coverage levels for this release. </p>" +
606                 "<ol>"
607                 + "<li>Fields = fields found at a modern level</li>"
608                 + "<li>UC = unconfirmed values: typically treated as missing by implementations</li>"
609                 + "<li>Miss = missing values</li>"
610                 + "<li>Modern%, etc = fields/(fields + missing + unconfirmed) — at that level</li>"
611                 + "<li>Core Missing = missing core fields — optionals marked with *</li></ol>"
612                 + "<p>A high-level summary of the meaning of the coverage values are at " +
613                 "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " +
614                 "The Core values are described on " +
615                 "<a target='_blank' href='http://cldr.unicode.org/index/cldr-spec/minimaldata'>Core Data</a>." +
616                 "</p>");
617 
618             Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>(
619                 MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml));
620             Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
621 
622             //Map<String, String> likely = testInfo.getSupplementalDataInfo().getLikelySubtags();
623             Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
624 
625             // Map<String,Counter<Level>> counts = new HashMap();
626             //        System.out.print("Script\tEnglish\tNative\tCode\tCode*");
627             //        for (Level level : Level.values()) {
628             //            if (skipPrintingLevels.contains(level)) {
629             //                continue;
630             //            }
631             //            System.out.print("\t≤" + level + " (f)\t(u)\t(m)");
632             //        }
633             //        System.out.println();
634             // Factory pathHeaderFactory = PathHeader.getFactory(testInfo.getCldrFactory().make("en", true));
635 
636             Counter<Level> foundCounter = new Counter<>();
637             Counter<Level> unconfirmedCounter = new Counter<>();
638             Counter<Level> missingCounter = new Counter<>();
639 
640             List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class));
641             levelsToShow.remove(Level.COMPREHENSIVE);
642             levelsToShow.remove(Level.UNDETERMINED);
643             levelsToShow = ImmutableList.copyOf(levelsToShow);
644             List<Level> reversedLevels = new ArrayList<>(levelsToShow);
645             Collections.reverse(reversedLevels);
646             reversedLevels = ImmutableList.copyOf(reversedLevels);
647 
648 
649             //        PrintWriter out2;
650             //        try {
651             //            out2 = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "showLocaleCoverage.tsv");
652             //        } catch (IOException e1) {
653             //            throw new ICUUncheckedIOException(e1);
654             //        }
655             //
656             //        out2.print("Code\tCom?\tEnglish Name\tNative Name\tScript\tSublocales\tStrings");
657             //        for (Level level : reversedLevels) {
658             //            out2.print("\t" + level + " %\t" + level + " UC%");
659             //        }
660             //        out2.println();
661             //System.out.println("\tCore*\nCore* Missing");
662             int localeCount = 0;
663 
664             final TablePrinter tablePrinter = new TablePrinter()
665                 .addColumn("Direct.", "class='source'", null, "class='source'", true)
666                 .setBreakSpans(true).setSpanRows(false)
667                 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true).setBreakSpans(true)
668                 .addColumn("English Name", "class='source'", null, "class='source'", true).setBreakSpans(true)
669                 .addColumn("Native Name", "class='source'", null, "class='source'", true).setBreakSpans(true)
670                 .addColumn("Script", "class='source'", null, "class='source'", true).setBreakSpans(true)
671                 .addColumn("CLDR target", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(0).setSortAscending(false)
672                 .addColumn("ICU", "class='source'", null, "class='source'", true).setBreakSpans(true).setSortPriority(1).setSortAscending(false)
673                 .addColumn("Sublocales", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
674                 .setCellPattern("{0,number}")
675                 .addColumn("Fields", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
676                 .setCellPattern("{0,number}")
677                 .addColumn("UC", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
678                 .setCellPattern("{0,number}")
679                 .addColumn("Miss", "class='target'", null, "class='targetRight'", true).setBreakSpans(true)
680                 .setCellPattern("{0,number}")
681                 //.addColumn("Target Level", "class='target'", null, "class='target'", true).setBreakSpans(true)
682                 ;
683 
684             NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH);
685             tsvPercent.setMaximumFractionDigits(2);
686 
687             for (Level level : reversedLevels) {
688                 String titleLevel = level.toString();
689                 tablePrinter.addColumn(UCharacter.toTitleCase(titleLevel, null) + "%", "class='target'", null, "class='targetRight'", true)
690                 .setCellPattern("{0,number,0.0%}")
691                 .setBreakSpans(true);
692                 switch(level) {
693                 case CORE:
694                     tablePrinter.setSortPriority(5).setSortAscending(false);
695                     break;
696                 case BASIC:
697                     tablePrinter.setSortPriority(4).setSortAscending(false);
698                     break;
699                 case MODERATE:
700                     tablePrinter.setSortPriority(3).setSortAscending(false);
701                     break;
702                 case MODERN:
703                     tablePrinter.setSortPriority(2).setSortAscending(false);
704                     break;
705                 }
706                 //            tablePrinter
707                 //            .addColumn("∪ UC%", "class='target'", null, "class='targetRight'", true)
708                 //            .setCellPattern("{0,number,0.0%}")
709                 //            .setBreakSpans(true)
710 
711             }
712             tablePrinter.addColumn("Core Missing", "class='target'", null, "class='targetRight'", true)
713             .setBreakSpans(true);
714 
715             long start = System.currentTimeMillis();
716             LikelySubtags likelySubtags = new LikelySubtags();
717 
718             EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class);
719             targetLevel.put(Level.CORE, 2 / 100d);
720             targetLevel.put(Level.BASIC, 16 / 100d);
721             targetLevel.put(Level.MODERATE, 33 / 100d);
722             targetLevel.put(Level.MODERN, 100 / 100d);
723 
724             //        NumberFormat percentFormat = NumberFormat.getPercentInstance(ULocale.ENGLISH);
725             //        percentFormat.setMaximumFractionDigits(2);
726             //        percentFormat.setMinimumFractionDigits(2);
727             //        NumberFormat intFormat = NumberFormat.getIntegerInstance(ULocale.ENGLISH);
728 
729             Multimap<String, String> pathToLocale = TreeMultimap.create();
730 
731             int counter = 0;
732             for (String locale : availableLanguages) {
733                 try {
734                     if (locale.contains("supplemental") // for old versionsl
735                         || locale.startsWith("sr_Latn")) {
736                         continue;
737                     }
738                     if (locales != null && !locales.contains(locale)) {
739                         String base = CLDRLocale.getInstance(locale).getLanguage();
740                         if (!locales.contains(base)) {
741                             continue;
742                         }
743                     }
744                     if (matcher != null && !matcher.reset(locale).matches()) {
745                         continue;
746                     }
747                     if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) {
748                         continue;
749                     }
750 
751                     CLDRFile vxmlCldrFile2 = null; // getVxmlCldrFile(locale); TODO clean this up
752 
753                     tsv_summary.flush();
754                     tsv_missing_summary.flush();
755                     tsv_missing.flush();
756                     tsv_missing_basic.flush();
757 
758                     boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists();
759 
760                     //boolean capture = locale.equals("en");
761                     String region = ltp.set(locale).getRegion();
762                     if (!region.isEmpty()) continue; // skip regions
763 
764                     final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr.toString(), locale);
765                     final boolean cldrLevelGoalBasicToModern = Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal);
766 
767                     String isCommonLocale = Level.MODERN == cldrLocaleLevelGoal ? "C*"
768                         : COMMON_LOCALES.contains(locale) ? "C"
769                             : "";
770 
771                     String max = likelySubtags.maximize(locale);
772                     String script = ltp.set(max).getScript();
773 
774                     String language = likelySubtags.minimize(locale);
775                     //                Level otherLevel = STANDARD_CODES.getLocaleCoverageLevel("apple", locale);
776                     //                if (otherLevel.compareTo(currentLevel) > 0
777                     //                    && otherLevel.compareTo(Level.MODERN) <= 0) {
778                     //                    currentLevel = otherLevel;
779                     //                }
780 
781                     missingPaths.clear();
782                     unconfirmed.clear();
783 
784                     final CLDRFile file = factory.make(locale, true, minimumDraftStatus);
785 
786                     if (locale.equals("af")) {
787                         int debug = 0;
788                     }
789 
790                     Iterable<String> pathSource = new IterableFilter(file.fullIterable());
791 
792                     VettingViewer.getStatus(pathSource, file,
793                         pathHeaderFactory, foundCounter, unconfirmedCounter,
794                         missingCounter, missingPaths, unconfirmed);
795 
796                     // HACK Fix up missing items. Remove once vxml is ok.
797                     if (vxmlCldrFile != null) {
798                         Multimap<MissingStatus,String> toRemove = HashMultimap.create();
799                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
800                             String mPath = entry.getValue();
801                             String vxmlValue = vxmlCldrFile.getStringValue(mPath);
802                             if (vxmlValue != null) {
803                                 if (vxmlValue.equals(CldrUtility.INHERITANCE_MARKER)) {
804                                     vxmlValue = vxmlCldrFile.getBaileyValue(mPath, null, null);
805                                 }
806                                 if (vxmlValue != null) {
807                                     String bailey = file.getStringValue(mPath);
808                                     if (vxmlValue.equals(bailey)) {
809                                         String fullPath = vxmlCldrFile.getFullXPath(mPath);
810                                         if (!fullPath.contains("provisional") && !fullPath.contains("unconfirmed")) {
811                                             toRemove.put(entry.getKey(), mPath);
812                                             Level level = coverageInfo.getCoverageLevel(mPath, locale);
813                                             missingCounter.add(level, -1);
814                                         }
815                                     }
816                                 }
817                             }
818                         }
819                         for (Entry<MissingStatus, String> entry : toRemove.entries()) {
820                             missingPaths.remove(entry.getKey(), entry.getValue());
821                         }
822                     }
823 
824                     Set<String> sublocales = languageToRegion.get(language);
825                     if (sublocales == null) {
826                         //System.err.println("No Sublocales: " + language);
827                         sublocales = Collections.EMPTY_SET;
828                     }
829 
830                     //                List s = Lists.newArrayList(file.fullIterable());
831 
832                     String seedString = isSeed ? "seed" : "common";
833                     tablePrinter.addRow()
834                     .addCell(seedString)
835                     .addCell(language)
836                     .addCell(ENGLISH.getName(language))
837                     .addCell(file.getName(language))
838                     .addCell(script)
839                     .addCell(cldrLocaleLevelGoal == Level.UNDETERMINED ? "" : cldrLocaleLevelGoal.toString())
840                     .addCell(getIcuValue(language))
841                     .addCell(sublocales.size());
842 
843                     String s = TSV_LOCALE_COVERAGE_HEADER; // make sure sync'ed (and below)
844                     if (cldrLevelGoalBasicToModern) {
845                         tsv_summary
846                         .append(seedString)
847                         .append('\t').append(language)
848                         .append('\t').append(ENGLISH.getName(language))
849                         .append('\t').append(file.getName(language))
850                         .append('\t').append(script)
851                         .append('\t').append(cldrLocaleLevelGoal.toString())
852                         .append('\t').append(sublocales.size()+"");
853 
854                     }
855 
856                     //                String header = language
857                     //                    + "\t" + isCommonLocale
858                     //                    + "\t" + ENGLISH.getName(language)
859                     //                    + "\t" + file.getName(language)
860                     //                    + "\t" + script
861                     //                    + "\t" + sublocales.size()
862                     //                    //+ "\t" + currentLevel
863                     //                    ;
864 
865                     int sumFound = 0;
866                     int sumMissing = 0;
867                     int sumUnconfirmed = 0;
868 
869                     // get the totals
870 
871                     EnumMap<Level, Integer> totals = new EnumMap<>(Level.class);
872                     EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class);
873                     //                EnumMap<Level, Integer> unconfirmedByLevel = new EnumMap<>(Level.class);
874                     Set<String> coreMissing = new LinkedHashSet<>();
875 
876                     if (locale.equals("af")) {
877                         int debug = 0;
878                     }
879 
880                     Counter<String> starredCounter = new Counter<>();
881 
882                     { // CORE
883                         long missingExemplarCount = missingCounter.get(Level.CORE);
884                         if (missingExemplarCount > 0) {
885                             for (Entry<MissingStatus, String> statusAndPath : missingPaths.entrySet()) {
886                                 String path = statusAndPath.getValue();
887                                 if (path.startsWith("//ldml/characters/exemplarCharacters")) {
888                                     PathHeader ph = pathHeaderFactory.fromPath(path);
889                                     String problem = ph.getCode().replaceAll("Others: ","").replaceAll("Main Letters", "main-letters");
890                                     coreMissing.add(problem);
891                                     // String line = spreadsheetLine(locale, script, language, cldrLevelGoal, foundLevel, missingStatus.toString(), path, file.getStringValue(path));
892                                     if (cldrLevelGoalBasicToModern) {
893                                         String line = spreadsheetLine(locale, language, script, "«No " + problem + "»", cldrLocaleLevelGoal, Level.CORE, "ABSENT", path, file, vxmlCldrFile2, pathToLocale);
894                                         tsv_missing.println(line);
895                                     } else {
896                                         gatherStarred(path, starredCounter);
897                                     }
898                                 }
899                             }
900                         }
901                         Multimap<CoreItems, String> detailedErrors = LinkedHashMultimap.create();
902                         Set<CoreItems> coverage = new TreeSet<>(
903                             CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors));
904                         Set<CoreItems> missing = EnumSet.allOf(CoreItems.class);
905                         missing.removeAll(coverage);
906                         for (Entry<CoreItems, String> entry : detailedErrors.entries()) {
907                             CoreItems coreItem = entry.getKey();
908                             String path = entry.getValue();
909                             coreMissing.add(coreItem.toString());
910                             //String line = spreadsheetLine(language, script, "n/a", detailedErrors.get(entry).toString(), level, "ABSENT", "n/a", "n/a", "n/a");
911                             if (cldrLevelGoalBasicToModern) {
912                                 String line = spreadsheetLine(locale, language, script, "«No " + coreItem + "»", cldrLocaleLevelGoal, coreItem.desiredLevel, "ABSENT", path, null, vxmlCldrFile2, pathToLocale);
913                                 tsv_missing.println(line);
914                             } else {
915                                 gatherStarred(path, starredCounter);
916                             }
917                         }
918                         missing.removeAll(CoreItems.ONLY_RECOMMENDED);
919                         foundCounter.add(Level.CORE, coverage.size());
920                         missingCounter.add(Level.CORE, missing.size());
921 
922                         //                    sumFound += coverage.size();
923                         //                    sumMissing += missing.size();
924 
925                         //                    confirmed.put(Level.CORE, (int) coverage.size());
926                         ////                    unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount));
927                         //                    totals.put(Level.CORE, (int)(coverage.size() + missing.size()));
928 
929                     }
930 
931                     if (cldrLevelGoalBasicToModern) {
932                         Level goalLevel = cldrLocaleLevelGoal;
933                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
934                             String path = entry.getValue();
935                             String status = entry.getKey().toString();
936                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
937                             if (goalLevel.compareTo(foundLevel) >= 0) {
938                                 String line = spreadsheetLine(locale, language, script, file.getStringValue(path), goalLevel, foundLevel, status, path, file, vxmlCldrFile2, pathToLocale);
939                                 tsv_missing.println(line);
940                             }
941                         }
942                         for (String path : unconfirmed) {
943                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
944                             if (goalLevel.compareTo(foundLevel) >= 0) {
945                                 String line = spreadsheetLine(locale, language, script, file.getStringValue(path), goalLevel, foundLevel, "n/a", path, file, vxmlCldrFile2, pathToLocale);
946                                 tsv_missing.println(line);
947                             }
948                         }
949                     } else {
950                         Level goalLevel = Level.BASIC;
951                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
952                             String path = entry.getValue();
953                             String status = entry.getKey().toString();
954                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
955                             if (goalLevel.compareTo(foundLevel) >= 0) {
956                                 gatherStarred(path, starredCounter);
957                             }
958                         }
959                         for (String path : unconfirmed) {
960                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
961                             if (goalLevel.compareTo(foundLevel) >= 0) {
962                                 gatherStarred(path, starredCounter);
963                             }
964                         }
965                     }
966 
967                     tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER);
968                     for (R2<Long, String> starred : starredCounter.getEntrySetSortedByCount(false, null)) {
969                         // PathHeader ph = pathHeaderFactory.fromPath(starred.get1());
970                         tsv_missing_basic.println(locale + "\t" + starred.get0() + "\t" + starred.get1().replace("\"*\"", "'*'"));
971                     }
972 
973                     for (Level level : levelsToShow) {
974                         long foundCount = foundCounter.get(level);
975                         long unconfirmedCount = unconfirmedCounter.get(level);
976                         long missingCount = missingCounter.get(level);
977 
978                         sumFound += foundCount;
979                         sumUnconfirmed += unconfirmedCount;
980                         sumMissing += missingCount;
981 
982                         confirmed.put(level, sumFound);
983                         //                    unconfirmedByLevel.put(level, (int)(foundCount + unconfirmedCount));
984                         totals.put(level, sumFound + sumUnconfirmed + sumMissing);
985                     }
986 
987                     double modernTotal = totals.get(Level.MODERN);
988 
989                     tablePrinter
990                     .addCell(sumFound)
991                     .addCell(sumUnconfirmed)
992                     .addCell(sumMissing)
993                     ;
994 
995                     if (cldrLevelGoalBasicToModern) {
996                         tsv_summary
997                         .append('\t').append(sumFound+"")
998                         .append('\t').append(sumUnconfirmed+"")
999                         .append('\t').append(sumMissing+"")
1000                         ;
1001                     }
1002 
1003                     //                header += "\t" + sumFound;
1004                     //                header += "\t" + (sumFound + sumUnconfirmed);
1005 
1006                     // print the totals
1007 
1008                     for (Level level : reversedLevels) {
1009                         if (useOrgLevel && cldrLocaleLevelGoal != level) {
1010                             continue;
1011                         }
1012                         int confirmedCoverage = confirmed.get(level);
1013                         //                    int unconfirmedCoverage = unconfirmedByLevel.get(level);
1014                         double total = totals.get(level);
1015 
1016                         tablePrinter
1017                         .addCell(confirmedCoverage / total)
1018                         //                    .addCell(unconfirmedCoverage / total)
1019                         ;
1020 
1021                         if (cldrLevelGoalBasicToModern) {
1022                             tsv_summary
1023                             .append('\t').append(String.valueOf(confirmedCoverage))
1024                             .append('\t').append(String.valueOf((int)total - confirmedCoverage))
1025                             ;
1026                         }
1027 
1028                         //                    if (RAW_DATA) {
1029                         //                        header += "\t" + confirmedCoverage / total
1030                         //                            + "\t" + unconfirmedCoverage / total;
1031                         //                    } else {
1032                         //                        Double factor = targetLevel.get(level) / (total / modernTotal);
1033                         //                        header += "\t" + factor * confirmedCoverage / modernTotal
1034                         ////                            + "\t" + factor * unconfirmedCoverage / modernTotal
1035                         //                            ;
1036                         //                    }
1037                     }
1038                     String coreMissingString =
1039                         Joiner.on(", ").join(coreMissing);
1040 
1041                     tablePrinter
1042                     .addCell(coreMissingString)
1043                     .finishRow();
1044 
1045                     if (cldrLevelGoalBasicToModern) {
1046                         tsv_summary
1047                         .append('\t')
1048                         .append(coreMissingString)
1049                         .append('\n');
1050                     }
1051 
1052                     // Write missing paths (for >99% and specials
1053 
1054                     //                if (false) { // checkModernLocales.contains(locale)
1055                     //                    CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance(locale);
1056                     //
1057                     //                    for (String path : unconfirmed) {
1058                     //                        Level level = coverageLevel2.getLevel(path);
1059                     //                        if (level.compareTo(cldrLocaleLevelGoal) > 0) {
1060                     //                            continue;
1061                     //                        }
1062                     //                        String line = spreadsheetLine(locale, language, script, file.getStringValue(path), cldrLocaleLevelGoal, level, "UNCONFIRMED", path, pathToLocale);
1063                     //                        if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) {
1064                     //                            //System.out.println("\nSKIP: " + line);
1065                     //                        } else {
1066                     //                            tsv_missing.println(line);
1067                     //                        }
1068                     //                    }
1069                     //                    for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
1070                     //                        String path = entry.getValue();
1071                     //                        Level level = coverageLevel2.getLevel(path);
1072                     //                        if (level.compareTo(cldrLocaleLevelGoal) > 0) {
1073                     //                            continue;
1074                     //                        }
1075                     //                        MissingStatus missingStatus = entry.getKey();
1076                     //                        String line = spreadsheetLine(locale, language, script, "???", cldrLocaleLevelGoal, level, missingStatus.toString(), path, pathToLocale);
1077                     //                        if (SUPPRESS_PATHS_CAN_BE_EMPTY.get(path) != null) {
1078                     //                            //System.out.println("\nSKIP: " + line);
1079                     //                        } else {
1080                     //                            tsv_missing.println(line);
1081                     //                        }
1082                     //                    }
1083                     //                }
1084 
1085                     localeCount++;
1086                 } catch (Exception e) {
1087                     throw new IllegalArgumentException(e);
1088                 }
1089             }
1090             pw.println(tablePrinter.toTable());
1091 
1092             Multimap<Level, String> levelToLocales = TreeMultimap.create();
1093 
1094             for ( Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) {
1095                 String path = entry.getKey();
1096                 Collection<String> localeSet = entry.getValue();
1097                 levelToLocales.clear();
1098                 for (String locale : localeSet) {
1099                     Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
1100                     levelToLocales.put(foundLevel, locale);
1101                 }
1102                 String phString = "n/a\tn/a\tn/a\tn/a";
1103                 try {
1104                     PathHeader ph = pathHeaderFactory.fromPath(path);
1105                     phString = ph.toString();
1106                 } catch (Exception e) {
1107                 }
1108                 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) {
1109                     Level level = entry2.getKey();
1110                     localeSet = entry2.getValue();
1111                     String s = TSV_MISSING_SUMMARY_HEADER; // check for changes
1112                     tsv_missing_summary.println(
1113                         level
1114                         + "\t" + localeSet.size()
1115                         + "\t" + Joiner.on(" ").join(localeSet)
1116                         + "\t" + phString
1117                         );
1118                 }
1119             }
1120             //        out2.close();
1121 
1122             long end = System.currentTimeMillis();
1123             System.out.println((end - start) + " millis = "
1124                 + ((end - start) / localeCount) + " millis/locale");
1125             ShowPlurals.appendBlanksForScrolling(pw);
1126         }
1127     }
1128 
1129 //    public static void showEnglish() {
1130 //        Map<PathHeader,String> sorted = new TreeMap<>();
1131 //        CoverageInfo coverageInfo=CLDRConfig.getInstance().getCoverageInfo();
1132 //        for (String path : ENGLISH) {
1133 ////            Level currentLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, "en");
1134 //            Level currentLevel=coverageInfo.getCoverageLevel(path, "en");
1135 //            if (currentLevel.compareTo(Level.MINIMAL) <= 0) {
1136 //                PathHeader ph = pathHeaderFactory.fromPath(path);
1137 //                sorted.put(ph, currentLevel + "\t" + ENGLISH.getStringValue(path));
1138 //            }
1139 //        }
1140 //        for (Entry<PathHeader, String> entry : sorted.entrySet()) {
1141 //            System.out.println(entry.getKey() + "\t" + entry.getValue());
1142 //        }
1143 //    }
1144 
1145     static class IterableFilter implements Iterable<String> {
1146         private Iterable<String> source;
1147 
IterableFilter(Iterable<String> source)1148         IterableFilter(Iterable<String> source) {
1149             this.source = source;
1150         }
1151 
1152         /**
1153          * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status.
1154          */
1155 
1156         static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of(
1157             "//ldml/localeDisplayNames/languages/language[@type=\"ccp\"]",
1158             "//ldml/localeDisplayNames/territories/territory[@type=\"XA\"]",
1159             "//ldml/localeDisplayNames/territories/territory[@type=\"XB\"]",
1160             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]",
1161             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]",
1162             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]",
1163             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]",
1164             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]",
1165             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]",
1166             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]",
1167             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]",
1168             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]",
1169             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]",
1170             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]",
1171             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]",
1172             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]",
1173             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]",
1174             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]",
1175             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]",
1176             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]",
1177             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]",
1178             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]",
1179             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]",
1180             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]",
1181             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]",
1182             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]",
1183             "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]",
1184             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"G\"]",
1185             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"Gy\"]/greatestDifference[@id=\"y\"]",
1186             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"G\"]",
1187             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"M\"]",
1188             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyM\"]/greatestDifference[@id=\"y\"]",
1189             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"d\"]",
1190             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"G\"]",
1191             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"M\"]",
1192             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMd\"]/greatestDifference[@id=\"y\"]",
1193             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"d\"]",
1194             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"G\"]",
1195             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"M\"]",
1196             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMEd\"]/greatestDifference[@id=\"y\"]",
1197             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"G\"]",
1198             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"M\"]",
1199             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMM\"]/greatestDifference[@id=\"y\"]",
1200             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"d\"]",
1201             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"G\"]",
1202             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"M\"]",
1203             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMd\"]/greatestDifference[@id=\"y\"]",
1204             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"d\"]",
1205             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"G\"]",
1206             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"M\"]",
1207             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"GyMMMEd\"]/greatestDifference[@id=\"y\"]"
1208             );
1209         @Override
iterator()1210         public Iterator<String> iterator() {
1211             return new IteratorFilter(source.iterator());
1212         }
1213 
1214         static class IteratorFilter implements Iterator<String> {
1215             Iterator<String> source;
1216             String peek;
1217 
IteratorFilter(Iterator<String> source)1218             public IteratorFilter(Iterator<String> source) {
1219                 this.source = source;
1220                 fillPeek();
1221             }
1222             @Override
hasNext()1223             public boolean hasNext() {
1224                 return peek != null;
1225             }
1226             @Override
next()1227             public String next() {
1228                 String result = peek;
1229                 fillPeek();
1230                 return result;
1231             }
1232 
fillPeek()1233             private void fillPeek() {
1234                 peek = null;
1235                 while (source.hasNext()) {
1236                     peek = source.next();
1237                     // if it is ok to assess, then break
1238                     if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek)
1239                         && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) {
1240                         break;
1241                     }
1242                     peek = null;
1243                 }
1244             }
1245         }
1246 
1247     }
1248     static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO);
1249 
1250 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc)
1251     static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename");
1252 
1253     static final UserInfo dummyUserInfo = new UserInfo() {
1254         @Override
1255         public VoterInfo getVoterInfo() {
1256             return dummyVoterInfo;
1257         }
1258     };
1259     static final PathValueInfo dummyPathValueInfo = new PathValueInfo() {
1260         // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE)
1261         @Override
1262         public Collection<? extends CandidateInfo> getValues() {
1263             throw new UnsupportedOperationException();
1264         }
1265         @Override
1266         public CandidateInfo getCurrentItem() {
1267             throw new UnsupportedOperationException();
1268         }
1269         @Override
1270         public String getBaselineValue() {
1271             throw new UnsupportedOperationException();
1272         }
1273         @Override
1274         public Level getCoverageLevel() {
1275             return Level.MODERN;
1276         }
1277         @Override
1278         public boolean hadVotesSometimeThisRelease() {
1279             throw new UnsupportedOperationException();
1280         }
1281         @Override
1282         public CLDRLocale getLocale() {
1283             throw new UnsupportedOperationException();
1284         }
1285         @Override
1286         public String getXpath() {
1287             throw new UnsupportedOperationException();
1288         }
1289     };
1290 
1291 
1292 //    static org.unicode.cldr.util.Factory VXML_FACTORY = SimpleFactory.make(new File[] {
1293 //        new File(VXML_CONSTANT + "main"),
1294 //        new File(VXML_CONSTANT + "annotations") }, ".*");
1295         static CLDRFile vxmlCldrFile = null;
1296 //    static String vxmlLocale = "";
1297 
1298 //    private static CLDRFile getVxmlCldrFile(String locale) {
1299 //        if (!vxmlLocale.equals(locale)) {
1300 //            try {
1301 //                vxmlCldrFile = VXML_FACTORY.make(locale, false);
1302 //            } catch (Exception e) {
1303 //                vxmlCldrFile = null;
1304 //            }
1305 //            vxmlLocale = locale;
1306 //        }
1307 //        return vxmlCldrFile;
1308 //    }
1309 
gatherStarred(String path, Counter<String> starredCounter)1310     public static void gatherStarred(String path, Counter<String> starredCounter) {
1311         starredCounter.add(new PathStarrer().setSubstitutionPattern("*").set(path), 1);
1312     }
1313 
spreadsheetLine(String locale, String language, String script, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, CLDRFile vxmlCldrFile, Multimap<String, String> pathToLocale)1314     public static String spreadsheetLine(String locale, String language, String script, String nativeValue, Level cldrLocaleLevelGoal,
1315         Level itemLevel, String status, String path, CLDRFile resolvedFile, CLDRFile vxmlCldrFile,
1316         Multimap<String, String> pathToLocale) {
1317         if (pathToLocale != null) {
1318             pathToLocale.put(path, locale);
1319         }
1320         String stLink = "n/a";
1321         String englishValue = "n/a";
1322         StatusAction action = null;
1323         SurveyToolStatus surveyToolStatus = null;
1324         String icuValue = getIcuValue(locale);
1325 
1326         String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path);
1327         String vxmlValue = "";
1328         String vxmlDraftStatus = "";
1329         if (vxmlCldrFile != null) {
1330             try {
1331                 vxmlValue = vxmlCldrFile.getStringValue(path);
1332                 if (vxmlValue == null) {
1333                     vxmlValue = "";
1334                 } else {
1335                     String fullXPath = vxmlCldrFile.getFullXPath(path);
1336                     XPathParts parts = XPathParts.getFrozenInstance(fullXPath);
1337                     vxmlDraftStatus = parts.getAttributeValue(-1, "draft");
1338                     if (vxmlDraftStatus == null) {
1339                         vxmlDraftStatus = "";
1340                     }
1341                 }
1342             } catch (Exception e) {
1343             }
1344         }
1345 
1346         String phString = "na\tn/a\tn/a\t" + path;
1347         try {
1348             PathHeader ph = pathHeaderFactory.fromPath(path);
1349             phString = ph.toString();
1350             surveyToolStatus = ph.getSurveyToolStatus();
1351             stLink = URLS.forXpath(locale, path);
1352             englishValue = ENGLISH.getStringValue(path);
1353             action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, surveyToolStatus, dummyUserInfo);
1354         } catch (Exception e) {
1355             int debug = 0;
1356         }
1357 
1358         String config_text = vxmlValue.isEmpty() ? "" :
1359             "locale=" + locale
1360             + " ; action=add"
1361             + " ; new_path=" + StringId.getHexId(path)
1362             + " ; new_value=" + vxmlValue;
1363 
1364 
1365 
1366         String s = TSV_MISSING_HEADER; // make sure in sync
1367         String line =
1368             language
1369             + "\t" + ENGLISH.getName(language)
1370             + "\t" + ENGLISH.getName("script", script)
1371             //+ "\t" + englishValue
1372             //+ "\t" + nativeValue
1373             + "\t" + cldrLocaleLevelGoal
1374             //+ "\t" + icuValue
1375             + "\t" + itemLevel
1376             //+ "\t" + status
1377             //+ "\t" + (action == null ? "?" : action.toString())
1378             + "\t" + (surveyToolStatus == null ? "?" : surveyToolStatus.toString())
1379             //+ "\t" + stLink
1380             + "\t" + bailey
1381             + "\t" + vxmlValue
1382             + "\t" + vxmlDraftStatus
1383             + "\t" + phString
1384             + "\t" + PathHeader.getUrlForLocalePath(locale, path)
1385             + "\t" + config_text
1386             ;
1387         return line;
1388     }
1389 
1390 
1391 
getIcuValue(String locale)1392     private static String getIcuValue(String locale) {
1393         return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : "";
1394     }
1395 
1396     static final Set<ULocale> ICU_Locales = ImmutableSet.copyOf(ULocale.getAvailableLocales());
1397     private static CLDRURLS URLS = CONFIG.urls();
1398 
1399 }
1400