1 /*
2  * Copyright (C) 2004-2011, Unicode, Inc., Google, Inc., and others.
3  * For terms of use, see http://www.unicode.org/terms_of_use.html
4  */
5 package org.unicode.cldr.tool.resolver;
6 
7 import java.io.File;
8 import java.io.FileNotFoundException;
9 import java.io.PrintWriter;
10 import java.io.UnsupportedEncodingException;
11 import java.util.ArrayList;
12 import java.util.List;
13 import java.util.Set;
14 import java.util.TreeSet;
15 
16 import org.unicode.cldr.tool.FilterFactory;
17 import org.unicode.cldr.tool.Option;
18 import org.unicode.cldr.tool.Option.Options;
19 import org.unicode.cldr.util.CLDRFile;
20 import org.unicode.cldr.util.CLDRFile.DraftStatus;
21 import org.unicode.cldr.util.CLDRPaths;
22 import org.unicode.cldr.util.CldrUtility;
23 import org.unicode.cldr.util.Factory;
24 import org.unicode.cldr.util.LocaleIDParser;
25 import org.unicode.cldr.util.SimpleXMLSource;
26 
27 import com.google.common.cache.Cache;
28 import com.google.common.cache.CacheBuilder;
29 
30 /**
31  * Class designed for the resolution of CLDR XML Files (e.g., removing aliases
32  * but leaving the inheritance structure intact).
33  *
34  * Instances of this class are not thread-safe. Any attempts to use an object of
35  * this class in multiple threads must be externally synchronized.
36  *
37  * @author ryanmentley@google.com (Ryan Mentley), jchye@google.com (Jennifer Chye)
38  *
39  */
40 public class CldrResolver {
41     /**
42      * The name of the code-fallback locale
43      */
44     public static final String CODE_FALLBACK = "code-fallback";
45 
46     /**
47      * The name of the root locale
48      */
49     public static final String ROOT = "root";
50 
51     /* The command-line options. */
52     private static final Options options = new Options(
53         "This program is used to convert CLDR XML files into their resolved versions.\n" +
54             "Please refer to the following options. Options are not case sensitive.\n" +
55             "\texample: org.unicode.cldr.tool.resolver.CldrResolver -s xxx -d yyy -l en")
56                 .add("locale", 'l', ".*", ".*", "The locales to generate resolved files for")
57                 .add("sourcedir", ".*", "Source directory for CLDR files")
58                 .add("destdir", ".*", "Destination directory for output files")
59                 .add("resolutiontype", 'r', "\\w+", "simple", "The resolution type to be used")
60                 .add("mindraftstatus", 'm', ".*", "unconfirmed", "The minimum draft status")
61                 .add("verbosity", 'v', "\\d", "2", "The verbosity level for comments during generation")
62                 .add("usealtvalues", 'a', null, null, "Use alternate values in FilterFactory for the locale data to be resolved.")
63                 .add("organization", 'o', ".*", null, "Filter by this organization's coverage level");
64 
65     /* Private instance variables */
66     private Factory cldrFactory;
67     private ResolutionType resolutionType;
68     // Cache for resolved CLDRFiles.
69     // This is most useful for simple resolution, where the resolved locales are
70     // required to resolve their children.
71     //private Map<String, CLDRFile> resolvedCache = new LruMap<String, CLDRFile>(5);
72 
73     /**
74      * The initial size of the resolved cache
75      */
76     private final int INITIAL_RESOLVED_CACHE_SIZE = 10;
77     private Cache<String, CLDRFile> resolvedCache = CacheBuilder.newBuilder().initialCapacity(INITIAL_RESOLVED_CACHE_SIZE).build();
78 
79     public static void main(String[] args) {
80         options.parse(args, true);
81 
82         // Parse the options
83         ResolutionType resolutionType = ResolutionType.SIMPLE;
84         Option option = options.get("resolutiontype");
85         if (option.doesOccur()) {
86             try {
87                 resolutionType = ResolutionType.forString(option.getValue());
88             } catch (IllegalArgumentException e) {
89                 ResolverUtils.debugPrintln("Warning: " + e.getMessage(), 1);
90                 ResolverUtils.debugPrintln("Using default resolution type " + resolutionType.toString(), 1);
91             }
92         }
93 
94         String srcDir = null;
95         option = options.get("sourcedir");
96         if (option.doesOccur()) {
97             srcDir = option.getValue();
98         } else {
99             srcDir = CLDRPaths.MAIN_DIRECTORY;
100         }
101 
102         option = options.get("destdir");
103         File dest;
104         if (option.doesOccur()) {
105             dest = new File(option.getValue());
106         } else {
107             dest = new File(CLDRPaths.GEN_DIRECTORY, "resolver");
108         }
109         if (!dest.exists()) {
110             dest.mkdir();
111         }
112         String destDir = dest.getAbsolutePath();
113 
114         int verbosityParsed = Integer.parseInt(options.get("verbosity").getValue());
115         if (verbosityParsed < 0 || verbosityParsed > 5) {
116             ResolverUtils.debugPrintln(
117                 "Warning: Verbosity must be between 0 and 5, inclusive.  Using default value "
118                     + ResolverUtils.verbosity,
119                 1);
120         } else {
121             ResolverUtils.verbosity = verbosityParsed;
122         }
123 
124         option = options.get("mindraftstatus");
125         DraftStatus minDraftStatus = option.doesOccur() ? DraftStatus.forString(option.getValue()) : DraftStatus.unconfirmed;
126         Factory factory = Factory.make(srcDir, ".*", minDraftStatus);
127         boolean useAltValues = options.get("usealtvalues").doesOccur();
128         String org = options.get("organization").getValue();
129         if (useAltValues || org != null) {
130             factory = FilterFactory.load(factory, org, useAltValues);
131         }
132         CldrResolver resolver = new CldrResolver(factory, resolutionType);
133 
134         // Perform the resolution
135         String localeRegex = options.get("locale").getValue();
136         resolver.resolve(localeRegex, destDir);
137         ResolverUtils.debugPrintln("Execution complete.", 3);
138     }
139 
140     /**
141      * Constructs a CLDR partial resolver given the path to a directory of XML
142      * files.
143      *
144      * @param factory the factory containing the files to be resolved
145      * @param resolutionType the resolution type of the resolver.
146      */
147     public CldrResolver(Factory factory, ResolutionType resolutionType) {
148         /*
149          * We don't do the regex filter here so that we can still resolve parent
150          * files that don't match the regex
151          */
152         cldrFactory = factory;
153         this.resolutionType = resolutionType;
154     }
155 
156     /**
157      * Resolves all locales that match the given regular expression and outputs
158      * their XML files to the given directory.
159      *
160      * @param localeRegex a regular expression that will be matched against the
161      *        names of locales
162      * @param outputDir the directory to which to output the partially-resolved
163      *        XML files
164      * @param resolutionType the type of resolution to perform
165      * @throws IllegalArgumentException if outputDir is not a directory
166      */
167     public void resolve(String localeRegex, File outputDir) {
168         if (!outputDir.isDirectory()) {
169             throw new IllegalArgumentException(outputDir.getPath() + " is not a directory");
170         }
171 
172         // Iterate through all the locales
173         for (String locale : getLocaleNames(localeRegex)) {
174             // Resolve the file
175             ResolverUtils.debugPrintln("Processing locale " + locale + "...", 2);
176             CLDRFile resolved = resolveLocale(locale);
177 
178             // Output the file to disk
179             printToFile(resolved, outputDir);
180         }
181     }
182 
183     /**
184      * Returns the locale names from the resolver that match a given regular
185      * expression.
186      *
187      * @param localeRegex a regular expression to match against
188      * @return all of the locales that will be resolved by a call to resolve()
189      *         with the same localeRegex
190      */
191     public Set<String> getLocaleNames(String localeRegex) {
192         ResolverUtils.debugPrint("Getting list of locales...", 3);
193         Set<String> allLocales = cldrFactory.getAvailable();
194         Set<String> locales = new TreeSet<>();
195         // Iterate through all the locales
196         for (String locale : allLocales) {
197             // Check if the locale name matches the regex
198             if (locale.matches(localeRegex)) {
199                 locales.add(locale);
200             } else {
201                 ResolverUtils.debugPrintln("Locale " + locale
202                     + "does not match the pattern.  Skipping...\n", 4);
203             }
204 
205         }
206         ResolverUtils.debugPrintln("done.\n", 3);
207         return locales;
208     }
209 
210     /**
211      * Resolves a locale to a {@link CLDRFile} object
212      *
213      * @param locale the name of the locale to resolve
214      * @param resolutionType the type of resolution to perform
215      * @return a {@link CLDRFile} containing the resolved data
216      */
217     public CLDRFile resolveLocale(String locale) {
218         // Create CLDRFile for current (base) locale
219         CLDRFile base = cldrFactory.make(locale, true);
220         CLDRFile resolved = resolvedCache.getIfPresent(locale);
221         if (resolved != null) return resolved;
222 
223         ResolverUtils.debugPrintln("Processing " + locale + "...", 2);
224         resolved = resolveLocaleInternal(base, resolutionType);
225         resolvedCache.put(locale, resolved);
226         return resolved;
227     }
228 
229     private CLDRFile resolveLocaleInternal(CLDRFile file, ResolutionType resolutionType) {
230         String locale = file.getLocaleID();
231         // Make parent files for simple resolution.
232         List<CLDRFile> ancestors = new ArrayList<>();
233         if (resolutionType == ResolutionType.SIMPLE && !locale.equals(ROOT)) {
234             String parentLocale = locale;
235             do {
236                 parentLocale = LocaleIDParser.getSimpleParent(parentLocale);
237                 ancestors.add(resolveLocale(parentLocale));
238             } while (!parentLocale.equals(ROOT));
239         }
240 
241         // Create empty file to hold (partially or fully) resolved data.
242         CLDRFile resolved = new CLDRFile(new SimpleXMLSource(locale));
243 
244         // Go through the XPaths, filter out appropriate values based on the
245         // inheritance model,
246         // then copy to the new CLDRFile.
247         Set<String> basePaths = ResolverUtils.getAllPaths(file);
248         for (String distinguishedPath : basePaths) {
249             ResolverUtils.debugPrintln("Distinguished path: " + distinguishedPath, 5);
250 
251             if (distinguishedPath.endsWith("/alias")) {
252                 // Ignore any aliases.
253                 ResolverUtils.debugPrintln("This path is an alias.  Dropping...", 5);
254                 continue;
255             }
256 
257             /*
258              * If we're fully resolving the locale (and, if code-fallback suppression
259              * is enabled, if the value is not from code-fallback) or the values
260              * aren't equal, add it to the resolved file.
261              */
262             if (resolutionType == ResolutionType.NO_CODE_FALLBACK && file.getSourceLocaleID(
263                 distinguishedPath, null).equals(CODE_FALLBACK)) {
264                 continue;
265             }
266 
267             // For simple resolution, don't add paths to child locales if the parent
268             // locale contains the same path with the same value.
269             String baseValue = file.getStringValue(distinguishedPath);
270             if (resolutionType == ResolutionType.SIMPLE) {
271                 String parentValue = null;
272                 for (CLDRFile ancestor : ancestors) {
273                     parentValue = ancestor.getStringValue(distinguishedPath);
274                     if (parentValue != null) break;
275                 }
276                 ResolverUtils.debugPrintln(
277                     "    Parent value : " + ResolverUtils.strRep(parentValue), 5);
278                 if (areEqual(parentValue, baseValue)) continue;
279             }
280 
281             ResolverUtils.debugPrintln("  Adding to resolved file.", 5);
282             // Suppress non-distinguishing attributes in simple inheritance
283             String path = resolutionType == ResolutionType.SIMPLE ? distinguishedPath : file.getFullXPath(distinguishedPath);
284             ResolverUtils.debugPrintln("Path to be saved: " + path, 5);
285             resolved.add(path, baseValue);
286         }
287 
288         // Sanity check in simple resolution to make sure that all paths in the parent are also in the child.
289         if (ancestors.size() > 0) {
290             CLDRFile ancestor = ancestors.get(0);
291             ResolverUtils.debugPrintln(
292                 "Adding UNDEFINED values based on ancestor: " + ancestor.getLocaleID(), 3);
293             for (String distinguishedPath : ResolverUtils.getAllPaths(ancestor)) {
294                 // Do the comparison with distinguished paths to prevent errors
295                 // resulting from duplicate full paths but the same distinguished path
296                 if (!basePaths.contains(distinguishedPath) &&
297                     !ancestor.getStringValue(distinguishedPath).equals(CldrUtility.NO_INHERITANCE_MARKER)) {
298                     ResolverUtils.debugPrintln(
299                         "Added UNDEFINED value for path: " + distinguishedPath, 4);
300                     resolved.add(distinguishedPath, CldrUtility.NO_INHERITANCE_MARKER);
301                 }
302             }
303         }
304         return resolved;
305     }
306 
307     /**
308      * Resolves all locales that match the given regular expression and outputs
309      * their XML files to the given directory.
310      *
311      * @param localeRegex a regular expression that will be matched against the
312      *        names of locales
313      * @param outputDir the directory to which to output the partially-resolved
314      *        XML files
315      * @param resolutionType the type of resolution to perform
316      * @throws IllegalArgumentException if outputDir is not a directory
317      */
318     public void resolve(String localeRegex, String outputDir) {
319         resolve(localeRegex, new File(outputDir));
320     }
321 
322     /**
323      * Writes out the given CLDRFile in XML form to the given directory
324      *
325      * @param cldrFile the CLDRFile to print to XML
326      * @param directory the directory to which to add the file
327      */
328     private static void printToFile(CLDRFile cldrFile, File directory) {
329         ResolverUtils.debugPrint("Printing file...", 2);
330         try {
331             PrintWriter pw = new PrintWriter(new File(directory, cldrFile.getLocaleID() + ".xml"), "UTF-8");
332             cldrFile.write(pw);
333             pw.close();
334             ResolverUtils.debugPrintln("done.\n", 2);
335         } catch (FileNotFoundException e) {
336             ResolverUtils.debugPrintln("\nFile not found: " + e.getMessage(), 1);
337             System.exit(1);
338             return;
339         } catch (UnsupportedEncodingException e) {
340             // This should never ever happen.
341             ResolverUtils.debugPrintln("Your system does not support UTF-8 encoding: " + e.getMessage(),
342                 1);
343             System.exit(1);
344             return;
345         }
346     }
347 
348     /**
349      * Convenience method to compare objects that works with nulls
350      *
351      * @param o1 the first object
352      * @param o2 the second object
353      * @return true if objects o1 == o2 or o1.equals(o2); false otherwise
354      */
355     private static boolean areEqual(Object o1, Object o2) {
356         if (o1 == o2) {
357             return true;
358         } else if (o1 == null) {
359             return false;
360         } else {
361             return o1.equals(o2);
362         }
363     }
364 }
365