1 package org.unicode.cldr.tool;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.Collection;
8 import java.util.Comparator;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.TreeSet;
16 
17 import org.unicode.cldr.draft.FileUtilities;
18 import org.unicode.cldr.util.Builder;
19 import org.unicode.cldr.util.CLDRFile;
20 import org.unicode.cldr.util.CLDRPaths;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.FileReaders;
23 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
24 
25 import com.ibm.icu.text.DecimalFormat;
26 import com.ibm.icu.text.PluralRules;
27 import com.ibm.icu.util.ULocale;
28 
29 public class GeneratePluralList {
30     static final String stock = "km|lo|ne|br|dz|nl|si|en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK";
31     private static final Map<String, Integer> keywordIndex = Builder.with(new HashMap<String, Integer>())
32         .put("zero", 0)
33         .put("one", 1)
34         .put("two", 2)
35         .put("few", 3)
36         .put("many", 4)
37         .put("other", 5)
38         .get();
39 
40     private DecimalFormat format = new DecimalFormat();
41     private PrintWriter out;
42     private PluralRules rules;
43 
GeneratePluralList(PrintWriter out)44     private GeneratePluralList(PrintWriter out) {
45         if (out == null) {
46             out = new PrintWriter(System.out);
47         }
48         this.out = out;
49     }
50 
51     private Map<String, Map<String, String>> localesToNouns = new HashMap<>();
52 
loadNouns()53     private void loadNouns() throws IOException {
54         BufferedReader reader = FileReaders.openFile(GeneratePluralList.class, "fractionnum.csv");
55         for (String line = reader.readLine(); line != null; line = reader.readLine()) {
56             String[] fields = line.split(",");
57             String locale = fields[0];
58             String count = fields[1];
59             String format = fields[5];
60             Map<String, String> nouns = localesToNouns.get(locale);
61             if (nouns == null) {
62                 localesToNouns.put(locale, nouns = new HashMap<>());
63             }
64             nouns.put(count, format);
65         }
66     }
67 
68     private class ExampleManager implements Iterable<String> {
69         // integer, fraction and last digit are 3 different types
70         private Set<String> list3;
71 
ExampleManager()72         public ExampleManager() {
73             list3 = new HashSet<>();
74         }
75 
add(String example)76         public void add(String example) {
77             list3.add(example);
78         }
79 
80         @Override
toString()81         public String toString() {
82             return list3.toString();
83         }
84 
85         @Override
iterator()86         public Iterator<String> iterator() {
87             return list3.iterator();
88         }
89 
getAll()90         public Set<String> getAll() {
91             return list3;
92         }
93     }
94 
build(PrintWriter out)95     public static GeneratePluralList build(PrintWriter out) {
96         GeneratePluralList generator = new GeneratePluralList(out);
97         return generator;
98     }
99 
getExamples(String locale)100     private void getExamples(String locale) {
101         rules = PluralRules.forLocale(new ULocale(locale));
102         // Setup.
103         Count[] digits = new Count[1000];
104         // 0 is always considered a plural type even if the plural rules say otherwise.
105         Set<Count> missingTypes = new HashSet<>();
106         for (String keyword : rules.getKeywords()) {
107             missingTypes.add(Count.valueOf(keyword));
108         }
109         Map<String, List<Integer>> integerMap = new HashMap<>();
110         digits[0] = Count.zero;
111         missingTypes.remove(Count.zero);
112         put(integerMap, "zero", 0);
113         put(integerMap, "zero|zero", 0);
114         for (int i = 1; i < digits.length; i++) {
115             Count type = Count.valueOf(rules.select(i));
116             digits[i] = type;
117             missingTypes.remove(type);
118             put(integerMap, type.toString(), i);
119             Count digitType = (i < 10) ? type : digits[i % 10];
120             String key = type.toString() + '|' + digitType;
121             put(integerMap, key, i);
122         }
123 
124         missingTypes.remove(Count.other);
125 
126         if (missingTypes.size() > 0) {
127             System.out.println("WARNING: the following plural types may not be represented fully for " + locale + ": " + missingTypes);
128             for (Count type : missingTypes) {
129                 Collection<Double> values = rules.getSamples(type.toString());
130                 if (values != null) {
131                     int value = values.iterator().next().intValue();
132                     put(integerMap, type.toString(), value);
133                 }
134             }
135         }
136 
137         for (int i = 1; i <= 3; i++) {
138             getExamples(locale, integerMap, i);
139         }
140     }
141 
getExamples(String locale, Map<String, List<Integer>> integerMap, int numDigits)142     private void getExamples(String locale, Map<String, List<Integer>> integerMap, int numDigits) {
143         Map<String, String> nouns = localesToNouns.get(locale);
144         if (nouns == null) return;
145 
146         // Load fractions as whole numbers.
147         int limit = (int) Math.pow(10, numDigits);
148 
149         // Generate all examples.
150         Map<String, String> exampleMap = new HashMap<>();
151         Map<String, ExampleManager> positionedExamples = new HashMap<>();
152         Set<String> allKeywords = new HashSet<>(rules.getKeywords());
153         allKeywords.add("zero");
154         allKeywords.retainAll(integerMap.keySet());
155         List<Integer> values;
156         format.setMinimumIntegerDigits(numDigits);
157         for (String x : allKeywords) {
158             values = integerMap.get(x);
159             int integer = values.get(values.size() > 1 ? 1 : 0); // get new set of examples if possible
160             for (String y : integerMap.keySet()) {
161                 if (!y.contains("|")) continue;
162                 values = integerMap.get(y);
163                 int fraction = values.get(values.size() > 1 ? 1 : 0);
164                 if (fraction >= limit) continue; // TODO: handle bg other
165                 String key = x + '|' + y;
166                 String[] keywords = key.split("\\|");
167                 if (!keywords[0].equals("zero") && keywords[0].equals(keywords[1])) {
168                     continue;
169                 }
170                 for (int i = 0; i < keywords.length; i++) {
171                     String position = i + keywords[i];
172                     ExampleManager manager = positionedExamples.get(position);
173                     if (manager == null) {
174                         positionedExamples.put(position, manager = new ExampleManager());
175                     }
176                     manager.add(key);
177                 }
178                 String example = integer + "." + format.format(fraction);
179                 exampleMap.put(key, example);
180             }
181         }
182 
183         // Output examples to file.
184         Set<String> finalExamples = new TreeSet<>(new Comparator<String>() {
185             @Override
186             public int compare(String arg0, String arg1) {
187                 String[] forms1 = arg1.split("\\|");
188                 String[] forms0 = arg0.split("\\|");
189                 for (int i = 0; i < forms0.length; i++) {
190                     int compare = keywordIndex.get(forms0[i]) - keywordIndex.get(forms1[i]);
191                     if (compare != 0) return compare;
192                 }
193                 return 0;
194             }
195         });
196 
197         for (ExampleManager manager : positionedExamples.values()) {
198             finalExamples.addAll(manager.getAll());
199         }
200         String realZeroType = rules.select(0);
201         for (String category : finalExamples) {
202             String exampleValue = exampleMap.get(category);
203             //String overallCategory = rules.select(Double.valueOf(exampleValue));
204             //String exampleFormat = nouns.get(overallCategory);
205 
206             out.println(locale + "\t" + exampleValue + "\t" +
207                 category.replace("zero", realZeroType).replace('|', '\t'));
208         }
209         out.flush();
210     }
211 
put(Map<A, List<B>> map, A key, B value)212     private static <A, B> void put(Map<A, List<B>> map, A key, B value) {
213         List<B> list = map.get(key);
214         if (list == null) {
215             map.put(key, list = new ArrayList<>());
216         }
217         list.add(value);
218     }
219 
220     static String[] units = { "second", "minute", "hour", "day", "month", "year" };
221 
getForms(CLDRFile file)222     private void getForms(CLDRFile file) {
223         rules = PluralRules.forLocale(new ULocale(file.getLocaleID()));
224         System.out.println(file.getLocaleID());
225         for (String plural : rules.getKeywords()) {
226             out.print(file.getLocaleID() + '\t' + plural + '\t' +
227                 rules.getSamples(plural).iterator().next());
228             for (String unit : units) {
229                 printUnit(file, unit, plural);
230                 printUnit(file, unit + "-past", plural);
231                 printUnit(file, unit + "-future", plural);
232             }
233             out.println();
234             out.flush();
235         }
236     }
237 
printUnit(CLDRFile file, String unit, String plural)238     private void printUnit(CLDRFile file, String unit, String plural) {
239         String path = "//ldml/units/unit[@type=\"" + unit + "\"]/unitPattern[@count=\"" + plural + "\"]";
240         String value = file.getStringValue(path);
241         out.print('\t');
242         if (value == null) {
243             System.out.println(file.getLocaleID() + " has no example for " + plural + " " + unit);
244         } else {
245             out.print(value);
246         }
247     }
248 
249     /**
250      * @param args
251      */
main(String[] args)252     public static void main(String[] args) throws Exception {
253         PrintWriter out = FileUtilities.openUTF8Writer("/Users/jchye/Desktop", "plurals.tsv");
254         GeneratePluralList generator = new GeneratePluralList(out);
255         generator.loadNouns();
256 
257         Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, stock);
258         for (String locale : factory.getAvailable()) {
259             generator.getExamples(locale);
260             //generator.getForms(factory.make(locale, true));
261         }
262         out.close();
263     }
264 }
265