1 package org.unicode.cldr.tool; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Collection; 8 import java.util.Comparator; 9 import java.util.HashMap; 10 import java.util.HashSet; 11 import java.util.Iterator; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Set; 15 import java.util.TreeSet; 16 17 import org.unicode.cldr.draft.FileUtilities; 18 import org.unicode.cldr.util.Builder; 19 import org.unicode.cldr.util.CLDRFile; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.Factory; 22 import org.unicode.cldr.util.FileReaders; 23 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 24 25 import com.ibm.icu.text.DecimalFormat; 26 import com.ibm.icu.text.PluralRules; 27 import com.ibm.icu.util.ULocale; 28 29 public class GeneratePluralList { 30 static final String stock = "km|lo|ne|br|dz|nl|si|en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK"; 31 private static final Map<String, Integer> keywordIndex = Builder.with(new HashMap<String, Integer>()) 32 .put("zero", 0) 33 .put("one", 1) 34 .put("two", 2) 35 .put("few", 3) 36 .put("many", 4) 37 .put("other", 5) 38 .get(); 39 40 private DecimalFormat format = new DecimalFormat(); 41 private PrintWriter out; 42 private PluralRules rules; 43 GeneratePluralList(PrintWriter out)44 private GeneratePluralList(PrintWriter out) { 45 if (out == null) { 46 out = new PrintWriter(System.out); 47 } 48 this.out = out; 49 } 50 51 private Map<String, Map<String, String>> localesToNouns = new HashMap<>(); 52 loadNouns()53 private void loadNouns() throws IOException { 54 BufferedReader reader = FileReaders.openFile(GeneratePluralList.class, "fractionnum.csv"); 55 for (String line = reader.readLine(); line != null; line = reader.readLine()) { 56 String[] fields = line.split(","); 57 String locale = fields[0]; 58 String count = fields[1]; 59 String format = fields[5]; 60 Map<String, String> nouns = localesToNouns.get(locale); 61 if (nouns == null) { 62 localesToNouns.put(locale, nouns = new HashMap<>()); 63 } 64 nouns.put(count, format); 65 } 66 } 67 68 private class ExampleManager implements Iterable<String> { 69 // integer, fraction and last digit are 3 different types 70 private Set<String> list3; 71 ExampleManager()72 public ExampleManager() { 73 list3 = new HashSet<>(); 74 } 75 add(String example)76 public void add(String example) { 77 list3.add(example); 78 } 79 80 @Override toString()81 public String toString() { 82 return list3.toString(); 83 } 84 85 @Override iterator()86 public Iterator<String> iterator() { 87 return list3.iterator(); 88 } 89 getAll()90 public Set<String> getAll() { 91 return list3; 92 } 93 } 94 build(PrintWriter out)95 public static GeneratePluralList build(PrintWriter out) { 96 GeneratePluralList generator = new GeneratePluralList(out); 97 return generator; 98 } 99 getExamples(String locale)100 private void getExamples(String locale) { 101 rules = PluralRules.forLocale(new ULocale(locale)); 102 // Setup. 103 Count[] digits = new Count[1000]; 104 // 0 is always considered a plural type even if the plural rules say otherwise. 105 Set<Count> missingTypes = new HashSet<>(); 106 for (String keyword : rules.getKeywords()) { 107 missingTypes.add(Count.valueOf(keyword)); 108 } 109 Map<String, List<Integer>> integerMap = new HashMap<>(); 110 digits[0] = Count.zero; 111 missingTypes.remove(Count.zero); 112 put(integerMap, "zero", 0); 113 put(integerMap, "zero|zero", 0); 114 for (int i = 1; i < digits.length; i++) { 115 Count type = Count.valueOf(rules.select(i)); 116 digits[i] = type; 117 missingTypes.remove(type); 118 put(integerMap, type.toString(), i); 119 Count digitType = (i < 10) ? type : digits[i % 10]; 120 String key = type.toString() + '|' + digitType; 121 put(integerMap, key, i); 122 } 123 124 missingTypes.remove(Count.other); 125 126 if (missingTypes.size() > 0) { 127 System.out.println("WARNING: the following plural types may not be represented fully for " + locale + ": " + missingTypes); 128 for (Count type : missingTypes) { 129 Collection<Double> values = rules.getSamples(type.toString()); 130 if (values != null) { 131 int value = values.iterator().next().intValue(); 132 put(integerMap, type.toString(), value); 133 } 134 } 135 } 136 137 for (int i = 1; i <= 3; i++) { 138 getExamples(locale, integerMap, i); 139 } 140 } 141 getExamples(String locale, Map<String, List<Integer>> integerMap, int numDigits)142 private void getExamples(String locale, Map<String, List<Integer>> integerMap, int numDigits) { 143 Map<String, String> nouns = localesToNouns.get(locale); 144 if (nouns == null) return; 145 146 // Load fractions as whole numbers. 147 int limit = (int) Math.pow(10, numDigits); 148 149 // Generate all examples. 150 Map<String, String> exampleMap = new HashMap<>(); 151 Map<String, ExampleManager> positionedExamples = new HashMap<>(); 152 Set<String> allKeywords = new HashSet<>(rules.getKeywords()); 153 allKeywords.add("zero"); 154 allKeywords.retainAll(integerMap.keySet()); 155 List<Integer> values; 156 format.setMinimumIntegerDigits(numDigits); 157 for (String x : allKeywords) { 158 values = integerMap.get(x); 159 int integer = values.get(values.size() > 1 ? 1 : 0); // get new set of examples if possible 160 for (String y : integerMap.keySet()) { 161 if (!y.contains("|")) continue; 162 values = integerMap.get(y); 163 int fraction = values.get(values.size() > 1 ? 1 : 0); 164 if (fraction >= limit) continue; // TODO: handle bg other 165 String key = x + '|' + y; 166 String[] keywords = key.split("\\|"); 167 if (!keywords[0].equals("zero") && keywords[0].equals(keywords[1])) { 168 continue; 169 } 170 for (int i = 0; i < keywords.length; i++) { 171 String position = i + keywords[i]; 172 ExampleManager manager = positionedExamples.get(position); 173 if (manager == null) { 174 positionedExamples.put(position, manager = new ExampleManager()); 175 } 176 manager.add(key); 177 } 178 String example = integer + "." + format.format(fraction); 179 exampleMap.put(key, example); 180 } 181 } 182 183 // Output examples to file. 184 Set<String> finalExamples = new TreeSet<>(new Comparator<String>() { 185 @Override 186 public int compare(String arg0, String arg1) { 187 String[] forms1 = arg1.split("\\|"); 188 String[] forms0 = arg0.split("\\|"); 189 for (int i = 0; i < forms0.length; i++) { 190 int compare = keywordIndex.get(forms0[i]) - keywordIndex.get(forms1[i]); 191 if (compare != 0) return compare; 192 } 193 return 0; 194 } 195 }); 196 197 for (ExampleManager manager : positionedExamples.values()) { 198 finalExamples.addAll(manager.getAll()); 199 } 200 String realZeroType = rules.select(0); 201 for (String category : finalExamples) { 202 String exampleValue = exampleMap.get(category); 203 //String overallCategory = rules.select(Double.valueOf(exampleValue)); 204 //String exampleFormat = nouns.get(overallCategory); 205 206 out.println(locale + "\t" + exampleValue + "\t" + 207 category.replace("zero", realZeroType).replace('|', '\t')); 208 } 209 out.flush(); 210 } 211 put(Map<A, List<B>> map, A key, B value)212 private static <A, B> void put(Map<A, List<B>> map, A key, B value) { 213 List<B> list = map.get(key); 214 if (list == null) { 215 map.put(key, list = new ArrayList<>()); 216 } 217 list.add(value); 218 } 219 220 static String[] units = { "second", "minute", "hour", "day", "month", "year" }; 221 getForms(CLDRFile file)222 private void getForms(CLDRFile file) { 223 rules = PluralRules.forLocale(new ULocale(file.getLocaleID())); 224 System.out.println(file.getLocaleID()); 225 for (String plural : rules.getKeywords()) { 226 out.print(file.getLocaleID() + '\t' + plural + '\t' + 227 rules.getSamples(plural).iterator().next()); 228 for (String unit : units) { 229 printUnit(file, unit, plural); 230 printUnit(file, unit + "-past", plural); 231 printUnit(file, unit + "-future", plural); 232 } 233 out.println(); 234 out.flush(); 235 } 236 } 237 printUnit(CLDRFile file, String unit, String plural)238 private void printUnit(CLDRFile file, String unit, String plural) { 239 String path = "//ldml/units/unit[@type=\"" + unit + "\"]/unitPattern[@count=\"" + plural + "\"]"; 240 String value = file.getStringValue(path); 241 out.print('\t'); 242 if (value == null) { 243 System.out.println(file.getLocaleID() + " has no example for " + plural + " " + unit); 244 } else { 245 out.print(value); 246 } 247 } 248 249 /** 250 * @param args 251 */ main(String[] args)252 public static void main(String[] args) throws Exception { 253 PrintWriter out = FileUtilities.openUTF8Writer("/Users/jchye/Desktop", "plurals.tsv"); 254 GeneratePluralList generator = new GeneratePluralList(out); 255 generator.loadNouns(); 256 257 Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, stock); 258 for (String locale : factory.getAvailable()) { 259 generator.getExamples(locale); 260 //generator.getForms(factory.make(locale, true)); 261 } 262 out.close(); 263 } 264 } 265