1 package org.unicode.cldr.util;
2 
3 import java.util.Arrays;
4 import java.util.Collection;
5 import java.util.Collections;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Map.Entry;
9 import java.util.Set;
10 import java.util.TreeMap;
11 import java.util.TreeSet;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 import java.util.stream.Collectors;
15 
16 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
17 
18 import com.google.common.base.Joiner;
19 import com.google.common.collect.ImmutableListMultimap;
20 import com.google.common.collect.ImmutableMap;
21 import com.google.common.collect.ImmutableMultimap;
22 import com.google.common.collect.ImmutableSet;
23 import com.ibm.icu.util.Freezable;
24 import com.ibm.icu.util.Output;
25 
26 /**
27  * Get the info from supplemental data, eg CLDRConfig.getInstance().getSupplementalDataInfo().getGrammarInfo("fr"); Use hasGrammarInfo() to see which locales have it.
28  * @author markdavis
29  *
30  */
31 public class GrammarInfo implements Freezable<GrammarInfo>{
32 
33     public enum GrammaticalTarget {nominal}
34 
35     public enum GrammaticalFeature {
36         grammaticalNumber("plural", "Ⓟ", "other"),
37         grammaticalCase("case", "Ⓒ", "nominative"),
38         grammaticalDefiniteness("definiteness", "Ⓓ", "indefinite"),
39         grammaticalGender("gender", "Ⓖ", "neuter");
40 
41         private final String shortName;
42         private final String symbol;
43         private final String defaultValue;
44 
45         public static final Pattern PATH_HAS_FEATURE = Pattern.compile("\\[@(count|case|gender|definiteness)=");
46 
GrammaticalFeature(String shortName, String symbol, String defaultValue)47         GrammaticalFeature(String shortName, String symbol, String defaultValue) {
48             this.shortName = shortName;
49             this.symbol = symbol;
50             this.defaultValue = defaultValue;
51         }
getShortName()52         public String getShortName() {
53             return shortName;
54         }
getSymbol()55         public CharSequence getSymbol() {
56             return symbol;
57         }
getDefault(Collection<String> values)58         public String getDefault(Collection<String> values) {
59             return this == grammaticalGender && values != null && !values.contains("neuter") ? "masculine" : defaultValue;
60         }
pathHasFeature(String path)61         public static Matcher pathHasFeature(String path) {
62             Matcher result = PATH_HAS_FEATURE.matcher(path);
63             return result.find() ? result : null;
64         }
65         static final Map<String, GrammaticalFeature> shortNameToEnum =
66             ImmutableMap.copyOf(Arrays.asList(GrammaticalFeature.values())
67             .stream()
68             .collect(Collectors.toMap(e -> e.shortName, e -> e)));
69 
fromName(String name)70         public static GrammaticalFeature fromName(String name) {
71             GrammaticalFeature result = shortNameToEnum.get(name);
72             return result != null ? result : valueOf(name);
73         }
74     }
75 
76     public enum GrammaticalScope {general, units}
77 
78     private Map<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> targetToFeatureToUsageToValues = new TreeMap<>();
79     private boolean frozen = false;
80 
add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, String value)81     public void add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, String value) {
82         Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target);
83         if (featureToUsageToValues == null) {
84             targetToFeatureToUsageToValues.put(target, featureToUsageToValues = new TreeMap<>());
85         }
86         if (feature != null) {
87             Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature);
88             if (usageToValues == null) {
89                 featureToUsageToValues.put(feature, usageToValues = new TreeMap<>());
90             }
91             Set<String> values = usageToValues.get(usage);
92             if (values == null) {
93                 usageToValues.put(usage, values = new TreeSet<>());
94             }
95             if (value != null) {
96                 values.add(value);
97             } else {
98                 int debug = 0;
99             }
100         }
101     }
102 
add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, Collection<String> valueSet)103     public void add(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage, Collection<String> valueSet) {
104         Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target);
105         if (featureToUsageToValues == null) {
106             targetToFeatureToUsageToValues.put(target, featureToUsageToValues = new TreeMap<>());
107         }
108         if (feature != null) {
109             Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature);
110             if (usageToValues == null) {
111                 featureToUsageToValues.put(feature, usageToValues = new TreeMap<>());
112             }
113             Set<String> values = usageToValues.get(usage);
114             if (values == null) {
115                 usageToValues.put(usage, values = new TreeSet<>());
116             }
117             values.addAll(valueSet);
118         }
119     }
120 
121 
122     /**
123      * Note: when there is known to be no features, the featureRaw will be null
124      */
add(String targetsRaw, String featureRaw, String usagesRaw, String valuesRaw)125     public void add(String targetsRaw, String featureRaw, String usagesRaw, String valuesRaw) {
126         for (String targetString : SupplementalDataInfo.split_space.split(targetsRaw)) {
127             GrammaticalTarget target = GrammaticalTarget.valueOf(targetString);
128             if (featureRaw == null) {
129                 add(target, null, null, (String)null);
130             } else {
131                 final GrammaticalFeature feature = GrammaticalFeature.valueOf(featureRaw);
132 
133                 List<String> usages = usagesRaw == null ? Collections.singletonList(GrammaticalScope.general.toString()) : SupplementalDataInfo.split_space.splitToList(usagesRaw);
134 
135                 List<String> values = valuesRaw == null ? Collections.emptyList() : SupplementalDataInfo.split_space.splitToList(valuesRaw);
136                 for (String usageRaw : usages) {
137                     GrammaticalScope usage = GrammaticalScope.valueOf(usageRaw);
138                     add(target, feature, usage, values);
139                 }
140             }
141         }
142     }
143 
144     @Override
isFrozen()145     public boolean isFrozen() {
146         return frozen;
147     }
148 
149     @Override
freeze()150     public GrammarInfo freeze() {
151         if (!frozen) {
152             Map<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope, Set<String>>>> temp = CldrUtility.protectCollection(targetToFeatureToUsageToValues);
153             if (!temp.equals(targetToFeatureToUsageToValues)) {
154                 throw new IllegalArgumentException();
155             }
156             targetToFeatureToUsageToValues = temp;
157             frozen = true;
158         }
159         return this;
160     }
161 
162     @Override
cloneAsThawed()163     public GrammarInfo cloneAsThawed() {
164         GrammarInfo result = new GrammarInfo();
165         this.forEach3((t,f,u,v) -> result.add(t,f,u,v));
166         return result;
167     }
168 
169     static interface Handler4<T,F,U,V> {
apply(T t, F f, U u, V v)170         void apply(T t, F f, U u, V v);
171     }
172 
forEach(Handler4<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, String> handler)173     public void forEach(Handler4<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, String> handler) {
174         for (Entry<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> entry1 : targetToFeatureToUsageToValues.entrySet()) {
175             GrammaticalTarget target = entry1.getKey();
176             final Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = entry1.getValue();
177             if (featureToUsageToValues.isEmpty()) {
178                 handler.apply(target, null, null, null);
179             } else
180                 for (Entry<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> entry2 : featureToUsageToValues.entrySet()) {
181                     GrammaticalFeature feature = entry2.getKey();
182                     for (Entry<GrammaticalScope, Set<String>> entry3 : entry2.getValue().entrySet()) {
183                         final GrammaticalScope usage = entry3.getKey();
184                         for (String value : entry3.getValue()) {
185                             handler.apply(target, feature, usage, value);
186                         }
187                     }
188                 }
189         }
190     }
191 
192     static interface Handler3<T,F,U, V> {
apply(T t, F f, U u, V v)193         void apply(T t, F f, U u, V v);
194     }
195 
forEach3(Handler3<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, Collection<String>> handler)196     public void forEach3(Handler3<GrammaticalTarget, GrammaticalFeature, GrammaticalScope, Collection<String>> handler) {
197         for (Entry<GrammaticalTarget, Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>>> entry1 : targetToFeatureToUsageToValues.entrySet()) {
198             GrammaticalTarget target = entry1.getKey();
199             final Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = entry1.getValue();
200             if (featureToUsageToValues.isEmpty()) {
201                 handler.apply(target, null, null, null);
202             } else
203                 for (Entry<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> entry2 : featureToUsageToValues.entrySet()) {
204                     GrammaticalFeature feature = entry2.getKey();
205                     for (Entry<GrammaticalScope, Set<String>> entry3 : entry2.getValue().entrySet()) {
206                         final GrammaticalScope usage = entry3.getKey();
207                         final Collection<String> values = entry3.getValue();
208                         handler.apply(target, feature, usage, values);
209                     }
210                 }
211         }
212     }
213 
214     /** Returns null if there is no known information. Otherwise returns the information for the locale (which may be empty if there are no variants) */
get(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage)215     public Collection<String> get(GrammaticalTarget target, GrammaticalFeature feature, GrammaticalScope usage) {
216         Map<GrammaticalFeature, Map<GrammaticalScope,Set<String>>> featureToUsageToValues = targetToFeatureToUsageToValues.get(target);
217         if (featureToUsageToValues == null) {
218             return Collections.emptySet();
219         }
220         Map<GrammaticalScope,Set<String>> usageToValues = featureToUsageToValues.get(feature);
221         if (usageToValues == null) {
222             return Collections.emptySet();
223         }
224         Collection<String> result = usageToValues.get(usage);
225         return result == null
226             ? usageToValues.get(GrammaticalScope.general)
227                 : result;
228     }
229 
hasInfo(GrammaticalTarget target)230     public boolean hasInfo(GrammaticalTarget target) {
231         return targetToFeatureToUsageToValues.containsKey(target);
232     }
233 
234     @Override
toString()235     public String toString() {
236         return toString("\n");
237     }
toString(String lineSep)238     public String toString(String lineSep) {
239         StringBuilder result = new StringBuilder();
240         this.forEach3((t,f,u, v) ->
241         {
242             result.append(lineSep);
243             result.append("{" + (t == null ? "" : t.toString()) + "}"
244                 + "\t{" + (f == null ? "" : f.toString()) + "}"
245                 + "\t{" +  (u == null ? "" : u.toString()) + "}"
246                 + "\t{" +  (v == null ? "" : Joiner.on(' ').join(v)) + "}");
247         });
248         return result.toString();
249     }
250 
getGrammaticalInfoAttributes(GrammarInfo grammarInfo, UnitPathType pathType, String plural, String gender, String caseVariant)251     static public String getGrammaticalInfoAttributes(GrammarInfo grammarInfo, UnitPathType pathType, String plural, String gender, String caseVariant) {
252         String grammaticalAttributes = "";
253         if (pathType.features.contains(GrammaticalFeature.grammaticalNumber)) { // count is special
254             grammaticalAttributes += "[@count=\"" + (plural == null ? "other" : plural) + "\"]";
255         }
256         if (grammarInfo != null && gender != null
257             && pathType.features.contains(GrammaticalFeature.grammaticalGender)
258             ) {
259             Collection<String> genders = grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalGender, GrammaticalScope.units);
260             if (!gender.equals(GrammaticalFeature.grammaticalGender.getDefault(genders))) {
261                 grammaticalAttributes += "[@gender=\"" + gender + "\"]";
262             }
263         }
264         if (grammarInfo != null && caseVariant != null
265             && pathType.features.contains(GrammaticalFeature.grammaticalCase)
266             && !caseVariant.equals(GrammaticalFeature.grammaticalCase.getDefault(null))) {
267             grammaticalAttributes += "[@case=\"" + caseVariant + "\"]";
268         }
269         return grammaticalAttributes;
270     }
271 
272     public static final ImmutableMultimap<String,PluralInfo.Count> NON_COMPUTABLE_PLURALS = ImmutableListMultimap.of(
273         "pl", PluralInfo.Count.one,
274         "pl", PluralInfo.Count.other,
275         "ru", PluralInfo.Count.one,
276         "ru", PluralInfo.Count.other);
277     /**
278      * TODO: change this to be data-file driven
279      */
280     public static final Set<String> SEED_LOCALES = ImmutableSet.of("pl", "ru", "da", "de", "nb", "sv", "hi", "id", "es", "fr", "it", "nl", "pt", "en", "ja", "th", "vi", "zh", "zh_TW", "ko", "yue");
281 
282     /**
283      * TODO: change this to be data-file driven
284      */
285     public static final Set<String> SPECIAL_TRANSLATION_UNITS = ImmutableSet.of(
286         // new in v38
287         "mass-grain",
288         "volume-dessert-spoon",
289         "volume-dessert-spoon-imperial",
290         "volume-drop",
291         "volume-dram",
292         "volume-jigger",
293         "volume-pinch",
294         "volume-quart-imperial",
295         // "volume-pint-imperial",
296 
297         "acceleration-meter-per-square-second", "area-acre", "area-hectare",
298         "area-square-centimeter", "area-square-foot", "area-square-kilometer", "area-square-mile", "concentr-percent", "consumption-mile-per-gallon",
299         "consumption-mile-per-gallon-imperial", "duration-day", "duration-hour", "duration-minute", "duration-month", "duration-second", "duration-week",
300         "duration-year", "energy-foodcalorie", "energy-kilocalorie", "length-centimeter", "length-foot", "length-inch", "length-kilometer", "length-meter",
301         "length-mile", "length-millimeter", "length-parsec", "length-picometer", "length-solar-radius", "length-yard", "light-solar-luminosity", "mass-dalton",
302         "mass-earth-mass", "mass-milligram", "mass-solar-mass", "pressure-kilopascal", "speed-kilometer-per-hour", "speed-meter-per-second", "speed-mile-per-hour",
303         "temperature-celsius", "temperature-fahrenheit", "temperature-generic", "temperature-kelvin", "acceleration-g-force", "consumption-liter-per-100-kilometer",
304         "mass-gram", "mass-kilogram", "mass-ounce", "mass-pound", "volume-centiliter", "volume-cubic-centimeter", "volume-cubic-foot", "volume-cubic-mile",
305         "volume-cup", "volume-deciliter", "volume-fluid-ounce", "volume-fluid-ounce-imperial", "volume-gallon", "volume-gallon", "volume-gallon-imperial",
306         "volume-liter", "volume-milliliter", "volume-pint", "volume-quart", "volume-tablespoon", "volume-teaspoon");
307     // compounds
308     // "kilogram-per-cubic-meter", "kilometer-per-liter", "concentr-gram-per-mole", "speed-mile-per-second", "volumetricflow-cubic-foot-per-second",
309     // "volumetricflow-cubic-meter-per-second", "gram-per-cubic-centimeter",
310 
311 
getSourceCaseAndPlural(String locale, String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)312     public void getSourceCaseAndPlural(String locale, String gender, String value, String desiredCase, String desiredPlural,
313         Output<String> sourceCase, Output<String> sourcePlural) {
314         switch(locale) {
315         case "pl":
316             getSourceCaseAndPluralPolish(gender, value, desiredCase, desiredPlural, sourceCase, sourcePlural);
317             break;
318         case "ru":
319             getSourceCaseAndPluralRussian(gender, value, desiredCase, desiredPlural, sourceCase, sourcePlural);
320             break;
321         default:
322             throw new UnsupportedOperationException(locale);
323         }
324     }
325 
326     /** Russian rules for paucal (few) and fractional (other)
327      * <pre>
328      * plural = other
329      * Nominative ⇒ genitive singular
330      * Accusative + masculine ⇒ genitive singular
331      * All other combinations of gender + case ⇒ same-case, plural
332      *
333      * Other
334      * genitive singular
335      *
336      * Plurals:
337      *   one,
338      *   few (2~4),
339      *   many, = plural
340      *   other (where other is 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0)
341      * </pre>
342      */
getSourceCaseAndPluralRussian(String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)343     private void getSourceCaseAndPluralRussian(String gender, String value,
344         String desiredCase, String desiredPlural,
345         Output<String> sourceCase, Output<String> sourcePlural) {
346         switch (desiredPlural) {
347         case "few":
348             // default source
349             sourceCase.value = desiredCase;
350             sourcePlural.value = "many";
351             // special cases
352             switch (desiredCase) {
353             case "nominative":
354                 sourceCase.value = "genitive";
355                 sourcePlural.value = "one";
356                 break;
357             case "accusative":
358                 switch (gender) {
359                 case "masculine":
360                     sourceCase.value = "genitive";
361                     sourcePlural.value = "one";
362                     break;
363                 }
364                 break;
365             }
366         case "other":
367             sourceCase.value = "genitive";
368             sourcePlural.value = "one";
369             return;
370         }
371     }
372 
373     /** Polish rules
374      * <pre>
375      * plural = few
376      *
377      * neuter + ending in -um + (nominative, accusative) ⇒ vocative plural
378      * Feminine||neuter + (nominative, accusative) ⇒ genitive singular
379      * Animate||inanimate + (nominative, accusative) ⇒ vocative plural
380      * Personal + nominative ⇒ vocative plural
381      * Personal + accusative ⇒ genitive plural
382      * All other combinations of gender + case ⇒ same-case, plural
383      *
384      * plural = other
385      * genitive singular
386      *
387      * Plurals:
388      *   one,
389      *   few (2~4),
390      *   many, = plural
391      *   other (where other is 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0)
392      * </pre>
393      */
getSourceCaseAndPluralPolish(String gender, String value, String desiredCase, String desiredPlural, Output<String> sourceCase, Output<String> sourcePlural)394     private void getSourceCaseAndPluralPolish(String gender, String value,
395         String desiredCase, String desiredPlural,
396         Output<String> sourceCase, Output<String> sourcePlural) {
397         switch (desiredPlural) {
398         case "few":
399             // default
400             sourceCase.value = desiredCase;
401             sourcePlural.value = "many";
402             // special cases
403             boolean isNominative = false;
404             switch (desiredCase) {
405             case "nominative":
406                 isNominative = true;
407             case "vocative":
408             case "accusative":
409                 switch (gender) {
410                 case "neuter":
411                     if (value.endsWith("um")) {
412                         sourceCase.value = "vocative";
413                         break;
414                     }
415                     // otherwise fall thorugh to feminine
416                 case "feminine":
417                     sourceCase.value = "nominative";
418                     sourcePlural.value = "few";
419                     break;
420                 case "animate":
421                 case "inanimate":
422                     sourceCase.value = "vocative";
423                     break;
424                 case "personal":
425                     sourceCase.value = isNominative ? "vocative" : "genitive";
426                     break;
427                 }
428                 break;
429             }
430             return;
431         case "other":
432             sourceCase.value = "genitive";
433             sourcePlural.value = "one";
434             return;
435         }
436     }
437 }