1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include <cstdlib>
9 
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "ureslocs.h"
13 #include "charstr.h"
14 #include "uresimp.h"
15 #include "measunit_impl.h"
16 #include "number_longnames.h"
17 #include "number_microprops.h"
18 #include <algorithm>
19 #include "cstring.h"
20 #include "util.h"
21 
22 using namespace icu;
23 using namespace icu::number;
24 using namespace icu::number::impl;
25 
26 namespace {
27 
28 /**
29  * Display Name (this format has no placeholder).
30  *
31  * Used as an index into the LongNameHandler::simpleFormats array. Units
32  * resources cover the normal set of PluralRules keys, as well as `dnam` and
33  * `per` forms.
34  */
35 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36 /**
37  * "per" form (e.g. "{0} per day" is day's "per" form).
38  *
39  * Used as an index into the LongNameHandler::simpleFormats array. Units
40  * resources cover the normal set of PluralRules keys, as well as `dnam` and
41  * `per` forms.
42  */
43 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44 /**
45  * Gender of the word, in languages with grammatical gender.
46  */
47 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48 // Number of keys in the array populated by PluralTableSink.
49 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50 
51 // TODO(icu-units#28): load this list from resources, after creating a "&set"
52 // function for use in ldml2icu rules.
53 const int32_t GENDER_COUNT = 7;
54 const char *gGenders[GENDER_COUNT] = {"animate",   "common", "feminine", "inanimate",
55                                       "masculine", "neuter", "personal"};
56 
57 // Converts a UnicodeString to a const char*, either pointing to a string in
58 // gGenders, or pointing to an empty string if an appropriate string was not
59 // found.
getGenderString(UnicodeString uGender,UErrorCode status)60 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61     if (uGender.length() == 0) {
62         return "";
63     }
64     CharString gender;
65     gender.appendInvariantChars(uGender, status);
66     if (U_FAILURE(status)) {
67         return "";
68     }
69     int32_t first = 0;
70     int32_t last = GENDER_COUNT;
71     while (first < last) {
72         int32_t mid = (first + last) / 2;
73         int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
74         if (cmp == 0) {
75             return gGenders[mid];
76         } else if (cmp > 0) {
77             first = mid + 1;
78         } else if (cmp < 0) {
79             last = mid;
80         }
81     }
82     // We don't return an error in case our gGenders list is incomplete in
83     // production.
84     //
85     // TODO(icu-units#28): a unit test checking all locales' genders are covered
86     // by gGenders? Else load a complete list of genders found in
87     // grammaticalFeatures in an initOnce.
88     return "";
89 }
90 
91 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)92 static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93     // pluralKeyword can also be "dnam", "per", or "gender"
94     switch (*pluralKeyword) {
95     case 'd':
96         if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
97             return DNAM_INDEX;
98         }
99         break;
100     case 'g':
101         if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
102             return GENDER_INDEX;
103         }
104         break;
105     case 'p':
106         if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
107             return PER_INDEX;
108         }
109         break;
110     default:
111         break;
112     }
113     StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114     return plural;
115 }
116 
117 // Selects a string out of the `strings` array which corresponds to the
118 // specified plural form, with fallback to the OTHER form.
119 //
120 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)122 static UnicodeString getWithPlural(
123         const UnicodeString* strings,
124         StandardPlural::Form plural,
125         UErrorCode& status) {
126     UnicodeString result = strings[plural];
127     if (result.isBogus()) {
128         result = strings[StandardPlural::Form::OTHER];
129     }
130     if (result.isBogus()) {
131         // There should always be data in the "other" plural variant.
132         status = U_INTERNAL_PROGRAM_ERROR;
133     }
134     return result;
135 }
136 
137 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138 
139 /**
140  * Returns three outputs extracted from pattern.
141  *
142  * @param coreUnit is extracted as per Extract(...) in the spec:
143  *   https://unicode.org/reports/tr35/tr35-general.html#compound-units
144  * @param PlaceholderPosition indicates where in the string the placeholder was
145  *   found.
146  * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147  *   contains the space character (if any) that separated the placeholder from
148  *   the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149  *   space character is considered.
150  */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,UChar & joinerChar)151 void extractCorePattern(const UnicodeString &pattern,
152                         UnicodeString &coreUnit,
153                         PlaceholderPosition &placeholderPosition,
154                         UChar &joinerChar) {
155     joinerChar = 0;
156     int32_t len = pattern.length();
157     if (pattern.startsWith(u"{0}", 3)) {
158         placeholderPosition = PH_BEGINNING;
159         if (u_isJavaSpaceChar(pattern[3])) {
160             joinerChar = pattern[3];
161             coreUnit.setTo(pattern, 4, len - 4);
162         } else {
163             coreUnit.setTo(pattern, 3, len - 3);
164         }
165     } else if (pattern.endsWith(u"{0}", 3)) {
166         placeholderPosition = PH_END;
167         if (u_isJavaSpaceChar(pattern[len - 4])) {
168             coreUnit.setTo(pattern, 0, len - 4);
169             joinerChar = pattern[len - 4];
170         } else {
171             coreUnit.setTo(pattern, 0, len - 3);
172         }
173     } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174         placeholderPosition = PH_NONE;
175         coreUnit = pattern;
176     } else {
177         placeholderPosition = PH_MIDDLE;
178         coreUnit = pattern;
179     }
180 }
181 
182 //////////////////////////
183 /// BEGIN DATA LOADING ///
184 //////////////////////////
185 
186 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187 // string both in case of unknown gender and in case of unknown unit.
188 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)189 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
191     if (U_FAILURE(status)) { return {}; }
192 
193     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195     StringPiece subtypeForResource;
196     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
197     if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
198         subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199     } else {
200         subtypeForResource = builtinUnit.getSubtype();
201     }
202 
203     CharString key;
204     key.append("units/", status);
205     key.append(builtinUnit.getType(), status);
206     key.append("/", status);
207     key.append(subtypeForResource, status);
208     key.append("/gender", status);
209 
210     UErrorCode localStatus = status;
211     int32_t resultLen = 0;
212     const UChar *result =
213         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214     if (U_SUCCESS(localStatus)) {
215         status = localStatus;
216         return UnicodeString(true, result, resultLen);
217     } else {
218         // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219         // check whether the parent "$unitRes" exists? Then we could return
220         // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221         // being a builtin).
222         return {};
223     }
224 }
225 
226 // Loads data from a resource tree with paths matching
227 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228 // and genders.
229 //
230 // An InflectedPluralSink is configured to load data for a specific gender and
231 // case. It loads all plural forms, because selection between plural forms is
232 // dependent upon the value being formatted.
233 //
234 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235 // units/compound/power2: German has case, French has differences for gender,
236 // but no case.
237 //
238 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239 // tree structures are different. After homogenizing the structures, we may be
240 // able to unify the two classes.
241 //
242 // TODO: Spec violation: expects presence of "count" - does not fallback to an
243 // absent "count"! If this fallback were added, getCompoundValue could be
244 // superseded?
245 class InflectedPluralSink : public ResourceSink {
246   public:
247     // Accepts `char*` rather than StringPiece because
248     // ResourceTable::findValue(...) requires a null-terminated `char*`.
249     //
250     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251     // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)252     explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253         : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254         // Initialize the array to bogus strings.
255         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256             outArray[i].setToBogus();
257         }
258     }
259 
260     // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)261     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
262         ResourceTable pluralsTable = value.getTable(status);
263         if (U_FAILURE(status)) { return; }
264         for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
265             int32_t pluralIndex = getIndex(key, status);
266             if (U_FAILURE(status)) { return; }
267             if (!outArray[pluralIndex].isBogus()) {
268                 // We already have a pattern
269                 continue;
270             }
271             ResourceTable genderTable = value.getTable(status);
272             ResourceTable caseTable; // This instance has to outlive `value`
273             if (loadForPluralForm(genderTable, caseTable, value, status)) {
274                 outArray[pluralIndex] = value.getUnicodeString(status);
275             }
276         }
277     }
278 
279   private:
280     // Tries to load data for the configured gender from `genderTable`. Returns
281     // true if found, returning the data in `value`. The returned data will be
282     // for the configured gender if found, falling back to "neuter" and
283     // no-gender if not. The caseTable parameter holds the intermediate
284     // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)285     bool loadForPluralForm(const ResourceTable &genderTable,
286                            ResourceTable &caseTable,
287                            ResourceValue &value,
288                            UErrorCode &status) {
289         if (uprv_strcmp(gender, "") != 0) {
290             if (loadForGender(genderTable, gender, caseTable, value, status)) {
291                 return true;
292             }
293             if (uprv_strcmp(gender, "neuter") != 0 &&
294                 loadForGender(genderTable, "neuter", caseTable, value, status)) {
295                 return true;
296             }
297         }
298         if (loadForGender(genderTable, "_", caseTable, value, status)) {
299             return true;
300         }
301         return false;
302     }
303 
304     // Tries to load data for the given gender from `genderTable`. Returns true
305     // if found, returning the data in `value`. The returned data will be for
306     // the configured case if found, falling back to "nominative" and no-case if
307     // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)308     bool loadForGender(const ResourceTable &genderTable,
309                        const char *genderVal,
310                        ResourceTable &caseTable,
311                        ResourceValue &value,
312                        UErrorCode &status) {
313         if (!genderTable.findValue(genderVal, value)) {
314             return false;
315         }
316         caseTable = value.getTable(status);
317         if (uprv_strcmp(caseVariant, "") != 0) {
318             if (loadForCase(caseTable, caseVariant, value)) {
319                 return true;
320             }
321             if (uprv_strcmp(caseVariant, "nominative") != 0 &&
322                 loadForCase(caseTable, "nominative", value)) {
323                 return true;
324             }
325         }
326         if (loadForCase(caseTable, "_", value)) {
327             return true;
328         }
329         return false;
330     }
331 
332     // Tries to load data for the given case from `caseTable`. Returns true if
333     // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)334     bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
335         if (!caseTable.findValue(caseValue, value)) {
336             return false;
337         }
338         return true;
339     }
340 
341     const char *gender;
342     const char *caseVariant;
343     UnicodeString *outArray;
344 };
345 
346 // Fetches localised formatting patterns for the given subKey. See documentation
347 // for InflectedPluralSink for details.
348 //
349 // Data is loaded for the appropriate unit width, with missing data filled in
350 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)351 void getInflectedMeasureData(StringPiece subKey,
352                              const Locale &locale,
353                              const UNumberUnitWidth &width,
354                              const char *gender,
355                              const char *caseVariant,
356                              UnicodeString *outArray,
357                              UErrorCode &status) {
358     InflectedPluralSink sink(gender, caseVariant, outArray);
359     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
360     if (U_FAILURE(status)) { return; }
361 
362     CharString key;
363     key.append("units", status);
364     if (width == UNUM_UNIT_WIDTH_NARROW) {
365         key.append("Narrow", status);
366     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
367         key.append("Short", status);
368     }
369     key.append("/", status);
370     key.append(subKey, status);
371 
372     UErrorCode localStatus = status;
373     ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
374     if (width == UNUM_UNIT_WIDTH_SHORT) {
375         status = localStatus;
376         return;
377     }
378 
379     // TODO(ICU-13353): The fallback to short does not work in ICU4C.
380     // Manually fall back to short (this is done automatically in Java).
381     key.clear();
382     key.append("unitsShort/", status);
383     key.append(subKey, status);
384     ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
385 }
386 
387 class PluralTableSink : public ResourceSink {
388   public:
389     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
390     // checking is performed.
PluralTableSink(UnicodeString * outArray)391     explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
392         // Initialize the array to bogus strings.
393         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
394             outArray[i].setToBogus();
395         }
396     }
397 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)398     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
399         ResourceTable pluralsTable = value.getTable(status);
400         if (U_FAILURE(status)) { return; }
401         for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
402             if (uprv_strcmp(key, "case") == 0) {
403                 continue;
404             }
405             int32_t index = getIndex(key, status);
406             if (U_FAILURE(status)) { return; }
407             if (!outArray[index].isBogus()) {
408                 continue;
409             }
410             outArray[index] = value.getUnicodeString(status);
411             if (U_FAILURE(status)) { return; }
412         }
413     }
414 
415   private:
416     UnicodeString *outArray;
417 };
418 
419 /**
420  * Populates outArray with `locale`-specific values for `unit` through use of
421  * PluralTableSink. Only the set of basic units are supported!
422  *
423  * Reading from resources *unitsNarrow* and *unitsShort* (for width
424  * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
425  * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
426  *
427  * @param unit must be a built-in unit, i.e. must have a type and subtype,
428  *     listed in gTypes and gSubTypes in measunit.cpp.
429  * @param unitDisplayCase the empty string and "nominative" are treated the
430  *     same. For other cases, strings for the requested case are used if found.
431  *     (For any missing case-specific data, we fall back to nominative.)
432  * @param outArray must be of fixed length ARRAY_LENGTH.
433  */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)434 void getMeasureData(const Locale &locale,
435                     const MeasureUnit &unit,
436                     const UNumberUnitWidth &width,
437                     const char *unitDisplayCase,
438                     UnicodeString *outArray,
439                     UErrorCode &status) {
440     PluralTableSink sink(outArray);
441     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
442     if (U_FAILURE(status)) { return; }
443 
444     CharString subKey;
445     subKey.append("/", status);
446     subKey.append(unit.getType(), status);
447     subKey.append("/", status);
448 
449     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
450     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
451     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype()));
452     if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) {
453         subKey.append({unit.getSubtype(), subtypeLen - 7}, status);
454     } else {
455         subKey.append({unit.getSubtype(), subtypeLen}, status);
456     }
457 
458     if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
459         UErrorCode localStatus = status;
460         CharString genderKey;
461         genderKey.append("units", localStatus);
462         genderKey.append(subKey, localStatus);
463         genderKey.append("/gender", localStatus);
464         StackUResourceBundle fillIn;
465         ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
466                                   &localStatus);
467         outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
468     }
469 
470     CharString key;
471     key.append("units", status);
472     if (width == UNUM_UNIT_WIDTH_NARROW) {
473         key.append("Narrow", status);
474     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
475         key.append("Short", status);
476     }
477     key.append(subKey, status);
478 
479     // Grab desired case first, if available. Then grab no-case data to fill in
480     // the gaps.
481     if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
482         CharString caseKey;
483         caseKey.append(key, status);
484         caseKey.append("/case/", status);
485         caseKey.append(unitDisplayCase, status);
486 
487         UErrorCode localStatus = U_ZERO_ERROR;
488         // TODO(icu-units#138): our fallback logic is not spec-compliant:
489         // lateral fallback should happen before locale fallback. Switch to
490         // getInflectedMeasureData after homogenizing data format? Find a unit
491         // test case that demonstrates the incorrect fallback logic (via
492         // regional variant of an inflected language?)
493         ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
494     }
495 
496     // TODO(icu-units#138): our fallback logic is not spec-compliant: we
497     // check the given case, then go straight to the no-case data. The spec
498     // states we should first look for case="nominative". As part of #138,
499     // either get the spec changed, or add unit tests that warn us if
500     // case="nominative" data differs from no-case data?
501     UErrorCode localStatus = U_ZERO_ERROR;
502     ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
503     if (width == UNUM_UNIT_WIDTH_SHORT) {
504         if (U_FAILURE(localStatus)) {
505             status = localStatus;
506         }
507         return;
508     }
509 
510     // TODO(ICU-13353): The fallback to short does not work in ICU4C.
511     // Manually fall back to short (this is done automatically in Java).
512     key.clear();
513     key.append("unitsShort", status);
514     key.append(subKey, status);
515     ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
516 }
517 
518 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)519 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
520                              UErrorCode &status) {
521     // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
522     // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
523     PluralTableSink sink(outArray);
524     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
525     if (U_FAILURE(status)) { return; }
526     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
527     if (U_FAILURE(status)) { return; }
528     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
529         UnicodeString &pattern = outArray[i];
530         if (pattern.isBogus()) {
531             continue;
532         }
533         int32_t longNameLen = 0;
534         const char16_t *longName = ucurr_getPluralName(
535                 currency.getISOCurrency(),
536                 locale.getName(),
537                 nullptr /* isChoiceFormat */,
538                 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
539                 &longNameLen,
540                 &status);
541         // Example pattern from data: "{0} {1}"
542         // Example output after find-and-replace: "{0} US dollars"
543         pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
544     }
545 }
546 
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)547 UnicodeString getCompoundValue(StringPiece compoundKey,
548                                const Locale &locale,
549                                const UNumberUnitWidth &width,
550                                UErrorCode &status) {
551     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
552     if (U_FAILURE(status)) { return {}; }
553     CharString key;
554     key.append("units", status);
555     if (width == UNUM_UNIT_WIDTH_NARROW) {
556         key.append("Narrow", status);
557     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
558         key.append("Short", status);
559     }
560     key.append("/compound/", status);
561     key.append(compoundKey, status);
562 
563     UErrorCode localStatus = status;
564     int32_t len = 0;
565     const UChar *ptr =
566         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
567     if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
568         // Fall back to short, which contains more compound data
569         key.clear();
570         key.append("unitsShort/compound/", status);
571         key.append(compoundKey, status);
572         ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
573     } else {
574         status = localStatus;
575     }
576     if (U_FAILURE(status)) {
577         return {};
578     }
579     return UnicodeString(ptr, len);
580 }
581 
582 /**
583  * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
584  *
585  * Consider a deriveComponent rule that looks like this:
586  *
587  *     <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
588  *
589  * Instantiating an instance as follows:
590  *
591  *     DerivedComponents d(loc, "case", "per");
592  *
593  * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
594  * and `d.value1("foo")` will be "nominative".
595  *
596  * The values returned by value0(...) and value1(...) are valid only while the
597  * instance exists. In case of any kind of failure, value0(...) and value1(...)
598  * will return "".
599  */
600 class DerivedComponents {
601   public:
602     /**
603      * Constructor.
604      *
605      * The feature and structure parameters must be null-terminated. The string
606      * referenced by compoundValue must exist for longer than the
607      * DerivedComponents instance.
608      */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)609     DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
610         StackUResourceBundle derivationsBundle, stackBundle;
611         ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
612         ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
613                       &status);
614         ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
615                       &status);
616         if (U_FAILURE(status)) {
617             return;
618         }
619         UErrorCode localStatus = U_ZERO_ERROR;
620         // TODO(icu-units#28): use standard normal locale resolution algorithms
621         // rather than just grabbing language:
622         ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
623                       &localStatus);
624         // TODO(icu-units#28):
625         // - code currently assumes if the locale exists, the rules are there -
626         //   instead of falling back to root when the requested rule is missing.
627         // - investigate ures.h functions, see if one that uses res_findResource()
628         //   might be better (or use res_findResource directly), or maybe help
629         //   improve ures documentation to guide function selection?
630         if (localStatus == U_MISSING_RESOURCE_ERROR) {
631             ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
632         } else {
633             status = localStatus;
634         }
635         ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
636         ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
637         ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
638         UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
639         UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
640         if (U_SUCCESS(status)) {
641             if (val0.compare(UnicodeString(u"compound")) == 0) {
642                 compound0_ = true;
643             } else {
644                 compound0_ = false;
645                 value0_.appendInvariantChars(val0, status);
646             }
647             if (val1.compare(UnicodeString(u"compound")) == 0) {
648                 compound1_ = true;
649             } else {
650                 compound1_ = false;
651                 value1_.appendInvariantChars(val1, status);
652             }
653         }
654     }
655 
656     // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const657     StringPiece value0(const StringPiece compoundValue) const {
658         return compound0_ ? compoundValue : value0_.toStringPiece();
659     }
660 
661     // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const662     StringPiece value1(const StringPiece compoundValue) const {
663         return compound1_ ? compoundValue : value1_.toStringPiece();
664     }
665 
666     // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const667     const char *value0(const char *compoundValue) const {
668         return compound0_ ? compoundValue : value0_.data();
669     }
670 
671     // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const672     const char *value1(const char *compoundValue) const {
673         return compound1_ ? compoundValue : value1_.data();
674     }
675 
676   private:
677     UErrorCode status = U_ZERO_ERROR;
678 
679     // Holds strings referred to by value0 and value1;
680     bool compound0_ = false, compound1_ = false;
681     CharString value0_, value1_;
682 };
683 
684 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
685 // testsuite support for testing with synthetic data?
686 /**
687  * Loads and returns the value in rules that look like these:
688  *
689  * <deriveCompound feature="gender" structure="per" value="0"/>
690  * <deriveCompound feature="gender" structure="times" value="1"/>
691  *
692  * Currently a fake example, but spec compliant:
693  * <deriveCompound feature="gender" structure="power" value="feminine"/>
694  *
695  * NOTE: If U_FAILURE(status), returns an empty string.
696  */
697 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)698 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
699     StackUResourceBundle derivationsBundle, stackBundle;
700     ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
701     ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
702                   &status);
703     ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
704     // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
705     ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
706     // TODO:
707     // - code currently assumes if the locale exists, the rules are there -
708     //   instead of falling back to root when the requested rule is missing.
709     // - investigate ures.h functions, see if one that uses res_findResource()
710     //   might be better (or use res_findResource directly), or maybe help
711     //   improve ures documentation to guide function selection?
712     if (status == U_MISSING_RESOURCE_ERROR) {
713         status = U_ZERO_ERROR;
714         ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
715     }
716     ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
717     ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
718     UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
719     if (U_FAILURE(status)) {
720         return {};
721     }
722     U_ASSERT(!uVal.isBogus());
723     return uVal;
724 }
725 
726 // Returns the gender string for structures following these rules:
727 //
728 // <deriveCompound feature="gender" structure="per" value="0"/>
729 // <deriveCompound feature="gender" structure="times" value="1"/>
730 //
731 // Fake example:
732 // <deriveCompound feature="gender" structure="power" value="feminine"/>
733 //
734 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
735 // correspond to value="0" and value="1".
736 //
737 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
738 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)739 UnicodeString getDerivedGender(Locale locale,
740                                const char *structure,
741                                UnicodeString *data0,
742                                UnicodeString *data1,
743                                UErrorCode &status) {
744     UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
745     if (val.length() == 1) {
746         switch (val[0]) {
747         case u'0':
748             return data0[GENDER_INDEX];
749         case u'1':
750             if (data1 == nullptr) {
751                 return {};
752             }
753             return data1[GENDER_INDEX];
754         }
755     }
756     return val;
757 }
758 
759 ////////////////////////
760 /// END DATA LOADING ///
761 ////////////////////////
762 
763 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const UChar * s,int32_t & length)764 const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
765     if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
766         return s;
767     }
768     int32_t start = 0;
769     int32_t limit = length;
770     while (start < limit && u_isJavaSpaceChar(s[start])) {
771         ++start;
772     }
773     if (start < limit) {
774         // There is non-white space at start; we will not move limit below that,
775         // so we need not test start<limit in the loop.
776         while (u_isJavaSpaceChar(s[limit - 1])) {
777             --limit;
778         }
779     }
780     length = limit - start;
781     return s + start;
782 }
783 
784 /**
785  * Calculates the gender of an arbitrary unit: this is the *second*
786  * implementation of an algorithm to do this:
787  *
788  * Gender is also calculated in "processPatternTimes": that code path is "bottom
789  * up", loading the gender for every component of a compound unit (at the same
790  * time as loading the Long Names formatting patterns), even if the gender is
791  * unneeded, then combining the single units' genders into the compound unit's
792  * gender, according to the rules. This algorithm does a lazier "top-down"
793  * evaluation, starting with the compound unit, calculating which single unit's
794  * gender is needed by breaking it down according to the rules, and then loading
795  * only the gender of the one single unit who's gender is needed.
796  *
797  * For future refactorings:
798  * 1. we could drop processPatternTimes' gender calculation and just call this
799  *    function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
800  *    same table as the formatting patterns, so loading it then may be
801  *    efficient. For other unit widths however, it needs to be explicitly looked
802  *    up anyway.
803  * 2. alternatively, if CLDR is providing all the genders we need such that we
804  *    don't need to calculate them in ICU anymore, we could drop this function
805  *    and keep only processPatternTimes' calculation. (And optimise it a bit?)
806  *
807  * @param locale The desired locale.
808  * @param unit The measure unit to calculate the gender for.
809  * @return The gender string for the unit, or an empty string if unknown or
810  *     ungendered.
811  */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)812 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
813     MeasureUnitImpl impl;
814     const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
815     int32_t singleUnitIndex = 0;
816     if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
817         int32_t startSlice = 0;
818         // inclusive
819         int32_t endSlice = mui.singleUnits.length()-1;
820         U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
821         if (mui.singleUnits[endSlice]->dimensionality < 0) {
822             // We have a -per- construct
823             UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
824             if (perRule.length() != 1) {
825                 // Fixed gender for -per- units
826                 return perRule;
827             }
828             if (perRule[0] == u'1') {
829                 // Find the start of the denominator. We already know there is one.
830                 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
831                     startSlice++;
832                 }
833             } else {
834                 // Find the end of the numerator
835                 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
836                     endSlice--;
837                 }
838                 if (endSlice < 0) {
839                     // We have only a denominator, e.g. "per-second".
840                     // TODO(icu-units#28): find out what gender to use in the
841                     // absence of a first value - mentioned in CLDR-14253.
842                     return {};
843                 }
844             }
845         }
846         if (endSlice > startSlice) {
847             // We have a -times- construct
848             UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
849             if (timesRule.length() != 1) {
850                 // Fixed gender for -times- units
851                 return timesRule;
852             }
853             if (timesRule[0] == u'0') {
854                 endSlice = startSlice;
855             } else {
856                 // We assume timesRule[0] == u'1'
857                 startSlice = endSlice;
858             }
859         }
860         U_ASSERT(startSlice == endSlice);
861         singleUnitIndex = startSlice;
862     } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
863         status = U_INTERNAL_PROGRAM_ERROR;
864         return {};
865     } else {
866         U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
867         U_ASSERT(mui.singleUnits.length() == 1);
868     }
869 
870     // Now we know which singleUnit's gender we want
871     const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
872     // Check for any power-prefix gender override:
873     if (std::abs(singleUnit->dimensionality) != 1) {
874         UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
875         if (powerRule.length() != 1) {
876             // Fixed gender for -powN- units
877             return powerRule;
878         }
879         // powerRule[0] == u'0'; u'1' not currently in spec.
880     }
881     // Check for any SI and binary prefix gender override:
882     if (std::abs(singleUnit->dimensionality) != 1) {
883         UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
884         if (prefixRule.length() != 1) {
885             // Fixed gender for -powN- units
886             return prefixRule;
887         }
888         // prefixRule[0] == u'0'; u'1' not currently in spec.
889     }
890     // Now we've boiled it down to the gender of one simple unit identifier:
891     return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
892                                status);
893 }
894 
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)895 void maybeCalculateGender(const Locale &locale,
896                           const MeasureUnit &unitRef,
897                           UnicodeString *outArray,
898                           UErrorCode &status) {
899     if (outArray[GENDER_INDEX].isBogus()) {
900         UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
901         if (meterGender.isEmpty()) {
902             // No gender for meter: assume ungendered language
903             return;
904         }
905         // We have a gendered language, but are lacking gender for unitRef.
906         outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
907     }
908 }
909 
910 } // namespace
911 
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)912 void LongNameHandler::forMeasureUnit(const Locale &loc,
913                                      const MeasureUnit &unitRef,
914                                      const UNumberUnitWidth &width,
915                                      const char *unitDisplayCase,
916                                      const PluralRules *rules,
917                                      const MicroPropsGenerator *parent,
918                                      LongNameHandler *fillIn,
919                                      UErrorCode &status) {
920     // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
921     // Points 1 and 2 are mostly handled by MeasureUnit:
922     //
923     // 1. If the unitId is empty or invalid, fail
924     // 2. Put the unitId into normalized order
925     U_ASSERT(fillIn != nullptr);
926 
927     if (uprv_strcmp(unitRef.getType(), "") != 0) {
928         // Handling built-in units:
929         //
930         // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
931         //    - If result is not empty, return it
932         UnicodeString simpleFormats[ARRAY_LENGTH];
933         getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
934         maybeCalculateGender(loc, unitRef, simpleFormats, status);
935         if (U_FAILURE(status)) {
936             return;
937         }
938         fillIn->rules = rules;
939         fillIn->parent = parent;
940         fillIn->simpleFormatsToModifiers(simpleFormats,
941                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
942         if (!simpleFormats[GENDER_INDEX].isBogus()) {
943             fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
944         }
945         return;
946 
947         // TODO(icu-units#145): figure out why this causes a failure in
948         // format/MeasureFormatTest/TestIndividualPluralFallback and other
949         // tests, when it should have been an alternative for the lines above:
950 
951         // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
952         // fillIn->rules = rules;
953         // fillIn->parent = parent;
954         // return;
955     } else {
956         // Check if it is a MeasureUnit this constructor handles: this
957         // constructor does not handle mixed units
958         U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
959         forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
960         fillIn->rules = rules;
961         fillIn->parent = parent;
962         return;
963     }
964 }
965 
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)966 void LongNameHandler::forArbitraryUnit(const Locale &loc,
967                                        const MeasureUnit &unitRef,
968                                        const UNumberUnitWidth &width,
969                                        const char *unitDisplayCase,
970                                        LongNameHandler *fillIn,
971                                        UErrorCode &status) {
972     if (U_FAILURE(status)) {
973         return;
974     }
975     if (fillIn == nullptr) {
976         status = U_INTERNAL_PROGRAM_ERROR;
977         return;
978     }
979 
980     // Numbered list items are from the algorithms at
981     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
982     //
983     // 4. Divide the unitId into numerator (the part before the "-per-") and
984     //    denominator (the part after the "-per-). If both are empty, fail
985     MeasureUnitImpl unit;
986     MeasureUnitImpl perUnit;
987     {
988         MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
989         if (U_FAILURE(status)) {
990             return;
991         }
992         for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
993             SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
994             if (subUnit->dimensionality > 0) {
995                 unit.appendSingleUnit(*subUnit, status);
996             } else {
997                 subUnit->dimensionality *= -1;
998                 perUnit.appendSingleUnit(*subUnit, status);
999             }
1000         }
1001     }
1002 
1003     // TODO(icu-units#28): check placeholder logic, see if it needs to be
1004     // present here instead of only in processPatternTimes:
1005     //
1006     // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1007 
1008     DerivedComponents derivedPerCases(loc, "case", "per");
1009 
1010     // 6. numeratorUnitString
1011     UnicodeString numeratorUnitData[ARRAY_LENGTH];
1012     processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1013                         numeratorUnitData, status);
1014 
1015     // 7. denominatorUnitString
1016     UnicodeString denominatorUnitData[ARRAY_LENGTH];
1017     processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1018                         denominatorUnitData, status);
1019 
1020     // TODO(icu-units#139):
1021     // - implement DerivedComponents for "plural/times" and "plural/power":
1022     //   French has different rules, we'll be producing the wrong results
1023     //   currently. (Prove via tests!)
1024     // - implement DerivedComponents for "plural/per", "plural/prefix",
1025     //   "case/times", "case/power", and "case/prefix" - although they're
1026     //   currently hardcoded. Languages with different rules are surely on the
1027     //   way.
1028     //
1029     // Currently we only use "case/per", "plural/times", "case/times", and
1030     // "case/power".
1031     //
1032     // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1033     // These rules are currently (ICU 69) all the same and hard-coded below.
1034     UnicodeString perUnitPattern;
1035     if (!denominatorUnitData[PER_INDEX].isBogus()) {
1036         // If we have no denominator, we obtain the empty string:
1037         perUnitPattern = denominatorUnitData[PER_INDEX];
1038     } else {
1039         // 8. Set perPattern to be getValue([per], locale, length)
1040         UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1041         // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1042         SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1043         if (U_FAILURE(status)) {
1044             return;
1045         }
1046         // Plural and placeholder handling for 7. denominatorUnitString:
1047         // TODO(icu-units#139): hardcoded:
1048         // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1049         UnicodeString denominatorFormat =
1050             getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1051         // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1052         SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1053         if (U_FAILURE(status)) {
1054             return;
1055         }
1056         UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1057         int32_t trimmedLen = denominatorPattern.length();
1058         const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1059         UnicodeString denominatorString(false, trimmed, trimmedLen);
1060         // 9. If the denominatorString is empty, set result to
1061         //    [numeratorString], otherwise set result to format(perPattern,
1062         //    numeratorString, denominatorString)
1063         //
1064         // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1065         // following line?
1066         perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1067         if (U_FAILURE(status)) {
1068             return;
1069         }
1070     }
1071     if (perUnitPattern.length() == 0) {
1072         fillIn->simpleFormatsToModifiers(numeratorUnitData,
1073                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1074     } else {
1075         fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1076                                               {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1077     }
1078 
1079     // Gender
1080     //
1081     // TODO(icu-units#28): find out what gender to use in the absence of a first
1082     // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1083     //
1084     // gender/per deriveCompound rules don't say:
1085     // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ←  gender(gram) -->
1086     fillIn->gender = getGenderString(
1087         getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1088 }
1089 
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1090 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1091                                           Locale loc,
1092                                           const UNumberUnitWidth &width,
1093                                           const char *caseVariant,
1094                                           UnicodeString *outArray,
1095                                           UErrorCode &status) {
1096     if (U_FAILURE(status)) {
1097         return;
1098     }
1099     if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1100         // These are handled by MixedUnitLongNameHandler
1101         status = U_UNSUPPORTED_ERROR;
1102         return;
1103     }
1104 
1105 #if U_DEBUG
1106     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1107         U_ASSERT(outArray[pluralIndex].length() == 0);
1108         U_ASSERT(!outArray[pluralIndex].isBogus());
1109     }
1110 #endif
1111 
1112     if (productUnit.identifier.isEmpty()) {
1113         // TODO(icu-units#28): consider when serialize should be called.
1114         // identifier might also be empty for MeasureUnit().
1115         productUnit.serialize(status);
1116     }
1117     if (U_FAILURE(status)) {
1118         return;
1119     }
1120     if (productUnit.identifier.length() == 0) {
1121         // MeasureUnit(): no units: return empty strings.
1122         return;
1123     }
1124 
1125     MeasureUnit builtinUnit;
1126     if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1127         // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1128         // breaks them all down. Do we want to drop this?
1129         // - findBySubType isn't super efficient, if we skip it and go to basic
1130         //   singles, we don't have to construct MeasureUnit's anymore.
1131         // - Check all the existing unit tests that fail without this: is it due
1132         //   to incorrect fallback via getMeasureData?
1133         // - Do those unit tests cover this code path representatively?
1134         if (builtinUnit != MeasureUnit()) {
1135             getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1136             maybeCalculateGender(loc, builtinUnit, outArray, status);
1137         }
1138         return;
1139     }
1140 
1141     // 2. Set timesPattern to be getValue(times, locale, length)
1142     UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1143     SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1144     if (U_FAILURE(status)) {
1145         return;
1146     }
1147 
1148     PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1149     UChar globalJoinerChar = 0;
1150     // Numbered list items are from the algorithms at
1151     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1152     //
1153     // pattern(...) point 5:
1154     // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1155     //
1156     // 3. Set result to be empty
1157     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1158         // Initial state: empty string pattern, via all falling back to OTHER:
1159         if (pluralIndex == StandardPlural::Form::OTHER) {
1160             outArray[pluralIndex].remove();
1161         } else {
1162             outArray[pluralIndex].setToBogus();
1163         }
1164         globalPlaceholder[pluralIndex] = PH_EMPTY;
1165     }
1166 
1167     // Empty string represents "compound" (propagate the plural form).
1168     const char *pluralCategory = "";
1169     DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1170     DerivedComponents derivedTimesCases(loc, "case", "times");
1171     DerivedComponents derivedPowerCases(loc, "case", "power");
1172 
1173     // 4. For each single_unit in product_unit
1174     for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1175          singleUnitIndex++) {
1176         SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1177         const char *singlePluralCategory;
1178         const char *singleCaseVariant;
1179         // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1180         // assign incorrect case variants here:
1181         if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1182             // 4.1. If hasMultiple
1183             singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1184             singleCaseVariant = derivedTimesCases.value0(caseVariant);
1185             pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1186             caseVariant = derivedTimesCases.value1(caseVariant);
1187         } else {
1188             singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1189             singleCaseVariant = derivedTimesCases.value1(caseVariant);
1190         }
1191 
1192         // 4.2. Get the gender of that single_unit
1193         MeasureUnit simpleUnit;
1194         if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1195             // Ideally all simple units should be known, but they're not:
1196             // 100-kilometer is internally treated as a simple unit, but it is
1197             // not a built-in unit and does not have formatting data in CLDR 39.
1198             //
1199             // TODO(icu-units#28): test (desirable) invariants in unit tests.
1200             status = U_UNSUPPORTED_ERROR;
1201             return;
1202         }
1203         const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1204 
1205         // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1206         U_ASSERT(singleUnit->dimensionality > 0);
1207         int32_t dimensionality = singleUnit->dimensionality;
1208         UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1209         if (dimensionality != 1) {
1210             // 4.3.1. set dimensionalityPrefixPattern to be
1211             //   getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1212             //   such as "{0} kwadratowym"
1213             CharString dimensionalityKey("compound/power", status);
1214             dimensionalityKey.appendNumber(dimensionality, status);
1215             getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1216                                     singleCaseVariant, dimensionalityPrefixPatterns, status);
1217             if (U_FAILURE(status)) {
1218                 // At the time of writing, only pow2 and pow3 are supported.
1219                 // Attempting to format other powers results in a
1220                 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1221                 // understand it:
1222                 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1223                     status = U_UNSUPPORTED_ERROR;
1224                 }
1225                 return;
1226             }
1227 
1228             // TODO(icu-units#139):
1229             // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1230 
1231             // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1232             singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1233             // 4.3.4. remove the dimensionality_prefix from singleUnit
1234             singleUnit->dimensionality = 1;
1235         }
1236 
1237         // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1238         UMeasurePrefix prefix = singleUnit->unitPrefix;
1239         UnicodeString prefixPattern;
1240         if (prefix != UMEASURE_PREFIX_ONE) {
1241             // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1242             //        length), such as "centy{0}"
1243             CharString prefixKey;
1244             // prefixKey looks like "1024p3" or "10p-2":
1245             prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1246             prefixKey.append('p', status);
1247             prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1248             // Contains a pattern like "centy{0}".
1249             prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1250 
1251             // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1252             //
1253             // TODO(icu-units#139): that refers to these rules:
1254             // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1255             // though I'm not sure what other value they might end up having.
1256             //
1257             // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1258             //
1259             // TODO(icu-units#139): that refers to:
1260             // <deriveComponent feature="case" structure="prefix" value0="nominative"
1261             // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1262             // propagates.
1263 
1264             // 4.4.4. remove the si_prefix from singleUnit
1265             singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1266         }
1267 
1268         // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1269         //      singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1270         UnicodeString singleUnitArray[ARRAY_LENGTH];
1271         // At this point we are left with a Simple Unit:
1272         U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1273                  0);
1274         getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1275                        status);
1276         if (U_FAILURE(status)) {
1277             // Shouldn't happen if we have data for all single units
1278             return;
1279         }
1280 
1281         // Calculate output gender
1282         if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1283             U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1284             UnicodeString uVal;
1285 
1286             if (prefix != UMEASURE_PREFIX_ONE) {
1287                 singleUnitArray[GENDER_INDEX] =
1288                     getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1289             }
1290 
1291             if (dimensionality != 1) {
1292                 singleUnitArray[GENDER_INDEX] =
1293                     getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1294             }
1295 
1296             UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1297             if (timesGenderRule.length() == 1) {
1298                 switch (timesGenderRule[0]) {
1299                 case u'0':
1300                     if (singleUnitIndex == 0) {
1301                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1302                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1303                     }
1304                     break;
1305                 case u'1':
1306                     if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1307                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1308                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1309                     }
1310                 }
1311             } else {
1312                 if (outArray[GENDER_INDEX].isBogus()) {
1313                     outArray[GENDER_INDEX] = timesGenderRule;
1314                 }
1315             }
1316         }
1317 
1318         // Calculate resulting patterns for each plural form
1319         for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1320             StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1321 
1322             // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1323             if (outArray[pluralIndex].isBogus()) {
1324                 if (singleUnitArray[pluralIndex].isBogus()) {
1325                     // Let the usual plural fallback mechanism take care of this
1326                     // plural form
1327                     continue;
1328                 } else {
1329                     // Since our singleUnit can have a plural form that outArray
1330                     // doesn't yet have (relying on fallback to OTHER), we start
1331                     // by grabbing it with the normal plural fallback mechanism
1332                     outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1333                     if (U_FAILURE(status)) {
1334                         return;
1335                     }
1336                 }
1337             }
1338 
1339             if (uprv_strcmp(singlePluralCategory, "") != 0) {
1340                 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1341             }
1342 
1343             // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1344             UnicodeString coreUnit;
1345             PlaceholderPosition placeholderPosition;
1346             UChar joinerChar;
1347             extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1348                                placeholderPosition, joinerChar);
1349 
1350             // 4.7 If the position is middle, then fail
1351             if (placeholderPosition == PH_MIDDLE) {
1352                 status = U_UNSUPPORTED_ERROR;
1353                 return;
1354             }
1355 
1356             // 4.8. If globalPlaceholder is empty
1357             if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1358                 globalPlaceholder[pluralIndex] = placeholderPosition;
1359                 globalJoinerChar = joinerChar;
1360             } else {
1361                 // Expect all units involved to have the same placeholder position
1362                 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1363                 // TODO(icu-units#28): Do we want to add a unit test that checks
1364                 // for consistent joiner chars? Probably not, given how
1365                 // inconsistent they are. File a CLDR ticket with examples?
1366             }
1367             // Now coreUnit would be just "Meter"
1368 
1369             // 4.9. If siPrefixPattern is not empty
1370             if (prefix != UMEASURE_PREFIX_ONE) {
1371                 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1372                 if (U_FAILURE(status)) {
1373                     return;
1374                 }
1375 
1376                 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1377                 //        coreUnit)
1378                 UnicodeString tmp;
1379                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1380                 //
1381                 // TODO(icu-units#28): run this only if prefixPattern does not
1382                 // contain space characters - do languages "as", "bn", "hi",
1383                 // "kk", etc have concepts of upper and lower case?:
1384                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1385                     coreUnit.toLower(loc);
1386                 }
1387                 prefixCompiled.format(coreUnit, tmp, status);
1388                 if (U_FAILURE(status)) {
1389                     return;
1390                 }
1391                 coreUnit = tmp;
1392             }
1393 
1394             // 4.10. If dimensionalityPrefixPattern is not empty
1395             if (dimensionality != 1) {
1396                 SimpleFormatter dimensionalityCompiled(
1397                     getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1398                 if (U_FAILURE(status)) {
1399                     return;
1400                 }
1401 
1402                 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1403                 //         dimensionalityPrefixPattern, coreUnit)
1404                 UnicodeString tmp;
1405                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1406                 //
1407                 // TODO(icu-units#28): run this only if prefixPattern does not
1408                 // contain space characters - do languages "as", "bn", "hi",
1409                 // "kk", etc have concepts of upper and lower case?:
1410                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1411                     coreUnit.toLower(loc);
1412                 }
1413                 dimensionalityCompiled.format(coreUnit, tmp, status);
1414                 if (U_FAILURE(status)) {
1415                     return;
1416                 }
1417                 coreUnit = tmp;
1418             }
1419 
1420             if (outArray[pluralIndex].length() == 0) {
1421                 // 4.11. If the result is empty, set result to be coreUnit
1422                 outArray[pluralIndex] = coreUnit;
1423             } else {
1424                 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1425                 UnicodeString tmp;
1426                 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1427                 outArray[pluralIndex] = tmp;
1428             }
1429         }
1430     }
1431     for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1432         if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1433             UnicodeString tmp;
1434             tmp.append(u"{0}", 3);
1435             if (globalJoinerChar != 0) {
1436                 tmp.append(globalJoinerChar);
1437             }
1438             tmp.append(outArray[pluralIndex]);
1439             outArray[pluralIndex] = tmp;
1440         } else if (globalPlaceholder[pluralIndex] == PH_END) {
1441             if (globalJoinerChar != 0) {
1442                 outArray[pluralIndex].append(globalJoinerChar);
1443             }
1444             outArray[pluralIndex].append(u"{0}", 3);
1445         }
1446     }
1447 }
1448 
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1449 UnicodeString LongNameHandler::getUnitDisplayName(
1450         const Locale& loc,
1451         const MeasureUnit& unit,
1452         UNumberUnitWidth width,
1453         UErrorCode& status) {
1454     if (U_FAILURE(status)) {
1455         return ICU_Utility::makeBogusString();
1456     }
1457     UnicodeString simpleFormats[ARRAY_LENGTH];
1458     getMeasureData(loc, unit, width, "", simpleFormats, status);
1459     return simpleFormats[DNAM_INDEX];
1460 }
1461 
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1462 UnicodeString LongNameHandler::getUnitPattern(
1463         const Locale& loc,
1464         const MeasureUnit& unit,
1465         UNumberUnitWidth width,
1466         StandardPlural::Form pluralForm,
1467         UErrorCode& status) {
1468     if (U_FAILURE(status)) {
1469         return ICU_Utility::makeBogusString();
1470     }
1471     UnicodeString simpleFormats[ARRAY_LENGTH];
1472     getMeasureData(loc, unit, width, "", simpleFormats, status);
1473     // The above already handles fallback from other widths to short
1474     if (U_FAILURE(status)) {
1475         return ICU_Utility::makeBogusString();
1476     }
1477     // Now handle fallback from other plural forms to OTHER
1478     return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1479             simpleFormats[StandardPlural::Form::OTHER];
1480 }
1481 
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1482 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
1483                                                       const PluralRules *rules,
1484                                                       const MicroPropsGenerator *parent,
1485                                                       UErrorCode &status) {
1486     auto* result = new LongNameHandler(rules, parent);
1487     if (result == nullptr) {
1488         status = U_MEMORY_ALLOCATION_ERROR;
1489         return nullptr;
1490     }
1491     UnicodeString simpleFormats[ARRAY_LENGTH];
1492     getCurrencyLongNameData(loc, currency, simpleFormats, status);
1493     if (U_FAILURE(status)) { return nullptr; }
1494     result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1495     // TODO(icu-units#28): currency gender?
1496     return result;
1497 }
1498 
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1499 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1500                                                UErrorCode &status) {
1501     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1502         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1503         UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1504         if (U_FAILURE(status)) { return; }
1505         SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1506         if (U_FAILURE(status)) { return; }
1507         fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1508     }
1509 }
1510 
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1511 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1512                                                     Field field, UErrorCode &status) {
1513     SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1514     if (U_FAILURE(status)) { return; }
1515     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1516         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1517         UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1518         if (U_FAILURE(status)) { return; }
1519         UnicodeString compoundFormat;
1520         if (leadFormat.length() == 0) {
1521             compoundFormat = trailFormat;
1522         } else {
1523             trailCompiled.format(leadFormat, compoundFormat, status);
1524             if (U_FAILURE(status)) { return; }
1525         }
1526         SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1527         if (U_FAILURE(status)) { return; }
1528         fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1529     }
1530 }
1531 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1532 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1533                                       UErrorCode &status) const {
1534     if (parent != NULL) {
1535         parent->processQuantity(quantity, micros, status);
1536     }
1537     StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1538     micros.modOuter = &fModifiers[pluralForm];
1539     micros.gender = gender;
1540 }
1541 
getModifier(Signum,StandardPlural::Form plural) const1542 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1543     return &fModifiers[plural];
1544 }
1545 
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1546 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1547                                               const MeasureUnit &mixedUnit,
1548                                               const UNumberUnitWidth &width,
1549                                               const char *unitDisplayCase,
1550                                               const PluralRules *rules,
1551                                               const MicroPropsGenerator *parent,
1552                                               MixedUnitLongNameHandler *fillIn,
1553                                               UErrorCode &status) {
1554     U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1555     U_ASSERT(fillIn != nullptr);
1556     if (U_FAILURE(status)) {
1557         return;
1558     }
1559 
1560     MeasureUnitImpl temp;
1561     const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1562     // Defensive, for production code:
1563     if (impl.complexity != UMEASURE_UNIT_MIXED) {
1564         // Should be using the normal LongNameHandler
1565         status = U_UNSUPPORTED_ERROR;
1566         return;
1567     }
1568 
1569     fillIn->fMixedUnitCount = impl.singleUnits.length();
1570     fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1571     for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1572         // Grab data for each of the components.
1573         UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1574         // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
1575         // propagation of unitDisplayCase is correct:
1576         getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1577                        status);
1578         // TODO(ICU-21494): if we add support for gender for mixed units, we may
1579         // need maybeCalculateGender() here.
1580     }
1581 
1582     // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1583     // high-magnitude fields
1584     // * for mixed units count N, produce N listFormatters, one for each subset
1585     //   that might be formatted.
1586     UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1587     if (width == UNUM_UNIT_WIDTH_NARROW) {
1588         listWidth = ULISTFMT_WIDTH_NARROW;
1589     } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1590         // This might be the same as SHORT in most languages:
1591         listWidth = ULISTFMT_WIDTH_WIDE;
1592     }
1593     fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1594         ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1595     // TODO(ICU-21494): grab gender of each unit, calculate the gender
1596     // associated with this list formatter, save it for later.
1597     fillIn->rules = rules;
1598     fillIn->parent = parent;
1599 
1600     // We need a localised NumberFormatter for the numbers of the bigger units
1601     // (providing Arabic numerals, for example).
1602     fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1603 }
1604 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1605 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1606                                                UErrorCode &status) const {
1607     U_ASSERT(fMixedUnitCount > 1);
1608     if (parent != nullptr) {
1609         parent->processQuantity(quantity, micros, status);
1610     }
1611     micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1612 }
1613 
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1614 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1615                                                                MicroProps &micros,
1616                                                                UErrorCode &status) const {
1617     if (micros.mixedMeasuresCount == 0) {
1618         U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1619         status = U_UNSUPPORTED_ERROR;
1620         return &micros.helpers.emptyWeakModifier;
1621     }
1622 
1623     // Algorithm:
1624     //
1625     // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1626     // find "3 yard" and "1 foot" in micros.mixedMeasures.
1627     //
1628     // Obtain long-names with plural forms corresponding to measure values:
1629     //   * {0} yards, {0} foot, {0} inches
1630     //
1631     // Format the integer values appropriately and modify with the format
1632     // strings:
1633     //   - 3 yards, 1 foot
1634     //
1635     // Use ListFormatter to combine, with one placeholder:
1636     //   - 3 yards, 1 foot and {0} inches
1637     //
1638     // Return a SimpleModifier for this pattern, letting the rest of the
1639     // pipeline take care of the remaining inches.
1640 
1641     LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1642     if (U_FAILURE(status)) {
1643         return &micros.helpers.emptyWeakModifier;
1644     }
1645 
1646     StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1647     for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1648         DecimalQuantity fdec;
1649 
1650         // If numbers are negative, only the first number needs to have its
1651         // negative sign formatted.
1652         int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1653 
1654         if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1655             // If quantity is not the first value and quantity is negative
1656             if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1657                 quantity.negate();
1658             }
1659 
1660             StandardPlural::Form quantityPlural =
1661                 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1662             UnicodeString quantityFormatWithPlural =
1663                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1664             SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1665             quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1666         } else {
1667             fdec.setToLong(number);
1668             StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1669             UnicodeString simpleFormat =
1670                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1671             SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1672             UnicodeString num;
1673             auto appendable = UnicodeStringAppendable(num);
1674 
1675             fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1676             compiledFormatter.format(num, outputMeasuresList[i], status);
1677         }
1678     }
1679 
1680     // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1681     // can set micros.gender to the gender associated with the list formatter in
1682     // use below (once we have correct support for that). And then document this
1683     // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1684     // something like this.
1685 
1686     // Combine list into a "premixed" pattern
1687     UnicodeString premixedFormatPattern;
1688     fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1689                            status);
1690     SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1691     if (U_FAILURE(status)) {
1692         return &micros.helpers.emptyWeakModifier;
1693     }
1694 
1695     micros.helpers.mixedUnitModifier =
1696         SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1697     return &micros.helpers.mixedUnitModifier;
1698 }
1699 
getModifier(Signum,StandardPlural::Form) const1700 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1701                                                       StandardPlural::Form /*plural*/) const {
1702     // TODO(icu-units#28): investigate this method when investigating where
1703     // ModifierStore::getModifier() gets used. To be sure it remains
1704     // unreachable:
1705     UPRV_UNREACHABLE;
1706     return nullptr;
1707 }
1708 
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1709 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1710                                                           const MaybeStackVector<MeasureUnit> &units,
1711                                                           const UNumberUnitWidth &width,
1712                                                           const char *unitDisplayCase,
1713                                                           const PluralRules *rules,
1714                                                           const MicroPropsGenerator *parent,
1715                                                           UErrorCode &status) {
1716     LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1717     if (U_FAILURE(status)) {
1718         return nullptr;
1719     }
1720     U_ASSERT(units.length() > 0);
1721     if (result->fHandlers.resize(units.length()) == nullptr) {
1722         status = U_MEMORY_ALLOCATION_ERROR;
1723         return nullptr;
1724     }
1725     result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1726     for (int32_t i = 0, length = units.length(); i < length; i++) {
1727         const MeasureUnit &unit = *units[i];
1728         result->fMeasureUnits[i] = unit;
1729         if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1730             MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1731             MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL,
1732                                                      mlnh, status);
1733             result->fHandlers[i] = mlnh;
1734         } else {
1735             LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1736             LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status);
1737             result->fHandlers[i] = lnh;
1738         }
1739         if (U_FAILURE(status)) {
1740             return nullptr;
1741         }
1742     }
1743     return result.orphan();
1744 }
1745 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1746 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1747                                           UErrorCode &status) const {
1748     // We call parent->processQuantity() from the Multiplexer, instead of
1749     // letting LongNameHandler handle it: we don't know which LongNameHandler to
1750     // call until we've called the parent!
1751     fParent->processQuantity(quantity, micros, status);
1752 
1753     // Call the correct LongNameHandler based on outputUnit
1754     for (int i = 0; i < fHandlers.getCapacity(); i++) {
1755         if (fMeasureUnits[i] == micros.outputUnit) {
1756             fHandlers[i]->processQuantity(quantity, micros, status);
1757             return;
1758         }
1759     }
1760     if (U_FAILURE(status)) {
1761         return;
1762     }
1763     // We shouldn't receive any outputUnit for which we haven't already got a
1764     // LongNameHandler:
1765     status = U_INTERNAL_PROGRAM_ERROR;
1766 }
1767 
1768 #endif /* #if !UCONFIG_NO_FORMATTING */
1769