1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include <cstdlib>
9
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "ureslocs.h"
13 #include "charstr.h"
14 #include "uresimp.h"
15 #include "measunit_impl.h"
16 #include "number_longnames.h"
17 #include "number_microprops.h"
18 #include <algorithm>
19 #include "cstring.h"
20 #include "util.h"
21
22 using namespace icu;
23 using namespace icu::number;
24 using namespace icu::number::impl;
25
26 namespace {
27
28 /**
29 * Display Name (this format has no placeholder).
30 *
31 * Used as an index into the LongNameHandler::simpleFormats array. Units
32 * resources cover the normal set of PluralRules keys, as well as `dnam` and
33 * `per` forms.
34 */
35 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36 /**
37 * "per" form (e.g. "{0} per day" is day's "per" form).
38 *
39 * Used as an index into the LongNameHandler::simpleFormats array. Units
40 * resources cover the normal set of PluralRules keys, as well as `dnam` and
41 * `per` forms.
42 */
43 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44 /**
45 * Gender of the word, in languages with grammatical gender.
46 */
47 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48 // Number of keys in the array populated by PluralTableSink.
49 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50
51 // TODO(icu-units#28): load this list from resources, after creating a "&set"
52 // function for use in ldml2icu rules.
53 const int32_t GENDER_COUNT = 7;
54 const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate",
55 "masculine", "neuter", "personal"};
56
57 // Converts a UnicodeString to a const char*, either pointing to a string in
58 // gGenders, or pointing to an empty string if an appropriate string was not
59 // found.
getGenderString(UnicodeString uGender,UErrorCode status)60 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61 if (uGender.length() == 0) {
62 return "";
63 }
64 CharString gender;
65 gender.appendInvariantChars(uGender, status);
66 if (U_FAILURE(status)) {
67 return "";
68 }
69 int32_t first = 0;
70 int32_t last = GENDER_COUNT;
71 while (first < last) {
72 int32_t mid = (first + last) / 2;
73 int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
74 if (cmp == 0) {
75 return gGenders[mid];
76 } else if (cmp > 0) {
77 first = mid + 1;
78 } else if (cmp < 0) {
79 last = mid;
80 }
81 }
82 // We don't return an error in case our gGenders list is incomplete in
83 // production.
84 //
85 // TODO(icu-units#28): a unit test checking all locales' genders are covered
86 // by gGenders? Else load a complete list of genders found in
87 // grammaticalFeatures in an initOnce.
88 return "";
89 }
90
91 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)92 static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93 // pluralKeyword can also be "dnam", "per", or "gender"
94 switch (*pluralKeyword) {
95 case 'd':
96 if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
97 return DNAM_INDEX;
98 }
99 break;
100 case 'g':
101 if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
102 return GENDER_INDEX;
103 }
104 break;
105 case 'p':
106 if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
107 return PER_INDEX;
108 }
109 break;
110 default:
111 break;
112 }
113 StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114 return plural;
115 }
116
117 // Selects a string out of the `strings` array which corresponds to the
118 // specified plural form, with fallback to the OTHER form.
119 //
120 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)122 static UnicodeString getWithPlural(
123 const UnicodeString* strings,
124 StandardPlural::Form plural,
125 UErrorCode& status) {
126 UnicodeString result = strings[plural];
127 if (result.isBogus()) {
128 result = strings[StandardPlural::Form::OTHER];
129 }
130 if (result.isBogus()) {
131 // There should always be data in the "other" plural variant.
132 status = U_INTERNAL_PROGRAM_ERROR;
133 }
134 return result;
135 }
136
137 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138
139 /**
140 * Returns three outputs extracted from pattern.
141 *
142 * @param coreUnit is extracted as per Extract(...) in the spec:
143 * https://unicode.org/reports/tr35/tr35-general.html#compound-units
144 * @param PlaceholderPosition indicates where in the string the placeholder was
145 * found.
146 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147 * contains the space character (if any) that separated the placeholder from
148 * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149 * space character is considered.
150 */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,UChar & joinerChar)151 void extractCorePattern(const UnicodeString &pattern,
152 UnicodeString &coreUnit,
153 PlaceholderPosition &placeholderPosition,
154 UChar &joinerChar) {
155 joinerChar = 0;
156 int32_t len = pattern.length();
157 if (pattern.startsWith(u"{0}", 3)) {
158 placeholderPosition = PH_BEGINNING;
159 if (u_isJavaSpaceChar(pattern[3])) {
160 joinerChar = pattern[3];
161 coreUnit.setTo(pattern, 4, len - 4);
162 } else {
163 coreUnit.setTo(pattern, 3, len - 3);
164 }
165 } else if (pattern.endsWith(u"{0}", 3)) {
166 placeholderPosition = PH_END;
167 if (u_isJavaSpaceChar(pattern[len - 4])) {
168 coreUnit.setTo(pattern, 0, len - 4);
169 joinerChar = pattern[len - 4];
170 } else {
171 coreUnit.setTo(pattern, 0, len - 3);
172 }
173 } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174 placeholderPosition = PH_NONE;
175 coreUnit = pattern;
176 } else {
177 placeholderPosition = PH_MIDDLE;
178 coreUnit = pattern;
179 }
180 }
181
182 //////////////////////////
183 /// BEGIN DATA LOADING ///
184 //////////////////////////
185
186 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187 // string both in case of unknown gender and in case of unknown unit.
188 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)189 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
191 if (U_FAILURE(status)) { return {}; }
192
193 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195 StringPiece subtypeForResource;
196 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
197 if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
198 subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199 } else {
200 subtypeForResource = builtinUnit.getSubtype();
201 }
202
203 CharString key;
204 key.append("units/", status);
205 key.append(builtinUnit.getType(), status);
206 key.append("/", status);
207 key.append(subtypeForResource, status);
208 key.append("/gender", status);
209
210 UErrorCode localStatus = status;
211 int32_t resultLen = 0;
212 const UChar *result =
213 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214 if (U_SUCCESS(localStatus)) {
215 status = localStatus;
216 return UnicodeString(true, result, resultLen);
217 } else {
218 // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219 // check whether the parent "$unitRes" exists? Then we could return
220 // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221 // being a builtin).
222 return {};
223 }
224 }
225
226 // Loads data from a resource tree with paths matching
227 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228 // and genders.
229 //
230 // An InflectedPluralSink is configured to load data for a specific gender and
231 // case. It loads all plural forms, because selection between plural forms is
232 // dependent upon the value being formatted.
233 //
234 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235 // units/compound/power2: German has case, French has differences for gender,
236 // but no case.
237 //
238 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239 // tree structures are different. After homogenizing the structures, we may be
240 // able to unify the two classes.
241 //
242 // TODO: Spec violation: expects presence of "count" - does not fallback to an
243 // absent "count"! If this fallback were added, getCompoundValue could be
244 // superseded?
245 class InflectedPluralSink : public ResourceSink {
246 public:
247 // Accepts `char*` rather than StringPiece because
248 // ResourceTable::findValue(...) requires a null-terminated `char*`.
249 //
250 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251 // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)252 explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253 : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254 // Initialize the array to bogus strings.
255 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256 outArray[i].setToBogus();
257 }
258 }
259
260 // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)261 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
262 ResourceTable pluralsTable = value.getTable(status);
263 if (U_FAILURE(status)) { return; }
264 for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
265 int32_t pluralIndex = getIndex(key, status);
266 if (U_FAILURE(status)) { return; }
267 if (!outArray[pluralIndex].isBogus()) {
268 // We already have a pattern
269 continue;
270 }
271 ResourceTable genderTable = value.getTable(status);
272 ResourceTable caseTable; // This instance has to outlive `value`
273 if (loadForPluralForm(genderTable, caseTable, value, status)) {
274 outArray[pluralIndex] = value.getUnicodeString(status);
275 }
276 }
277 }
278
279 private:
280 // Tries to load data for the configured gender from `genderTable`. Returns
281 // true if found, returning the data in `value`. The returned data will be
282 // for the configured gender if found, falling back to "neuter" and
283 // no-gender if not. The caseTable parameter holds the intermediate
284 // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)285 bool loadForPluralForm(const ResourceTable &genderTable,
286 ResourceTable &caseTable,
287 ResourceValue &value,
288 UErrorCode &status) {
289 if (uprv_strcmp(gender, "") != 0) {
290 if (loadForGender(genderTable, gender, caseTable, value, status)) {
291 return true;
292 }
293 if (uprv_strcmp(gender, "neuter") != 0 &&
294 loadForGender(genderTable, "neuter", caseTable, value, status)) {
295 return true;
296 }
297 }
298 if (loadForGender(genderTable, "_", caseTable, value, status)) {
299 return true;
300 }
301 return false;
302 }
303
304 // Tries to load data for the given gender from `genderTable`. Returns true
305 // if found, returning the data in `value`. The returned data will be for
306 // the configured case if found, falling back to "nominative" and no-case if
307 // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)308 bool loadForGender(const ResourceTable &genderTable,
309 const char *genderVal,
310 ResourceTable &caseTable,
311 ResourceValue &value,
312 UErrorCode &status) {
313 if (!genderTable.findValue(genderVal, value)) {
314 return false;
315 }
316 caseTable = value.getTable(status);
317 if (uprv_strcmp(caseVariant, "") != 0) {
318 if (loadForCase(caseTable, caseVariant, value)) {
319 return true;
320 }
321 if (uprv_strcmp(caseVariant, "nominative") != 0 &&
322 loadForCase(caseTable, "nominative", value)) {
323 return true;
324 }
325 }
326 if (loadForCase(caseTable, "_", value)) {
327 return true;
328 }
329 return false;
330 }
331
332 // Tries to load data for the given case from `caseTable`. Returns true if
333 // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)334 bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
335 if (!caseTable.findValue(caseValue, value)) {
336 return false;
337 }
338 return true;
339 }
340
341 const char *gender;
342 const char *caseVariant;
343 UnicodeString *outArray;
344 };
345
346 // Fetches localised formatting patterns for the given subKey. See documentation
347 // for InflectedPluralSink for details.
348 //
349 // Data is loaded for the appropriate unit width, with missing data filled in
350 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)351 void getInflectedMeasureData(StringPiece subKey,
352 const Locale &locale,
353 const UNumberUnitWidth &width,
354 const char *gender,
355 const char *caseVariant,
356 UnicodeString *outArray,
357 UErrorCode &status) {
358 InflectedPluralSink sink(gender, caseVariant, outArray);
359 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
360 if (U_FAILURE(status)) { return; }
361
362 CharString key;
363 key.append("units", status);
364 if (width == UNUM_UNIT_WIDTH_NARROW) {
365 key.append("Narrow", status);
366 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
367 key.append("Short", status);
368 }
369 key.append("/", status);
370 key.append(subKey, status);
371
372 UErrorCode localStatus = status;
373 ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
374 if (width == UNUM_UNIT_WIDTH_SHORT) {
375 status = localStatus;
376 return;
377 }
378
379 // TODO(ICU-13353): The fallback to short does not work in ICU4C.
380 // Manually fall back to short (this is done automatically in Java).
381 key.clear();
382 key.append("unitsShort/", status);
383 key.append(subKey, status);
384 ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
385 }
386
387 class PluralTableSink : public ResourceSink {
388 public:
389 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
390 // checking is performed.
PluralTableSink(UnicodeString * outArray)391 explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
392 // Initialize the array to bogus strings.
393 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
394 outArray[i].setToBogus();
395 }
396 }
397
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)398 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
399 ResourceTable pluralsTable = value.getTable(status);
400 if (U_FAILURE(status)) { return; }
401 for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
402 if (uprv_strcmp(key, "case") == 0) {
403 continue;
404 }
405 int32_t index = getIndex(key, status);
406 if (U_FAILURE(status)) { return; }
407 if (!outArray[index].isBogus()) {
408 continue;
409 }
410 outArray[index] = value.getUnicodeString(status);
411 if (U_FAILURE(status)) { return; }
412 }
413 }
414
415 private:
416 UnicodeString *outArray;
417 };
418
419 /**
420 * Populates outArray with `locale`-specific values for `unit` through use of
421 * PluralTableSink. Only the set of basic units are supported!
422 *
423 * Reading from resources *unitsNarrow* and *unitsShort* (for width
424 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
425 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
426 *
427 * @param unit must be a built-in unit, i.e. must have a type and subtype,
428 * listed in gTypes and gSubTypes in measunit.cpp.
429 * @param unitDisplayCase the empty string and "nominative" are treated the
430 * same. For other cases, strings for the requested case are used if found.
431 * (For any missing case-specific data, we fall back to nominative.)
432 * @param outArray must be of fixed length ARRAY_LENGTH.
433 */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)434 void getMeasureData(const Locale &locale,
435 const MeasureUnit &unit,
436 const UNumberUnitWidth &width,
437 const char *unitDisplayCase,
438 UnicodeString *outArray,
439 UErrorCode &status) {
440 PluralTableSink sink(outArray);
441 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
442 if (U_FAILURE(status)) { return; }
443
444 CharString subKey;
445 subKey.append("/", status);
446 subKey.append(unit.getType(), status);
447 subKey.append("/", status);
448
449 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
450 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
451 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype()));
452 if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) {
453 subKey.append({unit.getSubtype(), subtypeLen - 7}, status);
454 } else {
455 subKey.append({unit.getSubtype(), subtypeLen}, status);
456 }
457
458 if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
459 UErrorCode localStatus = status;
460 CharString genderKey;
461 genderKey.append("units", localStatus);
462 genderKey.append(subKey, localStatus);
463 genderKey.append("/gender", localStatus);
464 StackUResourceBundle fillIn;
465 ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
466 &localStatus);
467 outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
468 }
469
470 CharString key;
471 key.append("units", status);
472 if (width == UNUM_UNIT_WIDTH_NARROW) {
473 key.append("Narrow", status);
474 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
475 key.append("Short", status);
476 }
477 key.append(subKey, status);
478
479 // Grab desired case first, if available. Then grab no-case data to fill in
480 // the gaps.
481 if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
482 CharString caseKey;
483 caseKey.append(key, status);
484 caseKey.append("/case/", status);
485 caseKey.append(unitDisplayCase, status);
486
487 UErrorCode localStatus = U_ZERO_ERROR;
488 // TODO(icu-units#138): our fallback logic is not spec-compliant:
489 // lateral fallback should happen before locale fallback. Switch to
490 // getInflectedMeasureData after homogenizing data format? Find a unit
491 // test case that demonstrates the incorrect fallback logic (via
492 // regional variant of an inflected language?)
493 ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
494 }
495
496 // TODO(icu-units#138): our fallback logic is not spec-compliant: we
497 // check the given case, then go straight to the no-case data. The spec
498 // states we should first look for case="nominative". As part of #138,
499 // either get the spec changed, or add unit tests that warn us if
500 // case="nominative" data differs from no-case data?
501 UErrorCode localStatus = U_ZERO_ERROR;
502 ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
503 if (width == UNUM_UNIT_WIDTH_SHORT) {
504 if (U_FAILURE(localStatus)) {
505 status = localStatus;
506 }
507 return;
508 }
509
510 // TODO(ICU-13353): The fallback to short does not work in ICU4C.
511 // Manually fall back to short (this is done automatically in Java).
512 key.clear();
513 key.append("unitsShort", status);
514 key.append(subKey, status);
515 ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
516 }
517
518 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)519 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray,
520 UErrorCode &status) {
521 // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
522 // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
523 PluralTableSink sink(outArray);
524 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
525 if (U_FAILURE(status)) { return; }
526 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
527 if (U_FAILURE(status)) { return; }
528 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
529 UnicodeString &pattern = outArray[i];
530 if (pattern.isBogus()) {
531 continue;
532 }
533 int32_t longNameLen = 0;
534 const char16_t *longName = ucurr_getPluralName(
535 currency.getISOCurrency(),
536 locale.getName(),
537 nullptr /* isChoiceFormat */,
538 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
539 &longNameLen,
540 &status);
541 // Example pattern from data: "{0} {1}"
542 // Example output after find-and-replace: "{0} US dollars"
543 pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
544 }
545 }
546
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)547 UnicodeString getCompoundValue(StringPiece compoundKey,
548 const Locale &locale,
549 const UNumberUnitWidth &width,
550 UErrorCode &status) {
551 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
552 if (U_FAILURE(status)) { return {}; }
553 CharString key;
554 key.append("units", status);
555 if (width == UNUM_UNIT_WIDTH_NARROW) {
556 key.append("Narrow", status);
557 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
558 key.append("Short", status);
559 }
560 key.append("/compound/", status);
561 key.append(compoundKey, status);
562
563 UErrorCode localStatus = status;
564 int32_t len = 0;
565 const UChar *ptr =
566 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
567 if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
568 // Fall back to short, which contains more compound data
569 key.clear();
570 key.append("unitsShort/compound/", status);
571 key.append(compoundKey, status);
572 ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
573 } else {
574 status = localStatus;
575 }
576 if (U_FAILURE(status)) {
577 return {};
578 }
579 return UnicodeString(ptr, len);
580 }
581
582 /**
583 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
584 *
585 * Consider a deriveComponent rule that looks like this:
586 *
587 * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
588 *
589 * Instantiating an instance as follows:
590 *
591 * DerivedComponents d(loc, "case", "per");
592 *
593 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
594 * and `d.value1("foo")` will be "nominative".
595 *
596 * The values returned by value0(...) and value1(...) are valid only while the
597 * instance exists. In case of any kind of failure, value0(...) and value1(...)
598 * will return "".
599 */
600 class DerivedComponents {
601 public:
602 /**
603 * Constructor.
604 *
605 * The feature and structure parameters must be null-terminated. The string
606 * referenced by compoundValue must exist for longer than the
607 * DerivedComponents instance.
608 */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)609 DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
610 StackUResourceBundle derivationsBundle, stackBundle;
611 ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
612 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
613 &status);
614 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
615 &status);
616 if (U_FAILURE(status)) {
617 return;
618 }
619 UErrorCode localStatus = U_ZERO_ERROR;
620 // TODO(icu-units#28): use standard normal locale resolution algorithms
621 // rather than just grabbing language:
622 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
623 &localStatus);
624 // TODO(icu-units#28):
625 // - code currently assumes if the locale exists, the rules are there -
626 // instead of falling back to root when the requested rule is missing.
627 // - investigate ures.h functions, see if one that uses res_findResource()
628 // might be better (or use res_findResource directly), or maybe help
629 // improve ures documentation to guide function selection?
630 if (localStatus == U_MISSING_RESOURCE_ERROR) {
631 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
632 } else {
633 status = localStatus;
634 }
635 ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
636 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
637 ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
638 UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
639 UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
640 if (U_SUCCESS(status)) {
641 if (val0.compare(UnicodeString(u"compound")) == 0) {
642 compound0_ = true;
643 } else {
644 compound0_ = false;
645 value0_.appendInvariantChars(val0, status);
646 }
647 if (val1.compare(UnicodeString(u"compound")) == 0) {
648 compound1_ = true;
649 } else {
650 compound1_ = false;
651 value1_.appendInvariantChars(val1, status);
652 }
653 }
654 }
655
656 // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const657 StringPiece value0(const StringPiece compoundValue) const {
658 return compound0_ ? compoundValue : value0_.toStringPiece();
659 }
660
661 // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const662 StringPiece value1(const StringPiece compoundValue) const {
663 return compound1_ ? compoundValue : value1_.toStringPiece();
664 }
665
666 // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const667 const char *value0(const char *compoundValue) const {
668 return compound0_ ? compoundValue : value0_.data();
669 }
670
671 // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const672 const char *value1(const char *compoundValue) const {
673 return compound1_ ? compoundValue : value1_.data();
674 }
675
676 private:
677 UErrorCode status = U_ZERO_ERROR;
678
679 // Holds strings referred to by value0 and value1;
680 bool compound0_ = false, compound1_ = false;
681 CharString value0_, value1_;
682 };
683
684 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
685 // testsuite support for testing with synthetic data?
686 /**
687 * Loads and returns the value in rules that look like these:
688 *
689 * <deriveCompound feature="gender" structure="per" value="0"/>
690 * <deriveCompound feature="gender" structure="times" value="1"/>
691 *
692 * Currently a fake example, but spec compliant:
693 * <deriveCompound feature="gender" structure="power" value="feminine"/>
694 *
695 * NOTE: If U_FAILURE(status), returns an empty string.
696 */
697 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)698 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
699 StackUResourceBundle derivationsBundle, stackBundle;
700 ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
701 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
702 &status);
703 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
704 // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
705 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
706 // TODO:
707 // - code currently assumes if the locale exists, the rules are there -
708 // instead of falling back to root when the requested rule is missing.
709 // - investigate ures.h functions, see if one that uses res_findResource()
710 // might be better (or use res_findResource directly), or maybe help
711 // improve ures documentation to guide function selection?
712 if (status == U_MISSING_RESOURCE_ERROR) {
713 status = U_ZERO_ERROR;
714 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
715 }
716 ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
717 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
718 UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
719 if (U_FAILURE(status)) {
720 return {};
721 }
722 U_ASSERT(!uVal.isBogus());
723 return uVal;
724 }
725
726 // Returns the gender string for structures following these rules:
727 //
728 // <deriveCompound feature="gender" structure="per" value="0"/>
729 // <deriveCompound feature="gender" structure="times" value="1"/>
730 //
731 // Fake example:
732 // <deriveCompound feature="gender" structure="power" value="feminine"/>
733 //
734 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
735 // correspond to value="0" and value="1".
736 //
737 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
738 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)739 UnicodeString getDerivedGender(Locale locale,
740 const char *structure,
741 UnicodeString *data0,
742 UnicodeString *data1,
743 UErrorCode &status) {
744 UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
745 if (val.length() == 1) {
746 switch (val[0]) {
747 case u'0':
748 return data0[GENDER_INDEX];
749 case u'1':
750 if (data1 == nullptr) {
751 return {};
752 }
753 return data1[GENDER_INDEX];
754 }
755 }
756 return val;
757 }
758
759 ////////////////////////
760 /// END DATA LOADING ///
761 ////////////////////////
762
763 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const UChar * s,int32_t & length)764 const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
765 if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
766 return s;
767 }
768 int32_t start = 0;
769 int32_t limit = length;
770 while (start < limit && u_isJavaSpaceChar(s[start])) {
771 ++start;
772 }
773 if (start < limit) {
774 // There is non-white space at start; we will not move limit below that,
775 // so we need not test start<limit in the loop.
776 while (u_isJavaSpaceChar(s[limit - 1])) {
777 --limit;
778 }
779 }
780 length = limit - start;
781 return s + start;
782 }
783
784 /**
785 * Calculates the gender of an arbitrary unit: this is the *second*
786 * implementation of an algorithm to do this:
787 *
788 * Gender is also calculated in "processPatternTimes": that code path is "bottom
789 * up", loading the gender for every component of a compound unit (at the same
790 * time as loading the Long Names formatting patterns), even if the gender is
791 * unneeded, then combining the single units' genders into the compound unit's
792 * gender, according to the rules. This algorithm does a lazier "top-down"
793 * evaluation, starting with the compound unit, calculating which single unit's
794 * gender is needed by breaking it down according to the rules, and then loading
795 * only the gender of the one single unit who's gender is needed.
796 *
797 * For future refactorings:
798 * 1. we could drop processPatternTimes' gender calculation and just call this
799 * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
800 * same table as the formatting patterns, so loading it then may be
801 * efficient. For other unit widths however, it needs to be explicitly looked
802 * up anyway.
803 * 2. alternatively, if CLDR is providing all the genders we need such that we
804 * don't need to calculate them in ICU anymore, we could drop this function
805 * and keep only processPatternTimes' calculation. (And optimise it a bit?)
806 *
807 * @param locale The desired locale.
808 * @param unit The measure unit to calculate the gender for.
809 * @return The gender string for the unit, or an empty string if unknown or
810 * ungendered.
811 */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)812 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
813 MeasureUnitImpl impl;
814 const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
815 int32_t singleUnitIndex = 0;
816 if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
817 int32_t startSlice = 0;
818 // inclusive
819 int32_t endSlice = mui.singleUnits.length()-1;
820 U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
821 if (mui.singleUnits[endSlice]->dimensionality < 0) {
822 // We have a -per- construct
823 UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
824 if (perRule.length() != 1) {
825 // Fixed gender for -per- units
826 return perRule;
827 }
828 if (perRule[0] == u'1') {
829 // Find the start of the denominator. We already know there is one.
830 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
831 startSlice++;
832 }
833 } else {
834 // Find the end of the numerator
835 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
836 endSlice--;
837 }
838 if (endSlice < 0) {
839 // We have only a denominator, e.g. "per-second".
840 // TODO(icu-units#28): find out what gender to use in the
841 // absence of a first value - mentioned in CLDR-14253.
842 return {};
843 }
844 }
845 }
846 if (endSlice > startSlice) {
847 // We have a -times- construct
848 UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
849 if (timesRule.length() != 1) {
850 // Fixed gender for -times- units
851 return timesRule;
852 }
853 if (timesRule[0] == u'0') {
854 endSlice = startSlice;
855 } else {
856 // We assume timesRule[0] == u'1'
857 startSlice = endSlice;
858 }
859 }
860 U_ASSERT(startSlice == endSlice);
861 singleUnitIndex = startSlice;
862 } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
863 status = U_INTERNAL_PROGRAM_ERROR;
864 return {};
865 } else {
866 U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
867 U_ASSERT(mui.singleUnits.length() == 1);
868 }
869
870 // Now we know which singleUnit's gender we want
871 const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
872 // Check for any power-prefix gender override:
873 if (std::abs(singleUnit->dimensionality) != 1) {
874 UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
875 if (powerRule.length() != 1) {
876 // Fixed gender for -powN- units
877 return powerRule;
878 }
879 // powerRule[0] == u'0'; u'1' not currently in spec.
880 }
881 // Check for any SI and binary prefix gender override:
882 if (std::abs(singleUnit->dimensionality) != 1) {
883 UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
884 if (prefixRule.length() != 1) {
885 // Fixed gender for -powN- units
886 return prefixRule;
887 }
888 // prefixRule[0] == u'0'; u'1' not currently in spec.
889 }
890 // Now we've boiled it down to the gender of one simple unit identifier:
891 return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
892 status);
893 }
894
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)895 void maybeCalculateGender(const Locale &locale,
896 const MeasureUnit &unitRef,
897 UnicodeString *outArray,
898 UErrorCode &status) {
899 if (outArray[GENDER_INDEX].isBogus()) {
900 UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
901 if (meterGender.isEmpty()) {
902 // No gender for meter: assume ungendered language
903 return;
904 }
905 // We have a gendered language, but are lacking gender for unitRef.
906 outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
907 }
908 }
909
910 } // namespace
911
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)912 void LongNameHandler::forMeasureUnit(const Locale &loc,
913 const MeasureUnit &unitRef,
914 const UNumberUnitWidth &width,
915 const char *unitDisplayCase,
916 const PluralRules *rules,
917 const MicroPropsGenerator *parent,
918 LongNameHandler *fillIn,
919 UErrorCode &status) {
920 // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
921 // Points 1 and 2 are mostly handled by MeasureUnit:
922 //
923 // 1. If the unitId is empty or invalid, fail
924 // 2. Put the unitId into normalized order
925 U_ASSERT(fillIn != nullptr);
926
927 if (uprv_strcmp(unitRef.getType(), "") != 0) {
928 // Handling built-in units:
929 //
930 // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
931 // - If result is not empty, return it
932 UnicodeString simpleFormats[ARRAY_LENGTH];
933 getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
934 maybeCalculateGender(loc, unitRef, simpleFormats, status);
935 if (U_FAILURE(status)) {
936 return;
937 }
938 fillIn->rules = rules;
939 fillIn->parent = parent;
940 fillIn->simpleFormatsToModifiers(simpleFormats,
941 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
942 if (!simpleFormats[GENDER_INDEX].isBogus()) {
943 fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
944 }
945 return;
946
947 // TODO(icu-units#145): figure out why this causes a failure in
948 // format/MeasureFormatTest/TestIndividualPluralFallback and other
949 // tests, when it should have been an alternative for the lines above:
950
951 // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
952 // fillIn->rules = rules;
953 // fillIn->parent = parent;
954 // return;
955 } else {
956 // Check if it is a MeasureUnit this constructor handles: this
957 // constructor does not handle mixed units
958 U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
959 forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
960 fillIn->rules = rules;
961 fillIn->parent = parent;
962 return;
963 }
964 }
965
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)966 void LongNameHandler::forArbitraryUnit(const Locale &loc,
967 const MeasureUnit &unitRef,
968 const UNumberUnitWidth &width,
969 const char *unitDisplayCase,
970 LongNameHandler *fillIn,
971 UErrorCode &status) {
972 if (U_FAILURE(status)) {
973 return;
974 }
975 if (fillIn == nullptr) {
976 status = U_INTERNAL_PROGRAM_ERROR;
977 return;
978 }
979
980 // Numbered list items are from the algorithms at
981 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
982 //
983 // 4. Divide the unitId into numerator (the part before the "-per-") and
984 // denominator (the part after the "-per-). If both are empty, fail
985 MeasureUnitImpl unit;
986 MeasureUnitImpl perUnit;
987 {
988 MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
989 if (U_FAILURE(status)) {
990 return;
991 }
992 for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
993 SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
994 if (subUnit->dimensionality > 0) {
995 unit.appendSingleUnit(*subUnit, status);
996 } else {
997 subUnit->dimensionality *= -1;
998 perUnit.appendSingleUnit(*subUnit, status);
999 }
1000 }
1001 }
1002
1003 // TODO(icu-units#28): check placeholder logic, see if it needs to be
1004 // present here instead of only in processPatternTimes:
1005 //
1006 // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1007
1008 DerivedComponents derivedPerCases(loc, "case", "per");
1009
1010 // 6. numeratorUnitString
1011 UnicodeString numeratorUnitData[ARRAY_LENGTH];
1012 processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1013 numeratorUnitData, status);
1014
1015 // 7. denominatorUnitString
1016 UnicodeString denominatorUnitData[ARRAY_LENGTH];
1017 processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1018 denominatorUnitData, status);
1019
1020 // TODO(icu-units#139):
1021 // - implement DerivedComponents for "plural/times" and "plural/power":
1022 // French has different rules, we'll be producing the wrong results
1023 // currently. (Prove via tests!)
1024 // - implement DerivedComponents for "plural/per", "plural/prefix",
1025 // "case/times", "case/power", and "case/prefix" - although they're
1026 // currently hardcoded. Languages with different rules are surely on the
1027 // way.
1028 //
1029 // Currently we only use "case/per", "plural/times", "case/times", and
1030 // "case/power".
1031 //
1032 // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1033 // These rules are currently (ICU 69) all the same and hard-coded below.
1034 UnicodeString perUnitPattern;
1035 if (!denominatorUnitData[PER_INDEX].isBogus()) {
1036 // If we have no denominator, we obtain the empty string:
1037 perUnitPattern = denominatorUnitData[PER_INDEX];
1038 } else {
1039 // 8. Set perPattern to be getValue([per], locale, length)
1040 UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1041 // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1042 SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1043 if (U_FAILURE(status)) {
1044 return;
1045 }
1046 // Plural and placeholder handling for 7. denominatorUnitString:
1047 // TODO(icu-units#139): hardcoded:
1048 // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1049 UnicodeString denominatorFormat =
1050 getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1051 // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1052 SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1053 if (U_FAILURE(status)) {
1054 return;
1055 }
1056 UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1057 int32_t trimmedLen = denominatorPattern.length();
1058 const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1059 UnicodeString denominatorString(false, trimmed, trimmedLen);
1060 // 9. If the denominatorString is empty, set result to
1061 // [numeratorString], otherwise set result to format(perPattern,
1062 // numeratorString, denominatorString)
1063 //
1064 // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1065 // following line?
1066 perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1067 if (U_FAILURE(status)) {
1068 return;
1069 }
1070 }
1071 if (perUnitPattern.length() == 0) {
1072 fillIn->simpleFormatsToModifiers(numeratorUnitData,
1073 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1074 } else {
1075 fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1076 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1077 }
1078
1079 // Gender
1080 //
1081 // TODO(icu-units#28): find out what gender to use in the absence of a first
1082 // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1083 //
1084 // gender/per deriveCompound rules don't say:
1085 // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) -->
1086 fillIn->gender = getGenderString(
1087 getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1088 }
1089
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1090 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1091 Locale loc,
1092 const UNumberUnitWidth &width,
1093 const char *caseVariant,
1094 UnicodeString *outArray,
1095 UErrorCode &status) {
1096 if (U_FAILURE(status)) {
1097 return;
1098 }
1099 if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1100 // These are handled by MixedUnitLongNameHandler
1101 status = U_UNSUPPORTED_ERROR;
1102 return;
1103 }
1104
1105 #if U_DEBUG
1106 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1107 U_ASSERT(outArray[pluralIndex].length() == 0);
1108 U_ASSERT(!outArray[pluralIndex].isBogus());
1109 }
1110 #endif
1111
1112 if (productUnit.identifier.isEmpty()) {
1113 // TODO(icu-units#28): consider when serialize should be called.
1114 // identifier might also be empty for MeasureUnit().
1115 productUnit.serialize(status);
1116 }
1117 if (U_FAILURE(status)) {
1118 return;
1119 }
1120 if (productUnit.identifier.length() == 0) {
1121 // MeasureUnit(): no units: return empty strings.
1122 return;
1123 }
1124
1125 MeasureUnit builtinUnit;
1126 if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1127 // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1128 // breaks them all down. Do we want to drop this?
1129 // - findBySubType isn't super efficient, if we skip it and go to basic
1130 // singles, we don't have to construct MeasureUnit's anymore.
1131 // - Check all the existing unit tests that fail without this: is it due
1132 // to incorrect fallback via getMeasureData?
1133 // - Do those unit tests cover this code path representatively?
1134 if (builtinUnit != MeasureUnit()) {
1135 getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1136 maybeCalculateGender(loc, builtinUnit, outArray, status);
1137 }
1138 return;
1139 }
1140
1141 // 2. Set timesPattern to be getValue(times, locale, length)
1142 UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1143 SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1144 if (U_FAILURE(status)) {
1145 return;
1146 }
1147
1148 PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1149 UChar globalJoinerChar = 0;
1150 // Numbered list items are from the algorithms at
1151 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1152 //
1153 // pattern(...) point 5:
1154 // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1155 //
1156 // 3. Set result to be empty
1157 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1158 // Initial state: empty string pattern, via all falling back to OTHER:
1159 if (pluralIndex == StandardPlural::Form::OTHER) {
1160 outArray[pluralIndex].remove();
1161 } else {
1162 outArray[pluralIndex].setToBogus();
1163 }
1164 globalPlaceholder[pluralIndex] = PH_EMPTY;
1165 }
1166
1167 // Empty string represents "compound" (propagate the plural form).
1168 const char *pluralCategory = "";
1169 DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1170 DerivedComponents derivedTimesCases(loc, "case", "times");
1171 DerivedComponents derivedPowerCases(loc, "case", "power");
1172
1173 // 4. For each single_unit in product_unit
1174 for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1175 singleUnitIndex++) {
1176 SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1177 const char *singlePluralCategory;
1178 const char *singleCaseVariant;
1179 // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1180 // assign incorrect case variants here:
1181 if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1182 // 4.1. If hasMultiple
1183 singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1184 singleCaseVariant = derivedTimesCases.value0(caseVariant);
1185 pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1186 caseVariant = derivedTimesCases.value1(caseVariant);
1187 } else {
1188 singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1189 singleCaseVariant = derivedTimesCases.value1(caseVariant);
1190 }
1191
1192 // 4.2. Get the gender of that single_unit
1193 MeasureUnit simpleUnit;
1194 if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1195 // Ideally all simple units should be known, but they're not:
1196 // 100-kilometer is internally treated as a simple unit, but it is
1197 // not a built-in unit and does not have formatting data in CLDR 39.
1198 //
1199 // TODO(icu-units#28): test (desirable) invariants in unit tests.
1200 status = U_UNSUPPORTED_ERROR;
1201 return;
1202 }
1203 const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1204
1205 // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1206 U_ASSERT(singleUnit->dimensionality > 0);
1207 int32_t dimensionality = singleUnit->dimensionality;
1208 UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1209 if (dimensionality != 1) {
1210 // 4.3.1. set dimensionalityPrefixPattern to be
1211 // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1212 // such as "{0} kwadratowym"
1213 CharString dimensionalityKey("compound/power", status);
1214 dimensionalityKey.appendNumber(dimensionality, status);
1215 getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1216 singleCaseVariant, dimensionalityPrefixPatterns, status);
1217 if (U_FAILURE(status)) {
1218 // At the time of writing, only pow2 and pow3 are supported.
1219 // Attempting to format other powers results in a
1220 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1221 // understand it:
1222 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1223 status = U_UNSUPPORTED_ERROR;
1224 }
1225 return;
1226 }
1227
1228 // TODO(icu-units#139):
1229 // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1230
1231 // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1232 singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1233 // 4.3.4. remove the dimensionality_prefix from singleUnit
1234 singleUnit->dimensionality = 1;
1235 }
1236
1237 // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1238 UMeasurePrefix prefix = singleUnit->unitPrefix;
1239 UnicodeString prefixPattern;
1240 if (prefix != UMEASURE_PREFIX_ONE) {
1241 // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1242 // length), such as "centy{0}"
1243 CharString prefixKey;
1244 // prefixKey looks like "1024p3" or "10p-2":
1245 prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1246 prefixKey.append('p', status);
1247 prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1248 // Contains a pattern like "centy{0}".
1249 prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1250
1251 // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1252 //
1253 // TODO(icu-units#139): that refers to these rules:
1254 // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1255 // though I'm not sure what other value they might end up having.
1256 //
1257 // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1258 //
1259 // TODO(icu-units#139): that refers to:
1260 // <deriveComponent feature="case" structure="prefix" value0="nominative"
1261 // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1262 // propagates.
1263
1264 // 4.4.4. remove the si_prefix from singleUnit
1265 singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1266 }
1267
1268 // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1269 // singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1270 UnicodeString singleUnitArray[ARRAY_LENGTH];
1271 // At this point we are left with a Simple Unit:
1272 U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1273 0);
1274 getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1275 status);
1276 if (U_FAILURE(status)) {
1277 // Shouldn't happen if we have data for all single units
1278 return;
1279 }
1280
1281 // Calculate output gender
1282 if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1283 U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1284 UnicodeString uVal;
1285
1286 if (prefix != UMEASURE_PREFIX_ONE) {
1287 singleUnitArray[GENDER_INDEX] =
1288 getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1289 }
1290
1291 if (dimensionality != 1) {
1292 singleUnitArray[GENDER_INDEX] =
1293 getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1294 }
1295
1296 UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1297 if (timesGenderRule.length() == 1) {
1298 switch (timesGenderRule[0]) {
1299 case u'0':
1300 if (singleUnitIndex == 0) {
1301 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1302 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1303 }
1304 break;
1305 case u'1':
1306 if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1307 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1308 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1309 }
1310 }
1311 } else {
1312 if (outArray[GENDER_INDEX].isBogus()) {
1313 outArray[GENDER_INDEX] = timesGenderRule;
1314 }
1315 }
1316 }
1317
1318 // Calculate resulting patterns for each plural form
1319 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1320 StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1321
1322 // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1323 if (outArray[pluralIndex].isBogus()) {
1324 if (singleUnitArray[pluralIndex].isBogus()) {
1325 // Let the usual plural fallback mechanism take care of this
1326 // plural form
1327 continue;
1328 } else {
1329 // Since our singleUnit can have a plural form that outArray
1330 // doesn't yet have (relying on fallback to OTHER), we start
1331 // by grabbing it with the normal plural fallback mechanism
1332 outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1333 if (U_FAILURE(status)) {
1334 return;
1335 }
1336 }
1337 }
1338
1339 if (uprv_strcmp(singlePluralCategory, "") != 0) {
1340 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1341 }
1342
1343 // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1344 UnicodeString coreUnit;
1345 PlaceholderPosition placeholderPosition;
1346 UChar joinerChar;
1347 extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1348 placeholderPosition, joinerChar);
1349
1350 // 4.7 If the position is middle, then fail
1351 if (placeholderPosition == PH_MIDDLE) {
1352 status = U_UNSUPPORTED_ERROR;
1353 return;
1354 }
1355
1356 // 4.8. If globalPlaceholder is empty
1357 if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1358 globalPlaceholder[pluralIndex] = placeholderPosition;
1359 globalJoinerChar = joinerChar;
1360 } else {
1361 // Expect all units involved to have the same placeholder position
1362 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1363 // TODO(icu-units#28): Do we want to add a unit test that checks
1364 // for consistent joiner chars? Probably not, given how
1365 // inconsistent they are. File a CLDR ticket with examples?
1366 }
1367 // Now coreUnit would be just "Meter"
1368
1369 // 4.9. If siPrefixPattern is not empty
1370 if (prefix != UMEASURE_PREFIX_ONE) {
1371 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1372 if (U_FAILURE(status)) {
1373 return;
1374 }
1375
1376 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1377 // coreUnit)
1378 UnicodeString tmp;
1379 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1380 //
1381 // TODO(icu-units#28): run this only if prefixPattern does not
1382 // contain space characters - do languages "as", "bn", "hi",
1383 // "kk", etc have concepts of upper and lower case?:
1384 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1385 coreUnit.toLower(loc);
1386 }
1387 prefixCompiled.format(coreUnit, tmp, status);
1388 if (U_FAILURE(status)) {
1389 return;
1390 }
1391 coreUnit = tmp;
1392 }
1393
1394 // 4.10. If dimensionalityPrefixPattern is not empty
1395 if (dimensionality != 1) {
1396 SimpleFormatter dimensionalityCompiled(
1397 getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1398 if (U_FAILURE(status)) {
1399 return;
1400 }
1401
1402 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1403 // dimensionalityPrefixPattern, coreUnit)
1404 UnicodeString tmp;
1405 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1406 //
1407 // TODO(icu-units#28): run this only if prefixPattern does not
1408 // contain space characters - do languages "as", "bn", "hi",
1409 // "kk", etc have concepts of upper and lower case?:
1410 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1411 coreUnit.toLower(loc);
1412 }
1413 dimensionalityCompiled.format(coreUnit, tmp, status);
1414 if (U_FAILURE(status)) {
1415 return;
1416 }
1417 coreUnit = tmp;
1418 }
1419
1420 if (outArray[pluralIndex].length() == 0) {
1421 // 4.11. If the result is empty, set result to be coreUnit
1422 outArray[pluralIndex] = coreUnit;
1423 } else {
1424 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1425 UnicodeString tmp;
1426 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1427 outArray[pluralIndex] = tmp;
1428 }
1429 }
1430 }
1431 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1432 if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1433 UnicodeString tmp;
1434 tmp.append(u"{0}", 3);
1435 if (globalJoinerChar != 0) {
1436 tmp.append(globalJoinerChar);
1437 }
1438 tmp.append(outArray[pluralIndex]);
1439 outArray[pluralIndex] = tmp;
1440 } else if (globalPlaceholder[pluralIndex] == PH_END) {
1441 if (globalJoinerChar != 0) {
1442 outArray[pluralIndex].append(globalJoinerChar);
1443 }
1444 outArray[pluralIndex].append(u"{0}", 3);
1445 }
1446 }
1447 }
1448
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1449 UnicodeString LongNameHandler::getUnitDisplayName(
1450 const Locale& loc,
1451 const MeasureUnit& unit,
1452 UNumberUnitWidth width,
1453 UErrorCode& status) {
1454 if (U_FAILURE(status)) {
1455 return ICU_Utility::makeBogusString();
1456 }
1457 UnicodeString simpleFormats[ARRAY_LENGTH];
1458 getMeasureData(loc, unit, width, "", simpleFormats, status);
1459 return simpleFormats[DNAM_INDEX];
1460 }
1461
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1462 UnicodeString LongNameHandler::getUnitPattern(
1463 const Locale& loc,
1464 const MeasureUnit& unit,
1465 UNumberUnitWidth width,
1466 StandardPlural::Form pluralForm,
1467 UErrorCode& status) {
1468 if (U_FAILURE(status)) {
1469 return ICU_Utility::makeBogusString();
1470 }
1471 UnicodeString simpleFormats[ARRAY_LENGTH];
1472 getMeasureData(loc, unit, width, "", simpleFormats, status);
1473 // The above already handles fallback from other widths to short
1474 if (U_FAILURE(status)) {
1475 return ICU_Utility::makeBogusString();
1476 }
1477 // Now handle fallback from other plural forms to OTHER
1478 return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1479 simpleFormats[StandardPlural::Form::OTHER];
1480 }
1481
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1482 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy,
1483 const PluralRules *rules,
1484 const MicroPropsGenerator *parent,
1485 UErrorCode &status) {
1486 auto* result = new LongNameHandler(rules, parent);
1487 if (result == nullptr) {
1488 status = U_MEMORY_ALLOCATION_ERROR;
1489 return nullptr;
1490 }
1491 UnicodeString simpleFormats[ARRAY_LENGTH];
1492 getCurrencyLongNameData(loc, currency, simpleFormats, status);
1493 if (U_FAILURE(status)) { return nullptr; }
1494 result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1495 // TODO(icu-units#28): currency gender?
1496 return result;
1497 }
1498
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1499 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1500 UErrorCode &status) {
1501 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1502 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1503 UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1504 if (U_FAILURE(status)) { return; }
1505 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1506 if (U_FAILURE(status)) { return; }
1507 fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1508 }
1509 }
1510
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1511 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1512 Field field, UErrorCode &status) {
1513 SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1514 if (U_FAILURE(status)) { return; }
1515 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1516 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1517 UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1518 if (U_FAILURE(status)) { return; }
1519 UnicodeString compoundFormat;
1520 if (leadFormat.length() == 0) {
1521 compoundFormat = trailFormat;
1522 } else {
1523 trailCompiled.format(leadFormat, compoundFormat, status);
1524 if (U_FAILURE(status)) { return; }
1525 }
1526 SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1527 if (U_FAILURE(status)) { return; }
1528 fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1529 }
1530 }
1531
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1532 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1533 UErrorCode &status) const {
1534 if (parent != NULL) {
1535 parent->processQuantity(quantity, micros, status);
1536 }
1537 StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1538 micros.modOuter = &fModifiers[pluralForm];
1539 micros.gender = gender;
1540 }
1541
getModifier(Signum,StandardPlural::Form plural) const1542 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1543 return &fModifiers[plural];
1544 }
1545
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1546 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1547 const MeasureUnit &mixedUnit,
1548 const UNumberUnitWidth &width,
1549 const char *unitDisplayCase,
1550 const PluralRules *rules,
1551 const MicroPropsGenerator *parent,
1552 MixedUnitLongNameHandler *fillIn,
1553 UErrorCode &status) {
1554 U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1555 U_ASSERT(fillIn != nullptr);
1556 if (U_FAILURE(status)) {
1557 return;
1558 }
1559
1560 MeasureUnitImpl temp;
1561 const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1562 // Defensive, for production code:
1563 if (impl.complexity != UMEASURE_UNIT_MIXED) {
1564 // Should be using the normal LongNameHandler
1565 status = U_UNSUPPORTED_ERROR;
1566 return;
1567 }
1568
1569 fillIn->fMixedUnitCount = impl.singleUnits.length();
1570 fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1571 for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1572 // Grab data for each of the components.
1573 UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1574 // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
1575 // propagation of unitDisplayCase is correct:
1576 getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1577 status);
1578 // TODO(ICU-21494): if we add support for gender for mixed units, we may
1579 // need maybeCalculateGender() here.
1580 }
1581
1582 // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1583 // high-magnitude fields
1584 // * for mixed units count N, produce N listFormatters, one for each subset
1585 // that might be formatted.
1586 UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1587 if (width == UNUM_UNIT_WIDTH_NARROW) {
1588 listWidth = ULISTFMT_WIDTH_NARROW;
1589 } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1590 // This might be the same as SHORT in most languages:
1591 listWidth = ULISTFMT_WIDTH_WIDE;
1592 }
1593 fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1594 ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1595 // TODO(ICU-21494): grab gender of each unit, calculate the gender
1596 // associated with this list formatter, save it for later.
1597 fillIn->rules = rules;
1598 fillIn->parent = parent;
1599
1600 // We need a localised NumberFormatter for the numbers of the bigger units
1601 // (providing Arabic numerals, for example).
1602 fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1603 }
1604
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1605 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1606 UErrorCode &status) const {
1607 U_ASSERT(fMixedUnitCount > 1);
1608 if (parent != nullptr) {
1609 parent->processQuantity(quantity, micros, status);
1610 }
1611 micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1612 }
1613
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1614 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1615 MicroProps µs,
1616 UErrorCode &status) const {
1617 if (micros.mixedMeasuresCount == 0) {
1618 U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1619 status = U_UNSUPPORTED_ERROR;
1620 return µs.helpers.emptyWeakModifier;
1621 }
1622
1623 // Algorithm:
1624 //
1625 // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1626 // find "3 yard" and "1 foot" in micros.mixedMeasures.
1627 //
1628 // Obtain long-names with plural forms corresponding to measure values:
1629 // * {0} yards, {0} foot, {0} inches
1630 //
1631 // Format the integer values appropriately and modify with the format
1632 // strings:
1633 // - 3 yards, 1 foot
1634 //
1635 // Use ListFormatter to combine, with one placeholder:
1636 // - 3 yards, 1 foot and {0} inches
1637 //
1638 // Return a SimpleModifier for this pattern, letting the rest of the
1639 // pipeline take care of the remaining inches.
1640
1641 LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1642 if (U_FAILURE(status)) {
1643 return µs.helpers.emptyWeakModifier;
1644 }
1645
1646 StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1647 for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1648 DecimalQuantity fdec;
1649
1650 // If numbers are negative, only the first number needs to have its
1651 // negative sign formatted.
1652 int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1653
1654 if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1655 // If quantity is not the first value and quantity is negative
1656 if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1657 quantity.negate();
1658 }
1659
1660 StandardPlural::Form quantityPlural =
1661 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1662 UnicodeString quantityFormatWithPlural =
1663 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1664 SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1665 quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1666 } else {
1667 fdec.setToLong(number);
1668 StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1669 UnicodeString simpleFormat =
1670 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1671 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1672 UnicodeString num;
1673 auto appendable = UnicodeStringAppendable(num);
1674
1675 fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1676 compiledFormatter.format(num, outputMeasuresList[i], status);
1677 }
1678 }
1679
1680 // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1681 // can set micros.gender to the gender associated with the list formatter in
1682 // use below (once we have correct support for that). And then document this
1683 // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1684 // something like this.
1685
1686 // Combine list into a "premixed" pattern
1687 UnicodeString premixedFormatPattern;
1688 fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1689 status);
1690 SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1691 if (U_FAILURE(status)) {
1692 return µs.helpers.emptyWeakModifier;
1693 }
1694
1695 micros.helpers.mixedUnitModifier =
1696 SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1697 return µs.helpers.mixedUnitModifier;
1698 }
1699
getModifier(Signum,StandardPlural::Form) const1700 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1701 StandardPlural::Form /*plural*/) const {
1702 // TODO(icu-units#28): investigate this method when investigating where
1703 // ModifierStore::getModifier() gets used. To be sure it remains
1704 // unreachable:
1705 UPRV_UNREACHABLE;
1706 return nullptr;
1707 }
1708
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1709 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1710 const MaybeStackVector<MeasureUnit> &units,
1711 const UNumberUnitWidth &width,
1712 const char *unitDisplayCase,
1713 const PluralRules *rules,
1714 const MicroPropsGenerator *parent,
1715 UErrorCode &status) {
1716 LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1717 if (U_FAILURE(status)) {
1718 return nullptr;
1719 }
1720 U_ASSERT(units.length() > 0);
1721 if (result->fHandlers.resize(units.length()) == nullptr) {
1722 status = U_MEMORY_ALLOCATION_ERROR;
1723 return nullptr;
1724 }
1725 result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1726 for (int32_t i = 0, length = units.length(); i < length; i++) {
1727 const MeasureUnit &unit = *units[i];
1728 result->fMeasureUnits[i] = unit;
1729 if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1730 MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1731 MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL,
1732 mlnh, status);
1733 result->fHandlers[i] = mlnh;
1734 } else {
1735 LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1736 LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status);
1737 result->fHandlers[i] = lnh;
1738 }
1739 if (U_FAILURE(status)) {
1740 return nullptr;
1741 }
1742 }
1743 return result.orphan();
1744 }
1745
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1746 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1747 UErrorCode &status) const {
1748 // We call parent->processQuantity() from the Multiplexer, instead of
1749 // letting LongNameHandler handle it: we don't know which LongNameHandler to
1750 // call until we've called the parent!
1751 fParent->processQuantity(quantity, micros, status);
1752
1753 // Call the correct LongNameHandler based on outputUnit
1754 for (int i = 0; i < fHandlers.getCapacity(); i++) {
1755 if (fMeasureUnits[i] == micros.outputUnit) {
1756 fHandlers[i]->processQuantity(quantity, micros, status);
1757 return;
1758 }
1759 }
1760 if (U_FAILURE(status)) {
1761 return;
1762 }
1763 // We shouldn't receive any outputUnit for which we haven't already got a
1764 // LongNameHandler:
1765 status = U_INTERNAL_PROGRAM_ERROR;
1766 }
1767
1768 #endif /* #if !UCONFIG_NO_FORMATTING */
1769