1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include <cstdlib>
9
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "ureslocs.h"
13 #include "charstr.h"
14 #include "uresimp.h"
15 #include "measunit_impl.h"
16 #include "number_longnames.h"
17 #include "number_microprops.h"
18 #include <algorithm>
19 #include "cstring.h"
20 #include "util.h"
21
22 using namespace icu;
23 using namespace icu::number;
24 using namespace icu::number::impl;
25
26 namespace {
27
28 /**
29 * Display Name (this format has no placeholder).
30 *
31 * Used as an index into the LongNameHandler::simpleFormats array. Units
32 * resources cover the normal set of PluralRules keys, as well as `dnam` and
33 * `per` forms.
34 */
35 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36 /**
37 * "per" form (e.g. "{0} per day" is day's "per" form).
38 *
39 * Used as an index into the LongNameHandler::simpleFormats array. Units
40 * resources cover the normal set of PluralRules keys, as well as `dnam` and
41 * `per` forms.
42 */
43 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44 /**
45 * Gender of the word, in languages with grammatical gender.
46 */
47 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48 // Number of keys in the array populated by PluralTableSink.
49 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50
51 // TODO(icu-units#28): load this list from resources, after creating a "&set"
52 // function for use in ldml2icu rules.
53 const int32_t GENDER_COUNT = 7;
54 const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate",
55 "masculine", "neuter", "personal"};
56
57 // Converts a UnicodeString to a const char*, either pointing to a string in
58 // gGenders, or pointing to an empty string if an appropriate string was not
59 // found.
getGenderString(UnicodeString uGender,UErrorCode status)60 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61 if (uGender.length() == 0) {
62 return "";
63 }
64 CharString gender;
65 gender.appendInvariantChars(uGender, status);
66 if (U_FAILURE(status)) {
67 return "";
68 }
69 int32_t first = 0;
70 int32_t last = GENDER_COUNT;
71 while (first < last) {
72 int32_t mid = (first + last) / 2;
73 int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
74 if (cmp == 0) {
75 return gGenders[mid];
76 } else if (cmp > 0) {
77 first = mid + 1;
78 } else if (cmp < 0) {
79 last = mid;
80 }
81 }
82 // We don't return an error in case our gGenders list is incomplete in
83 // production.
84 //
85 // TODO(icu-units#28): a unit test checking all locales' genders are covered
86 // by gGenders? Else load a complete list of genders found in
87 // grammaticalFeatures in an initOnce.
88 return "";
89 }
90
91 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)92 static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93 // pluralKeyword can also be "dnam", "per", or "gender"
94 switch (*pluralKeyword) {
95 case 'd':
96 if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
97 return DNAM_INDEX;
98 }
99 break;
100 case 'g':
101 if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
102 return GENDER_INDEX;
103 }
104 break;
105 case 'p':
106 if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
107 return PER_INDEX;
108 }
109 break;
110 default:
111 break;
112 }
113 StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114 return plural;
115 }
116
117 // Selects a string out of the `strings` array which corresponds to the
118 // specified plural form, with fallback to the OTHER form.
119 //
120 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)122 static UnicodeString getWithPlural(
123 const UnicodeString* strings,
124 StandardPlural::Form plural,
125 UErrorCode& status) {
126 UnicodeString result = strings[plural];
127 if (result.isBogus()) {
128 result = strings[StandardPlural::Form::OTHER];
129 }
130 if (result.isBogus()) {
131 // There should always be data in the "other" plural variant.
132 status = U_INTERNAL_PROGRAM_ERROR;
133 }
134 return result;
135 }
136
137 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138
139 /**
140 * Returns three outputs extracted from pattern.
141 *
142 * @param coreUnit is extracted as per Extract(...) in the spec:
143 * https://unicode.org/reports/tr35/tr35-general.html#compound-units
144 * @param PlaceholderPosition indicates where in the string the placeholder was
145 * found.
146 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147 * contains the space character (if any) that separated the placeholder from
148 * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149 * space character is considered.
150 */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,UChar & joinerChar)151 void extractCorePattern(const UnicodeString &pattern,
152 UnicodeString &coreUnit,
153 PlaceholderPosition &placeholderPosition,
154 UChar &joinerChar) {
155 joinerChar = 0;
156 int32_t len = pattern.length();
157 if (pattern.startsWith(u"{0}", 3)) {
158 placeholderPosition = PH_BEGINNING;
159 if (u_isJavaSpaceChar(pattern[3])) {
160 joinerChar = pattern[3];
161 coreUnit.setTo(pattern, 4, len - 4);
162 } else {
163 coreUnit.setTo(pattern, 3, len - 3);
164 }
165 } else if (pattern.endsWith(u"{0}", 3)) {
166 placeholderPosition = PH_END;
167 if (u_isJavaSpaceChar(pattern[len - 4])) {
168 coreUnit.setTo(pattern, 0, len - 4);
169 joinerChar = pattern[len - 4];
170 } else {
171 coreUnit.setTo(pattern, 0, len - 3);
172 }
173 } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174 placeholderPosition = PH_NONE;
175 coreUnit = pattern;
176 } else {
177 placeholderPosition = PH_MIDDLE;
178 coreUnit = pattern;
179 }
180 }
181
182 //////////////////////////
183 /// BEGIN DATA LOADING ///
184 //////////////////////////
185
186 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187 // string both in case of unknown gender and in case of unknown unit.
188 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)189 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
191 if (U_FAILURE(status)) { return {}; }
192
193 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195 StringPiece subtypeForResource;
196 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
197 if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
198 subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199 } else {
200 subtypeForResource = builtinUnit.getSubtype();
201 }
202
203 CharString key;
204 key.append("units/", status);
205 key.append(builtinUnit.getType(), status);
206 key.append("/", status);
207 key.append(subtypeForResource, status);
208 key.append("/gender", status);
209
210 UErrorCode localStatus = status;
211 int32_t resultLen = 0;
212 const UChar *result =
213 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214 if (U_SUCCESS(localStatus)) {
215 status = localStatus;
216 return UnicodeString(true, result, resultLen);
217 } else {
218 // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219 // check whether the parent "$unitRes" exists? Then we could return
220 // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221 // being a builtin).
222 return {};
223 }
224 }
225
226 // Loads data from a resource tree with paths matching
227 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228 // and genders.
229 //
230 // An InflectedPluralSink is configured to load data for a specific gender and
231 // case. It loads all plural forms, because selection between plural forms is
232 // dependent upon the value being formatted.
233 //
234 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235 // units/compound/power2: German has case, French has differences for gender,
236 // but no case.
237 //
238 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239 // tree structures are different. After homogenizing the structures, we may be
240 // able to unify the two classes.
241 //
242 // TODO: Spec violation: expects presence of "count" - does not fallback to an
243 // absent "count"! If this fallback were added, getCompoundValue could be
244 // superseded?
245 class InflectedPluralSink : public ResourceSink {
246 public:
247 // Accepts `char*` rather than StringPiece because
248 // ResourceTable::findValue(...) requires a null-terminated `char*`.
249 //
250 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251 // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)252 explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253 : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254 // Initialize the array to bogus strings.
255 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256 outArray[i].setToBogus();
257 }
258 }
259
260 // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)261 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
262 int32_t pluralIndex = getIndex(key, status);
263 if (U_FAILURE(status)) { return; }
264 if (!outArray[pluralIndex].isBogus()) {
265 // We already have a pattern
266 return;
267 }
268 ResourceTable genderTable = value.getTable(status);
269 ResourceTable caseTable; // This instance has to outlive `value`
270 if (loadForPluralForm(genderTable, caseTable, value, status)) {
271 outArray[pluralIndex] = value.getUnicodeString(status);
272 }
273 }
274
275 private:
276 // Tries to load data for the configured gender from `genderTable`. Returns
277 // true if found, returning the data in `value`. The returned data will be
278 // for the configured gender if found, falling back to "neuter" and
279 // no-gender if not. The caseTable parameter holds the intermediate
280 // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)281 bool loadForPluralForm(const ResourceTable &genderTable,
282 ResourceTable &caseTable,
283 ResourceValue &value,
284 UErrorCode &status) {
285 if (uprv_strcmp(gender, "") != 0) {
286 if (loadForGender(genderTable, gender, caseTable, value, status)) {
287 return true;
288 }
289 if (uprv_strcmp(gender, "neuter") != 0 &&
290 loadForGender(genderTable, "neuter", caseTable, value, status)) {
291 return true;
292 }
293 }
294 if (loadForGender(genderTable, "_", caseTable, value, status)) {
295 return true;
296 }
297 return false;
298 }
299
300 // Tries to load data for the given gender from `genderTable`. Returns true
301 // if found, returning the data in `value`. The returned data will be for
302 // the configured case if found, falling back to "nominative" and no-case if
303 // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)304 bool loadForGender(const ResourceTable &genderTable,
305 const char *genderVal,
306 ResourceTable &caseTable,
307 ResourceValue &value,
308 UErrorCode &status) {
309 if (!genderTable.findValue(genderVal, value)) {
310 return false;
311 }
312 caseTable = value.getTable(status);
313 if (uprv_strcmp(caseVariant, "") != 0) {
314 if (loadForCase(caseTable, caseVariant, value)) {
315 return true;
316 }
317 if (uprv_strcmp(caseVariant, "nominative") != 0 &&
318 loadForCase(caseTable, "nominative", value)) {
319 return true;
320 }
321 }
322 if (loadForCase(caseTable, "_", value)) {
323 return true;
324 }
325 return false;
326 }
327
328 // Tries to load data for the given case from `caseTable`. Returns true if
329 // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)330 bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
331 if (!caseTable.findValue(caseValue, value)) {
332 return false;
333 }
334 return true;
335 }
336
337 const char *gender;
338 const char *caseVariant;
339 UnicodeString *outArray;
340 };
341
342 // Fetches localised formatting patterns for the given subKey. See documentation
343 // for InflectedPluralSink for details.
344 //
345 // Data is loaded for the appropriate unit width, with missing data filled in
346 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)347 void getInflectedMeasureData(StringPiece subKey,
348 const Locale &locale,
349 const UNumberUnitWidth &width,
350 const char *gender,
351 const char *caseVariant,
352 UnicodeString *outArray,
353 UErrorCode &status) {
354 InflectedPluralSink sink(gender, caseVariant, outArray);
355 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
356 if (U_FAILURE(status)) { return; }
357
358 CharString key;
359 key.append("units", status);
360 if (width == UNUM_UNIT_WIDTH_NARROW) {
361 key.append("Narrow", status);
362 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
363 key.append("Short", status);
364 }
365 key.append("/", status);
366 key.append(subKey, status);
367
368 UErrorCode localStatus = status;
369 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
370 if (width == UNUM_UNIT_WIDTH_SHORT) {
371 status = localStatus;
372 return;
373 }
374 }
375
376 class PluralTableSink : public ResourceSink {
377 public:
378 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
379 // checking is performed.
PluralTableSink(UnicodeString * outArray)380 explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
381 // Initialize the array to bogus strings.
382 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
383 outArray[i].setToBogus();
384 }
385 }
386
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)387 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
388 if (uprv_strcmp(key, "case") == 0) {
389 return;
390 }
391 int32_t index = getIndex(key, status);
392 if (U_FAILURE(status)) { return; }
393 if (!outArray[index].isBogus()) {
394 return;
395 }
396 outArray[index] = value.getUnicodeString(status);
397 if (U_FAILURE(status)) { return; }
398 }
399
400 private:
401 UnicodeString *outArray;
402 };
403
404 /**
405 * Populates outArray with `locale`-specific values for `unit` through use of
406 * PluralTableSink. Only the set of basic units are supported!
407 *
408 * Reading from resources *unitsNarrow* and *unitsShort* (for width
409 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
410 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
411 *
412 * @param unit must be a built-in unit, i.e. must have a type and subtype,
413 * listed in gTypes and gSubTypes in measunit.cpp.
414 * @param unitDisplayCase the empty string and "nominative" are treated the
415 * same. For other cases, strings for the requested case are used if found.
416 * (For any missing case-specific data, we fall back to nominative.)
417 * @param outArray must be of fixed length ARRAY_LENGTH.
418 */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)419 void getMeasureData(const Locale &locale,
420 const MeasureUnit &unit,
421 const UNumberUnitWidth &width,
422 const char *unitDisplayCase,
423 UnicodeString *outArray,
424 UErrorCode &status) {
425 PluralTableSink sink(outArray);
426 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
427 if (U_FAILURE(status)) { return; }
428
429 CharString subKey;
430 subKey.append("/", status);
431 subKey.append(unit.getType(), status);
432 subKey.append("/", status);
433
434 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
435 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
436 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype()));
437 if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) {
438 subKey.append({unit.getSubtype(), subtypeLen - 7}, status);
439 } else {
440 subKey.append({unit.getSubtype(), subtypeLen}, status);
441 }
442
443 if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
444 UErrorCode localStatus = status;
445 CharString genderKey;
446 genderKey.append("units", localStatus);
447 genderKey.append(subKey, localStatus);
448 genderKey.append("/gender", localStatus);
449 StackUResourceBundle fillIn;
450 ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
451 &localStatus);
452 outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
453 }
454
455 CharString key;
456 key.append("units", status);
457 if (width == UNUM_UNIT_WIDTH_NARROW) {
458 key.append("Narrow", status);
459 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
460 key.append("Short", status);
461 }
462 key.append(subKey, status);
463
464 // Grab desired case first, if available. Then grab no-case data to fill in
465 // the gaps.
466 if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
467 CharString caseKey;
468 caseKey.append(key, status);
469 caseKey.append("/case/", status);
470 caseKey.append(unitDisplayCase, status);
471
472 UErrorCode localStatus = U_ZERO_ERROR;
473 // TODO(icu-units#138): our fallback logic is not spec-compliant:
474 // lateral fallback should happen before locale fallback. Switch to
475 // getInflectedMeasureData after homogenizing data format? Find a unit
476 // test case that demonstrates the incorrect fallback logic (via
477 // regional variant of an inflected language?)
478 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
479 }
480
481 // TODO(icu-units#138): our fallback logic is not spec-compliant: we
482 // check the given case, then go straight to the no-case data. The spec
483 // states we should first look for case="nominative". As part of #138,
484 // either get the spec changed, or add unit tests that warn us if
485 // case="nominative" data differs from no-case data?
486 UErrorCode localStatus = U_ZERO_ERROR;
487 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
488 if (width == UNUM_UNIT_WIDTH_SHORT) {
489 if (U_FAILURE(localStatus)) {
490 status = localStatus;
491 }
492 return;
493 }
494 }
495
496 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)497 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray,
498 UErrorCode &status) {
499 // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
500 // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
501 PluralTableSink sink(outArray);
502 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
503 if (U_FAILURE(status)) { return; }
504 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
505 if (U_FAILURE(status)) { return; }
506 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
507 UnicodeString &pattern = outArray[i];
508 if (pattern.isBogus()) {
509 continue;
510 }
511 int32_t longNameLen = 0;
512 const char16_t *longName = ucurr_getPluralName(
513 currency.getISOCurrency(),
514 locale.getName(),
515 nullptr /* isChoiceFormat */,
516 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
517 &longNameLen,
518 &status);
519 // Example pattern from data: "{0} {1}"
520 // Example output after find-and-replace: "{0} US dollars"
521 pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
522 }
523 }
524
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)525 UnicodeString getCompoundValue(StringPiece compoundKey,
526 const Locale &locale,
527 const UNumberUnitWidth &width,
528 UErrorCode &status) {
529 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
530 if (U_FAILURE(status)) { return {}; }
531 CharString key;
532 key.append("units", status);
533 if (width == UNUM_UNIT_WIDTH_NARROW) {
534 key.append("Narrow", status);
535 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
536 key.append("Short", status);
537 }
538 key.append("/compound/", status);
539 key.append(compoundKey, status);
540
541 UErrorCode localStatus = status;
542 int32_t len = 0;
543 const UChar *ptr =
544 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
545 if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
546 // Fall back to short, which contains more compound data
547 key.clear();
548 key.append("unitsShort/compound/", status);
549 key.append(compoundKey, status);
550 ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
551 } else {
552 status = localStatus;
553 }
554 if (U_FAILURE(status)) {
555 return {};
556 }
557 return UnicodeString(ptr, len);
558 }
559
560 /**
561 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
562 *
563 * Consider a deriveComponent rule that looks like this:
564 *
565 * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
566 *
567 * Instantiating an instance as follows:
568 *
569 * DerivedComponents d(loc, "case", "per");
570 *
571 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
572 * and `d.value1("foo")` will be "nominative".
573 *
574 * The values returned by value0(...) and value1(...) are valid only while the
575 * instance exists. In case of any kind of failure, value0(...) and value1(...)
576 * will return "".
577 */
578 class DerivedComponents {
579 public:
580 /**
581 * Constructor.
582 *
583 * The feature and structure parameters must be null-terminated. The string
584 * referenced by compoundValue must exist for longer than the
585 * DerivedComponents instance.
586 */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)587 DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
588 StackUResourceBundle derivationsBundle, stackBundle;
589 ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
590 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
591 &status);
592 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
593 &status);
594 if (U_FAILURE(status)) {
595 return;
596 }
597 UErrorCode localStatus = U_ZERO_ERROR;
598 // TODO(icu-units#28): use standard normal locale resolution algorithms
599 // rather than just grabbing language:
600 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
601 &localStatus);
602 // TODO(icu-units#28):
603 // - code currently assumes if the locale exists, the rules are there -
604 // instead of falling back to root when the requested rule is missing.
605 // - investigate ures.h functions, see if one that uses res_findResource()
606 // might be better (or use res_findResource directly), or maybe help
607 // improve ures documentation to guide function selection?
608 if (localStatus == U_MISSING_RESOURCE_ERROR) {
609 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
610 } else {
611 status = localStatus;
612 }
613 ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
614 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
615 ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
616 UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
617 UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
618 if (U_SUCCESS(status)) {
619 if (val0.compare(UnicodeString(u"compound")) == 0) {
620 compound0_ = true;
621 } else {
622 compound0_ = false;
623 value0_.appendInvariantChars(val0, status);
624 }
625 if (val1.compare(UnicodeString(u"compound")) == 0) {
626 compound1_ = true;
627 } else {
628 compound1_ = false;
629 value1_.appendInvariantChars(val1, status);
630 }
631 }
632 }
633
634 // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const635 StringPiece value0(const StringPiece compoundValue) const {
636 return compound0_ ? compoundValue : value0_.toStringPiece();
637 }
638
639 // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const640 StringPiece value1(const StringPiece compoundValue) const {
641 return compound1_ ? compoundValue : value1_.toStringPiece();
642 }
643
644 // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const645 const char *value0(const char *compoundValue) const {
646 return compound0_ ? compoundValue : value0_.data();
647 }
648
649 // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const650 const char *value1(const char *compoundValue) const {
651 return compound1_ ? compoundValue : value1_.data();
652 }
653
654 private:
655 UErrorCode status = U_ZERO_ERROR;
656
657 // Holds strings referred to by value0 and value1;
658 bool compound0_ = false, compound1_ = false;
659 CharString value0_, value1_;
660 };
661
662 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
663 // testsuite support for testing with synthetic data?
664 /**
665 * Loads and returns the value in rules that look like these:
666 *
667 * <deriveCompound feature="gender" structure="per" value="0"/>
668 * <deriveCompound feature="gender" structure="times" value="1"/>
669 *
670 * Currently a fake example, but spec compliant:
671 * <deriveCompound feature="gender" structure="power" value="feminine"/>
672 *
673 * NOTE: If U_FAILURE(status), returns an empty string.
674 */
675 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)676 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
677 StackUResourceBundle derivationsBundle, stackBundle;
678 ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
679 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
680 &status);
681 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
682 // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
683 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
684 // TODO:
685 // - code currently assumes if the locale exists, the rules are there -
686 // instead of falling back to root when the requested rule is missing.
687 // - investigate ures.h functions, see if one that uses res_findResource()
688 // might be better (or use res_findResource directly), or maybe help
689 // improve ures documentation to guide function selection?
690 if (status == U_MISSING_RESOURCE_ERROR) {
691 status = U_ZERO_ERROR;
692 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
693 }
694 ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
695 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
696 UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
697 if (U_FAILURE(status)) {
698 return {};
699 }
700 U_ASSERT(!uVal.isBogus());
701 return uVal;
702 }
703
704 // Returns the gender string for structures following these rules:
705 //
706 // <deriveCompound feature="gender" structure="per" value="0"/>
707 // <deriveCompound feature="gender" structure="times" value="1"/>
708 //
709 // Fake example:
710 // <deriveCompound feature="gender" structure="power" value="feminine"/>
711 //
712 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
713 // correspond to value="0" and value="1".
714 //
715 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
716 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)717 UnicodeString getDerivedGender(Locale locale,
718 const char *structure,
719 UnicodeString *data0,
720 UnicodeString *data1,
721 UErrorCode &status) {
722 UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
723 if (val.length() == 1) {
724 switch (val[0]) {
725 case u'0':
726 return data0[GENDER_INDEX];
727 case u'1':
728 if (data1 == nullptr) {
729 return {};
730 }
731 return data1[GENDER_INDEX];
732 }
733 }
734 return val;
735 }
736
737 ////////////////////////
738 /// END DATA LOADING ///
739 ////////////////////////
740
741 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const UChar * s,int32_t & length)742 const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
743 if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
744 return s;
745 }
746 int32_t start = 0;
747 int32_t limit = length;
748 while (start < limit && u_isJavaSpaceChar(s[start])) {
749 ++start;
750 }
751 if (start < limit) {
752 // There is non-white space at start; we will not move limit below that,
753 // so we need not test start<limit in the loop.
754 while (u_isJavaSpaceChar(s[limit - 1])) {
755 --limit;
756 }
757 }
758 length = limit - start;
759 return s + start;
760 }
761
762 /**
763 * Calculates the gender of an arbitrary unit: this is the *second*
764 * implementation of an algorithm to do this:
765 *
766 * Gender is also calculated in "processPatternTimes": that code path is "bottom
767 * up", loading the gender for every component of a compound unit (at the same
768 * time as loading the Long Names formatting patterns), even if the gender is
769 * unneeded, then combining the single units' genders into the compound unit's
770 * gender, according to the rules. This algorithm does a lazier "top-down"
771 * evaluation, starting with the compound unit, calculating which single unit's
772 * gender is needed by breaking it down according to the rules, and then loading
773 * only the gender of the one single unit who's gender is needed.
774 *
775 * For future refactorings:
776 * 1. we could drop processPatternTimes' gender calculation and just call this
777 * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
778 * same table as the formatting patterns, so loading it then may be
779 * efficient. For other unit widths however, it needs to be explicitly looked
780 * up anyway.
781 * 2. alternatively, if CLDR is providing all the genders we need such that we
782 * don't need to calculate them in ICU anymore, we could drop this function
783 * and keep only processPatternTimes' calculation. (And optimise it a bit?)
784 *
785 * @param locale The desired locale.
786 * @param unit The measure unit to calculate the gender for.
787 * @return The gender string for the unit, or an empty string if unknown or
788 * ungendered.
789 */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)790 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
791 MeasureUnitImpl impl;
792 const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
793 int32_t singleUnitIndex = 0;
794 if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
795 int32_t startSlice = 0;
796 // inclusive
797 int32_t endSlice = mui.singleUnits.length()-1;
798 U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
799 if (mui.singleUnits[endSlice]->dimensionality < 0) {
800 // We have a -per- construct
801 UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
802 if (perRule.length() != 1) {
803 // Fixed gender for -per- units
804 return perRule;
805 }
806 if (perRule[0] == u'1') {
807 // Find the start of the denominator. We already know there is one.
808 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
809 startSlice++;
810 }
811 } else {
812 // Find the end of the numerator
813 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
814 endSlice--;
815 }
816 if (endSlice < 0) {
817 // We have only a denominator, e.g. "per-second".
818 // TODO(icu-units#28): find out what gender to use in the
819 // absence of a first value - mentioned in CLDR-14253.
820 return {};
821 }
822 }
823 }
824 if (endSlice > startSlice) {
825 // We have a -times- construct
826 UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
827 if (timesRule.length() != 1) {
828 // Fixed gender for -times- units
829 return timesRule;
830 }
831 if (timesRule[0] == u'0') {
832 endSlice = startSlice;
833 } else {
834 // We assume timesRule[0] == u'1'
835 startSlice = endSlice;
836 }
837 }
838 U_ASSERT(startSlice == endSlice);
839 singleUnitIndex = startSlice;
840 } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
841 status = U_INTERNAL_PROGRAM_ERROR;
842 return {};
843 } else {
844 U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
845 U_ASSERT(mui.singleUnits.length() == 1);
846 }
847
848 // Now we know which singleUnit's gender we want
849 const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
850 // Check for any power-prefix gender override:
851 if (std::abs(singleUnit->dimensionality) != 1) {
852 UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
853 if (powerRule.length() != 1) {
854 // Fixed gender for -powN- units
855 return powerRule;
856 }
857 // powerRule[0] == u'0'; u'1' not currently in spec.
858 }
859 // Check for any SI and binary prefix gender override:
860 if (std::abs(singleUnit->dimensionality) != 1) {
861 UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
862 if (prefixRule.length() != 1) {
863 // Fixed gender for -powN- units
864 return prefixRule;
865 }
866 // prefixRule[0] == u'0'; u'1' not currently in spec.
867 }
868 // Now we've boiled it down to the gender of one simple unit identifier:
869 return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
870 status);
871 }
872
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)873 void maybeCalculateGender(const Locale &locale,
874 const MeasureUnit &unitRef,
875 UnicodeString *outArray,
876 UErrorCode &status) {
877 if (outArray[GENDER_INDEX].isBogus()) {
878 UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
879 if (meterGender.isEmpty()) {
880 // No gender for meter: assume ungendered language
881 return;
882 }
883 // We have a gendered language, but are lacking gender for unitRef.
884 outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
885 }
886 }
887
888 } // namespace
889
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)890 void LongNameHandler::forMeasureUnit(const Locale &loc,
891 const MeasureUnit &unitRef,
892 const UNumberUnitWidth &width,
893 const char *unitDisplayCase,
894 const PluralRules *rules,
895 const MicroPropsGenerator *parent,
896 LongNameHandler *fillIn,
897 UErrorCode &status) {
898 // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
899 // Points 1 and 2 are mostly handled by MeasureUnit:
900 //
901 // 1. If the unitId is empty or invalid, fail
902 // 2. Put the unitId into normalized order
903 U_ASSERT(fillIn != nullptr);
904
905 if (uprv_strcmp(unitRef.getType(), "") != 0) {
906 // Handling built-in units:
907 //
908 // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
909 // - If result is not empty, return it
910 UnicodeString simpleFormats[ARRAY_LENGTH];
911 getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
912 maybeCalculateGender(loc, unitRef, simpleFormats, status);
913 if (U_FAILURE(status)) {
914 return;
915 }
916 fillIn->rules = rules;
917 fillIn->parent = parent;
918 fillIn->simpleFormatsToModifiers(simpleFormats,
919 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
920 if (!simpleFormats[GENDER_INDEX].isBogus()) {
921 fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
922 }
923 return;
924
925 // TODO(icu-units#145): figure out why this causes a failure in
926 // format/MeasureFormatTest/TestIndividualPluralFallback and other
927 // tests, when it should have been an alternative for the lines above:
928
929 // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
930 // fillIn->rules = rules;
931 // fillIn->parent = parent;
932 // return;
933 } else {
934 // Check if it is a MeasureUnit this constructor handles: this
935 // constructor does not handle mixed units
936 U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
937 forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
938 fillIn->rules = rules;
939 fillIn->parent = parent;
940 return;
941 }
942 }
943
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)944 void LongNameHandler::forArbitraryUnit(const Locale &loc,
945 const MeasureUnit &unitRef,
946 const UNumberUnitWidth &width,
947 const char *unitDisplayCase,
948 LongNameHandler *fillIn,
949 UErrorCode &status) {
950 if (U_FAILURE(status)) {
951 return;
952 }
953 if (fillIn == nullptr) {
954 status = U_INTERNAL_PROGRAM_ERROR;
955 return;
956 }
957
958 // Numbered list items are from the algorithms at
959 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
960 //
961 // 4. Divide the unitId into numerator (the part before the "-per-") and
962 // denominator (the part after the "-per-). If both are empty, fail
963 MeasureUnitImpl unit;
964 MeasureUnitImpl perUnit;
965 {
966 MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
967 if (U_FAILURE(status)) {
968 return;
969 }
970 for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
971 SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
972 if (subUnit->dimensionality > 0) {
973 unit.appendSingleUnit(*subUnit, status);
974 } else {
975 subUnit->dimensionality *= -1;
976 perUnit.appendSingleUnit(*subUnit, status);
977 }
978 }
979 }
980
981 // TODO(icu-units#28): check placeholder logic, see if it needs to be
982 // present here instead of only in processPatternTimes:
983 //
984 // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
985
986 DerivedComponents derivedPerCases(loc, "case", "per");
987
988 // 6. numeratorUnitString
989 UnicodeString numeratorUnitData[ARRAY_LENGTH];
990 processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
991 numeratorUnitData, status);
992
993 // 7. denominatorUnitString
994 UnicodeString denominatorUnitData[ARRAY_LENGTH];
995 processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
996 denominatorUnitData, status);
997
998 // TODO(icu-units#139):
999 // - implement DerivedComponents for "plural/times" and "plural/power":
1000 // French has different rules, we'll be producing the wrong results
1001 // currently. (Prove via tests!)
1002 // - implement DerivedComponents for "plural/per", "plural/prefix",
1003 // "case/times", "case/power", and "case/prefix" - although they're
1004 // currently hardcoded. Languages with different rules are surely on the
1005 // way.
1006 //
1007 // Currently we only use "case/per", "plural/times", "case/times", and
1008 // "case/power".
1009 //
1010 // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1011 // These rules are currently (ICU 69) all the same and hard-coded below.
1012 UnicodeString perUnitPattern;
1013 if (!denominatorUnitData[PER_INDEX].isBogus()) {
1014 // If we have no denominator, we obtain the empty string:
1015 perUnitPattern = denominatorUnitData[PER_INDEX];
1016 } else {
1017 // 8. Set perPattern to be getValue([per], locale, length)
1018 UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1019 // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1020 SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1021 if (U_FAILURE(status)) {
1022 return;
1023 }
1024 // Plural and placeholder handling for 7. denominatorUnitString:
1025 // TODO(icu-units#139): hardcoded:
1026 // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1027 UnicodeString denominatorFormat =
1028 getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1029 // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1030 SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1031 if (U_FAILURE(status)) {
1032 return;
1033 }
1034 UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1035 int32_t trimmedLen = denominatorPattern.length();
1036 const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1037 UnicodeString denominatorString(false, trimmed, trimmedLen);
1038 // 9. If the denominatorString is empty, set result to
1039 // [numeratorString], otherwise set result to format(perPattern,
1040 // numeratorString, denominatorString)
1041 //
1042 // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1043 // following line?
1044 perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1045 if (U_FAILURE(status)) {
1046 return;
1047 }
1048 }
1049 if (perUnitPattern.length() == 0) {
1050 fillIn->simpleFormatsToModifiers(numeratorUnitData,
1051 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1052 } else {
1053 fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1054 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1055 }
1056
1057 // Gender
1058 //
1059 // TODO(icu-units#28): find out what gender to use in the absence of a first
1060 // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1061 //
1062 // gender/per deriveCompound rules don't say:
1063 // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) -->
1064 fillIn->gender = getGenderString(
1065 getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1066 }
1067
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1068 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1069 Locale loc,
1070 const UNumberUnitWidth &width,
1071 const char *caseVariant,
1072 UnicodeString *outArray,
1073 UErrorCode &status) {
1074 if (U_FAILURE(status)) {
1075 return;
1076 }
1077 if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1078 // These are handled by MixedUnitLongNameHandler
1079 status = U_UNSUPPORTED_ERROR;
1080 return;
1081 }
1082
1083 #if U_DEBUG
1084 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1085 U_ASSERT(outArray[pluralIndex].length() == 0);
1086 U_ASSERT(!outArray[pluralIndex].isBogus());
1087 }
1088 #endif
1089
1090 if (productUnit.identifier.isEmpty()) {
1091 // TODO(icu-units#28): consider when serialize should be called.
1092 // identifier might also be empty for MeasureUnit().
1093 productUnit.serialize(status);
1094 }
1095 if (U_FAILURE(status)) {
1096 return;
1097 }
1098 if (productUnit.identifier.length() == 0) {
1099 // MeasureUnit(): no units: return empty strings.
1100 return;
1101 }
1102
1103 MeasureUnit builtinUnit;
1104 if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1105 // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1106 // breaks them all down. Do we want to drop this?
1107 // - findBySubType isn't super efficient, if we skip it and go to basic
1108 // singles, we don't have to construct MeasureUnit's anymore.
1109 // - Check all the existing unit tests that fail without this: is it due
1110 // to incorrect fallback via getMeasureData?
1111 // - Do those unit tests cover this code path representatively?
1112 if (builtinUnit != MeasureUnit()) {
1113 getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1114 maybeCalculateGender(loc, builtinUnit, outArray, status);
1115 }
1116 return;
1117 }
1118
1119 // 2. Set timesPattern to be getValue(times, locale, length)
1120 UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1121 SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1122 if (U_FAILURE(status)) {
1123 return;
1124 }
1125
1126 PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1127 UChar globalJoinerChar = 0;
1128 // Numbered list items are from the algorithms at
1129 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1130 //
1131 // pattern(...) point 5:
1132 // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1133 //
1134 // 3. Set result to be empty
1135 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1136 // Initial state: empty string pattern, via all falling back to OTHER:
1137 if (pluralIndex == StandardPlural::Form::OTHER) {
1138 outArray[pluralIndex].remove();
1139 } else {
1140 outArray[pluralIndex].setToBogus();
1141 }
1142 globalPlaceholder[pluralIndex] = PH_EMPTY;
1143 }
1144
1145 // Empty string represents "compound" (propagate the plural form).
1146 const char *pluralCategory = "";
1147 DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1148 DerivedComponents derivedTimesCases(loc, "case", "times");
1149 DerivedComponents derivedPowerCases(loc, "case", "power");
1150
1151 // 4. For each single_unit in product_unit
1152 for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1153 singleUnitIndex++) {
1154 SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1155 const char *singlePluralCategory;
1156 const char *singleCaseVariant;
1157 // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1158 // assign incorrect case variants here:
1159 if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1160 // 4.1. If hasMultiple
1161 singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1162 singleCaseVariant = derivedTimesCases.value0(caseVariant);
1163 pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1164 caseVariant = derivedTimesCases.value1(caseVariant);
1165 } else {
1166 singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1167 singleCaseVariant = derivedTimesCases.value1(caseVariant);
1168 }
1169
1170 // 4.2. Get the gender of that single_unit
1171 MeasureUnit simpleUnit;
1172 if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1173 // Ideally all simple units should be known, but they're not:
1174 // 100-kilometer is internally treated as a simple unit, but it is
1175 // not a built-in unit and does not have formatting data in CLDR 39.
1176 //
1177 // TODO(icu-units#28): test (desirable) invariants in unit tests.
1178 status = U_UNSUPPORTED_ERROR;
1179 return;
1180 }
1181 const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1182
1183 // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1184 U_ASSERT(singleUnit->dimensionality > 0);
1185 int32_t dimensionality = singleUnit->dimensionality;
1186 UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1187 if (dimensionality != 1) {
1188 // 4.3.1. set dimensionalityPrefixPattern to be
1189 // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1190 // such as "{0} kwadratowym"
1191 CharString dimensionalityKey("compound/power", status);
1192 dimensionalityKey.appendNumber(dimensionality, status);
1193 getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1194 singleCaseVariant, dimensionalityPrefixPatterns, status);
1195 if (U_FAILURE(status)) {
1196 // At the time of writing, only pow2 and pow3 are supported.
1197 // Attempting to format other powers results in a
1198 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1199 // understand it:
1200 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1201 status = U_UNSUPPORTED_ERROR;
1202 }
1203 return;
1204 }
1205
1206 // TODO(icu-units#139):
1207 // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1208
1209 // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1210 singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1211 // 4.3.4. remove the dimensionality_prefix from singleUnit
1212 singleUnit->dimensionality = 1;
1213 }
1214
1215 // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1216 UMeasurePrefix prefix = singleUnit->unitPrefix;
1217 UnicodeString prefixPattern;
1218 if (prefix != UMEASURE_PREFIX_ONE) {
1219 // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1220 // length), such as "centy{0}"
1221 CharString prefixKey;
1222 // prefixKey looks like "1024p3" or "10p-2":
1223 prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1224 prefixKey.append('p', status);
1225 prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1226 // Contains a pattern like "centy{0}".
1227 prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1228
1229 // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1230 //
1231 // TODO(icu-units#139): that refers to these rules:
1232 // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1233 // though I'm not sure what other value they might end up having.
1234 //
1235 // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1236 //
1237 // TODO(icu-units#139): that refers to:
1238 // <deriveComponent feature="case" structure="prefix" value0="nominative"
1239 // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1240 // propagates.
1241
1242 // 4.4.4. remove the si_prefix from singleUnit
1243 singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1244 }
1245
1246 // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1247 // singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1248 UnicodeString singleUnitArray[ARRAY_LENGTH];
1249 // At this point we are left with a Simple Unit:
1250 U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1251 0);
1252 getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1253 status);
1254 if (U_FAILURE(status)) {
1255 // Shouldn't happen if we have data for all single units
1256 return;
1257 }
1258
1259 // Calculate output gender
1260 if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1261 U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1262 UnicodeString uVal;
1263
1264 if (prefix != UMEASURE_PREFIX_ONE) {
1265 singleUnitArray[GENDER_INDEX] =
1266 getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1267 }
1268
1269 if (dimensionality != 1) {
1270 singleUnitArray[GENDER_INDEX] =
1271 getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1272 }
1273
1274 UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1275 if (timesGenderRule.length() == 1) {
1276 switch (timesGenderRule[0]) {
1277 case u'0':
1278 if (singleUnitIndex == 0) {
1279 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1280 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1281 }
1282 break;
1283 case u'1':
1284 if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1285 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1286 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1287 }
1288 }
1289 } else {
1290 if (outArray[GENDER_INDEX].isBogus()) {
1291 outArray[GENDER_INDEX] = timesGenderRule;
1292 }
1293 }
1294 }
1295
1296 // Calculate resulting patterns for each plural form
1297 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1298 StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1299
1300 // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1301 if (outArray[pluralIndex].isBogus()) {
1302 if (singleUnitArray[pluralIndex].isBogus()) {
1303 // Let the usual plural fallback mechanism take care of this
1304 // plural form
1305 continue;
1306 } else {
1307 // Since our singleUnit can have a plural form that outArray
1308 // doesn't yet have (relying on fallback to OTHER), we start
1309 // by grabbing it with the normal plural fallback mechanism
1310 outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1311 if (U_FAILURE(status)) {
1312 return;
1313 }
1314 }
1315 }
1316
1317 if (uprv_strcmp(singlePluralCategory, "") != 0) {
1318 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1319 }
1320
1321 // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1322 UnicodeString coreUnit;
1323 PlaceholderPosition placeholderPosition;
1324 UChar joinerChar;
1325 extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1326 placeholderPosition, joinerChar);
1327
1328 // 4.7 If the position is middle, then fail
1329 if (placeholderPosition == PH_MIDDLE) {
1330 status = U_UNSUPPORTED_ERROR;
1331 return;
1332 }
1333
1334 // 4.8. If globalPlaceholder is empty
1335 if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1336 globalPlaceholder[pluralIndex] = placeholderPosition;
1337 globalJoinerChar = joinerChar;
1338 } else {
1339 // Expect all units involved to have the same placeholder position
1340 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1341 // TODO(icu-units#28): Do we want to add a unit test that checks
1342 // for consistent joiner chars? Probably not, given how
1343 // inconsistent they are. File a CLDR ticket with examples?
1344 }
1345 // Now coreUnit would be just "Meter"
1346
1347 // 4.9. If siPrefixPattern is not empty
1348 if (prefix != UMEASURE_PREFIX_ONE) {
1349 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1350 if (U_FAILURE(status)) {
1351 return;
1352 }
1353
1354 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1355 // coreUnit)
1356 UnicodeString tmp;
1357 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1358 //
1359 // TODO(icu-units#28): run this only if prefixPattern does not
1360 // contain space characters - do languages "as", "bn", "hi",
1361 // "kk", etc have concepts of upper and lower case?:
1362 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1363 coreUnit.toLower(loc);
1364 }
1365 prefixCompiled.format(coreUnit, tmp, status);
1366 if (U_FAILURE(status)) {
1367 return;
1368 }
1369 coreUnit = tmp;
1370 }
1371
1372 // 4.10. If dimensionalityPrefixPattern is not empty
1373 if (dimensionality != 1) {
1374 SimpleFormatter dimensionalityCompiled(
1375 getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1376 if (U_FAILURE(status)) {
1377 return;
1378 }
1379
1380 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1381 // dimensionalityPrefixPattern, coreUnit)
1382 UnicodeString tmp;
1383 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1384 //
1385 // TODO(icu-units#28): run this only if prefixPattern does not
1386 // contain space characters - do languages "as", "bn", "hi",
1387 // "kk", etc have concepts of upper and lower case?:
1388 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1389 coreUnit.toLower(loc);
1390 }
1391 dimensionalityCompiled.format(coreUnit, tmp, status);
1392 if (U_FAILURE(status)) {
1393 return;
1394 }
1395 coreUnit = tmp;
1396 }
1397
1398 if (outArray[pluralIndex].length() == 0) {
1399 // 4.11. If the result is empty, set result to be coreUnit
1400 outArray[pluralIndex] = coreUnit;
1401 } else {
1402 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1403 UnicodeString tmp;
1404 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1405 outArray[pluralIndex] = tmp;
1406 }
1407 }
1408 }
1409 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1410 if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1411 UnicodeString tmp;
1412 tmp.append(u"{0}", 3);
1413 if (globalJoinerChar != 0) {
1414 tmp.append(globalJoinerChar);
1415 }
1416 tmp.append(outArray[pluralIndex]);
1417 outArray[pluralIndex] = tmp;
1418 } else if (globalPlaceholder[pluralIndex] == PH_END) {
1419 if (globalJoinerChar != 0) {
1420 outArray[pluralIndex].append(globalJoinerChar);
1421 }
1422 outArray[pluralIndex].append(u"{0}", 3);
1423 }
1424 }
1425 }
1426
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1427 UnicodeString LongNameHandler::getUnitDisplayName(
1428 const Locale& loc,
1429 const MeasureUnit& unit,
1430 UNumberUnitWidth width,
1431 UErrorCode& status) {
1432 if (U_FAILURE(status)) {
1433 return ICU_Utility::makeBogusString();
1434 }
1435 UnicodeString simpleFormats[ARRAY_LENGTH];
1436 getMeasureData(loc, unit, width, "", simpleFormats, status);
1437 return simpleFormats[DNAM_INDEX];
1438 }
1439
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1440 UnicodeString LongNameHandler::getUnitPattern(
1441 const Locale& loc,
1442 const MeasureUnit& unit,
1443 UNumberUnitWidth width,
1444 StandardPlural::Form pluralForm,
1445 UErrorCode& status) {
1446 if (U_FAILURE(status)) {
1447 return ICU_Utility::makeBogusString();
1448 }
1449 UnicodeString simpleFormats[ARRAY_LENGTH];
1450 getMeasureData(loc, unit, width, "", simpleFormats, status);
1451 // The above already handles fallback from other widths to short
1452 if (U_FAILURE(status)) {
1453 return ICU_Utility::makeBogusString();
1454 }
1455 // Now handle fallback from other plural forms to OTHER
1456 return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1457 simpleFormats[StandardPlural::Form::OTHER];
1458 }
1459
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1460 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy,
1461 const PluralRules *rules,
1462 const MicroPropsGenerator *parent,
1463 UErrorCode &status) {
1464 auto* result = new LongNameHandler(rules, parent);
1465 if (result == nullptr) {
1466 status = U_MEMORY_ALLOCATION_ERROR;
1467 return nullptr;
1468 }
1469 UnicodeString simpleFormats[ARRAY_LENGTH];
1470 getCurrencyLongNameData(loc, currency, simpleFormats, status);
1471 if (U_FAILURE(status)) { return nullptr; }
1472 result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1473 // TODO(icu-units#28): currency gender?
1474 return result;
1475 }
1476
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1477 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1478 UErrorCode &status) {
1479 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1480 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1481 UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1482 if (U_FAILURE(status)) { return; }
1483 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1484 if (U_FAILURE(status)) { return; }
1485 fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1486 }
1487 }
1488
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1489 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1490 Field field, UErrorCode &status) {
1491 SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1492 if (U_FAILURE(status)) { return; }
1493 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1494 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1495 UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1496 if (U_FAILURE(status)) { return; }
1497 UnicodeString compoundFormat;
1498 if (leadFormat.length() == 0) {
1499 compoundFormat = trailFormat;
1500 } else {
1501 trailCompiled.format(leadFormat, compoundFormat, status);
1502 if (U_FAILURE(status)) { return; }
1503 }
1504 SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1505 if (U_FAILURE(status)) { return; }
1506 fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1507 }
1508 }
1509
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1510 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1511 UErrorCode &status) const {
1512 if (parent != NULL) {
1513 parent->processQuantity(quantity, micros, status);
1514 }
1515 StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1516 micros.modOuter = &fModifiers[pluralForm];
1517 micros.gender = gender;
1518 }
1519
getModifier(Signum,StandardPlural::Form plural) const1520 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1521 return &fModifiers[plural];
1522 }
1523
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1524 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1525 const MeasureUnit &mixedUnit,
1526 const UNumberUnitWidth &width,
1527 const char *unitDisplayCase,
1528 const PluralRules *rules,
1529 const MicroPropsGenerator *parent,
1530 MixedUnitLongNameHandler *fillIn,
1531 UErrorCode &status) {
1532 U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1533 U_ASSERT(fillIn != nullptr);
1534 if (U_FAILURE(status)) {
1535 return;
1536 }
1537
1538 MeasureUnitImpl temp;
1539 const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1540 // Defensive, for production code:
1541 if (impl.complexity != UMEASURE_UNIT_MIXED) {
1542 // Should be using the normal LongNameHandler
1543 status = U_UNSUPPORTED_ERROR;
1544 return;
1545 }
1546
1547 fillIn->fMixedUnitCount = impl.singleUnits.length();
1548 fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1549 for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1550 // Grab data for each of the components.
1551 UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1552 // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
1553 // propagation of unitDisplayCase is correct:
1554 getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1555 status);
1556 // TODO(ICU-21494): if we add support for gender for mixed units, we may
1557 // need maybeCalculateGender() here.
1558 }
1559
1560 // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1561 // high-magnitude fields
1562 // * for mixed units count N, produce N listFormatters, one for each subset
1563 // that might be formatted.
1564 UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1565 if (width == UNUM_UNIT_WIDTH_NARROW) {
1566 listWidth = ULISTFMT_WIDTH_NARROW;
1567 } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1568 // This might be the same as SHORT in most languages:
1569 listWidth = ULISTFMT_WIDTH_WIDE;
1570 }
1571 fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1572 ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1573 // TODO(ICU-21494): grab gender of each unit, calculate the gender
1574 // associated with this list formatter, save it for later.
1575 fillIn->rules = rules;
1576 fillIn->parent = parent;
1577
1578 // We need a localised NumberFormatter for the numbers of the bigger units
1579 // (providing Arabic numerals, for example).
1580 fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1581 }
1582
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1583 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1584 UErrorCode &status) const {
1585 U_ASSERT(fMixedUnitCount > 1);
1586 if (parent != nullptr) {
1587 parent->processQuantity(quantity, micros, status);
1588 }
1589 micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1590 }
1591
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1592 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1593 MicroProps µs,
1594 UErrorCode &status) const {
1595 if (micros.mixedMeasuresCount == 0) {
1596 U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1597 status = U_UNSUPPORTED_ERROR;
1598 return µs.helpers.emptyWeakModifier;
1599 }
1600
1601 // Algorithm:
1602 //
1603 // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1604 // find "3 yard" and "1 foot" in micros.mixedMeasures.
1605 //
1606 // Obtain long-names with plural forms corresponding to measure values:
1607 // * {0} yards, {0} foot, {0} inches
1608 //
1609 // Format the integer values appropriately and modify with the format
1610 // strings:
1611 // - 3 yards, 1 foot
1612 //
1613 // Use ListFormatter to combine, with one placeholder:
1614 // - 3 yards, 1 foot and {0} inches
1615 //
1616 // Return a SimpleModifier for this pattern, letting the rest of the
1617 // pipeline take care of the remaining inches.
1618
1619 LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1620 if (U_FAILURE(status)) {
1621 return µs.helpers.emptyWeakModifier;
1622 }
1623
1624 StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1625 for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1626 DecimalQuantity fdec;
1627
1628 // If numbers are negative, only the first number needs to have its
1629 // negative sign formatted.
1630 int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1631
1632 if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1633 // If quantity is not the first value and quantity is negative
1634 if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1635 quantity.negate();
1636 }
1637
1638 StandardPlural::Form quantityPlural =
1639 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1640 UnicodeString quantityFormatWithPlural =
1641 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1642 SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1643 quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1644 } else {
1645 fdec.setToLong(number);
1646 StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1647 UnicodeString simpleFormat =
1648 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1649 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1650 UnicodeString num;
1651 auto appendable = UnicodeStringAppendable(num);
1652
1653 fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1654 compiledFormatter.format(num, outputMeasuresList[i], status);
1655 }
1656 }
1657
1658 // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1659 // can set micros.gender to the gender associated with the list formatter in
1660 // use below (once we have correct support for that). And then document this
1661 // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1662 // something like this.
1663
1664 // Combine list into a "premixed" pattern
1665 UnicodeString premixedFormatPattern;
1666 fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1667 status);
1668 SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1669 if (U_FAILURE(status)) {
1670 return µs.helpers.emptyWeakModifier;
1671 }
1672
1673 micros.helpers.mixedUnitModifier =
1674 SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1675 return µs.helpers.mixedUnitModifier;
1676 }
1677
getModifier(Signum,StandardPlural::Form) const1678 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1679 StandardPlural::Form /*plural*/) const {
1680 // TODO(icu-units#28): investigate this method when investigating where
1681 // ModifierStore::getModifier() gets used. To be sure it remains
1682 // unreachable:
1683 UPRV_UNREACHABLE_EXIT;
1684 return nullptr;
1685 }
1686
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1687 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1688 const MaybeStackVector<MeasureUnit> &units,
1689 const UNumberUnitWidth &width,
1690 const char *unitDisplayCase,
1691 const PluralRules *rules,
1692 const MicroPropsGenerator *parent,
1693 UErrorCode &status) {
1694 LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1695 if (U_FAILURE(status)) {
1696 return nullptr;
1697 }
1698 U_ASSERT(units.length() > 0);
1699 if (result->fHandlers.resize(units.length()) == nullptr) {
1700 status = U_MEMORY_ALLOCATION_ERROR;
1701 return nullptr;
1702 }
1703 result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1704 for (int32_t i = 0, length = units.length(); i < length; i++) {
1705 const MeasureUnit &unit = *units[i];
1706 result->fMeasureUnits[i] = unit;
1707 if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1708 MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1709 MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL,
1710 mlnh, status);
1711 result->fHandlers[i] = mlnh;
1712 } else {
1713 LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1714 LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status);
1715 result->fHandlers[i] = lnh;
1716 }
1717 if (U_FAILURE(status)) {
1718 return nullptr;
1719 }
1720 }
1721 return result.orphan();
1722 }
1723
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1724 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1725 UErrorCode &status) const {
1726 // We call parent->processQuantity() from the Multiplexer, instead of
1727 // letting LongNameHandler handle it: we don't know which LongNameHandler to
1728 // call until we've called the parent!
1729 fParent->processQuantity(quantity, micros, status);
1730
1731 // Call the correct LongNameHandler based on outputUnit
1732 for (int i = 0; i < fHandlers.getCapacity(); i++) {
1733 if (fMeasureUnits[i] == micros.outputUnit) {
1734 fHandlers[i]->processQuantity(quantity, micros, status);
1735 return;
1736 }
1737 }
1738 if (U_FAILURE(status)) {
1739 return;
1740 }
1741 // We shouldn't receive any outputUnit for which we haven't already got a
1742 // LongNameHandler:
1743 status = U_INTERNAL_PROGRAM_ERROR;
1744 }
1745
1746 #endif /* #if !UCONFIG_NO_FORMATTING */
1747