1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
7 
8 #include "cstring.h"
9 #include "unicode/ures.h"
10 #include "uresimp.h"
11 #include "charstr.h"
12 #include "number_formatimpl.h"
13 #include "unicode/numfmt.h"
14 #include "number_patternstring.h"
15 #include "number_utils.h"
16 #include "unicode/numberformatter.h"
17 #include "unicode/dcfmtsym.h"
18 #include "number_scientific.h"
19 #include "number_compact.h"
20 
21 using namespace icu;
22 using namespace icu::number;
23 using namespace icu::number::impl;
24 
25 namespace {
26 
27 // NOTE: In Java, the method to get a pattern from the resource bundle exists in NumberFormat.
28 // In C++, we have to implement that logic here.
29 // TODO: Make Java and C++ consistent?
30 
31 enum CldrPatternStyle {
32     CLDR_PATTERN_STYLE_DECIMAL,
33     CLDR_PATTERN_STYLE_CURRENCY,
34     CLDR_PATTERN_STYLE_ACCOUNTING,
35     CLDR_PATTERN_STYLE_PERCENT
36     // TODO: Consider scientific format.
37 };
38 
39 const char16_t *
doGetPattern(UResourceBundle * res,const char * nsName,const char * patternKey,UErrorCode & publicStatus,UErrorCode & localStatus)40 doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus,
41              UErrorCode &localStatus) {
42     // Construct the path into the resource bundle
43     CharString key;
44     key.append("NumberElements/", publicStatus);
45     key.append(nsName, publicStatus);
46     key.append("/patterns/", publicStatus);
47     key.append(patternKey, publicStatus);
48     if (U_FAILURE(publicStatus)) {
49         return u"";
50     }
51     return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus);
52 }
53 
getPatternForStyle(const Locale & locale,const char * nsName,CldrPatternStyle style,UErrorCode & status)54 const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style,
55                                    UErrorCode &status) {
56     const char *patternKey;
57     switch (style) {
58         case CLDR_PATTERN_STYLE_DECIMAL:
59             patternKey = "decimalFormat";
60             break;
61         case CLDR_PATTERN_STYLE_CURRENCY:
62             patternKey = "currencyFormat";
63             break;
64         case CLDR_PATTERN_STYLE_ACCOUNTING:
65             patternKey = "accountingFormat";
66             break;
67         case CLDR_PATTERN_STYLE_PERCENT:
68         default:
69             patternKey = "percentFormat";
70             break;
71     }
72     LocalUResourceBundlePointer res(ures_open(nullptr, locale.getName(), &status));
73     if (U_FAILURE(status)) { return u""; }
74 
75     // Attempt to get the pattern with the native numbering system.
76     UErrorCode localStatus = U_ZERO_ERROR;
77     const char16_t *pattern;
78     pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus);
79     if (U_FAILURE(status)) { return u""; }
80 
81     // Fall back to latn if native numbering system does not have the right pattern
82     if (U_FAILURE(localStatus) && uprv_strcmp("latn", nsName) != 0) {
83         localStatus = U_ZERO_ERROR;
84         pattern = doGetPattern(res.getAlias(), "latn", patternKey, status, localStatus);
85         if (U_FAILURE(status)) { return u""; }
86     }
87 
88     return pattern;
89 }
90 
unitIsCurrency(const MeasureUnit & unit)91 inline bool unitIsCurrency(const MeasureUnit &unit) {
92     return uprv_strcmp("currency", unit.getType()) == 0;
93 }
94 
unitIsNoUnit(const MeasureUnit & unit)95 inline bool unitIsNoUnit(const MeasureUnit &unit) {
96     return uprv_strcmp("none", unit.getType()) == 0;
97 }
98 
unitIsPercent(const MeasureUnit & unit)99 inline bool unitIsPercent(const MeasureUnit &unit) {
100     return uprv_strcmp("percent", unit.getSubtype()) == 0;
101 }
102 
unitIsPermille(const MeasureUnit & unit)103 inline bool unitIsPermille(const MeasureUnit &unit) {
104     return uprv_strcmp("permille", unit.getSubtype()) == 0;
105 }
106 
107 }  // namespace
108 
fromMacros(const MacroProps & macros,UErrorCode & status)109 NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps &macros, UErrorCode &status) {
110     return new NumberFormatterImpl(macros, true, status);
111 }
112 
applyStatic(const MacroProps & macros,DecimalQuantity & inValue,NumberStringBuilder & outString,UErrorCode & status)113 void NumberFormatterImpl::applyStatic(const MacroProps &macros, DecimalQuantity &inValue,
114                                       NumberStringBuilder &outString, UErrorCode &status) {
115     NumberFormatterImpl impl(macros, false, status);
116     impl.applyUnsafe(inValue, outString, status);
117 }
118 
119 // NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA:
120 // The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance.
121 // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
122 // See MicroProps::processQuantity() for details.
123 
apply(DecimalQuantity & inValue,NumberStringBuilder & outString,UErrorCode & status) const124 void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString,
125                                 UErrorCode &status) const {
126     if (U_FAILURE(status)) { return; }
127     MicroProps micros;
128     fMicroPropsGenerator->processQuantity(inValue, micros, status);
129     if (U_FAILURE(status)) { return; }
130     microsToString(micros, inValue, outString, status);
131 }
132 
applyUnsafe(DecimalQuantity & inValue,NumberStringBuilder & outString,UErrorCode & status)133 void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString,
134                                       UErrorCode &status) {
135     if (U_FAILURE(status)) { return; }
136     fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
137     if (U_FAILURE(status)) { return; }
138     microsToString(fMicros, inValue, outString, status);
139 }
140 
NumberFormatterImpl(const MacroProps & macros,bool safe,UErrorCode & status)141 NumberFormatterImpl::NumberFormatterImpl(const MacroProps &macros, bool safe, UErrorCode &status) {
142     fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
143 }
144 
145 //////////
146 
147 const MicroPropsGenerator *
macrosToMicroGenerator(const MacroProps & macros,bool safe,UErrorCode & status)148 NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status) {
149     const MicroPropsGenerator *chain = &fMicros;
150 
151     // Check that macros is error-free before continuing.
152     if (macros.copyErrorTo(status)) {
153         return nullptr;
154     }
155 
156     // TODO: Accept currency symbols from DecimalFormatSymbols?
157 
158     // Pre-compute a few values for efficiency.
159     bool isCurrency = unitIsCurrency(macros.unit);
160     bool isNoUnit = unitIsNoUnit(macros.unit);
161     bool isPercent = isNoUnit && unitIsPercent(macros.unit);
162     bool isPermille = isNoUnit && unitIsPermille(macros.unit);
163     bool isCldrUnit = !isCurrency && !isNoUnit;
164     bool isAccounting =
165             macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS;
166     CurrencyUnit currency(kDefaultCurrency, status);
167     if (isCurrency) {
168         currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
169     }
170     UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
171     if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
172         unitWidth = macros.unitWidth;
173     }
174 
175     // Select the numbering system.
176     LocalPointer<const NumberingSystem> nsLocal;
177     const NumberingSystem *ns;
178     if (macros.symbols.isNumberingSystem()) {
179         ns = macros.symbols.getNumberingSystem();
180     } else {
181         // TODO: Is there a way to avoid creating the NumberingSystem object?
182         ns = NumberingSystem::createInstance(macros.locale, status);
183         // Give ownership to the function scope.
184         nsLocal.adoptInstead(ns);
185     }
186     const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn";
187 
188     // Load and parse the pattern string.  It is used for grouping sizes and affixes only.
189     CldrPatternStyle patternStyle;
190     if (isPercent || isPermille) {
191         patternStyle = CLDR_PATTERN_STYLE_PERCENT;
192     } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
193         patternStyle = CLDR_PATTERN_STYLE_DECIMAL;
194     } else if (isAccounting) {
195         // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now,
196         // the API contract allows us to add support to other units in the future.
197         patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING;
198     } else {
199         patternStyle = CLDR_PATTERN_STYLE_CURRENCY;
200     }
201     const char16_t *pattern = getPatternForStyle(macros.locale, nsName, patternStyle, status);
202     auto patternInfo = new ParsedPatternInfo();
203     fPatternInfo.adoptInstead(patternInfo);
204     PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status);
205 
206     /////////////////////////////////////////////////////////////////////////////////////
207     /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR ///
208     /////////////////////////////////////////////////////////////////////////////////////
209 
210     // Symbols
211     if (macros.symbols.isDecimalFormatSymbols()) {
212         fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
213     } else {
214         fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status);
215         // Give ownership to the NumberFormatterImpl.
216         fSymbols.adoptInstead(fMicros.symbols);
217     }
218 
219     // Rounding strategy
220     if (!macros.rounder.isBogus()) {
221         fMicros.rounding = macros.rounder;
222     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
223         fMicros.rounding = Rounder::integer().withMinDigits(2);
224     } else if (isCurrency) {
225         fMicros.rounding = Rounder::currency(UCURR_USAGE_STANDARD);
226     } else {
227         fMicros.rounding = Rounder::maxFraction(6);
228     }
229     fMicros.rounding.setLocaleData(currency, status);
230 
231     // Grouping strategy
232     if (!macros.grouper.isBogus()) {
233         fMicros.grouping = macros.grouper;
234     } else if (macros.notation.fType == Notation::NTN_COMPACT) {
235         // Compact notation uses minGrouping by default since ICU 59
236         fMicros.grouping = Grouper::minTwoDigits();
237     } else {
238         fMicros.grouping = Grouper::defaults();
239     }
240     fMicros.grouping.setLocaleData(*fPatternInfo);
241 
242     // Padding strategy
243     if (!macros.padder.isBogus()) {
244         fMicros.padding = macros.padder;
245     } else {
246         fMicros.padding = Padder::none();
247     }
248 
249     // Integer width
250     if (!macros.integerWidth.isBogus()) {
251         fMicros.integerWidth = macros.integerWidth;
252     } else {
253         fMicros.integerWidth = IntegerWidth::zeroFillTo(1);
254     }
255 
256     // Sign display
257     if (macros.sign != UNUM_SIGN_COUNT) {
258         fMicros.sign = macros.sign;
259     } else {
260         fMicros.sign = UNUM_SIGN_AUTO;
261     }
262 
263     // Decimal mark display
264     if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) {
265         fMicros.decimal = macros.decimal;
266     } else {
267         fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO;
268     }
269 
270     // Use monetary separator symbols
271     fMicros.useCurrency = isCurrency;
272 
273     // Inner modifier (scientific notation)
274     if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
275         fScientificHandler.adoptInstead(new ScientificHandler(&macros.notation, fMicros.symbols, chain));
276         chain = fScientificHandler.getAlias();
277     } else {
278         // No inner modifier required
279         fMicros.modInner = &fMicros.helpers.emptyStrongModifier;
280     }
281 
282     // Middle modifier (patterns, positive/negative, currency symbols, percent)
283     auto patternModifier = new MutablePatternModifier(false);
284     fPatternModifier.adoptInstead(patternModifier);
285     patternModifier->setPatternInfo(fPatternInfo.getAlias());
286     patternModifier->setPatternAttributes(fMicros.sign, isPermille);
287     if (patternModifier->needsPlurals()) {
288         patternModifier->setSymbols(
289                 fMicros.symbols,
290                 currency,
291                 unitWidth,
292                 resolvePluralRules(macros.rules, macros.locale, status));
293     } else {
294         patternModifier->setSymbols(fMicros.symbols, currency, unitWidth, nullptr);
295     }
296     if (safe) {
297         fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status));
298         chain = fImmutablePatternModifier.getAlias();
299     } else {
300         patternModifier->addToChain(chain);
301         chain = patternModifier;
302     }
303 
304     // Outer modifier (CLDR units and currency long names)
305     if (isCldrUnit) {
306         fLongNameHandler.adoptInstead(
307                 new LongNameHandler(
308                         LongNameHandler::forMeasureUnit(
309                                 macros.locale,
310                                 macros.unit,
311                                 unitWidth,
312                                 resolvePluralRules(macros.rules, macros.locale, status),
313                                 chain,
314                                 status)));
315         chain = fLongNameHandler.getAlias();
316     } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
317         fLongNameHandler.adoptInstead(
318                 new LongNameHandler(
319                         LongNameHandler::forCurrencyLongNames(
320                                 macros.locale,
321                                 currency,
322                                 resolvePluralRules(macros.rules, macros.locale, status),
323                                 chain,
324                                 status)));
325         chain = fLongNameHandler.getAlias();
326     } else {
327         // No outer modifier required
328         fMicros.modOuter = &fMicros.helpers.emptyWeakModifier;
329     }
330 
331     // Compact notation
332     // NOTE: Compact notation can (but might not) override the middle modifier and rounding.
333     // It therefore needs to go at the end of the chain.
334     if (macros.notation.fType == Notation::NTN_COMPACT) {
335         CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME)
336                                   ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL;
337         fCompactHandler.adoptInstead(
338                 new CompactHandler(
339                         macros.notation.fUnion.compactStyle,
340                         macros.locale,
341                         nsName,
342                         compactType,
343                         resolvePluralRules(macros.rules, macros.locale, status),
344                         safe ? patternModifier : nullptr,
345                         chain,
346                         status));
347         chain = fCompactHandler.getAlias();
348     }
349 
350     return chain;
351 }
352 
353 const PluralRules *
resolvePluralRules(const PluralRules * rulesPtr,const Locale & locale,UErrorCode & status)354 NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale,
355                                         UErrorCode &status) {
356     if (rulesPtr != nullptr) {
357         return rulesPtr;
358     }
359     // Lazily create PluralRules
360     if (fRules.isNull()) {
361         fRules.adoptInstead(PluralRules::forLocale(locale, status));
362     }
363     return fRules.getAlias();
364 }
365 
microsToString(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,UErrorCode & status)366 int32_t NumberFormatterImpl::microsToString(const MicroProps &micros, DecimalQuantity &quantity,
367                                             NumberStringBuilder &string, UErrorCode &status) {
368     micros.rounding.apply(quantity, status);
369     micros.integerWidth.apply(quantity, status);
370     int32_t length = writeNumber(micros, quantity, string, status);
371     // NOTE: When range formatting is added, these modifiers can bubble up.
372     // For now, apply them all here at once.
373     // Always apply the inner modifier (which is "strong").
374     length += micros.modInner->apply(string, 0, length, status);
375     if (micros.padding.isValid()) {
376         length += micros.padding
377                 .padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status);
378     } else {
379         length += micros.modMiddle->apply(string, 0, length, status);
380         length += micros.modOuter->apply(string, 0, length, status);
381     }
382     return length;
383 }
384 
writeNumber(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,UErrorCode & status)385 int32_t NumberFormatterImpl::writeNumber(const MicroProps &micros, DecimalQuantity &quantity,
386                                          NumberStringBuilder &string, UErrorCode &status) {
387     int32_t length = 0;
388     if (quantity.isInfinite()) {
389         length += string.insert(
390                 length,
391                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol),
392                 UNUM_INTEGER_FIELD,
393                 status);
394 
395     } else if (quantity.isNaN()) {
396         length += string.insert(
397                 length,
398                 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol),
399                 UNUM_INTEGER_FIELD,
400                 status);
401 
402     } else {
403         // Add the integer digits
404         length += writeIntegerDigits(micros, quantity, string, status);
405 
406         // Add the decimal point
407         if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) {
408             length += string.insert(
409                     length,
410                     micros.useCurrency ? micros.symbols->getSymbol(
411                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros
412                             .symbols
413                             ->getSymbol(
414                                     DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol),
415                     UNUM_DECIMAL_SEPARATOR_FIELD,
416                     status);
417         }
418 
419         // Add the fraction digits
420         length += writeFractionDigits(micros, quantity, string, status);
421     }
422 
423     return length;
424 }
425 
writeIntegerDigits(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,UErrorCode & status)426 int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity,
427                                                 NumberStringBuilder &string, UErrorCode &status) {
428     int length = 0;
429     int integerCount = quantity.getUpperDisplayMagnitude() + 1;
430     for (int i = 0; i < integerCount; i++) {
431         // Add grouping separator
432         if (micros.grouping.groupAtPosition(i, quantity)) {
433             length += string.insert(
434                     0,
435                     micros.useCurrency ? micros.symbols->getSymbol(
436                             DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol)
437                                        : micros.symbols->getSymbol(
438                             DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol),
439                     UNUM_GROUPING_SEPARATOR_FIELD,
440                     status);
441         }
442 
443         // Get and append the next digit value
444         int8_t nextDigit = quantity.getDigit(i);
445         length += string.insert(
446                 0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status);
447     }
448     return length;
449 }
450 
writeFractionDigits(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,UErrorCode & status)451 int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity,
452                                                  NumberStringBuilder &string, UErrorCode &status) {
453     int length = 0;
454     int fractionCount = -quantity.getLowerDisplayMagnitude();
455     for (int i = 0; i < fractionCount; i++) {
456         // Get and append the next digit value
457         int8_t nextDigit = quantity.getDigit(-i - 1);
458         length += string.append(
459                 getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status);
460     }
461     return length;
462 }
463 
464 #endif /* #if !UCONFIG_NO_FORMATTING */
465