1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12 
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16 
17 namespace {
18 
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21 
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26 
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTSZ = nullptr;
29 
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31     delete UNISET_DIGIT;
32     UNISET_DIGIT = nullptr;
33     delete UNISET_NOTSZ;
34     UNISET_NOTSZ = nullptr;
35     gDefaultCurrencySpacingInitOnce.reset();
36     return TRUE;
37 }
38 
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40     ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41     UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42     UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43     if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44         status = U_MEMORY_ALLOCATION_ERROR;
45         return;
46     }
47     UNISET_DIGIT->freeze();
48     UNISET_NOTSZ->freeze();
49 }
50 
51 }  // namespace
52 
53 
54 Modifier::~Modifier() = default;
55 
Parameters()56 Modifier::Parameters::Parameters()
57         : obj(nullptr) {}
58 
Parameters(const ModifierStore * _obj,Signum _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60     const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61         : obj(_obj), signum(_signum), plural(_plural) {}
62 
63 ModifierStore::~ModifierStore() = default;
64 
~AdoptingModifierStore()65 AdoptingModifierStore::~AdoptingModifierStore()  {
66     for (const Modifier *mod : mods) {
67         delete mod;
68     }
69 }
70 
71 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const72 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
73                                      UErrorCode &status) const {
74     // Insert the suffix first since inserting the prefix will change the rightIndex
75     int length = output.insert(rightIndex, fSuffix, fField, status);
76     length += output.insert(leftIndex, fPrefix, fField, status);
77     return length;
78 }
79 
getPrefixLength() const80 int32_t ConstantAffixModifier::getPrefixLength() const {
81     return fPrefix.length();
82 }
83 
getCodePointCount() const84 int32_t ConstantAffixModifier::getCodePointCount() const {
85     return fPrefix.countChar32() + fSuffix.countChar32();
86 }
87 
isStrong() const88 bool ConstantAffixModifier::isStrong() const {
89     return fStrong;
90 }
91 
containsField(Field field) const92 bool ConstantAffixModifier::containsField(Field field) const {
93     (void)field;
94     // This method is not currently used.
95     UPRV_UNREACHABLE;
96 }
97 
getParameters(Parameters & output) const98 void ConstantAffixModifier::getParameters(Parameters& output) const {
99     (void)output;
100     // This method is not currently used.
101     UPRV_UNREACHABLE;
102 }
103 
semanticallyEquivalent(const Modifier & other) const104 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
105     auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
106     if (_other == nullptr) {
107         return false;
108     }
109     return fPrefix == _other->fPrefix
110         && fSuffix == _other->fSuffix
111         && fField == _other->fField
112         && fStrong == _other->fStrong;
113 }
114 
115 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)116 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
117         : SimpleModifier(simpleFormatter, field, strong, {}) {}
118 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)119 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
120                                const Modifier::Parameters parameters)
121         : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
122           fParameters(parameters) {
123     int32_t argLimit = SimpleFormatter::getArgumentLimit(
124             fCompiledPattern.getBuffer(), fCompiledPattern.length());
125     if (argLimit == 0) {
126         // No arguments in compiled pattern
127         fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
128         U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
129         // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
130         fSuffixOffset = -1;
131         fSuffixLength = 0;
132     } else {
133         U_ASSERT(argLimit == 1);
134         if (fCompiledPattern.charAt(1) != 0) {
135             // Found prefix
136             fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
137             fSuffixOffset = 3 + fPrefixLength;
138         } else {
139             // No prefix
140             fPrefixLength = 0;
141             fSuffixOffset = 2;
142         }
143         if (3 + fPrefixLength < fCompiledPattern.length()) {
144             // Found suffix
145             fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
146         } else {
147             // No suffix
148             fSuffixLength = 0;
149         }
150     }
151 }
152 
SimpleModifier()153 SimpleModifier::SimpleModifier()
154         : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
155 }
156 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const157 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
158                               UErrorCode &status) const {
159     return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
160 }
161 
getPrefixLength() const162 int32_t SimpleModifier::getPrefixLength() const {
163     return fPrefixLength;
164 }
165 
getCodePointCount() const166 int32_t SimpleModifier::getCodePointCount() const {
167     int32_t count = 0;
168     if (fPrefixLength > 0) {
169         count += fCompiledPattern.countChar32(2, fPrefixLength);
170     }
171     if (fSuffixLength > 0) {
172         count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
173     }
174     return count;
175 }
176 
isStrong() const177 bool SimpleModifier::isStrong() const {
178     return fStrong;
179 }
180 
containsField(Field field) const181 bool SimpleModifier::containsField(Field field) const {
182     (void)field;
183     // This method is not currently used.
184     UPRV_UNREACHABLE;
185 }
186 
getParameters(Parameters & output) const187 void SimpleModifier::getParameters(Parameters& output) const {
188     output = fParameters;
189 }
190 
semanticallyEquivalent(const Modifier & other) const191 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
192     auto* _other = dynamic_cast<const SimpleModifier*>(&other);
193     if (_other == nullptr) {
194         return false;
195     }
196     if (fParameters.obj != nullptr) {
197         return fParameters.obj == _other->fParameters.obj;
198     }
199     return fCompiledPattern == _other->fCompiledPattern
200         && fField == _other->fField
201         && fStrong == _other->fStrong;
202 }
203 
204 
205 int32_t
formatAsPrefixSuffix(FormattedStringBuilder & result,int32_t startIndex,int32_t endIndex,UErrorCode & status) const206 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
207                                      UErrorCode &status) const {
208     if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
209         // There is no argument for the inner number; overwrite the entire segment with our string.
210         return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
211     } else {
212         if (fPrefixLength > 0) {
213             result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
214         }
215         if (fSuffixLength > 0) {
216             result.insert(
217                     endIndex + fPrefixLength,
218                     fCompiledPattern,
219                     1 + fSuffixOffset,
220                     1 + fSuffixOffset + fSuffixLength,
221                     fField,
222                     status);
223         }
224         return fPrefixLength + fSuffixLength;
225     }
226 }
227 
228 
229 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,FormattedStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)230 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
231                                     int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
232                                     Field field, UErrorCode& status) {
233     const UnicodeString& compiledPattern = compiled.compiledPattern;
234     int32_t argLimit = SimpleFormatter::getArgumentLimit(
235             compiledPattern.getBuffer(), compiledPattern.length());
236     if (argLimit != 2) {
237         status = U_INTERNAL_PROGRAM_ERROR;
238         return 0;
239     }
240     int32_t offset = 1; // offset into compiledPattern
241     int32_t length = 0; // chars added to result
242 
243     int32_t prefixLength = compiledPattern.charAt(offset);
244     offset++;
245     if (prefixLength < ARG_NUM_LIMIT) {
246         // No prefix
247         prefixLength = 0;
248     } else {
249         prefixLength -= ARG_NUM_LIMIT;
250         result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
251         offset += prefixLength;
252         length += prefixLength;
253         offset++;
254     }
255 
256     int32_t infixLength = compiledPattern.charAt(offset);
257     offset++;
258     if (infixLength < ARG_NUM_LIMIT) {
259         // No infix
260         infixLength = 0;
261     } else {
262         infixLength -= ARG_NUM_LIMIT;
263         result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
264         offset += infixLength;
265         length += infixLength;
266         offset++;
267     }
268 
269     int32_t suffixLength;
270     if (offset == compiledPattern.length()) {
271         // No suffix
272         suffixLength = 0;
273     } else {
274         suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
275         offset++;
276         result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
277         length += suffixLength;
278     }
279 
280     *outPrefixLength = prefixLength;
281     *outSuffixLength = suffixLength;
282 
283     return length;
284 }
285 
286 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const287 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
288                                           UErrorCode &status) const {
289     int32_t length = output.insert(leftIndex, fPrefix, status);
290     if (fOverwrite) {
291         length += output.splice(
292             leftIndex + length,
293             rightIndex + length,
294             UnicodeString(), 0, 0,
295             kUndefinedField, status);
296     }
297     length += output.insert(rightIndex + length, fSuffix, status);
298     return length;
299 }
300 
getPrefixLength() const301 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
302     return fPrefix.length();
303 }
304 
getCodePointCount() const305 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
306     return fPrefix.codePointCount() + fSuffix.codePointCount();
307 }
308 
isStrong() const309 bool ConstantMultiFieldModifier::isStrong() const {
310     return fStrong;
311 }
312 
containsField(Field field) const313 bool ConstantMultiFieldModifier::containsField(Field field) const {
314     return fPrefix.containsField(field) || fSuffix.containsField(field);
315 }
316 
getParameters(Parameters & output) const317 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
318     output = fParameters;
319 }
320 
semanticallyEquivalent(const Modifier & other) const321 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
322     auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
323     if (_other == nullptr) {
324         return false;
325     }
326     if (fParameters.obj != nullptr) {
327         return fParameters.obj == _other->fParameters.obj;
328     }
329     return fPrefix.contentEquals(_other->fPrefix)
330         && fSuffix.contentEquals(_other->fSuffix)
331         && fOverwrite == _other->fOverwrite
332         && fStrong == _other->fStrong;
333 }
334 
335 
CurrencySpacingEnabledModifier(const FormattedStringBuilder & prefix,const FormattedStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)336 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
337                                                                const FormattedStringBuilder &suffix,
338                                                                bool overwrite,
339                                                                bool strong,
340                                                                const DecimalFormatSymbols &symbols,
341                                                                UErrorCode &status)
342         : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
343     // Check for currency spacing. Do not build the UnicodeSets unless there is
344     // a currency code point at a boundary.
345     if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
346         int prefixCp = prefix.getLastCodePoint();
347         UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
348         if (prefixUnicodeSet.contains(prefixCp)) {
349             fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
350             fAfterPrefixUnicodeSet.freeze();
351             fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
352         } else {
353             fAfterPrefixUnicodeSet.setToBogus();
354             fAfterPrefixInsert.setToBogus();
355         }
356     } else {
357         fAfterPrefixUnicodeSet.setToBogus();
358         fAfterPrefixInsert.setToBogus();
359     }
360     if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
361         int suffixCp = suffix.getFirstCodePoint();
362         UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
363         if (suffixUnicodeSet.contains(suffixCp)) {
364             fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
365             fBeforeSuffixUnicodeSet.freeze();
366             fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
367         } else {
368             fBeforeSuffixUnicodeSet.setToBogus();
369             fBeforeSuffixInsert.setToBogus();
370         }
371     } else {
372         fBeforeSuffixUnicodeSet.setToBogus();
373         fBeforeSuffixInsert.setToBogus();
374     }
375 }
376 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const377 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
378                                               UErrorCode &status) const {
379     // Currency spacing logic
380     int length = 0;
381     if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
382         fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
383         // TODO: Should we use the CURRENCY field here?
384         length += output.insert(
385             leftIndex,
386             fAfterPrefixInsert,
387             kUndefinedField,
388             status);
389     }
390     if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
391         fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
392         // TODO: Should we use the CURRENCY field here?
393         length += output.insert(
394             rightIndex + length,
395             fBeforeSuffixInsert,
396             kUndefinedField,
397             status);
398     }
399 
400     // Call super for the remaining logic
401     length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
402     return length;
403 }
404 
405 int32_t
applyCurrencySpacing(FormattedStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)406 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
407                                                      int32_t prefixLen, int32_t suffixStart,
408                                                      int32_t suffixLen,
409                                                      const DecimalFormatSymbols &symbols,
410                                                      UErrorCode &status) {
411     int length = 0;
412     bool hasPrefix = (prefixLen > 0);
413     bool hasSuffix = (suffixLen > 0);
414     bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
415     if (hasPrefix && hasNumber) {
416         length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
417     }
418     if (hasSuffix && hasNumber) {
419         length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
420     }
421     return length;
422 }
423 
424 int32_t
applyCurrencySpacingAffix(FormattedStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)425 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
426                                                           EAffix affix,
427                                                           const DecimalFormatSymbols &symbols,
428                                                           UErrorCode &status) {
429     // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
430     // This works even if the last code point in the prefix is 2 code units because the
431     // field value gets populated to both indices in the field array.
432     Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
433     if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
434         return 0;
435     }
436     int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
437     UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
438     if (!affixUniset.contains(affixCp)) {
439         return 0;
440     }
441     int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
442     UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
443     if (!numberUniset.contains(numberCp)) {
444         return 0;
445     }
446     UnicodeString spacingString = getInsertString(symbols, affix, status);
447 
448     // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
449     // It would be more efficient if this could be done before affixes were attached,
450     // so that it could be prepended/appended instead of inserted.
451     // However, the build code path is more efficient, and this is the most natural
452     // place to put currency spacing in the non-build code path.
453     // TODO: Should we use the CURRENCY field here?
454     return output.insert(index, spacingString, kUndefinedField, status);
455 }
456 
457 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)458 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
459                                               EAffix affix, UErrorCode &status) {
460     // Ensure the static defaults are initialized:
461     umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
462     if (U_FAILURE(status)) {
463         return UnicodeSet();
464     }
465 
466     const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
467             position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
468             affix == SUFFIX,
469             status);
470     if (pattern.compare(u"[:digit:]", -1) == 0) {
471         return *UNISET_DIGIT;
472     } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
473         return *UNISET_NOTSZ;
474     } else {
475         return UnicodeSet(pattern, status);
476     }
477 }
478 
479 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)480 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
481                                                 UErrorCode &status) {
482     return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
483 }
484 
485 #endif /* #if !UCONFIG_NO_FORMATTING */
486