1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16
17 namespace {
18
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTSZ = nullptr;
29
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31 delete UNISET_DIGIT;
32 UNISET_DIGIT = nullptr;
33 delete UNISET_NOTSZ;
34 UNISET_NOTSZ = nullptr;
35 gDefaultCurrencySpacingInitOnce.reset();
36 return TRUE;
37 }
38
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42 UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43 if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44 status = U_MEMORY_ALLOCATION_ERROR;
45 return;
46 }
47 UNISET_DIGIT->freeze();
48 UNISET_NOTSZ->freeze();
49 }
50
51 } // namespace
52
53
54 Modifier::~Modifier() = default;
55
Parameters()56 Modifier::Parameters::Parameters()
57 : obj(nullptr) {}
58
Parameters(const ModifierStore * _obj,Signum _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60 const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61 : obj(_obj), signum(_signum), plural(_plural) {}
62
63 ModifierStore::~ModifierStore() = default;
64
~AdoptingModifierStore()65 AdoptingModifierStore::~AdoptingModifierStore() {
66 for (const Modifier *mod : mods) {
67 delete mod;
68 }
69 }
70
71
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const72 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
73 UErrorCode &status) const {
74 // Insert the suffix first since inserting the prefix will change the rightIndex
75 int length = output.insert(rightIndex, fSuffix, fField, status);
76 length += output.insert(leftIndex, fPrefix, fField, status);
77 return length;
78 }
79
getPrefixLength() const80 int32_t ConstantAffixModifier::getPrefixLength() const {
81 return fPrefix.length();
82 }
83
getCodePointCount() const84 int32_t ConstantAffixModifier::getCodePointCount() const {
85 return fPrefix.countChar32() + fSuffix.countChar32();
86 }
87
isStrong() const88 bool ConstantAffixModifier::isStrong() const {
89 return fStrong;
90 }
91
containsField(Field field) const92 bool ConstantAffixModifier::containsField(Field field) const {
93 (void)field;
94 // This method is not currently used.
95 UPRV_UNREACHABLE;
96 #ifdef U_STRINGI_PATCHES
97 return false;
98 #endif
99 }
100
getParameters(Parameters & output) const101 void ConstantAffixModifier::getParameters(Parameters& output) const {
102 (void)output;
103 // This method is not currently used.
104 UPRV_UNREACHABLE;
105 }
106
semanticallyEquivalent(const Modifier & other) const107 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
108 auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
109 if (_other == nullptr) {
110 return false;
111 }
112 return fPrefix == _other->fPrefix
113 && fSuffix == _other->fSuffix
114 && fField == _other->fField
115 && fStrong == _other->fStrong;
116 }
117
118
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)119 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
120 : SimpleModifier(simpleFormatter, field, strong, {}) {}
121
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)122 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
123 const Modifier::Parameters parameters)
124 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
125 fParameters(parameters) {
126 int32_t argLimit = SimpleFormatter::getArgumentLimit(
127 fCompiledPattern.getBuffer(), fCompiledPattern.length());
128 if (argLimit == 0) {
129 // No arguments in compiled pattern
130 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
131 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
132 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
133 fSuffixOffset = -1;
134 fSuffixLength = 0;
135 } else {
136 U_ASSERT(argLimit == 1);
137 if (fCompiledPattern.charAt(1) != 0) {
138 // Found prefix
139 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
140 fSuffixOffset = 3 + fPrefixLength;
141 } else {
142 // No prefix
143 fPrefixLength = 0;
144 fSuffixOffset = 2;
145 }
146 if (3 + fPrefixLength < fCompiledPattern.length()) {
147 // Found suffix
148 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
149 } else {
150 // No suffix
151 fSuffixLength = 0;
152 }
153 }
154 }
155
SimpleModifier()156 SimpleModifier::SimpleModifier()
157 : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
158 }
159
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const160 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
161 UErrorCode &status) const {
162 return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
163 }
164
getPrefixLength() const165 int32_t SimpleModifier::getPrefixLength() const {
166 return fPrefixLength;
167 }
168
getCodePointCount() const169 int32_t SimpleModifier::getCodePointCount() const {
170 int32_t count = 0;
171 if (fPrefixLength > 0) {
172 count += fCompiledPattern.countChar32(2, fPrefixLength);
173 }
174 if (fSuffixLength > 0) {
175 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
176 }
177 return count;
178 }
179
isStrong() const180 bool SimpleModifier::isStrong() const {
181 return fStrong;
182 }
183
containsField(Field field) const184 bool SimpleModifier::containsField(Field field) const {
185 (void)field;
186 // This method is not currently used.
187 UPRV_UNREACHABLE;
188 #ifdef U_STRINGI_PATCHES
189 return false;
190 #endif
191 }
192
getParameters(Parameters & output) const193 void SimpleModifier::getParameters(Parameters& output) const {
194 output = fParameters;
195 }
196
semanticallyEquivalent(const Modifier & other) const197 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
198 auto* _other = dynamic_cast<const SimpleModifier*>(&other);
199 if (_other == nullptr) {
200 return false;
201 }
202 if (fParameters.obj != nullptr) {
203 return fParameters.obj == _other->fParameters.obj;
204 }
205 return fCompiledPattern == _other->fCompiledPattern
206 && fField == _other->fField
207 && fStrong == _other->fStrong;
208 }
209
210
211 int32_t
formatAsPrefixSuffix(FormattedStringBuilder & result,int32_t startIndex,int32_t endIndex,UErrorCode & status) const212 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
213 UErrorCode &status) const {
214 if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
215 // There is no argument for the inner number; overwrite the entire segment with our string.
216 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
217 } else {
218 if (fPrefixLength > 0) {
219 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
220 }
221 if (fSuffixLength > 0) {
222 result.insert(
223 endIndex + fPrefixLength,
224 fCompiledPattern,
225 1 + fSuffixOffset,
226 1 + fSuffixOffset + fSuffixLength,
227 fField,
228 status);
229 }
230 return fPrefixLength + fSuffixLength;
231 }
232 }
233
234
235 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,FormattedStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)236 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
237 int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
238 Field field, UErrorCode& status) {
239 const UnicodeString& compiledPattern = compiled.compiledPattern;
240 int32_t argLimit = SimpleFormatter::getArgumentLimit(
241 compiledPattern.getBuffer(), compiledPattern.length());
242 if (argLimit != 2) {
243 status = U_INTERNAL_PROGRAM_ERROR;
244 return 0;
245 }
246 int32_t offset = 1; // offset into compiledPattern
247 int32_t length = 0; // chars added to result
248
249 int32_t prefixLength = compiledPattern.charAt(offset);
250 offset++;
251 if (prefixLength < ARG_NUM_LIMIT) {
252 // No prefix
253 prefixLength = 0;
254 } else {
255 prefixLength -= ARG_NUM_LIMIT;
256 result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
257 offset += prefixLength;
258 length += prefixLength;
259 offset++;
260 }
261
262 int32_t infixLength = compiledPattern.charAt(offset);
263 offset++;
264 if (infixLength < ARG_NUM_LIMIT) {
265 // No infix
266 infixLength = 0;
267 } else {
268 infixLength -= ARG_NUM_LIMIT;
269 result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
270 offset += infixLength;
271 length += infixLength;
272 offset++;
273 }
274
275 int32_t suffixLength;
276 if (offset == compiledPattern.length()) {
277 // No suffix
278 suffixLength = 0;
279 } else {
280 suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT;
281 offset++;
282 result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
283 length += suffixLength;
284 }
285
286 *outPrefixLength = prefixLength;
287 *outSuffixLength = suffixLength;
288
289 return length;
290 }
291
292
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const293 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
294 UErrorCode &status) const {
295 int32_t length = output.insert(leftIndex, fPrefix, status);
296 if (fOverwrite) {
297 length += output.splice(
298 leftIndex + length,
299 rightIndex + length,
300 UnicodeString(), 0, 0,
301 kUndefinedField, status);
302 }
303 length += output.insert(rightIndex + length, fSuffix, status);
304 return length;
305 }
306
getPrefixLength() const307 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
308 return fPrefix.length();
309 }
310
getCodePointCount() const311 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
312 return fPrefix.codePointCount() + fSuffix.codePointCount();
313 }
314
isStrong() const315 bool ConstantMultiFieldModifier::isStrong() const {
316 return fStrong;
317 }
318
containsField(Field field) const319 bool ConstantMultiFieldModifier::containsField(Field field) const {
320 return fPrefix.containsField(field) || fSuffix.containsField(field);
321 }
322
getParameters(Parameters & output) const323 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
324 output = fParameters;
325 }
326
semanticallyEquivalent(const Modifier & other) const327 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
328 auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
329 if (_other == nullptr) {
330 return false;
331 }
332 if (fParameters.obj != nullptr) {
333 return fParameters.obj == _other->fParameters.obj;
334 }
335 return fPrefix.contentEquals(_other->fPrefix)
336 && fSuffix.contentEquals(_other->fSuffix)
337 && fOverwrite == _other->fOverwrite
338 && fStrong == _other->fStrong;
339 }
340
341
CurrencySpacingEnabledModifier(const FormattedStringBuilder & prefix,const FormattedStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)342 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
343 const FormattedStringBuilder &suffix,
344 bool overwrite,
345 bool strong,
346 const DecimalFormatSymbols &symbols,
347 UErrorCode &status)
348 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
349 // Check for currency spacing. Do not build the UnicodeSets unless there is
350 // a currency code point at a boundary.
351 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
352 int prefixCp = prefix.getLastCodePoint();
353 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
354 if (prefixUnicodeSet.contains(prefixCp)) {
355 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
356 fAfterPrefixUnicodeSet.freeze();
357 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
358 } else {
359 fAfterPrefixUnicodeSet.setToBogus();
360 fAfterPrefixInsert.setToBogus();
361 }
362 } else {
363 fAfterPrefixUnicodeSet.setToBogus();
364 fAfterPrefixInsert.setToBogus();
365 }
366 if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
367 int suffixCp = suffix.getFirstCodePoint();
368 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
369 if (suffixUnicodeSet.contains(suffixCp)) {
370 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
371 fBeforeSuffixUnicodeSet.freeze();
372 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
373 } else {
374 fBeforeSuffixUnicodeSet.setToBogus();
375 fBeforeSuffixInsert.setToBogus();
376 }
377 } else {
378 fBeforeSuffixUnicodeSet.setToBogus();
379 fBeforeSuffixInsert.setToBogus();
380 }
381 }
382
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const383 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
384 UErrorCode &status) const {
385 // Currency spacing logic
386 int length = 0;
387 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
388 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
389 // TODO: Should we use the CURRENCY field here?
390 length += output.insert(
391 leftIndex,
392 fAfterPrefixInsert,
393 kUndefinedField,
394 status);
395 }
396 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
397 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
398 // TODO: Should we use the CURRENCY field here?
399 length += output.insert(
400 rightIndex + length,
401 fBeforeSuffixInsert,
402 kUndefinedField,
403 status);
404 }
405
406 // Call super for the remaining logic
407 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
408 return length;
409 }
410
411 int32_t
applyCurrencySpacing(FormattedStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)412 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
413 int32_t prefixLen, int32_t suffixStart,
414 int32_t suffixLen,
415 const DecimalFormatSymbols &symbols,
416 UErrorCode &status) {
417 int length = 0;
418 bool hasPrefix = (prefixLen > 0);
419 bool hasSuffix = (suffixLen > 0);
420 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
421 if (hasPrefix && hasNumber) {
422 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
423 }
424 if (hasSuffix && hasNumber) {
425 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
426 }
427 return length;
428 }
429
430 int32_t
applyCurrencySpacingAffix(FormattedStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)431 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
432 EAffix affix,
433 const DecimalFormatSymbols &symbols,
434 UErrorCode &status) {
435 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
436 // This works even if the last code point in the prefix is 2 code units because the
437 // field value gets populated to both indices in the field array.
438 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
439 if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
440 return 0;
441 }
442 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
443 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
444 if (!affixUniset.contains(affixCp)) {
445 return 0;
446 }
447 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
448 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
449 if (!numberUniset.contains(numberCp)) {
450 return 0;
451 }
452 UnicodeString spacingString = getInsertString(symbols, affix, status);
453
454 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
455 // It would be more efficient if this could be done before affixes were attached,
456 // so that it could be prepended/appended instead of inserted.
457 // However, the build code path is more efficient, and this is the most natural
458 // place to put currency spacing in the non-build code path.
459 // TODO: Should we use the CURRENCY field here?
460 return output.insert(index, spacingString, kUndefinedField, status);
461 }
462
463 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)464 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
465 EAffix affix, UErrorCode &status) {
466 // Ensure the static defaults are initialized:
467 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
468 if (U_FAILURE(status)) {
469 return UnicodeSet();
470 }
471
472 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
473 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
474 affix == SUFFIX,
475 status);
476 if (pattern.compare(u"[:digit:]", -1) == 0) {
477 return *UNISET_DIGIT;
478 } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
479 return *UNISET_NOTSZ;
480 } else {
481 return UnicodeSet(pattern, status);
482 }
483 }
484
485 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)486 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
487 UErrorCode &status) {
488 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
489 }
490
491 #endif /* #if !UCONFIG_NO_FORMATTING */
492