1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 #define UNISTR_FROM_CHAR_EXPLICIT
12 
13 #include "uassert.h"
14 #include "number_patternstring.h"
15 #include "unicode/utf16.h"
16 #include "number_utils.h"
17 #include "number_roundingutils.h"
18 #include "number_mapper.h"
19 
20 using namespace icu;
21 using namespace icu::number;
22 using namespace icu::number::impl;
23 
24 
parseToPatternInfo(const UnicodeString & patternString,ParsedPatternInfo & patternInfo,UErrorCode & status)25 void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
26                                        UErrorCode& status) {
27     patternInfo.consumePattern(patternString, status);
28 }
29 
30 DecimalFormatProperties
parseToProperties(const UnicodeString & pattern,IgnoreRounding ignoreRounding,UErrorCode & status)31 PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
32                                  UErrorCode& status) {
33     DecimalFormatProperties properties;
34     parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
35     return properties;
36 }
37 
parseToProperties(const UnicodeString & pattern,UErrorCode & status)38 DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern,
39                                                          UErrorCode& status) {
40     return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status);
41 }
42 
43 void
parseToExistingProperties(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)44 PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
45                                          IgnoreRounding ignoreRounding, UErrorCode& status) {
46     parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
47 }
48 
49 
charAt(int32_t flags,int32_t index) const50 char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
51     const Endpoints& endpoints = getEndpoints(flags);
52     if (index < 0 || index >= endpoints.end - endpoints.start) {
53         UPRV_UNREACHABLE;
54     }
55     return pattern.charAt(endpoints.start + index);
56 }
57 
length(int32_t flags) const58 int32_t ParsedPatternInfo::length(int32_t flags) const {
59     return getLengthFromEndpoints(getEndpoints(flags));
60 }
61 
getLengthFromEndpoints(const Endpoints & endpoints)62 int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
63     return endpoints.end - endpoints.start;
64 }
65 
getString(int32_t flags) const66 UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
67     const Endpoints& endpoints = getEndpoints(flags);
68     if (endpoints.start == endpoints.end) {
69         return UnicodeString();
70     }
71     // Create a new UnicodeString
72     return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
73 }
74 
getEndpoints(int32_t flags) const75 const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
76     bool prefix = (flags & AFFIX_PREFIX) != 0;
77     bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
78     bool padding = (flags & AFFIX_PADDING) != 0;
79     if (isNegative && padding) {
80         return negative.paddingEndpoints;
81     } else if (padding) {
82         return positive.paddingEndpoints;
83     } else if (prefix && isNegative) {
84         return negative.prefixEndpoints;
85     } else if (prefix) {
86         return positive.prefixEndpoints;
87     } else if (isNegative) {
88         return negative.suffixEndpoints;
89     } else {
90         return positive.suffixEndpoints;
91     }
92 }
93 
positiveHasPlusSign() const94 bool ParsedPatternInfo::positiveHasPlusSign() const {
95     return positive.hasPlusSign;
96 }
97 
hasNegativeSubpattern() const98 bool ParsedPatternInfo::hasNegativeSubpattern() const {
99     return fHasNegativeSubpattern;
100 }
101 
negativeHasMinusSign() const102 bool ParsedPatternInfo::negativeHasMinusSign() const {
103     return negative.hasMinusSign;
104 }
105 
hasCurrencySign() const106 bool ParsedPatternInfo::hasCurrencySign() const {
107     return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
108 }
109 
containsSymbolType(AffixPatternType type,UErrorCode & status) const110 bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
111     return AffixUtils::containsType(pattern, type, status);
112 }
113 
hasBody() const114 bool ParsedPatternInfo::hasBody() const {
115     return positive.integerTotal > 0;
116 }
117 
118 /////////////////////////////////////////////////////
119 /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION ///
120 /////////////////////////////////////////////////////
121 
peek()122 UChar32 ParsedPatternInfo::ParserState::peek() {
123     if (offset == pattern.length()) {
124         return -1;
125     } else {
126         return pattern.char32At(offset);
127     }
128 }
129 
next()130 UChar32 ParsedPatternInfo::ParserState::next() {
131     int codePoint = peek();
132     offset += U16_LENGTH(codePoint);
133     return codePoint;
134 }
135 
consumePattern(const UnicodeString & patternString,UErrorCode & status)136 void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
137     if (U_FAILURE(status)) { return; }
138     this->pattern = patternString;
139 
140     // This class is not intended for writing twice!
141     // Use move assignment to overwrite instead.
142     U_ASSERT(state.offset == 0);
143 
144     // pattern := subpattern (';' subpattern)?
145     currentSubpattern = &positive;
146     consumeSubpattern(status);
147     if (U_FAILURE(status)) { return; }
148     if (state.peek() == u';') {
149         state.next(); // consume the ';'
150         // Don't consume the negative subpattern if it is empty (trailing ';')
151         if (state.peek() != -1) {
152             fHasNegativeSubpattern = true;
153             currentSubpattern = &negative;
154             consumeSubpattern(status);
155             if (U_FAILURE(status)) { return; }
156         }
157     }
158     if (state.peek() != -1) {
159         state.toParseException(u"Found unquoted special character");
160         status = U_UNQUOTED_SPECIAL;
161     }
162 }
163 
consumeSubpattern(UErrorCode & status)164 void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
165     // subpattern := literals? number exponent? literals?
166     consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
167     if (U_FAILURE(status)) { return; }
168     consumeAffix(currentSubpattern->prefixEndpoints, status);
169     if (U_FAILURE(status)) { return; }
170     consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status);
171     if (U_FAILURE(status)) { return; }
172     consumeFormat(status);
173     if (U_FAILURE(status)) { return; }
174     consumeExponent(status);
175     if (U_FAILURE(status)) { return; }
176     consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status);
177     if (U_FAILURE(status)) { return; }
178     consumeAffix(currentSubpattern->suffixEndpoints, status);
179     if (U_FAILURE(status)) { return; }
180     consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status);
181     if (U_FAILURE(status)) { return; }
182 }
183 
consumePadding(PadPosition paddingLocation,UErrorCode & status)184 void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
185     if (state.peek() != u'*') {
186         return;
187     }
188     if (currentSubpattern->hasPadding) {
189         state.toParseException(u"Cannot have multiple pad specifiers");
190         status = U_MULTIPLE_PAD_SPECIFIERS;
191         return;
192     }
193     currentSubpattern->paddingLocation = paddingLocation;
194     currentSubpattern->hasPadding = true;
195     state.next(); // consume the '*'
196     currentSubpattern->paddingEndpoints.start = state.offset;
197     consumeLiteral(status);
198     currentSubpattern->paddingEndpoints.end = state.offset;
199 }
200 
consumeAffix(Endpoints & endpoints,UErrorCode & status)201 void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
202     // literals := { literal }
203     endpoints.start = state.offset;
204     while (true) {
205         switch (state.peek()) {
206             case u'#':
207             case u'@':
208             case u';':
209             case u'*':
210             case u'.':
211             case u',':
212             case u'0':
213             case u'1':
214             case u'2':
215             case u'3':
216             case u'4':
217             case u'5':
218             case u'6':
219             case u'7':
220             case u'8':
221             case u'9':
222             case -1:
223                 // Characters that cannot appear unquoted in a literal
224                 // break outer;
225                 goto after_outer;
226 
227             case u'%':
228                 currentSubpattern->hasPercentSign = true;
229                 break;
230 
231             case u'‰':
232                 currentSubpattern->hasPerMilleSign = true;
233                 break;
234 
235             case u'¤':
236                 currentSubpattern->hasCurrencySign = true;
237                 break;
238 
239             case u'-':
240                 currentSubpattern->hasMinusSign = true;
241                 break;
242 
243             case u'+':
244                 currentSubpattern->hasPlusSign = true;
245                 break;
246 
247             default:
248                 break;
249         }
250         consumeLiteral(status);
251         if (U_FAILURE(status)) { return; }
252     }
253     after_outer:
254     endpoints.end = state.offset;
255 }
256 
consumeLiteral(UErrorCode & status)257 void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
258     if (state.peek() == -1) {
259         state.toParseException(u"Expected unquoted literal but found EOL");
260         status = U_PATTERN_SYNTAX_ERROR;
261         return;
262     } else if (state.peek() == u'\'') {
263         state.next(); // consume the starting quote
264         while (state.peek() != u'\'') {
265             if (state.peek() == -1) {
266                 state.toParseException(u"Expected quoted literal but found EOL");
267                 status = U_PATTERN_SYNTAX_ERROR;
268                 return;
269             } else {
270                 state.next(); // consume a quoted character
271             }
272         }
273         state.next(); // consume the ending quote
274     } else {
275         // consume a non-quoted literal character
276         state.next();
277     }
278 }
279 
consumeFormat(UErrorCode & status)280 void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
281     consumeIntegerFormat(status);
282     if (U_FAILURE(status)) { return; }
283     if (state.peek() == u'.') {
284         state.next(); // consume the decimal point
285         currentSubpattern->hasDecimal = true;
286         currentSubpattern->widthExceptAffixes += 1;
287         consumeFractionFormat(status);
288         if (U_FAILURE(status)) { return; }
289     }
290 }
291 
consumeIntegerFormat(UErrorCode & status)292 void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
293     // Convenience reference:
294     ParsedSubpatternInfo& result = *currentSubpattern;
295 
296     while (true) {
297         switch (state.peek()) {
298             case u',':
299                 result.widthExceptAffixes += 1;
300                 result.groupingSizes <<= 16;
301                 break;
302 
303             case u'#':
304                 if (result.integerNumerals > 0) {
305                     state.toParseException(u"# cannot follow 0 before decimal point");
306                     status = U_UNEXPECTED_TOKEN;
307                     return;
308                 }
309                 result.widthExceptAffixes += 1;
310                 result.groupingSizes += 1;
311                 if (result.integerAtSigns > 0) {
312                     result.integerTrailingHashSigns += 1;
313                 } else {
314                     result.integerLeadingHashSigns += 1;
315                 }
316                 result.integerTotal += 1;
317                 break;
318 
319             case u'@':
320                 if (result.integerNumerals > 0) {
321                     state.toParseException(u"Cannot mix 0 and @");
322                     status = U_UNEXPECTED_TOKEN;
323                     return;
324                 }
325                 if (result.integerTrailingHashSigns > 0) {
326                     state.toParseException(u"Cannot nest # inside of a run of @");
327                     status = U_UNEXPECTED_TOKEN;
328                     return;
329                 }
330                 result.widthExceptAffixes += 1;
331                 result.groupingSizes += 1;
332                 result.integerAtSigns += 1;
333                 result.integerTotal += 1;
334                 break;
335 
336             case u'0':
337             case u'1':
338             case u'2':
339             case u'3':
340             case u'4':
341             case u'5':
342             case u'6':
343             case u'7':
344             case u'8':
345             case u'9':
346                 if (result.integerAtSigns > 0) {
347                     state.toParseException(u"Cannot mix @ and 0");
348                     status = U_UNEXPECTED_TOKEN;
349                     return;
350                 }
351                 result.widthExceptAffixes += 1;
352                 result.groupingSizes += 1;
353                 result.integerNumerals += 1;
354                 result.integerTotal += 1;
355                 if (!result.rounding.isZeroish() || state.peek() != u'0') {
356                     result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true);
357                 }
358                 break;
359 
360             default:
361                 goto after_outer;
362         }
363         state.next(); // consume the symbol
364     }
365 
366     after_outer:
367     // Disallow patterns with a trailing ',' or with two ',' next to each other
368     auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff);
369     auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff);
370     auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff);
371     if (grouping1 == 0 && grouping2 != -1) {
372         state.toParseException(u"Trailing grouping separator is invalid");
373         status = U_UNEXPECTED_TOKEN;
374         return;
375     }
376     if (grouping2 == 0 && grouping3 != -1) {
377         state.toParseException(u"Grouping width of zero is invalid");
378         status = U_PATTERN_SYNTAX_ERROR;
379         return;
380     }
381 }
382 
consumeFractionFormat(UErrorCode & status)383 void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
384     // Convenience reference:
385     ParsedSubpatternInfo& result = *currentSubpattern;
386 
387     int32_t zeroCounter = 0;
388     while (true) {
389         switch (state.peek()) {
390             case u'#':
391                 result.widthExceptAffixes += 1;
392                 result.fractionHashSigns += 1;
393                 result.fractionTotal += 1;
394                 zeroCounter++;
395                 break;
396 
397             case u'0':
398             case u'1':
399             case u'2':
400             case u'3':
401             case u'4':
402             case u'5':
403             case u'6':
404             case u'7':
405             case u'8':
406             case u'9':
407                 if (result.fractionHashSigns > 0) {
408                     state.toParseException(u"0 cannot follow # after decimal point");
409                     status = U_UNEXPECTED_TOKEN;
410                     return;
411                 }
412                 result.widthExceptAffixes += 1;
413                 result.fractionNumerals += 1;
414                 result.fractionTotal += 1;
415                 if (state.peek() == u'0') {
416                     zeroCounter++;
417                 } else {
418                     result.rounding
419                             .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false);
420                     zeroCounter = 0;
421                 }
422                 break;
423 
424             default:
425                 return;
426         }
427         state.next(); // consume the symbol
428     }
429 }
430 
consumeExponent(UErrorCode & status)431 void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
432     // Convenience reference:
433     ParsedSubpatternInfo& result = *currentSubpattern;
434 
435     if (state.peek() != u'E') {
436         return;
437     }
438     if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) {
439         state.toParseException(u"Cannot have grouping separator in scientific notation");
440         status = U_MALFORMED_EXPONENTIAL_PATTERN;
441         return;
442     }
443     state.next(); // consume the E
444     result.widthExceptAffixes++;
445     if (state.peek() == u'+') {
446         state.next(); // consume the +
447         result.exponentHasPlusSign = true;
448         result.widthExceptAffixes++;
449     }
450     while (state.peek() == u'0') {
451         state.next(); // consume the 0
452         result.exponentZeros += 1;
453         result.widthExceptAffixes++;
454     }
455 }
456 
457 ///////////////////////////////////////////////////
458 /// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
459 ///////////////////////////////////////////////////
460 
parseToExistingPropertiesImpl(const UnicodeString & pattern,DecimalFormatProperties & properties,IgnoreRounding ignoreRounding,UErrorCode & status)461 void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
462                                                   DecimalFormatProperties& properties,
463                                                   IgnoreRounding ignoreRounding, UErrorCode& status) {
464     if (pattern.length() == 0) {
465         // Backwards compatibility requires that we reset to the default values.
466         // TODO: Only overwrite the properties that "saveToProperties" normally touches?
467         properties.clear();
468         return;
469     }
470 
471     ParsedPatternInfo patternInfo;
472     parseToPatternInfo(pattern, patternInfo, status);
473     if (U_FAILURE(status)) { return; }
474     patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
475 }
476 
477 void
patternInfoToProperties(DecimalFormatProperties & properties,ParsedPatternInfo & patternInfo,IgnoreRounding _ignoreRounding,UErrorCode & status)478 PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
479                                        IgnoreRounding _ignoreRounding, UErrorCode& status) {
480     // Translate from PatternParseResult to Properties.
481     // Note that most data from "negative" is ignored per the specification of DecimalFormat.
482 
483     const ParsedSubpatternInfo& positive = patternInfo.positive;
484 
485     bool ignoreRounding;
486     if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
487         ignoreRounding = false;
488     } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) {
489         ignoreRounding = positive.hasCurrencySign;
490     } else {
491         U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS);
492         ignoreRounding = true;
493     }
494 
495     // Grouping settings
496     auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff);
497     auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff);
498     auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff);
499     if (grouping2 != -1) {
500         properties.groupingSize = grouping1;
501         properties.groupingUsed = true;
502     } else {
503         properties.groupingSize = -1;
504         properties.groupingUsed = false;
505     }
506     if (grouping3 != -1) {
507         properties.secondaryGroupingSize = grouping2;
508     } else {
509         properties.secondaryGroupingSize = -1;
510     }
511 
512     // For backwards compatibility, require that the pattern emit at least one min digit.
513     int minInt, minFrac;
514     if (positive.integerTotal == 0 && positive.fractionTotal > 0) {
515         // patterns like ".##"
516         minInt = 0;
517         minFrac = uprv_max(1, positive.fractionNumerals);
518     } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) {
519         // patterns like "#.##"
520         minInt = 1;
521         minFrac = 0;
522     } else {
523         minInt = positive.integerNumerals;
524         minFrac = positive.fractionNumerals;
525     }
526 
527     // Rounding settings
528     // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage
529     if (positive.integerAtSigns > 0) {
530         properties.minimumFractionDigits = -1;
531         properties.maximumFractionDigits = -1;
532         properties.roundingIncrement = 0.0;
533         properties.minimumSignificantDigits = positive.integerAtSigns;
534         properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
535     } else if (!positive.rounding.isZeroish()) {
536         if (!ignoreRounding) {
537             properties.minimumFractionDigits = minFrac;
538             properties.maximumFractionDigits = positive.fractionTotal;
539             properties.roundingIncrement = positive.rounding.toDouble();
540         } else {
541             properties.minimumFractionDigits = -1;
542             properties.maximumFractionDigits = -1;
543             properties.roundingIncrement = 0.0;
544         }
545         properties.minimumSignificantDigits = -1;
546         properties.maximumSignificantDigits = -1;
547     } else {
548         if (!ignoreRounding) {
549             properties.minimumFractionDigits = minFrac;
550             properties.maximumFractionDigits = positive.fractionTotal;
551             properties.roundingIncrement = 0.0;
552         } else {
553             properties.minimumFractionDigits = -1;
554             properties.maximumFractionDigits = -1;
555             properties.roundingIncrement = 0.0;
556         }
557         properties.minimumSignificantDigits = -1;
558         properties.maximumSignificantDigits = -1;
559     }
560 
561     // If the pattern ends with a '.' then force the decimal point.
562     if (positive.hasDecimal && positive.fractionTotal == 0) {
563         properties.decimalSeparatorAlwaysShown = true;
564     } else {
565         properties.decimalSeparatorAlwaysShown = false;
566     }
567 
568     // Scientific notation settings
569     if (positive.exponentZeros > 0) {
570         properties.exponentSignAlwaysShown = positive.exponentHasPlusSign;
571         properties.minimumExponentDigits = positive.exponentZeros;
572         if (positive.integerAtSigns == 0) {
573             // patterns without '@' can define max integer digits, used for engineering notation
574             properties.minimumIntegerDigits = positive.integerNumerals;
575             properties.maximumIntegerDigits = positive.integerTotal;
576         } else {
577             // patterns with '@' cannot define max integer digits
578             properties.minimumIntegerDigits = 1;
579             properties.maximumIntegerDigits = -1;
580         }
581     } else {
582         properties.exponentSignAlwaysShown = false;
583         properties.minimumExponentDigits = -1;
584         properties.minimumIntegerDigits = minInt;
585         properties.maximumIntegerDigits = -1;
586     }
587 
588     // Compute the affix patterns (required for both padding and affixes)
589     UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX);
590     UnicodeString posSuffix = patternInfo.getString(0);
591 
592     // Padding settings
593     if (positive.hasPadding) {
594         // The width of the positive prefix and suffix templates are included in the padding
595         int paddingWidth = positive.widthExceptAffixes +
596                            AffixUtils::estimateLength(posPrefix, status) +
597                            AffixUtils::estimateLength(posSuffix, status);
598         properties.formatWidth = paddingWidth;
599         UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
600         if (rawPaddingString.length() == 1) {
601             properties.padString = rawPaddingString;
602         } else if (rawPaddingString.length() == 2) {
603             if (rawPaddingString.charAt(0) == u'\'') {
604                 properties.padString.setTo(u"'", -1);
605             } else {
606                 properties.padString = rawPaddingString;
607             }
608         } else {
609             properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2);
610         }
611         properties.padPosition = positive.paddingLocation;
612     } else {
613         properties.formatWidth = -1;
614         properties.padString.setToBogus();
615         properties.padPosition.nullify();
616     }
617 
618     // Set the affixes
619     // Always call the setter, even if the prefixes are empty, especially in the case of the
620     // negative prefix pattern, to prevent default values from overriding the pattern.
621     properties.positivePrefixPattern = posPrefix;
622     properties.positiveSuffixPattern = posSuffix;
623     if (patternInfo.fHasNegativeSubpattern) {
624         properties.negativePrefixPattern = patternInfo.getString(
625                 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX);
626         properties.negativeSuffixPattern = patternInfo.getString(
627                 AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN);
628     } else {
629         properties.negativePrefixPattern.setToBogus();
630         properties.negativeSuffixPattern.setToBogus();
631     }
632 
633     // Set the magnitude multiplier
634     if (positive.hasPercentSign) {
635         properties.magnitudeMultiplier = 2;
636     } else if (positive.hasPerMilleSign) {
637         properties.magnitudeMultiplier = 3;
638     } else {
639         properties.magnitudeMultiplier = 0;
640     }
641 }
642 
643 ///////////////////////////////////////////////////////////////////
644 /// End PatternStringParser.java; begin PatternStringUtils.java ///
645 ///////////////////////////////////////////////////////////////////
646 
647 // Determine whether a given roundingIncrement should be ignored for formatting
648 // based on the current maxFrac value (maximum fraction digits). For example a
649 // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
650 // is 2 or more. Note that roundingIncrements are rounded in significance, so
651 // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
652 // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
653 // 0.005 is treated like 0.001 for significance). This is the reason for the
654 // initial doubling below.
655 // roundIncr must be non-zero.
ignoreRoundingIncrement(double roundIncr,int32_t maxFrac)656 bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) {
657     if (maxFrac < 0) {
658         return false;
659     }
660     int32_t frac = 0;
661     roundIncr *= 2.0;
662     for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0);
663     return (frac > maxFrac);
664 }
665 
propertiesToPatternString(const DecimalFormatProperties & properties,UErrorCode & status)666 UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
667                                                             UErrorCode& status) {
668     UnicodeString sb;
669 
670     // Convenience references
671     // The uprv_min() calls prevent DoS
672     int32_t dosMax = 100;
673     int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax));
674     int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax));
675     bool useGrouping = properties.groupingUsed;
676     int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax);
677     NullableValue<PadPosition> paddingLocation = properties.padPosition;
678     UnicodeString paddingString = properties.padString;
679     int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax));
680     int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
681     int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax));
682     int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax);
683     int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax);
684     int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax);
685     bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown;
686     int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax);
687     bool exponentShowPlusSign = properties.exponentSignAlwaysShown;
688 
689     AutoAffixPatternProvider affixProvider(properties, status);
690 
691     // Prefixes
692     sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX));
693     int32_t afterPrefixPos = sb.length();
694 
695     // Figure out the grouping sizes.
696     if (!useGrouping) {
697         grouping1 = 0;
698         grouping2 = 0;
699     } else if (grouping1 == grouping2) {
700         grouping1 = 0;
701     }
702     int32_t groupingLength = grouping1 + grouping2 + 1;
703 
704     // Figure out the digits we need to put in the pattern.
705     double roundingInterval = properties.roundingIncrement;
706     UnicodeString digitsString;
707     int32_t digitsStringScale = 0;
708     if (maxSig != uprv_min(dosMax, -1)) {
709         // Significant Digits.
710         while (digitsString.length() < minSig) {
711             digitsString.append(u'@');
712         }
713         while (digitsString.length() < maxSig) {
714             digitsString.append(u'#');
715         }
716     } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) {
717         // Rounding Interval.
718         digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr);
719         // TODO: Check for DoS here?
720         DecimalQuantity incrementQuantity;
721         incrementQuantity.setToDouble(roundingInterval);
722         incrementQuantity.adjustMagnitude(-digitsStringScale);
723         incrementQuantity.roundToMagnitude(0, kDefaultMode, status);
724         UnicodeString str = incrementQuantity.toPlainString();
725         if (str.charAt(0) == u'-') {
726             // TODO: Unsupported operation exception or fail silently?
727             digitsString.append(str, 1, str.length() - 1);
728         } else {
729             digitsString.append(str);
730         }
731     }
732     while (digitsString.length() + digitsStringScale < minInt) {
733         digitsString.insert(0, u'0');
734     }
735     while (-digitsStringScale < minFrac) {
736         digitsString.append(u'0');
737         digitsStringScale--;
738     }
739 
740     // Write the digits to the string builder
741     int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale);
742     m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1;
743     int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale;
744     for (int32_t magnitude = m0; magnitude >= mN; magnitude--) {
745         int32_t di = digitsString.length() + digitsStringScale - magnitude - 1;
746         if (di < 0 || di >= digitsString.length()) {
747             sb.append(u'#');
748         } else {
749             sb.append(digitsString.charAt(di));
750         }
751         // Decimal separator
752         if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) {
753             sb.append(u'.');
754         }
755         if (!useGrouping) {
756             continue;
757         }
758         // Least-significant grouping separator
759         if (magnitude > 0 && magnitude == grouping1) {
760             sb.append(u',');
761         }
762         // All other grouping separators
763         if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) {
764             sb.append(u',');
765         }
766     }
767 
768     // Exponential notation
769     if (exponentDigits != uprv_min(dosMax, -1)) {
770         sb.append(u'E');
771         if (exponentShowPlusSign) {
772             sb.append(u'+');
773         }
774         for (int32_t i = 0; i < exponentDigits; i++) {
775             sb.append(u'0');
776         }
777     }
778 
779     // Suffixes
780     int32_t beforeSuffixPos = sb.length();
781     sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
782 
783     // Resolve Padding
784     if (paddingWidth > 0 && !paddingLocation.isNull()) {
785         while (paddingWidth - sb.length() > 0) {
786             sb.insert(afterPrefixPos, u'#');
787             beforeSuffixPos++;
788         }
789         int32_t addedLength;
790         switch (paddingLocation.get(status)) {
791             case PadPosition::UNUM_PAD_BEFORE_PREFIX:
792                 addedLength = escapePaddingString(paddingString, sb, 0, status);
793                 sb.insert(0, u'*');
794                 afterPrefixPos += addedLength + 1;
795                 beforeSuffixPos += addedLength + 1;
796                 break;
797             case PadPosition::UNUM_PAD_AFTER_PREFIX:
798                 addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status);
799                 sb.insert(afterPrefixPos, u'*');
800                 afterPrefixPos += addedLength + 1;
801                 beforeSuffixPos += addedLength + 1;
802                 break;
803             case PadPosition::UNUM_PAD_BEFORE_SUFFIX:
804                 escapePaddingString(paddingString, sb, beforeSuffixPos, status);
805                 sb.insert(beforeSuffixPos, u'*');
806                 break;
807             case PadPosition::UNUM_PAD_AFTER_SUFFIX:
808                 sb.append(u'*');
809                 escapePaddingString(paddingString, sb, sb.length(), status);
810                 break;
811         }
812         if (U_FAILURE(status)) { return sb; }
813     }
814 
815     // Negative affixes
816     // Ignore if the negative prefix pattern is "-" and the negative suffix is empty
817     if (affixProvider.get().hasNegativeSubpattern()) {
818         sb.append(u';');
819         sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
820         // Copy the positive digit format into the negative.
821         // This is optional; the pattern is the same as if '#' were appended here instead.
822         // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy.
823         // See http://bugs.icu-project.org/trac/ticket/13707
824         UnicodeString copy(sb);
825         sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos);
826         sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
827     }
828 
829     return sb;
830 }
831 
escapePaddingString(UnicodeString input,UnicodeString & output,int startIndex,UErrorCode & status)832 int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
833                                             UErrorCode& status) {
834     (void) status;
835     if (input.length() == 0) {
836         input.setTo(kFallbackPaddingString, -1);
837     }
838     int startLength = output.length();
839     if (input.length() == 1) {
840         if (input.compare(u"'", -1) == 0) {
841             output.insert(startIndex, u"''", -1);
842         } else {
843             output.insert(startIndex, input);
844         }
845     } else {
846         output.insert(startIndex, u'\'');
847         int offset = 1;
848         for (int i = 0; i < input.length(); i++) {
849             // it's okay to deal in chars here because the quote mark is the only interesting thing.
850             char16_t ch = input.charAt(i);
851             if (ch == u'\'') {
852                 output.insert(startIndex + offset, u"''", -1);
853                 offset += 2;
854             } else {
855                 output.insert(startIndex + offset, ch);
856                 offset += 1;
857             }
858         }
859         output.insert(startIndex + offset, u'\'');
860     }
861     return output.length() - startLength;
862 }
863 
864 UnicodeString
convertLocalized(const UnicodeString & input,const DecimalFormatSymbols & symbols,bool toLocalized,UErrorCode & status)865 PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
866                                      bool toLocalized, UErrorCode& status) {
867     // Construct a table of strings to be converted between localized and standard.
868     static constexpr int32_t LEN = 21;
869     UnicodeString table[LEN][2];
870     int standIdx = toLocalized ? 0 : 1;
871     int localIdx = toLocalized ? 1 : 0;
872     table[0][standIdx] = u"%";
873     table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
874     table[1][standIdx] = u"‰";
875     table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
876     table[2][standIdx] = u".";
877     table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
878     table[3][standIdx] = u",";
879     table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
880     table[4][standIdx] = u"-";
881     table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
882     table[5][standIdx] = u"+";
883     table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
884     table[6][standIdx] = u";";
885     table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol);
886     table[7][standIdx] = u"@";
887     table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol);
888     table[8][standIdx] = u"E";
889     table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
890     table[9][standIdx] = u"*";
891     table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol);
892     table[10][standIdx] = u"#";
893     table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol);
894     for (int i = 0; i < 10; i++) {
895         table[11 + i][standIdx] = u'0' + i;
896         table[11 + i][localIdx] = symbols.getConstDigitSymbol(i);
897     }
898 
899     // Special case: quotes are NOT allowed to be in any localIdx strings.
900     // Substitute them with '’' instead.
901     for (int32_t i = 0; i < LEN; i++) {
902         table[i][localIdx].findAndReplace(u'\'', u'’');
903     }
904 
905     // Iterate through the string and convert.
906     // State table:
907     // 0 => base state
908     // 1 => first char inside a quoted sequence in input and output string
909     // 2 => inside a quoted sequence in input and output string
910     // 3 => first char after a close quote in input string;
911     // close quote still needs to be written to output string
912     // 4 => base state in input string; inside quoted sequence in output string
913     // 5 => first char inside a quoted sequence in input string;
914     // inside quoted sequence in output string
915     UnicodeString result;
916     int state = 0;
917     for (int offset = 0; offset < input.length(); offset++) {
918         UChar ch = input.charAt(offset);
919 
920         // Handle a quote character (state shift)
921         if (ch == u'\'') {
922             if (state == 0) {
923                 result.append(u'\'');
924                 state = 1;
925                 continue;
926             } else if (state == 1) {
927                 result.append(u'\'');
928                 state = 0;
929                 continue;
930             } else if (state == 2) {
931                 state = 3;
932                 continue;
933             } else if (state == 3) {
934                 result.append(u'\'');
935                 result.append(u'\'');
936                 state = 1;
937                 continue;
938             } else if (state == 4) {
939                 state = 5;
940                 continue;
941             } else {
942                 U_ASSERT(state == 5);
943                 result.append(u'\'');
944                 result.append(u'\'');
945                 state = 4;
946                 continue;
947             }
948         }
949 
950         if (state == 0 || state == 3 || state == 4) {
951             for (auto& pair : table) {
952                 // Perform a greedy match on this symbol string
953                 UnicodeString temp = input.tempSubString(offset, pair[0].length());
954                 if (temp == pair[0]) {
955                     // Skip ahead past this region for the next iteration
956                     offset += pair[0].length() - 1;
957                     if (state == 3 || state == 4) {
958                         result.append(u'\'');
959                         state = 0;
960                     }
961                     result.append(pair[1]);
962                     goto continue_outer;
963                 }
964             }
965             // No replacement found. Check if a special quote is necessary
966             for (auto& pair : table) {
967                 UnicodeString temp = input.tempSubString(offset, pair[1].length());
968                 if (temp == pair[1]) {
969                     if (state == 0) {
970                         result.append(u'\'');
971                         state = 4;
972                     }
973                     result.append(ch);
974                     goto continue_outer;
975                 }
976             }
977             // Still nothing. Copy the char verbatim. (Add a close quote if necessary)
978             if (state == 3 || state == 4) {
979                 result.append(u'\'');
980                 state = 0;
981             }
982             result.append(ch);
983         } else {
984             U_ASSERT(state == 1 || state == 2 || state == 5);
985             result.append(ch);
986             state = 2;
987         }
988         continue_outer:;
989     }
990     // Resolve final quotes
991     if (state == 3 || state == 4) {
992         result.append(u'\'');
993         state = 0;
994     }
995     if (state != 0) {
996         // Malformed localized pattern: unterminated quote
997         status = U_PATTERN_SYNTAX_ERROR;
998     }
999     return result;
1000 }
1001 
patternInfoToStringBuilder(const AffixPatternProvider & patternInfo,bool isPrefix,PatternSignType patternSignType,StandardPlural::Form plural,bool perMilleReplacesPercent,UnicodeString & output)1002 void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
1003                                                     PatternSignType patternSignType,
1004                                                     StandardPlural::Form plural,
1005                                                     bool perMilleReplacesPercent, UnicodeString& output) {
1006 
1007     // Should the output render '+' where '-' would normally appear in the pattern?
1008     bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
1009         && !patternInfo.positiveHasPlusSign();
1010 
1011     // Should we use the affix from the negative subpattern?
1012     // (If not, we will use the positive subpattern.)
1013     bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern()
1014         && (patternSignType == PATTERN_SIGN_TYPE_NEG
1015             || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
1016 
1017     // Resolve the flags for the affix pattern.
1018     int flags = 0;
1019     if (useNegativeAffixPattern) {
1020         flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
1021     }
1022     if (isPrefix) {
1023         flags |= AffixPatternProvider::AFFIX_PREFIX;
1024     }
1025     if (plural != StandardPlural::Form::COUNT) {
1026         U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
1027         flags |= plural;
1028     }
1029 
1030     // Should we prepend a sign to the pattern?
1031     bool prependSign;
1032     if (!isPrefix || useNegativeAffixPattern) {
1033         prependSign = false;
1034     } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) {
1035         prependSign = true;
1036     } else {
1037         prependSign = plusReplacesMinusSign;
1038     }
1039 
1040     // Compute the length of the affix pattern.
1041     int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
1042 
1043     // Finally, set the result into the StringBuilder.
1044     output.remove();
1045     for (int index = 0; index < length; index++) {
1046         char16_t candidate;
1047         if (prependSign && index == 0) {
1048             candidate = u'-';
1049         } else if (prependSign) {
1050             candidate = patternInfo.charAt(flags, index - 1);
1051         } else {
1052             candidate = patternInfo.charAt(flags, index);
1053         }
1054         if (plusReplacesMinusSign && candidate == u'-') {
1055             candidate = u'+';
1056         }
1057         if (perMilleReplacesPercent && candidate == u'%') {
1058             candidate = u'‰';
1059         }
1060         output.append(candidate);
1061     }
1062 }
1063 
resolveSignDisplay(UNumberSignDisplay signDisplay,Signum signum)1064 PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) {
1065     switch (signDisplay) {
1066         case UNUM_SIGN_AUTO:
1067         case UNUM_SIGN_ACCOUNTING:
1068             switch (signum) {
1069                 case SIGNUM_NEG:
1070                 case SIGNUM_NEG_ZERO:
1071                     return PATTERN_SIGN_TYPE_NEG;
1072                 case SIGNUM_POS_ZERO:
1073                 case SIGNUM_POS:
1074                     return PATTERN_SIGN_TYPE_POS;
1075                 default:
1076                     break;
1077             }
1078             break;
1079 
1080         case UNUM_SIGN_ALWAYS:
1081         case UNUM_SIGN_ACCOUNTING_ALWAYS:
1082             switch (signum) {
1083                 case SIGNUM_NEG:
1084                 case SIGNUM_NEG_ZERO:
1085                     return PATTERN_SIGN_TYPE_NEG;
1086                 case SIGNUM_POS_ZERO:
1087                 case SIGNUM_POS:
1088                     return PATTERN_SIGN_TYPE_POS_SIGN;
1089                 default:
1090                     break;
1091             }
1092             break;
1093 
1094         case UNUM_SIGN_EXCEPT_ZERO:
1095         case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
1096             switch (signum) {
1097                 case SIGNUM_NEG:
1098                     return PATTERN_SIGN_TYPE_NEG;
1099                 case SIGNUM_NEG_ZERO:
1100                 case SIGNUM_POS_ZERO:
1101                     return PATTERN_SIGN_TYPE_POS;
1102                 case SIGNUM_POS:
1103                     return PATTERN_SIGN_TYPE_POS_SIGN;
1104                 default:
1105                     break;
1106             }
1107             break;
1108 
1109         case UNUM_SIGN_NEGATIVE:
1110         case UNUM_SIGN_ACCOUNTING_NEGATIVE:
1111             switch (signum) {
1112                 case SIGNUM_NEG:
1113                     return PATTERN_SIGN_TYPE_NEG;
1114                 case SIGNUM_NEG_ZERO:
1115                 case SIGNUM_POS_ZERO:
1116                 case SIGNUM_POS:
1117                     return PATTERN_SIGN_TYPE_POS;
1118                 default:
1119                     break;
1120             }
1121             break;
1122 
1123         case UNUM_SIGN_NEVER:
1124             return PATTERN_SIGN_TYPE_POS;
1125 
1126         default:
1127             break;
1128     }
1129 
1130     UPRV_UNREACHABLE;
1131     return PATTERN_SIGN_TYPE_POS;
1132 }
1133 
1134 #endif /* #if !UCONFIG_NO_FORMATTING */
1135