1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "number_decnum.h"
13 #include "number_skeletons.h"
14 #include "umutex.h"
15 #include "ucln_in.h"
16 #include "patternprops.h"
17 #include "unicode/ucharstriebuilder.h"
18 #include "number_utils.h"
19 #include "number_decimalquantity.h"
20 #include "unicode/numberformatter.h"
21 #include "uinvchar.h"
22 #include "charstr.h"
23 #include "string_segment.h"
24 
25 using namespace icu;
26 using namespace icu::number;
27 using namespace icu::number::impl;
28 using namespace icu::number::impl::skeleton;
29 
30 namespace {
31 
32 icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER;
33 
34 char16_t* kSerializedStemTrie = nullptr;
35 
cleanupNumberSkeletons()36 UBool U_CALLCONV cleanupNumberSkeletons() {
37     uprv_free(kSerializedStemTrie);
38     kSerializedStemTrie = nullptr;
39     gNumberSkeletonsInitOnce.reset();
40     return TRUE;
41 }
42 
initNumberSkeletons(UErrorCode & status)43 void U_CALLCONV initNumberSkeletons(UErrorCode& status) {
44     ucln_i18n_registerCleanup(UCLN_I18N_NUMBER_SKELETONS, cleanupNumberSkeletons);
45 
46     UCharsTrieBuilder b(status);
47     if (U_FAILURE(status)) { return; }
48 
49     // Section 1:
50     b.add(u"compact-short", STEM_COMPACT_SHORT, status);
51     b.add(u"compact-long", STEM_COMPACT_LONG, status);
52     b.add(u"scientific", STEM_SCIENTIFIC, status);
53     b.add(u"engineering", STEM_ENGINEERING, status);
54     b.add(u"notation-simple", STEM_NOTATION_SIMPLE, status);
55     b.add(u"base-unit", STEM_BASE_UNIT, status);
56     b.add(u"percent", STEM_PERCENT, status);
57     b.add(u"permille", STEM_PERMILLE, status);
58     b.add(u"precision-integer", STEM_PRECISION_INTEGER, status);
59     b.add(u"precision-unlimited", STEM_PRECISION_UNLIMITED, status);
60     b.add(u"precision-currency-standard", STEM_PRECISION_CURRENCY_STANDARD, status);
61     b.add(u"precision-currency-cash", STEM_PRECISION_CURRENCY_CASH, status);
62     b.add(u"rounding-mode-ceiling", STEM_ROUNDING_MODE_CEILING, status);
63     b.add(u"rounding-mode-floor", STEM_ROUNDING_MODE_FLOOR, status);
64     b.add(u"rounding-mode-down", STEM_ROUNDING_MODE_DOWN, status);
65     b.add(u"rounding-mode-up", STEM_ROUNDING_MODE_UP, status);
66     b.add(u"rounding-mode-half-even", STEM_ROUNDING_MODE_HALF_EVEN, status);
67     b.add(u"rounding-mode-half-down", STEM_ROUNDING_MODE_HALF_DOWN, status);
68     b.add(u"rounding-mode-half-up", STEM_ROUNDING_MODE_HALF_UP, status);
69     b.add(u"rounding-mode-unnecessary", STEM_ROUNDING_MODE_UNNECESSARY, status);
70     b.add(u"group-off", STEM_GROUP_OFF, status);
71     b.add(u"group-min2", STEM_GROUP_MIN2, status);
72     b.add(u"group-auto", STEM_GROUP_AUTO, status);
73     b.add(u"group-on-aligned", STEM_GROUP_ON_ALIGNED, status);
74     b.add(u"group-thousands", STEM_GROUP_THOUSANDS, status);
75     b.add(u"latin", STEM_LATIN, status);
76     b.add(u"unit-width-narrow", STEM_UNIT_WIDTH_NARROW, status);
77     b.add(u"unit-width-short", STEM_UNIT_WIDTH_SHORT, status);
78     b.add(u"unit-width-full-name", STEM_UNIT_WIDTH_FULL_NAME, status);
79     b.add(u"unit-width-iso-code", STEM_UNIT_WIDTH_ISO_CODE, status);
80     b.add(u"unit-width-hidden", STEM_UNIT_WIDTH_HIDDEN, status);
81     b.add(u"sign-auto", STEM_SIGN_AUTO, status);
82     b.add(u"sign-always", STEM_SIGN_ALWAYS, status);
83     b.add(u"sign-never", STEM_SIGN_NEVER, status);
84     b.add(u"sign-accounting", STEM_SIGN_ACCOUNTING, status);
85     b.add(u"sign-accounting-always", STEM_SIGN_ACCOUNTING_ALWAYS, status);
86     b.add(u"sign-except-zero", STEM_SIGN_EXCEPT_ZERO, status);
87     b.add(u"sign-accounting-except-zero", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status);
88     b.add(u"decimal-auto", STEM_DECIMAL_AUTO, status);
89     b.add(u"decimal-always", STEM_DECIMAL_ALWAYS, status);
90     if (U_FAILURE(status)) { return; }
91 
92     // Section 2:
93     b.add(u"precision-increment", STEM_PRECISION_INCREMENT, status);
94     b.add(u"measure-unit", STEM_MEASURE_UNIT, status);
95     b.add(u"per-measure-unit", STEM_PER_MEASURE_UNIT, status);
96     b.add(u"currency", STEM_CURRENCY, status);
97     b.add(u"integer-width", STEM_INTEGER_WIDTH, status);
98     b.add(u"numbering-system", STEM_NUMBERING_SYSTEM, status);
99     b.add(u"scale", STEM_SCALE, status);
100     if (U_FAILURE(status)) { return; }
101 
102     // Build the CharsTrie
103     // TODO: Use SLOW or FAST here?
104     UnicodeString result;
105     b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
106     if (U_FAILURE(status)) { return; }
107 
108     // Copy the result into the global constant pointer
109     size_t numBytes = result.length() * sizeof(char16_t);
110     kSerializedStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
111     uprv_memcpy(kSerializedStemTrie, result.getBuffer(), numBytes);
112 }
113 
114 
appendMultiple(UnicodeString & sb,UChar32 cp,int32_t count)115 inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
116     for (int i = 0; i < count; i++) {
117         sb.append(cp);
118     }
119 }
120 
121 
122 #define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \
123 UPRV_BLOCK_MACRO_BEGIN { \
124     if ((seen).field) { \
125         (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
126         return STATE_NULL; \
127     } \
128     (seen).field = true; \
129 } UPRV_BLOCK_MACRO_END
130 
131 
132 #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
133 UPRV_BLOCK_MACRO_BEGIN { \
134     UErrorCode conversionStatus = U_ZERO_ERROR; \
135     (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
136     if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
137         /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
138         (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
139         return; \
140     } else if (U_FAILURE(conversionStatus)) { \
141         (status) = conversionStatus; \
142         return; \
143     } \
144 } UPRV_BLOCK_MACRO_END
145 
146 
147 } // anonymous namespace
148 
149 
notation(skeleton::StemEnum stem)150 Notation stem_to_object::notation(skeleton::StemEnum stem) {
151     switch (stem) {
152         case STEM_COMPACT_SHORT:
153             return Notation::compactShort();
154         case STEM_COMPACT_LONG:
155             return Notation::compactLong();
156         case STEM_SCIENTIFIC:
157             return Notation::scientific();
158         case STEM_ENGINEERING:
159             return Notation::engineering();
160         case STEM_NOTATION_SIMPLE:
161             return Notation::simple();
162         default:
163             UPRV_UNREACHABLE;
164     }
165 }
166 
unit(skeleton::StemEnum stem)167 MeasureUnit stem_to_object::unit(skeleton::StemEnum stem) {
168     switch (stem) {
169         case STEM_BASE_UNIT:
170             // Slicing is okay
171             return NoUnit::base(); // NOLINT
172         case STEM_PERCENT:
173             // Slicing is okay
174             return NoUnit::percent(); // NOLINT
175         case STEM_PERMILLE:
176             // Slicing is okay
177             return NoUnit::permille(); // NOLINT
178         default:
179             UPRV_UNREACHABLE;
180     }
181 }
182 
precision(skeleton::StemEnum stem)183 Precision stem_to_object::precision(skeleton::StemEnum stem) {
184     switch (stem) {
185         case STEM_PRECISION_INTEGER:
186             return Precision::integer();
187         case STEM_PRECISION_UNLIMITED:
188             return Precision::unlimited();
189         case STEM_PRECISION_CURRENCY_STANDARD:
190             return Precision::currency(UCURR_USAGE_STANDARD);
191         case STEM_PRECISION_CURRENCY_CASH:
192             return Precision::currency(UCURR_USAGE_CASH);
193         default:
194             UPRV_UNREACHABLE;
195     }
196 }
197 
roundingMode(skeleton::StemEnum stem)198 UNumberFormatRoundingMode stem_to_object::roundingMode(skeleton::StemEnum stem) {
199     switch (stem) {
200         case STEM_ROUNDING_MODE_CEILING:
201             return UNUM_ROUND_CEILING;
202         case STEM_ROUNDING_MODE_FLOOR:
203             return UNUM_ROUND_FLOOR;
204         case STEM_ROUNDING_MODE_DOWN:
205             return UNUM_ROUND_DOWN;
206         case STEM_ROUNDING_MODE_UP:
207             return UNUM_ROUND_UP;
208         case STEM_ROUNDING_MODE_HALF_EVEN:
209             return UNUM_ROUND_HALFEVEN;
210         case STEM_ROUNDING_MODE_HALF_DOWN:
211             return UNUM_ROUND_HALFDOWN;
212         case STEM_ROUNDING_MODE_HALF_UP:
213             return UNUM_ROUND_HALFUP;
214         case STEM_ROUNDING_MODE_UNNECESSARY:
215             return UNUM_ROUND_UNNECESSARY;
216         default:
217             UPRV_UNREACHABLE;
218     }
219 }
220 
groupingStrategy(skeleton::StemEnum stem)221 UNumberGroupingStrategy stem_to_object::groupingStrategy(skeleton::StemEnum stem) {
222     switch (stem) {
223         case STEM_GROUP_OFF:
224             return UNUM_GROUPING_OFF;
225         case STEM_GROUP_MIN2:
226             return UNUM_GROUPING_MIN2;
227         case STEM_GROUP_AUTO:
228             return UNUM_GROUPING_AUTO;
229         case STEM_GROUP_ON_ALIGNED:
230             return UNUM_GROUPING_ON_ALIGNED;
231         case STEM_GROUP_THOUSANDS:
232             return UNUM_GROUPING_THOUSANDS;
233         default:
234             return UNUM_GROUPING_COUNT; // for objects, throw; for enums, return COUNT
235     }
236 }
237 
unitWidth(skeleton::StemEnum stem)238 UNumberUnitWidth stem_to_object::unitWidth(skeleton::StemEnum stem) {
239     switch (stem) {
240         case STEM_UNIT_WIDTH_NARROW:
241             return UNUM_UNIT_WIDTH_NARROW;
242         case STEM_UNIT_WIDTH_SHORT:
243             return UNUM_UNIT_WIDTH_SHORT;
244         case STEM_UNIT_WIDTH_FULL_NAME:
245             return UNUM_UNIT_WIDTH_FULL_NAME;
246         case STEM_UNIT_WIDTH_ISO_CODE:
247             return UNUM_UNIT_WIDTH_ISO_CODE;
248         case STEM_UNIT_WIDTH_HIDDEN:
249             return UNUM_UNIT_WIDTH_HIDDEN;
250         default:
251             return UNUM_UNIT_WIDTH_COUNT; // for objects, throw; for enums, return COUNT
252     }
253 }
254 
signDisplay(skeleton::StemEnum stem)255 UNumberSignDisplay stem_to_object::signDisplay(skeleton::StemEnum stem) {
256     switch (stem) {
257         case STEM_SIGN_AUTO:
258             return UNUM_SIGN_AUTO;
259         case STEM_SIGN_ALWAYS:
260             return UNUM_SIGN_ALWAYS;
261         case STEM_SIGN_NEVER:
262             return UNUM_SIGN_NEVER;
263         case STEM_SIGN_ACCOUNTING:
264             return UNUM_SIGN_ACCOUNTING;
265         case STEM_SIGN_ACCOUNTING_ALWAYS:
266             return UNUM_SIGN_ACCOUNTING_ALWAYS;
267         case STEM_SIGN_EXCEPT_ZERO:
268             return UNUM_SIGN_EXCEPT_ZERO;
269         case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO:
270             return UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
271         default:
272             return UNUM_SIGN_COUNT; // for objects, throw; for enums, return COUNT
273     }
274 }
275 
decimalSeparatorDisplay(skeleton::StemEnum stem)276 UNumberDecimalSeparatorDisplay stem_to_object::decimalSeparatorDisplay(skeleton::StemEnum stem) {
277     switch (stem) {
278         case STEM_DECIMAL_AUTO:
279             return UNUM_DECIMAL_SEPARATOR_AUTO;
280         case STEM_DECIMAL_ALWAYS:
281             return UNUM_DECIMAL_SEPARATOR_ALWAYS;
282         default:
283             return UNUM_DECIMAL_SEPARATOR_COUNT; // for objects, throw; for enums, return COUNT
284     }
285 }
286 
287 
roundingMode(UNumberFormatRoundingMode value,UnicodeString & sb)288 void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb) {
289     switch (value) {
290         case UNUM_ROUND_CEILING:
291             sb.append(u"rounding-mode-ceiling", -1);
292             break;
293         case UNUM_ROUND_FLOOR:
294             sb.append(u"rounding-mode-floor", -1);
295             break;
296         case UNUM_ROUND_DOWN:
297             sb.append(u"rounding-mode-down", -1);
298             break;
299         case UNUM_ROUND_UP:
300             sb.append(u"rounding-mode-up", -1);
301             break;
302         case UNUM_ROUND_HALFEVEN:
303             sb.append(u"rounding-mode-half-even", -1);
304             break;
305         case UNUM_ROUND_HALFDOWN:
306             sb.append(u"rounding-mode-half-down", -1);
307             break;
308         case UNUM_ROUND_HALFUP:
309             sb.append(u"rounding-mode-half-up", -1);
310             break;
311         case UNUM_ROUND_UNNECESSARY:
312             sb.append(u"rounding-mode-unnecessary", -1);
313             break;
314         default:
315             UPRV_UNREACHABLE;
316     }
317 }
318 
groupingStrategy(UNumberGroupingStrategy value,UnicodeString & sb)319 void enum_to_stem_string::groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb) {
320     switch (value) {
321         case UNUM_GROUPING_OFF:
322             sb.append(u"group-off", -1);
323             break;
324         case UNUM_GROUPING_MIN2:
325             sb.append(u"group-min2", -1);
326             break;
327         case UNUM_GROUPING_AUTO:
328             sb.append(u"group-auto", -1);
329             break;
330         case UNUM_GROUPING_ON_ALIGNED:
331             sb.append(u"group-on-aligned", -1);
332             break;
333         case UNUM_GROUPING_THOUSANDS:
334             sb.append(u"group-thousands", -1);
335             break;
336         default:
337             UPRV_UNREACHABLE;
338     }
339 }
340 
unitWidth(UNumberUnitWidth value,UnicodeString & sb)341 void enum_to_stem_string::unitWidth(UNumberUnitWidth value, UnicodeString& sb) {
342     switch (value) {
343         case UNUM_UNIT_WIDTH_NARROW:
344             sb.append(u"unit-width-narrow", -1);
345             break;
346         case UNUM_UNIT_WIDTH_SHORT:
347             sb.append(u"unit-width-short", -1);
348             break;
349         case UNUM_UNIT_WIDTH_FULL_NAME:
350             sb.append(u"unit-width-full-name", -1);
351             break;
352         case UNUM_UNIT_WIDTH_ISO_CODE:
353             sb.append(u"unit-width-iso-code", -1);
354             break;
355         case UNUM_UNIT_WIDTH_HIDDEN:
356             sb.append(u"unit-width-hidden", -1);
357             break;
358         default:
359             UPRV_UNREACHABLE;
360     }
361 }
362 
signDisplay(UNumberSignDisplay value,UnicodeString & sb)363 void enum_to_stem_string::signDisplay(UNumberSignDisplay value, UnicodeString& sb) {
364     switch (value) {
365         case UNUM_SIGN_AUTO:
366             sb.append(u"sign-auto", -1);
367             break;
368         case UNUM_SIGN_ALWAYS:
369             sb.append(u"sign-always", -1);
370             break;
371         case UNUM_SIGN_NEVER:
372             sb.append(u"sign-never", -1);
373             break;
374         case UNUM_SIGN_ACCOUNTING:
375             sb.append(u"sign-accounting", -1);
376             break;
377         case UNUM_SIGN_ACCOUNTING_ALWAYS:
378             sb.append(u"sign-accounting-always", -1);
379             break;
380         case UNUM_SIGN_EXCEPT_ZERO:
381             sb.append(u"sign-except-zero", -1);
382             break;
383         case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO:
384             sb.append(u"sign-accounting-except-zero", -1);
385             break;
386         default:
387             UPRV_UNREACHABLE;
388     }
389 }
390 
391 void
decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value,UnicodeString & sb)392 enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb) {
393     switch (value) {
394         case UNUM_DECIMAL_SEPARATOR_AUTO:
395             sb.append(u"decimal-auto", -1);
396             break;
397         case UNUM_DECIMAL_SEPARATOR_ALWAYS:
398             sb.append(u"decimal-always", -1);
399             break;
400         default:
401             UPRV_UNREACHABLE;
402     }
403 }
404 
405 
create(const UnicodeString & skeletonString,UParseError * perror,UErrorCode & status)406 UnlocalizedNumberFormatter skeleton::create(
407         const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status) {
408 
409     // Initialize perror
410     if (perror != nullptr) {
411         perror->line = 0;
412         perror->offset = -1;
413         perror->preContext[0] = 0;
414         perror->postContext[0] = 0;
415     }
416 
417     umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
418     if (U_FAILURE(status)) {
419         return {};
420     }
421 
422     int32_t errOffset;
423     MacroProps macros = parseSkeleton(skeletonString, errOffset, status);
424     if (U_SUCCESS(status)) {
425         return NumberFormatter::with().macros(macros);
426     }
427 
428     if (perror == nullptr) {
429         return {};
430     }
431 
432     // Populate the UParseError with the error location
433     perror->offset = errOffset;
434     int32_t contextStart = uprv_max(0, errOffset - U_PARSE_CONTEXT_LEN + 1);
435     int32_t contextEnd = uprv_min(skeletonString.length(), errOffset + U_PARSE_CONTEXT_LEN - 1);
436     skeletonString.extract(contextStart, errOffset - contextStart, perror->preContext, 0);
437     perror->preContext[errOffset - contextStart] = 0;
438     skeletonString.extract(errOffset, contextEnd - errOffset, perror->postContext, 0);
439     perror->postContext[contextEnd - errOffset] = 0;
440     return {};
441 }
442 
generate(const MacroProps & macros,UErrorCode & status)443 UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) {
444     umtx_initOnce(gNumberSkeletonsInitOnce, &initNumberSkeletons, status);
445     UnicodeString sb;
446     GeneratorHelpers::generateSkeleton(macros, sb, status);
447     return sb;
448 }
449 
parseSkeleton(const UnicodeString & skeletonString,int32_t & errOffset,UErrorCode & status)450 MacroProps skeleton::parseSkeleton(
451         const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) {
452     U_ASSERT(U_SUCCESS(status));
453 
454     // Add a trailing whitespace to the end of the skeleton string to make code cleaner.
455     UnicodeString tempSkeletonString(skeletonString);
456     tempSkeletonString.append(u' ');
457 
458     SeenMacroProps seen;
459     MacroProps macros;
460     StringSegment segment(tempSkeletonString, false);
461     UCharsTrie stemTrie(kSerializedStemTrie);
462     ParseState stem = STATE_NULL;
463     int32_t offset = 0;
464 
465     // Primary skeleton parse loop:
466     while (offset < segment.length()) {
467         UChar32 cp = segment.codePointAt(offset);
468         bool isTokenSeparator = PatternProps::isWhiteSpace(cp);
469         bool isOptionSeparator = (cp == u'/');
470 
471         if (!isTokenSeparator && !isOptionSeparator) {
472             // Non-separator token; consume it.
473             offset += U16_LENGTH(cp);
474             if (stem == STATE_NULL) {
475                 // We are currently consuming a stem.
476                 // Go to the next state in the stem trie.
477                 stemTrie.nextForCodePoint(cp);
478             }
479             continue;
480         }
481 
482         // We are looking at a token or option separator.
483         // If the segment is nonempty, parse it and reset the segment.
484         // Otherwise, make sure it is a valid repeating separator.
485         if (offset != 0) {
486             segment.setLength(offset);
487             if (stem == STATE_NULL) {
488                 // The first separator after the start of a token. Parse it as a stem.
489                 stem = parseStem(segment, stemTrie, seen, macros, status);
490                 stemTrie.reset();
491             } else {
492                 // A separator after the first separator of a token. Parse it as an option.
493                 stem = parseOption(stem, segment, macros, status);
494             }
495             segment.resetLength();
496             if (U_FAILURE(status)) {
497                 errOffset = segment.getOffset();
498                 return macros;
499             }
500 
501             // Consume the segment:
502             segment.adjustOffset(offset);
503             offset = 0;
504 
505         } else if (stem != STATE_NULL) {
506             // A separator ('/' or whitespace) following an option separator ('/')
507             // segment.setLength(U16_LENGTH(cp)); // for error message
508             // throw new SkeletonSyntaxException("Unexpected separator character", segment);
509             status = U_NUMBER_SKELETON_SYNTAX_ERROR;
510             errOffset = segment.getOffset();
511             return macros;
512 
513         } else {
514             // Two spaces in a row; this is OK.
515         }
516 
517         // Does the current stem forbid options?
518         if (isOptionSeparator && stem == STATE_NULL) {
519             // segment.setLength(U16_LENGTH(cp)); // for error message
520             // throw new SkeletonSyntaxException("Unexpected option separator", segment);
521             status = U_NUMBER_SKELETON_SYNTAX_ERROR;
522             errOffset = segment.getOffset();
523             return macros;
524         }
525 
526         // Does the current stem require an option?
527         if (isTokenSeparator && stem != STATE_NULL) {
528             switch (stem) {
529                 case STATE_INCREMENT_PRECISION:
530                 case STATE_MEASURE_UNIT:
531                 case STATE_PER_MEASURE_UNIT:
532                 case STATE_CURRENCY_UNIT:
533                 case STATE_INTEGER_WIDTH:
534                 case STATE_NUMBERING_SYSTEM:
535                 case STATE_SCALE:
536                     // segment.setLength(U16_LENGTH(cp)); // for error message
537                     // throw new SkeletonSyntaxException("Stem requires an option", segment);
538                     status = U_NUMBER_SKELETON_SYNTAX_ERROR;
539                     errOffset = segment.getOffset();
540                     return macros;
541                 default:
542                     break;
543             }
544             stem = STATE_NULL;
545         }
546 
547         // Consume the separator:
548         segment.adjustOffset(U16_LENGTH(cp));
549     }
550     U_ASSERT(stem == STATE_NULL);
551     return macros;
552 }
553 
554 ParseState
parseStem(const StringSegment & segment,const UCharsTrie & stemTrie,SeenMacroProps & seen,MacroProps & macros,UErrorCode & status)555 skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
556                     MacroProps& macros, UErrorCode& status) {
557     // First check for "blueprint" stems, which start with a "signal char"
558     switch (segment.charAt(0)) {
559         case u'.':
560         CHECK_NULL(seen, precision, status);
561             blueprint_helpers::parseFractionStem(segment, macros, status);
562             return STATE_FRACTION_PRECISION;
563         case u'@':
564         CHECK_NULL(seen, precision, status);
565             blueprint_helpers::parseDigitsStem(segment, macros, status);
566             return STATE_NULL;
567         default:
568             break;
569     }
570 
571     // Now look at the stemsTrie, which is already be pointing at our stem.
572     UStringTrieResult stemResult = stemTrie.current();
573 
574     if (stemResult != USTRINGTRIE_INTERMEDIATE_VALUE && stemResult != USTRINGTRIE_FINAL_VALUE) {
575         // throw new SkeletonSyntaxException("Unknown stem", segment);
576         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
577         return STATE_NULL;
578     }
579 
580     auto stem = static_cast<StemEnum>(stemTrie.getValue());
581     switch (stem) {
582 
583         // Stems with meaning on their own, not requiring an option:
584 
585         case STEM_COMPACT_SHORT:
586         case STEM_COMPACT_LONG:
587         case STEM_SCIENTIFIC:
588         case STEM_ENGINEERING:
589         case STEM_NOTATION_SIMPLE:
590         CHECK_NULL(seen, notation, status);
591             macros.notation = stem_to_object::notation(stem);
592             switch (stem) {
593                 case STEM_SCIENTIFIC:
594                 case STEM_ENGINEERING:
595                     return STATE_SCIENTIFIC; // allows for scientific options
596                 default:
597                     return STATE_NULL;
598             }
599 
600         case STEM_BASE_UNIT:
601         case STEM_PERCENT:
602         case STEM_PERMILLE:
603         CHECK_NULL(seen, unit, status);
604             macros.unit = stem_to_object::unit(stem);
605             return STATE_NULL;
606 
607         case STEM_PRECISION_INTEGER:
608         case STEM_PRECISION_UNLIMITED:
609         case STEM_PRECISION_CURRENCY_STANDARD:
610         case STEM_PRECISION_CURRENCY_CASH:
611         CHECK_NULL(seen, precision, status);
612             macros.precision = stem_to_object::precision(stem);
613             switch (stem) {
614                 case STEM_PRECISION_INTEGER:
615                     return STATE_FRACTION_PRECISION; // allows for "precision-integer/@##"
616                 default:
617                     return STATE_NULL;
618             }
619 
620         case STEM_ROUNDING_MODE_CEILING:
621         case STEM_ROUNDING_MODE_FLOOR:
622         case STEM_ROUNDING_MODE_DOWN:
623         case STEM_ROUNDING_MODE_UP:
624         case STEM_ROUNDING_MODE_HALF_EVEN:
625         case STEM_ROUNDING_MODE_HALF_DOWN:
626         case STEM_ROUNDING_MODE_HALF_UP:
627         case STEM_ROUNDING_MODE_UNNECESSARY:
628         CHECK_NULL(seen, roundingMode, status);
629             macros.roundingMode = stem_to_object::roundingMode(stem);
630             return STATE_NULL;
631 
632         case STEM_GROUP_OFF:
633         case STEM_GROUP_MIN2:
634         case STEM_GROUP_AUTO:
635         case STEM_GROUP_ON_ALIGNED:
636         case STEM_GROUP_THOUSANDS:
637         CHECK_NULL(seen, grouper, status);
638             macros.grouper = Grouper::forStrategy(stem_to_object::groupingStrategy(stem));
639             return STATE_NULL;
640 
641         case STEM_LATIN:
642         CHECK_NULL(seen, symbols, status);
643             macros.symbols.setTo(NumberingSystem::createInstanceByName("latn", status));
644             return STATE_NULL;
645 
646         case STEM_UNIT_WIDTH_NARROW:
647         case STEM_UNIT_WIDTH_SHORT:
648         case STEM_UNIT_WIDTH_FULL_NAME:
649         case STEM_UNIT_WIDTH_ISO_CODE:
650         case STEM_UNIT_WIDTH_HIDDEN:
651         CHECK_NULL(seen, unitWidth, status);
652             macros.unitWidth = stem_to_object::unitWidth(stem);
653             return STATE_NULL;
654 
655         case STEM_SIGN_AUTO:
656         case STEM_SIGN_ALWAYS:
657         case STEM_SIGN_NEVER:
658         case STEM_SIGN_ACCOUNTING:
659         case STEM_SIGN_ACCOUNTING_ALWAYS:
660         case STEM_SIGN_EXCEPT_ZERO:
661         case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO:
662         CHECK_NULL(seen, sign, status);
663             macros.sign = stem_to_object::signDisplay(stem);
664             return STATE_NULL;
665 
666         case STEM_DECIMAL_AUTO:
667         case STEM_DECIMAL_ALWAYS:
668         CHECK_NULL(seen, decimal, status);
669             macros.decimal = stem_to_object::decimalSeparatorDisplay(stem);
670             return STATE_NULL;
671 
672             // Stems requiring an option:
673 
674         case STEM_PRECISION_INCREMENT:
675         CHECK_NULL(seen, precision, status);
676             return STATE_INCREMENT_PRECISION;
677 
678         case STEM_MEASURE_UNIT:
679         CHECK_NULL(seen, unit, status);
680             return STATE_MEASURE_UNIT;
681 
682         case STEM_PER_MEASURE_UNIT:
683         CHECK_NULL(seen, perUnit, status);
684             return STATE_PER_MEASURE_UNIT;
685 
686         case STEM_CURRENCY:
687         CHECK_NULL(seen, unit, status);
688             return STATE_CURRENCY_UNIT;
689 
690         case STEM_INTEGER_WIDTH:
691         CHECK_NULL(seen, integerWidth, status);
692             return STATE_INTEGER_WIDTH;
693 
694         case STEM_NUMBERING_SYSTEM:
695         CHECK_NULL(seen, symbols, status);
696             return STATE_NUMBERING_SYSTEM;
697 
698         case STEM_SCALE:
699         CHECK_NULL(seen, scale, status);
700             return STATE_SCALE;
701 
702         default:
703             UPRV_UNREACHABLE;
704     }
705 }
706 
parseOption(ParseState stem,const StringSegment & segment,MacroProps & macros,UErrorCode & status)707 ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros,
708                                  UErrorCode& status) {
709 
710     ///// Required options: /////
711 
712     switch (stem) {
713         case STATE_CURRENCY_UNIT:
714             blueprint_helpers::parseCurrencyOption(segment, macros, status);
715             return STATE_NULL;
716         case STATE_MEASURE_UNIT:
717             blueprint_helpers::parseMeasureUnitOption(segment, macros, status);
718             return STATE_NULL;
719         case STATE_PER_MEASURE_UNIT:
720             blueprint_helpers::parseMeasurePerUnitOption(segment, macros, status);
721             return STATE_NULL;
722         case STATE_INCREMENT_PRECISION:
723             blueprint_helpers::parseIncrementOption(segment, macros, status);
724             return STATE_NULL;
725         case STATE_INTEGER_WIDTH:
726             blueprint_helpers::parseIntegerWidthOption(segment, macros, status);
727             return STATE_NULL;
728         case STATE_NUMBERING_SYSTEM:
729             blueprint_helpers::parseNumberingSystemOption(segment, macros, status);
730             return STATE_NULL;
731         case STATE_SCALE:
732             blueprint_helpers::parseScaleOption(segment, macros, status);
733             return STATE_NULL;
734         default:
735             break;
736     }
737 
738     ///// Non-required options: /////
739 
740     // Scientific options
741     switch (stem) {
742         case STATE_SCIENTIFIC:
743             if (blueprint_helpers::parseExponentWidthOption(segment, macros, status)) {
744                 return STATE_SCIENTIFIC;
745             }
746             if (U_FAILURE(status)) {
747                 return {};
748             }
749             if (blueprint_helpers::parseExponentSignOption(segment, macros, status)) {
750                 return STATE_SCIENTIFIC;
751             }
752             if (U_FAILURE(status)) {
753                 return {};
754             }
755             break;
756         default:
757             break;
758     }
759 
760     // Frac-sig option
761     switch (stem) {
762         case STATE_FRACTION_PRECISION:
763             if (blueprint_helpers::parseFracSigOption(segment, macros, status)) {
764                 return STATE_NULL;
765             }
766             if (U_FAILURE(status)) {
767                 return {};
768             }
769             break;
770         default:
771             break;
772     }
773 
774     // Unknown option
775     // throw new SkeletonSyntaxException("Invalid option", segment);
776     status = U_NUMBER_SKELETON_SYNTAX_ERROR;
777     return STATE_NULL;
778 }
779 
generateSkeleton(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)780 void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
781     if (U_FAILURE(status)) { return; }
782 
783     // Supported options
784     if (GeneratorHelpers::notation(macros, sb, status)) {
785         sb.append(u' ');
786     }
787     if (U_FAILURE(status)) { return; }
788     if (GeneratorHelpers::unit(macros, sb, status)) {
789         sb.append(u' ');
790     }
791     if (U_FAILURE(status)) { return; }
792     if (GeneratorHelpers::perUnit(macros, sb, status)) {
793         sb.append(u' ');
794     }
795     if (U_FAILURE(status)) { return; }
796     if (GeneratorHelpers::precision(macros, sb, status)) {
797         sb.append(u' ');
798     }
799     if (U_FAILURE(status)) { return; }
800     if (GeneratorHelpers::roundingMode(macros, sb, status)) {
801         sb.append(u' ');
802     }
803     if (U_FAILURE(status)) { return; }
804     if (GeneratorHelpers::grouping(macros, sb, status)) {
805         sb.append(u' ');
806     }
807     if (U_FAILURE(status)) { return; }
808     if (GeneratorHelpers::integerWidth(macros, sb, status)) {
809         sb.append(u' ');
810     }
811     if (U_FAILURE(status)) { return; }
812     if (GeneratorHelpers::symbols(macros, sb, status)) {
813         sb.append(u' ');
814     }
815     if (U_FAILURE(status)) { return; }
816     if (GeneratorHelpers::unitWidth(macros, sb, status)) {
817         sb.append(u' ');
818     }
819     if (U_FAILURE(status)) { return; }
820     if (GeneratorHelpers::sign(macros, sb, status)) {
821         sb.append(u' ');
822     }
823     if (U_FAILURE(status)) { return; }
824     if (GeneratorHelpers::decimal(macros, sb, status)) {
825         sb.append(u' ');
826     }
827     if (U_FAILURE(status)) { return; }
828     if (GeneratorHelpers::scale(macros, sb, status)) {
829         sb.append(u' ');
830     }
831     if (U_FAILURE(status)) { return; }
832 
833     // Unsupported options
834     if (!macros.padder.isBogus()) {
835         status = U_UNSUPPORTED_ERROR;
836         return;
837     }
838     if (macros.affixProvider != nullptr) {
839         status = U_UNSUPPORTED_ERROR;
840         return;
841     }
842     if (macros.rules != nullptr) {
843         status = U_UNSUPPORTED_ERROR;
844         return;
845     }
846     if (macros.currencySymbols != nullptr) {
847         status = U_UNSUPPORTED_ERROR;
848         return;
849     }
850 
851     // Remove the trailing space
852     if (sb.length() > 0) {
853         sb.truncate(sb.length() - 1);
854     }
855 }
856 
857 
parseExponentWidthOption(const StringSegment & segment,MacroProps & macros,UErrorCode &)858 bool blueprint_helpers::parseExponentWidthOption(const StringSegment& segment, MacroProps& macros,
859                                                  UErrorCode&) {
860     if (segment.charAt(0) != u'+') {
861         return false;
862     }
863     int32_t offset = 1;
864     int32_t minExp = 0;
865     for (; offset < segment.length(); offset++) {
866         if (segment.charAt(offset) == u'e') {
867             minExp++;
868         } else {
869             break;
870         }
871     }
872     if (offset < segment.length()) {
873         return false;
874     }
875     // Use the public APIs to enforce bounds checking
876     macros.notation = static_cast<ScientificNotation&>(macros.notation).withMinExponentDigits(minExp);
877     return true;
878 }
879 
880 void
generateExponentWidthOption(int32_t minExponentDigits,UnicodeString & sb,UErrorCode &)881 blueprint_helpers::generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode&) {
882     sb.append(u'+');
883     appendMultiple(sb, u'e', minExponentDigits);
884 }
885 
886 bool
parseExponentSignOption(const StringSegment & segment,MacroProps & macros,UErrorCode &)887 blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) {
888     // Get the sign display type out of the CharsTrie data structure.
889     UCharsTrie tempStemTrie(kSerializedStemTrie);
890     UStringTrieResult result = tempStemTrie.next(
891             segment.toTempUnicodeString().getBuffer(),
892             segment.length());
893     if (result != USTRINGTRIE_INTERMEDIATE_VALUE && result != USTRINGTRIE_FINAL_VALUE) {
894         return false;
895     }
896     auto sign = stem_to_object::signDisplay(static_cast<StemEnum>(tempStemTrie.getValue()));
897     if (sign == UNUM_SIGN_COUNT) {
898         return false;
899     }
900     macros.notation = static_cast<ScientificNotation&>(macros.notation).withExponentSignDisplay(sign);
901     return true;
902 }
903 
parseCurrencyOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)904 void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros,
905                                             UErrorCode& status) {
906     // Unlike ICU4J, have to check length manually because ICU4C CurrencyUnit does not check it for us
907     if (segment.length() != 3) {
908         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
909         return;
910     }
911     const UChar* currencyCode = segment.toTempUnicodeString().getBuffer();
912     UErrorCode localStatus = U_ZERO_ERROR;
913     CurrencyUnit currency(currencyCode, localStatus);
914     if (U_FAILURE(localStatus)) {
915         // Not 3 ascii chars
916         // throw new SkeletonSyntaxException("Invalid currency", segment);
917         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
918         return;
919     }
920     // Slicing is OK
921     macros.unit = currency; // NOLINT
922 }
923 
924 void
generateCurrencyOption(const CurrencyUnit & currency,UnicodeString & sb,UErrorCode &)925 blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode&) {
926     sb.append(currency.getISOCurrency(), -1);
927 }
928 
parseMeasureUnitOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)929 void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros,
930                                                UErrorCode& status) {
931     const UnicodeString stemString = segment.toTempUnicodeString();
932 
933     // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
934     // http://unicode.org/reports/tr35/#Validity_Data
935     int firstHyphen = 0;
936     while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') {
937         firstHyphen++;
938     }
939     if (firstHyphen == stemString.length()) {
940         // throw new SkeletonSyntaxException("Invalid measure unit option", segment);
941         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
942         return;
943     }
944 
945     // Need to do char <-> UChar conversion...
946     U_ASSERT(U_SUCCESS(status));
947     CharString type;
948     SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status);
949     CharString subType;
950     SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status);
951 
952     // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units.
953     static constexpr int32_t CAPACITY = 30;
954     MeasureUnit units[CAPACITY];
955     UErrorCode localStatus = U_ZERO_ERROR;
956     int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus);
957     if (U_FAILURE(localStatus)) {
958         // More than 30 units in this type?
959         status = U_INTERNAL_PROGRAM_ERROR;
960         return;
961     }
962     for (int32_t i = 0; i < numUnits; i++) {
963         auto& unit = units[i];
964         if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) {
965             macros.unit = unit;
966             return;
967         }
968     }
969 
970     // throw new SkeletonSyntaxException("Unknown measure unit", segment);
971     status = U_NUMBER_SKELETON_SYNTAX_ERROR;
972 }
973 
generateMeasureUnitOption(const MeasureUnit & measureUnit,UnicodeString & sb,UErrorCode &)974 void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb,
975                                                   UErrorCode&) {
976     // Need to do char <-> UChar conversion...
977     sb.append(UnicodeString(measureUnit.getType(), -1, US_INV));
978     sb.append(u'-');
979     sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV));
980 }
981 
parseMeasurePerUnitOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)982 void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros,
983                                                   UErrorCode& status) {
984     // A little bit of a hack: safe the current unit (numerator), call the main measure unit
985     // parsing code, put back the numerator unit, and put the new unit into per-unit.
986     MeasureUnit numerator = macros.unit;
987     parseMeasureUnitOption(segment, macros, status);
988     if (U_FAILURE(status)) { return; }
989     macros.perUnit = macros.unit;
990     macros.unit = numerator;
991 }
992 
parseFractionStem(const StringSegment & segment,MacroProps & macros,UErrorCode & status)993 void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros,
994                                           UErrorCode& status) {
995     U_ASSERT(segment.charAt(0) == u'.');
996     int32_t offset = 1;
997     int32_t minFrac = 0;
998     int32_t maxFrac;
999     for (; offset < segment.length(); offset++) {
1000         if (segment.charAt(offset) == u'0') {
1001             minFrac++;
1002         } else {
1003             break;
1004         }
1005     }
1006     if (offset < segment.length()) {
1007         if (segment.charAt(offset) == u'+') {
1008             maxFrac = -1;
1009             offset++;
1010         } else {
1011             maxFrac = minFrac;
1012             for (; offset < segment.length(); offset++) {
1013                 if (segment.charAt(offset) == u'#') {
1014                     maxFrac++;
1015                 } else {
1016                     break;
1017                 }
1018             }
1019         }
1020     } else {
1021         maxFrac = minFrac;
1022     }
1023     if (offset < segment.length()) {
1024         // throw new SkeletonSyntaxException("Invalid fraction stem", segment);
1025         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1026         return;
1027     }
1028     // Use the public APIs to enforce bounds checking
1029     if (maxFrac == -1) {
1030         macros.precision = Precision::minFraction(minFrac);
1031     } else {
1032         macros.precision = Precision::minMaxFraction(minFrac, maxFrac);
1033     }
1034 }
1035 
1036 void
generateFractionStem(int32_t minFrac,int32_t maxFrac,UnicodeString & sb,UErrorCode &)1037 blueprint_helpers::generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode&) {
1038     if (minFrac == 0 && maxFrac == 0) {
1039         sb.append(u"precision-integer", -1);
1040         return;
1041     }
1042     sb.append(u'.');
1043     appendMultiple(sb, u'0', minFrac);
1044     if (maxFrac == -1) {
1045         sb.append(u'+');
1046     } else {
1047         appendMultiple(sb, u'#', maxFrac - minFrac);
1048     }
1049 }
1050 
1051 void
parseDigitsStem(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1052 blueprint_helpers::parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) {
1053     U_ASSERT(segment.charAt(0) == u'@');
1054     int offset = 0;
1055     int minSig = 0;
1056     int maxSig;
1057     for (; offset < segment.length(); offset++) {
1058         if (segment.charAt(offset) == u'@') {
1059             minSig++;
1060         } else {
1061             break;
1062         }
1063     }
1064     if (offset < segment.length()) {
1065         if (segment.charAt(offset) == u'+') {
1066             maxSig = -1;
1067             offset++;
1068         } else {
1069             maxSig = minSig;
1070             for (; offset < segment.length(); offset++) {
1071                 if (segment.charAt(offset) == u'#') {
1072                     maxSig++;
1073                 } else {
1074                     break;
1075                 }
1076             }
1077         }
1078     } else {
1079         maxSig = minSig;
1080     }
1081     if (offset < segment.length()) {
1082         // throw new SkeletonSyntaxException("Invalid significant digits stem", segment);
1083         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1084         return;
1085     }
1086     // Use the public APIs to enforce bounds checking
1087     if (maxSig == -1) {
1088         macros.precision = Precision::minSignificantDigits(minSig);
1089     } else {
1090         macros.precision = Precision::minMaxSignificantDigits(minSig, maxSig);
1091     }
1092 }
1093 
1094 void
generateDigitsStem(int32_t minSig,int32_t maxSig,UnicodeString & sb,UErrorCode &)1095 blueprint_helpers::generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode&) {
1096     appendMultiple(sb, u'@', minSig);
1097     if (maxSig == -1) {
1098         sb.append(u'+');
1099     } else {
1100         appendMultiple(sb, u'#', maxSig - minSig);
1101     }
1102 }
1103 
parseFracSigOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1104 bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroProps& macros,
1105                                            UErrorCode& status) {
1106     if (segment.charAt(0) != u'@') {
1107         return false;
1108     }
1109     int offset = 0;
1110     int minSig = 0;
1111     int maxSig;
1112     for (; offset < segment.length(); offset++) {
1113         if (segment.charAt(offset) == u'@') {
1114             minSig++;
1115         } else {
1116             break;
1117         }
1118     }
1119     // For the frac-sig option, there must be minSig or maxSig but not both.
1120     // Valid: @+, @@+, @@@+
1121     // Valid: @#, @##, @###
1122     // Invalid: @, @@, @@@
1123     // Invalid: @@#, @@##, @@@#
1124     if (offset < segment.length()) {
1125         if (segment.charAt(offset) == u'+') {
1126             maxSig = -1;
1127             offset++;
1128         } else if (minSig > 1) {
1129             // @@#, @@##, @@@#
1130             // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1131             status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1132             return false;
1133         } else {
1134             maxSig = minSig;
1135             for (; offset < segment.length(); offset++) {
1136                 if (segment.charAt(offset) == u'#') {
1137                     maxSig++;
1138                 } else {
1139                     break;
1140                 }
1141             }
1142         }
1143     } else {
1144         // @, @@, @@@
1145         // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1146         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1147         return false;
1148     }
1149     if (offset < segment.length()) {
1150         // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment);
1151         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1152         return false;
1153     }
1154 
1155     auto& oldPrecision = static_cast<const FractionPrecision&>(macros.precision);
1156     if (maxSig == -1) {
1157         macros.precision = oldPrecision.withMinDigits(minSig);
1158     } else {
1159         macros.precision = oldPrecision.withMaxDigits(maxSig);
1160     }
1161     return true;
1162 }
1163 
parseIncrementOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1164 void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros,
1165                                              UErrorCode& status) {
1166     // Need to do char <-> UChar conversion...
1167     U_ASSERT(U_SUCCESS(status));
1168     CharString buffer;
1169     SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1170 
1171     // Utilize DecimalQuantity/decNumber to parse this for us.
1172     DecimalQuantity dq;
1173     UErrorCode localStatus = U_ZERO_ERROR;
1174     dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus);
1175     if (U_FAILURE(localStatus)) {
1176         // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e);
1177         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1178         return;
1179     }
1180     double increment = dq.toDouble();
1181 
1182     // We also need to figure out how many digits. Do a brute force string operation.
1183     int decimalOffset = 0;
1184     while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') {
1185         decimalOffset++;
1186     }
1187     if (decimalOffset == segment.length()) {
1188         macros.precision = Precision::increment(increment);
1189     } else {
1190         int32_t fractionLength = segment.length() - decimalOffset - 1;
1191         macros.precision = Precision::increment(increment).withMinFraction(fractionLength);
1192     }
1193 }
1194 
generateIncrementOption(double increment,int32_t trailingZeros,UnicodeString & sb,UErrorCode &)1195 void blueprint_helpers::generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb,
1196                                                 UErrorCode&) {
1197     // Utilize DecimalQuantity/double_conversion to format this for us.
1198     DecimalQuantity dq;
1199     dq.setToDouble(increment);
1200     dq.roundToInfinity();
1201     sb.append(dq.toPlainString());
1202 
1203     // We might need to append extra trailing zeros for min fraction...
1204     if (trailingZeros > 0) {
1205         appendMultiple(sb, u'0', trailingZeros);
1206     }
1207 }
1208 
parseIntegerWidthOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1209 void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros,
1210                                                 UErrorCode& status) {
1211     int32_t offset = 0;
1212     int32_t minInt = 0;
1213     int32_t maxInt;
1214     if (segment.charAt(0) == u'+') {
1215         maxInt = -1;
1216         offset++;
1217     } else {
1218         maxInt = 0;
1219     }
1220     for (; offset < segment.length(); offset++) {
1221         if (maxInt != -1 && segment.charAt(offset) == u'#') {
1222             maxInt++;
1223         } else {
1224             break;
1225         }
1226     }
1227     if (offset < segment.length()) {
1228         for (; offset < segment.length(); offset++) {
1229             if (segment.charAt(offset) == u'0') {
1230                 minInt++;
1231             } else {
1232                 break;
1233             }
1234         }
1235     }
1236     if (maxInt != -1) {
1237         maxInt += minInt;
1238     }
1239     if (offset < segment.length()) {
1240         // throw new SkeletonSyntaxException("Invalid integer width stem", segment);
1241         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1242         return;
1243     }
1244     // Use the public APIs to enforce bounds checking
1245     if (maxInt == -1) {
1246         macros.integerWidth = IntegerWidth::zeroFillTo(minInt);
1247     } else {
1248         macros.integerWidth = IntegerWidth::zeroFillTo(minInt).truncateAt(maxInt);
1249     }
1250 }
1251 
generateIntegerWidthOption(int32_t minInt,int32_t maxInt,UnicodeString & sb,UErrorCode &)1252 void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb,
1253                                                    UErrorCode&) {
1254     if (maxInt == -1) {
1255         sb.append(u'+');
1256     } else {
1257         appendMultiple(sb, u'#', maxInt - minInt);
1258     }
1259     appendMultiple(sb, u'0', minInt);
1260 }
1261 
parseNumberingSystemOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1262 void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros,
1263                                                    UErrorCode& status) {
1264     // Need to do char <-> UChar conversion...
1265     U_ASSERT(U_SUCCESS(status));
1266     CharString buffer;
1267     SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1268 
1269     NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status);
1270     if (ns == nullptr || U_FAILURE(status)) {
1271         // This is a skeleton syntax error; don't bubble up the low-level NumberingSystem error
1272         // throw new SkeletonSyntaxException("Unknown numbering system", segment);
1273         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1274         return;
1275     }
1276     macros.symbols.setTo(ns);
1277 }
1278 
generateNumberingSystemOption(const NumberingSystem & ns,UnicodeString & sb,UErrorCode &)1279 void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb,
1280                                                       UErrorCode&) {
1281     // Need to do char <-> UChar conversion...
1282     sb.append(UnicodeString(ns.getName(), -1, US_INV));
1283 }
1284 
parseScaleOption(const StringSegment & segment,MacroProps & macros,UErrorCode & status)1285 void blueprint_helpers::parseScaleOption(const StringSegment& segment, MacroProps& macros,
1286                                               UErrorCode& status) {
1287     // Need to do char <-> UChar conversion...
1288     U_ASSERT(U_SUCCESS(status));
1289     CharString buffer;
1290     SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
1291 
1292     LocalPointer<DecNum> decnum(new DecNum(), status);
1293     if (U_FAILURE(status)) { return; }
1294     decnum->setTo({buffer.data(), buffer.length()}, status);
1295     if (U_FAILURE(status)) {
1296         // This is a skeleton syntax error; don't let the low-level decnum error bubble up
1297         status = U_NUMBER_SKELETON_SYNTAX_ERROR;
1298         return;
1299     }
1300 
1301     // NOTE: The constructor will optimize the decnum for us if possible.
1302     macros.scale = {0, decnum.orphan()};
1303 }
1304 
generateScaleOption(int32_t magnitude,const DecNum * arbitrary,UnicodeString & sb,UErrorCode & status)1305 void blueprint_helpers::generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
1306                                             UErrorCode& status) {
1307     // Utilize DecimalQuantity/double_conversion to format this for us.
1308     DecimalQuantity dq;
1309     if (arbitrary != nullptr) {
1310         dq.setToDecNum(*arbitrary, status);
1311         if (U_FAILURE(status)) { return; }
1312     } else {
1313         dq.setToInt(1);
1314     }
1315     dq.adjustMagnitude(magnitude);
1316     dq.roundToInfinity();
1317     sb.append(dq.toPlainString());
1318 }
1319 
1320 
notation(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1321 bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1322     if (macros.notation.fType == Notation::NTN_COMPACT) {
1323         UNumberCompactStyle style = macros.notation.fUnion.compactStyle;
1324         if (style == UNumberCompactStyle::UNUM_LONG) {
1325             sb.append(u"compact-long", -1);
1326             return true;
1327         } else if (style == UNumberCompactStyle::UNUM_SHORT) {
1328             sb.append(u"compact-short", -1);
1329             return true;
1330         } else {
1331             // Compact notation generated from custom data (not supported in skeleton)
1332             // The other compact notations are literals
1333             status = U_UNSUPPORTED_ERROR;
1334             return false;
1335         }
1336     } else if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
1337         const Notation::ScientificSettings& impl = macros.notation.fUnion.scientific;
1338         if (impl.fEngineeringInterval == 3) {
1339             sb.append(u"engineering", -1);
1340         } else {
1341             sb.append(u"scientific", -1);
1342         }
1343         if (impl.fMinExponentDigits > 1) {
1344             sb.append(u'/');
1345             blueprint_helpers::generateExponentWidthOption(impl.fMinExponentDigits, sb, status);
1346             if (U_FAILURE(status)) {
1347                 return false;
1348             }
1349         }
1350         if (impl.fExponentSignDisplay != UNUM_SIGN_AUTO) {
1351             sb.append(u'/');
1352             enum_to_stem_string::signDisplay(impl.fExponentSignDisplay, sb);
1353         }
1354         return true;
1355     } else {
1356         // Default value is not shown in normalized form
1357         return false;
1358     }
1359 }
1360 
unit(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1361 bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1362     if (utils::unitIsCurrency(macros.unit)) {
1363         sb.append(u"currency/", -1);
1364         CurrencyUnit currency(macros.unit, status);
1365         if (U_FAILURE(status)) {
1366             return false;
1367         }
1368         blueprint_helpers::generateCurrencyOption(currency, sb, status);
1369         return true;
1370     } else if (utils::unitIsNoUnit(macros.unit)) {
1371         if (utils::unitIsPercent(macros.unit)) {
1372             sb.append(u"percent", -1);
1373             return true;
1374         } else if (utils::unitIsPermille(macros.unit)) {
1375             sb.append(u"permille", -1);
1376             return true;
1377         } else {
1378             // Default value is not shown in normalized form
1379             return false;
1380         }
1381     } else {
1382         sb.append(u"measure-unit/", -1);
1383         blueprint_helpers::generateMeasureUnitOption(macros.unit, sb, status);
1384         return true;
1385     }
1386 }
1387 
perUnit(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1388 bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1389     // Per-units are currently expected to be only MeasureUnits.
1390     if (utils::unitIsNoUnit(macros.perUnit)) {
1391         if (utils::unitIsPercent(macros.perUnit) || utils::unitIsPermille(macros.perUnit)) {
1392             status = U_UNSUPPORTED_ERROR;
1393             return false;
1394         } else {
1395             // Default value: ok to ignore
1396             return false;
1397         }
1398     } else if (utils::unitIsCurrency(macros.perUnit)) {
1399         status = U_UNSUPPORTED_ERROR;
1400         return false;
1401     } else {
1402         sb.append(u"per-measure-unit/", -1);
1403         blueprint_helpers::generateMeasureUnitOption(macros.perUnit, sb, status);
1404         return true;
1405     }
1406 }
1407 
precision(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1408 bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1409     if (macros.precision.fType == Precision::RND_NONE) {
1410         sb.append(u"precision-unlimited", -1);
1411     } else if (macros.precision.fType == Precision::RND_FRACTION) {
1412         const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1413         blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status);
1414     } else if (macros.precision.fType == Precision::RND_SIGNIFICANT) {
1415         const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1416         blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status);
1417     } else if (macros.precision.fType == Precision::RND_FRACTION_SIGNIFICANT) {
1418         const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig;
1419         blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status);
1420         sb.append(u'/');
1421         if (impl.fMinSig == -1) {
1422             blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status);
1423         } else {
1424             blueprint_helpers::generateDigitsStem(impl.fMinSig, -1, sb, status);
1425         }
1426     } else if (macros.precision.fType == Precision::RND_INCREMENT
1427             || macros.precision.fType == Precision::RND_INCREMENT_ONE
1428             || macros.precision.fType == Precision::RND_INCREMENT_FIVE) {
1429         const Precision::IncrementSettings& impl = macros.precision.fUnion.increment;
1430         sb.append(u"precision-increment/", -1);
1431         blueprint_helpers::generateIncrementOption(
1432                 impl.fIncrement,
1433                 impl.fMinFrac - impl.fMaxFrac,
1434                 sb,
1435                 status);
1436     } else if (macros.precision.fType == Precision::RND_CURRENCY) {
1437         UCurrencyUsage usage = macros.precision.fUnion.currencyUsage;
1438         if (usage == UCURR_USAGE_STANDARD) {
1439             sb.append(u"precision-currency-standard", -1);
1440         } else {
1441             sb.append(u"precision-currency-cash", -1);
1442         }
1443     } else {
1444         // Bogus or Error
1445         return false;
1446     }
1447 
1448     // NOTE: Always return true for rounding because the default value depends on other options.
1449     return true;
1450 }
1451 
roundingMode(const MacroProps & macros,UnicodeString & sb,UErrorCode &)1452 bool GeneratorHelpers::roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1453     if (macros.roundingMode == kDefaultMode) {
1454         return false; // Default
1455     }
1456     enum_to_stem_string::roundingMode(macros.roundingMode, sb);
1457     return true;
1458 }
1459 
grouping(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1460 bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1461     if (macros.grouper.isBogus()) {
1462         return false; // No value
1463     } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
1464         status = U_UNSUPPORTED_ERROR;
1465         return false;
1466     } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) {
1467         return false; // Default value
1468     } else {
1469         enum_to_stem_string::groupingStrategy(macros.grouper.fStrategy, sb);
1470         return true;
1471     }
1472 }
1473 
integerWidth(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1474 bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1475     if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() ||
1476         macros.integerWidth == IntegerWidth::standard()) {
1477         // Error or Default
1478         return false;
1479     }
1480     sb.append(u"integer-width/", -1);
1481     blueprint_helpers::generateIntegerWidthOption(
1482             macros.integerWidth.fUnion.minMaxInt.fMinInt,
1483             macros.integerWidth.fUnion.minMaxInt.fMaxInt,
1484             sb,
1485             status);
1486     return true;
1487 }
1488 
symbols(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1489 bool GeneratorHelpers::symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1490     if (macros.symbols.isNumberingSystem()) {
1491         const NumberingSystem& ns = *macros.symbols.getNumberingSystem();
1492         if (uprv_strcmp(ns.getName(), "latn") == 0) {
1493             sb.append(u"latin", -1);
1494         } else {
1495             sb.append(u"numbering-system/", -1);
1496             blueprint_helpers::generateNumberingSystemOption(ns, sb, status);
1497         }
1498         return true;
1499     } else if (macros.symbols.isDecimalFormatSymbols()) {
1500         status = U_UNSUPPORTED_ERROR;
1501         return false;
1502     } else {
1503         // No custom symbols
1504         return false;
1505     }
1506 }
1507 
unitWidth(const MacroProps & macros,UnicodeString & sb,UErrorCode &)1508 bool GeneratorHelpers::unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1509     if (macros.unitWidth == UNUM_UNIT_WIDTH_SHORT || macros.unitWidth == UNUM_UNIT_WIDTH_COUNT) {
1510         return false; // Default or Bogus
1511     }
1512     enum_to_stem_string::unitWidth(macros.unitWidth, sb);
1513     return true;
1514 }
1515 
sign(const MacroProps & macros,UnicodeString & sb,UErrorCode &)1516 bool GeneratorHelpers::sign(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1517     if (macros.sign == UNUM_SIGN_AUTO || macros.sign == UNUM_SIGN_COUNT) {
1518         return false; // Default or Bogus
1519     }
1520     enum_to_stem_string::signDisplay(macros.sign, sb);
1521     return true;
1522 }
1523 
decimal(const MacroProps & macros,UnicodeString & sb,UErrorCode &)1524 bool GeneratorHelpers::decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode&) {
1525     if (macros.decimal == UNUM_DECIMAL_SEPARATOR_AUTO || macros.decimal == UNUM_DECIMAL_SEPARATOR_COUNT) {
1526         return false; // Default or Bogus
1527     }
1528     enum_to_stem_string::decimalSeparatorDisplay(macros.decimal, sb);
1529     return true;
1530 }
1531 
scale(const MacroProps & macros,UnicodeString & sb,UErrorCode & status)1532 bool GeneratorHelpers::scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
1533     if (!macros.scale.isValid()) {
1534         return false; // Default or Bogus
1535     }
1536     sb.append(u"scale/", -1);
1537     blueprint_helpers::generateScaleOption(
1538             macros.scale.fMagnitude,
1539             macros.scale.fArbitrary,
1540             sb,
1541             status);
1542     return true;
1543 }
1544 
1545 
1546 #endif /* #if !UCONFIG_NO_FORMATTING */
1547