1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __SOURCE_NUMBER_SKELETONS_H__
8 #define __SOURCE_NUMBER_SKELETONS_H__
9
10 #include "number_types.h"
11 #include "numparse_types.h"
12 #include "unicode/ucharstrie.h"
13 #include "string_segment.h"
14
15 U_NAMESPACE_BEGIN
16 namespace number {
17 namespace impl {
18
19 // Forward-declaration
20 struct SeenMacroProps;
21
22 // namespace for enums and entrypoint functions
23 namespace skeleton {
24
25 ////////////////////////////////////////////////////////////////////////////////////////
26 // NOTE: For examples of how to add a new stem to the number skeleton parser, see: //
27 // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
28 // and //
29 // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
30 ////////////////////////////////////////////////////////////////////////////////////////
31
32 /**
33 * While parsing a skeleton, this enum records what type of option we expect to find next.
34 */
35 enum ParseState {
36
37 // Section 0: We expect whitespace or a stem, but not an option:
38
39 STATE_NULL,
40
41 // Section 1: We might accept an option, but it is not required:
42
43 STATE_SCIENTIFIC,
44 STATE_FRACTION_PRECISION,
45 STATE_PRECISION,
46
47 // Section 2: An option is required:
48
49 STATE_INCREMENT_PRECISION,
50 STATE_MEASURE_UNIT,
51 STATE_PER_MEASURE_UNIT,
52 STATE_IDENTIFIER_UNIT,
53 STATE_UNIT_USAGE,
54 STATE_CURRENCY_UNIT,
55 STATE_INTEGER_WIDTH,
56 STATE_NUMBERING_SYSTEM,
57 STATE_SCALE,
58 };
59
60 /**
61 * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
62 * string literal written in upper snake case.
63 *
64 * @see StemToObject
65 * @see #SERIALIZED_STEM_TRIE
66 */
67 enum StemEnum {
68
69 // Section 1: Stems that do not require an option:
70
71 STEM_COMPACT_SHORT,
72 STEM_COMPACT_LONG,
73 STEM_SCIENTIFIC,
74 STEM_ENGINEERING,
75 STEM_NOTATION_SIMPLE,
76 STEM_BASE_UNIT,
77 STEM_PERCENT,
78 STEM_PERMILLE,
79 STEM_PERCENT_100, // concise-only
80 STEM_PRECISION_INTEGER,
81 STEM_PRECISION_UNLIMITED,
82 STEM_PRECISION_CURRENCY_STANDARD,
83 STEM_PRECISION_CURRENCY_CASH,
84 STEM_ROUNDING_MODE_CEILING,
85 STEM_ROUNDING_MODE_FLOOR,
86 STEM_ROUNDING_MODE_DOWN,
87 STEM_ROUNDING_MODE_UP,
88 STEM_ROUNDING_MODE_HALF_EVEN,
89 STEM_ROUNDING_MODE_HALF_ODD,
90 STEM_ROUNDING_MODE_HALF_CEILING,
91 STEM_ROUNDING_MODE_HALF_FLOOR,
92 STEM_ROUNDING_MODE_HALF_DOWN,
93 STEM_ROUNDING_MODE_HALF_UP,
94 STEM_ROUNDING_MODE_UNNECESSARY,
95 STEM_GROUP_OFF,
96 STEM_GROUP_MIN2,
97 STEM_GROUP_AUTO,
98 STEM_GROUP_ON_ALIGNED,
99 STEM_GROUP_THOUSANDS,
100 STEM_LATIN,
101 STEM_UNIT_WIDTH_NARROW,
102 STEM_UNIT_WIDTH_SHORT,
103 STEM_UNIT_WIDTH_FULL_NAME,
104 STEM_UNIT_WIDTH_ISO_CODE,
105 STEM_UNIT_WIDTH_FORMAL,
106 STEM_UNIT_WIDTH_VARIANT,
107 STEM_UNIT_WIDTH_HIDDEN,
108 STEM_SIGN_AUTO,
109 STEM_SIGN_ALWAYS,
110 STEM_SIGN_NEVER,
111 STEM_SIGN_ACCOUNTING,
112 STEM_SIGN_ACCOUNTING_ALWAYS,
113 STEM_SIGN_EXCEPT_ZERO,
114 STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
115 STEM_SIGN_NEGATIVE,
116 STEM_SIGN_ACCOUNTING_NEGATIVE,
117 STEM_DECIMAL_AUTO,
118 STEM_DECIMAL_ALWAYS,
119
120 // Section 2: Stems that DO require an option:
121
122 STEM_PRECISION_INCREMENT,
123 STEM_MEASURE_UNIT,
124 STEM_PER_MEASURE_UNIT,
125 STEM_UNIT,
126 STEM_UNIT_USAGE,
127 STEM_CURRENCY,
128 STEM_INTEGER_WIDTH,
129 STEM_NUMBERING_SYSTEM,
130 STEM_SCALE,
131 };
132
133 /** Default wildcard char, accepted on input and printed in output */
134 constexpr char16_t kWildcardChar = u'*';
135
136 /** Alternative wildcard char, accept on input but not printed in output */
137 constexpr char16_t kAltWildcardChar = u'+';
138
139 /** Checks whether the char is a wildcard on input */
isWildcardChar(char16_t c)140 inline bool isWildcardChar(char16_t c) {
141 return c == kWildcardChar || c == kAltWildcardChar;
142 }
143
144 /**
145 * Creates a NumberFormatter corresponding to the given skeleton string.
146 *
147 * @param skeletonString
148 * A number skeleton string, possibly not in its shortest form.
149 * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
150 */
151 UnlocalizedNumberFormatter create(
152 const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
153
154 /**
155 * Create a skeleton string corresponding to the given NumberFormatter.
156 *
157 * @param macros
158 * The NumberFormatter options object.
159 * @return A skeleton string in normalized form.
160 */
161 UnicodeString generate(const MacroProps& macros, UErrorCode& status);
162
163 /**
164 * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
165 *
166 * Internal: use the create() endpoint instead of this function.
167 */
168 MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
169
170 /**
171 * Given that the current segment represents a stem, parse it and save the result.
172 *
173 * @return The next state after parsing this stem, corresponding to what subset of options to expect.
174 */
175 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
176 MacroProps& macros, UErrorCode& status);
177
178 /**
179 * Given that the current segment represents an option, parse it and save the result.
180 *
181 * @return The next state after parsing this option, corresponding to what subset of options to
182 * expect next.
183 */
184 ParseState
185 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
186
187 } // namespace skeleton
188
189
190 /**
191 * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
192 * applies to only the "Section 1" stems, those that are well-defined without an option.
193 */
194 namespace stem_to_object {
195
196 Notation notation(skeleton::StemEnum stem);
197
198 MeasureUnit unit(skeleton::StemEnum stem);
199
200 Precision precision(skeleton::StemEnum stem);
201
202 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
203
204 UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
205
206 UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
207
208 UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
209
210 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
211
212 } // namespace stem_to_object
213
214 /**
215 * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
216 * take place in the object_to_stem_string namespace.
217 */
218 namespace enum_to_stem_string {
219
220 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
221
222 void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
223
224 void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
225
226 void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
227
228 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
229
230 } // namespace enum_to_stem_string
231
232 /**
233 * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
234 */
235 namespace blueprint_helpers {
236
237 /** @return Whether we successfully found and parsed an exponent width option. */
238 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
239
240 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
241
242 /** @return Whether we successfully found and parsed an exponent sign option. */
243 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
244
245 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
246
247 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
248
249 // "measure-unit/" is deprecated in favour of "unit/".
250 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
251
252 // "per-measure-unit/" is deprecated in favour of "unit/".
253 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
254
255 /**
256 * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
257 * specified via a "unit/" concise skeleton.
258 */
259 void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
260
261 void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
262
263 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
264
265 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
266
267 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
268
269 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
270
271 void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
272
273 // Note: no generateScientificStem since this syntax was added later in ICU 67
274
275 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
276
277 // Note: no generateIntegerStem since this syntax was added later in ICU 67
278
279 /** @return Whether we successfully found and parsed a frac-sig option. */
280 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
281
282 /** @return Whether we successfully found and parsed a trailing zero option. */
283 bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
284
285 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
286
287 void
288 generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
289
290 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
291
292 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
293
294 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
295
296 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
297
298 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
299
300 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
301 UErrorCode& status);
302
303 } // namespace blueprint_helpers
304
305 /**
306 * Class for utility methods for generating a token corresponding to each macro-prop. Each method
307 * returns whether or not a token was written to the string builder.
308 *
309 * This needs to be a class, not a namespace, so it can be friended.
310 */
311 class GeneratorHelpers {
312 public:
313 /**
314 * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
315 * StringBuilder.
316 *
317 * Internal: use the create() endpoint instead of this function.
318 */
319 static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
320
321 private:
322 static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
323
324 static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
325
326 static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
327
328 static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
329
330 static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
331
332 static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
333
334 static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
335
336 static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
337
338 static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
339
340 static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
341
342 static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
343
344 static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
345
346 };
347
348 /**
349 * Struct for null-checking.
350 * In Java, we can just check the object reference. In C++, we need a different method.
351 */
352 struct SeenMacroProps {
353 bool notation = false;
354 bool unit = false;
355 bool perUnit = false;
356 bool usage = false;
357 bool precision = false;
358 bool roundingMode = false;
359 bool grouper = false;
360 bool padder = false;
361 bool integerWidth = false;
362 bool symbols = false;
363 bool unitWidth = false;
364 bool sign = false;
365 bool decimal = false;
366 bool scale = false;
367 };
368
369 namespace {
370
371 #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
372 UPRV_BLOCK_MACRO_BEGIN { \
373 UErrorCode conversionStatus = U_ZERO_ERROR; \
374 (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
375 if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
376 /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
377 (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
378 return; \
379 } else if (U_FAILURE(conversionStatus)) { \
380 (status) = conversionStatus; \
381 return; \
382 } \
383 } UPRV_BLOCK_MACRO_END
384
385 } // namespace
386
387 } // namespace impl
388 } // namespace number
389 U_NAMESPACE_END
390
391 #endif //__SOURCE_NUMBER_SKELETONS_H__
392 #endif /* #if !UCONFIG_NO_FORMATTING */
393