1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "uresimp.h"
16 #include "util.h"
17 
18 using namespace icu;
19 using namespace icu::number;
20 using namespace icu::number::impl;
21 
22 namespace {
23 
24 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)25 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
26     return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
27 }
28 
29 
30 struct NumberRangeData {
31     SimpleFormatter rangePattern;
32     SimpleFormatter approximatelyPattern;
33 };
34 
35 class NumberRangeDataSink : public ResourceSink {
36   public:
NumberRangeDataSink(NumberRangeData & data)37     NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
38 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)39     void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
40         ResourceTable miscTable = value.getTable(status);
41         if (U_FAILURE(status)) { return; }
42         for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
43             if (uprv_strcmp(key, "range") == 0) {
44                 if (hasRangeData()) {
45                     continue; // have already seen this pattern
46                 }
47                 fData.rangePattern = {value.getUnicodeString(status), status};
48             } else if (uprv_strcmp(key, "approximately") == 0) {
49                 if (hasApproxData()) {
50                     continue; // have already seen this pattern
51                 }
52                 fData.approximatelyPattern = {value.getUnicodeString(status), status};
53             }
54         }
55     }
56 
hasRangeData()57     bool hasRangeData() {
58         return fData.rangePattern.getArgumentLimit() != 0;
59     }
60 
hasApproxData()61     bool hasApproxData() {
62         return fData.approximatelyPattern.getArgumentLimit() != 0;
63     }
64 
isComplete()65     bool isComplete() {
66         return hasRangeData() && hasApproxData();
67     }
68 
fillInDefaults(UErrorCode & status)69     void fillInDefaults(UErrorCode& status) {
70         if (!hasRangeData()) {
71             fData.rangePattern = {u"{0}–{1}", status};
72         }
73         if (!hasApproxData()) {
74             fData.approximatelyPattern = {u"~{0}", status};
75         }
76     }
77 
78   private:
79     NumberRangeData& fData;
80 };
81 
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)82 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
83     if (U_FAILURE(status)) { return; }
84     LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
85     if (U_FAILURE(status)) { return; }
86     NumberRangeDataSink sink(data);
87 
88     CharString dataPath;
89     dataPath.append("NumberElements/", -1, status);
90     dataPath.append(nsName, -1, status);
91     dataPath.append("/miscPatterns", -1, status);
92     if (U_FAILURE(status)) { return; }
93 
94     UErrorCode localStatus = U_ZERO_ERROR;
95     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
96     if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
97         status = localStatus;
98         return;
99     }
100 
101     // Fall back to latn if necessary
102     if (!sink.isComplete()) {
103         ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
104     }
105 
106     sink.fillInDefaults(status);
107 }
108 
109 class PluralRangesDataSink : public ResourceSink {
110   public:
PluralRangesDataSink(StandardPluralRanges & output)111     PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
112 
put(const char *,ResourceValue & value,UBool,UErrorCode & status)113     void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
114         ResourceArray entriesArray = value.getArray(status);
115         if (U_FAILURE(status)) { return; }
116         fOutput.setCapacity(entriesArray.getSize());
117         for (int i = 0; entriesArray.getValue(i, value); i++) {
118             ResourceArray pluralFormsArray = value.getArray(status);
119             if (U_FAILURE(status)) { return; }
120             pluralFormsArray.getValue(0, value);
121             StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
122             if (U_FAILURE(status)) { return; }
123             pluralFormsArray.getValue(1, value);
124             StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
125             if (U_FAILURE(status)) { return; }
126             pluralFormsArray.getValue(2, value);
127             StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
128             if (U_FAILURE(status)) { return; }
129             fOutput.addPluralRange(first, second, result);
130         }
131     }
132 
133   private:
134     StandardPluralRanges& fOutput;
135 };
136 
getPluralRangesData(const Locale & locale,StandardPluralRanges & output,UErrorCode & status)137 void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
138     if (U_FAILURE(status)) { return; }
139     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
140     if (U_FAILURE(status)) { return; }
141 
142     CharString dataPath;
143     dataPath.append("locales/", -1, status);
144     dataPath.append(locale.getLanguage(), -1, status);
145     if (U_FAILURE(status)) { return; }
146     int32_t setLen;
147     // Not all languages are covered: fail gracefully
148     UErrorCode internalStatus = U_ZERO_ERROR;
149     const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
150     if (U_FAILURE(internalStatus)) { return; }
151 
152     dataPath.clear();
153     dataPath.append("rules/", -1, status);
154     dataPath.appendInvariantChars(set, setLen, status);
155     if (U_FAILURE(status)) { return; }
156     PluralRangesDataSink sink(output);
157     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
158     if (U_FAILURE(status)) { return; }
159 }
160 
161 } // namespace
162 
163 
initialize(const Locale & locale,UErrorCode & status)164 void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
165     getPluralRangesData(locale, *this, status);
166 }
167 
addPluralRange(StandardPlural::Form first,StandardPlural::Form second,StandardPlural::Form result)168 void StandardPluralRanges::addPluralRange(
169         StandardPlural::Form first,
170         StandardPlural::Form second,
171         StandardPlural::Form result) {
172     U_ASSERT(fTriplesLen < fTriples.getCapacity());
173     fTriples[fTriplesLen] = {first, second, result};
174     fTriplesLen++;
175 }
176 
setCapacity(int32_t length)177 void StandardPluralRanges::setCapacity(int32_t length) {
178     if (length > fTriples.getCapacity()) {
179         fTriples.resize(length, 0);
180     }
181 }
182 
183 StandardPlural::Form
resolve(StandardPlural::Form first,StandardPlural::Form second) const184 StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
185     for (int32_t i=0; i<fTriplesLen; i++) {
186         const auto& triple = fTriples[i];
187         if (triple.first == first && triple.second == second) {
188             return triple.result;
189         }
190     }
191     // Default fallback
192     return StandardPlural::OTHER;
193 }
194 
195 
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)196 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
197     : formatterImpl1(macros.formatter1.fMacros, status),
198       formatterImpl2(macros.formatter2.fMacros, status),
199       fSameFormatters(macros.singleFormatter),
200       fCollapse(macros.collapse),
201       fIdentityFallback(macros.identityFallback) {
202 
203     const char* nsName = formatterImpl1.getRawMicroProps().nsName;
204     if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
205         status = U_ILLEGAL_ARGUMENT_ERROR;
206         return;
207     }
208 
209     NumberRangeData data;
210     getNumberRangeData(macros.locale.getName(), nsName, data, status);
211     if (U_FAILURE(status)) { return; }
212     fRangeFormatter = data.rangePattern;
213     fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
214 
215     // TODO: Get locale from PluralRules instead?
216     fPluralRanges.initialize(macros.locale, status);
217     if (U_FAILURE(status)) { return; }
218 }
219 
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const220 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
221     if (U_FAILURE(status)) {
222         return;
223     }
224 
225     MicroProps micros1;
226     MicroProps micros2;
227     formatterImpl1.preProcess(data.quantity1, micros1, status);
228     if (fSameFormatters) {
229         formatterImpl1.preProcess(data.quantity2, micros2, status);
230     } else {
231         formatterImpl2.preProcess(data.quantity2, micros2, status);
232     }
233     if (U_FAILURE(status)) {
234         return;
235     }
236 
237     // If any of the affixes are different, an identity is not possible
238     // and we must use formatRange().
239     // TODO: Write this as MicroProps operator==() ?
240     // TODO: Avoid the redundancy of these equality operations with the
241     // ones in formatRange?
242     if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
243             || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
244             || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
245         formatRange(data, micros1, micros2, status);
246         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
247         return;
248     }
249 
250     // Check for identity
251     if (equalBeforeRounding) {
252         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
253     } else if (data.quantity1 == data.quantity2) {
254         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
255     } else {
256         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
257     }
258 
259     switch (identity2d(fIdentityFallback, data.identityResult)) {
260         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
261                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
262         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
263                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
264         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
265                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
266         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
267                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
268         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
269                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
270         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
271                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
272             formatRange(data, micros1, micros2, status);
273             break;
274 
275         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
276                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
277         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
278                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
279         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
280                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
281             formatApproximately(data, micros1, micros2, status);
282             break;
283 
284         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
285                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
286         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
287                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
288         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
289                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
290             formatSingleValue(data, micros1, micros2, status);
291             break;
292 
293         default:
294             UPRV_UNREACHABLE;
295     }
296 }
297 
298 
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const299 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
300                                                  MicroProps& micros1, MicroProps& micros2,
301                                                  UErrorCode& status) const {
302     if (U_FAILURE(status)) { return; }
303     if (fSameFormatters) {
304         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
305         NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
306     } else {
307         formatRange(data, micros1, micros2, status);
308     }
309 }
310 
311 
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const312 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
313                                                     MicroProps& micros1, MicroProps& micros2,
314                                                     UErrorCode& status) const {
315     if (U_FAILURE(status)) { return; }
316     if (fSameFormatters) {
317         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status);
318         // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
319         length += micros1.modInner->apply(data.getStringRef(), 0, length, status);
320         length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status);
321         length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status);
322         micros1.modOuter->apply(data.getStringRef(), 0, length, status);
323     } else {
324         formatRange(data, micros1, micros2, status);
325     }
326 }
327 
328 
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const329 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
330                                            MicroProps& micros1, MicroProps& micros2,
331                                            UErrorCode& status) const {
332     if (U_FAILURE(status)) { return; }
333 
334     // modInner is always notation (scientific); collapsable in ALL.
335     // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
336     // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
337     // Never collapse an outer mod but not an inner mod.
338     bool collapseOuter, collapseMiddle, collapseInner;
339     switch (fCollapse) {
340         case UNUM_RANGE_COLLAPSE_ALL:
341         case UNUM_RANGE_COLLAPSE_AUTO:
342         case UNUM_RANGE_COLLAPSE_UNIT:
343         {
344             // OUTER MODIFIER
345             collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
346 
347             if (!collapseOuter) {
348                 // Never collapse inner mods if outer mods are not collapsable
349                 collapseMiddle = false;
350                 collapseInner = false;
351                 break;
352             }
353 
354             // MIDDLE MODIFIER
355             collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
356 
357             if (!collapseMiddle) {
358                 // Never collapse inner mods if outer mods are not collapsable
359                 collapseInner = false;
360                 break;
361             }
362 
363             // MIDDLE MODIFIER HEURISTICS
364             // (could disable collapsing of the middle modifier)
365             // The modifiers are equal by this point, so we can look at just one of them.
366             const Modifier* mm = micros1.modMiddle;
367             if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
368                 // Only collapse if the modifier is a unit.
369                 // TODO: Make a better way to check for a unit?
370                 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
371                 if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
372                     collapseMiddle = false;
373                 }
374             } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
375                 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
376                 if (mm->getCodePointCount() <= 1) {
377                     collapseMiddle = false;
378                 }
379             }
380 
381             if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
382                 collapseInner = false;
383                 break;
384             }
385 
386             // INNER MODIFIER
387             collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
388 
389             // All done checking for collapsability.
390             break;
391         }
392 
393         default:
394             collapseOuter = false;
395             collapseMiddle = false;
396             collapseInner = false;
397             break;
398     }
399 
400     FormattedStringBuilder& string = data.getStringRef();
401     int32_t lengthPrefix = 0;
402     int32_t length1 = 0;
403     int32_t lengthInfix = 0;
404     int32_t length2 = 0;
405     int32_t lengthSuffix = 0;
406 
407     // Use #define so that these are evaluated at the call site.
408     #define UPRV_INDEX_0 (lengthPrefix)
409     #define UPRV_INDEX_1 (lengthPrefix + length1)
410     #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
411     #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
412 
413     int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
414         fRangeFormatter,
415         string,
416         0,
417         &lengthPrefix,
418         &lengthSuffix,
419         UNUM_FIELD_COUNT,
420         status);
421     if (U_FAILURE(status)) { return; }
422     lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
423     U_ASSERT(lengthInfix > 0);
424 
425     // SPACING HEURISTIC
426     // Add spacing unless all modifiers are collapsed.
427     // TODO: add API to control this?
428     // TODO: Use a data-driven heuristic like currency spacing?
429     // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
430     {
431         bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
432         bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
433         bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
434         if (repeatInner || repeatMiddle || repeatOuter) {
435             // Add spacing if there is not already spacing
436             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
437                 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status);
438             }
439             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
440                 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status);
441             }
442         }
443     }
444 
445     length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
446     length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
447 
448     // TODO: Support padding?
449 
450     if (collapseInner) {
451         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
452         const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
453         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
454     } else {
455         length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
456         length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
457     }
458 
459     if (collapseMiddle) {
460         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
461         const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
462         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
463     } else {
464         length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
465         length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
466     }
467 
468     if (collapseOuter) {
469         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
470         const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
471         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
472     } else {
473         length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
474         length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
475     }
476 }
477 
478 
479 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const480 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
481     Modifier::Parameters parameters;
482     first.getParameters(parameters);
483     if (parameters.obj == nullptr) {
484         // No plural form; return a fallback (e.g., the first)
485         return first;
486     }
487     StandardPlural::Form firstPlural = parameters.plural;
488 
489     second.getParameters(parameters);
490     if (parameters.obj == nullptr) {
491         // No plural form; return a fallback (e.g., the first)
492         return first;
493     }
494     StandardPlural::Form secondPlural = parameters.plural;
495 
496     // Get the required plural form from data
497     StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
498 
499     // Get and return the new Modifier
500     const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
501     U_ASSERT(mod != nullptr);
502     return *mod;
503 }
504 
505 
506 
507 #endif /* #if !UCONFIG_NO_FORMATTING */
508