1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 *
9 * File COMPACTDECIMALFORMAT.CPP
10 *
11 ********************************************************************************
12 */
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_FORMATTING
16 
17 #include "charstr.h"
18 #include "cstring.h"
19 #include "digitlst.h"
20 #include "mutex.h"
21 #include "unicode/compactdecimalformat.h"
22 #include "unicode/numsys.h"
23 #include "unicode/plurrule.h"
24 #include "unicode/ures.h"
25 #include "ucln_in.h"
26 #include "uhash.h"
27 #include "umutex.h"
28 #include "unicode/ures.h"
29 #include "uresimp.h"
30 
31 // Maps locale name to CDFLocaleData struct.
32 static UHashtable* gCompactDecimalData = NULL;
33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
34 
35 U_NAMESPACE_BEGIN
36 
37 static const int32_t MAX_DIGITS = 15;
38 static const char gOther[] = "other";
39 static const char gLatnTag[] = "latn";
40 static const char gNumberElementsTag[] = "NumberElements";
41 static const char gDecimalFormatTag[] = "decimalFormat";
42 static const char gPatternsShort[] = "patternsShort";
43 static const char gPatternsLong[] = "patternsLong";
44 static const char gLatnPath[] = "NumberElements/latn";
45 
46 static const UChar u_0 = 0x30;
47 static const UChar u_apos = 0x27;
48 
49 static const UChar kZero[] = {u_0};
50 
51 // Used to unescape single quotes.
52 enum QuoteState {
53   OUTSIDE,
54   INSIDE_EMPTY,
55   INSIDE_FULL
56 };
57 
58 enum FallbackFlags {
59   ANY = 0,
60   MUST = 1,
61   NOT_ROOT = 2
62   // Next one will be 4 then 6 etc.
63 };
64 
65 
66 // CDFUnit represents a prefix-suffix pair for a particular variant
67 // and log10 value.
68 struct CDFUnit : public UMemory {
69   UnicodeString prefix;
70   UnicodeString suffix;
CDFUnitCDFUnit71   inline CDFUnit() : prefix(), suffix() {
72     prefix.setToBogus();
73   }
~CDFUnitCDFUnit74   inline ~CDFUnit() {}
isSetCDFUnit75   inline UBool isSet() const {
76     return !prefix.isBogus();
77   }
markAsSetCDFUnit78   inline void markAsSet() {
79     prefix.remove();
80   }
81 };
82 
83 // CDFLocaleStyleData contains formatting data for a particular locale
84 // and style.
85 class CDFLocaleStyleData : public UMemory {
86  public:
87   // What to divide by for each log10 value when formatting. These values
88   // will be powers of 10. For English, would be:
89   // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
90   double divisors[MAX_DIGITS];
91   // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
92   // To format a number x,
93   // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
94   // Compute the plural variant for displayNum
95   // (e.g zero, one, two, few, many, other).
96   // Compute cdfUnits = unitsByVariant[pluralVariant].
97   // Prefix and suffix to use at cdfUnits[log10(x)]
98   UHashtable* unitsByVariant;
99   // A flag for whether or not this CDFLocaleStyleData was loaded from the
100   // Latin numbering system as a fallback from the locale numbering system.
101   // This value is meaningless if the object is bogus or empty.
102   UBool fromFallback;
CDFLocaleStyleData()103   inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
104     uprv_memset(divisors, 0, sizeof(divisors));
105   }
106   ~CDFLocaleStyleData();
107   // Init initializes this object.
108   void Init(UErrorCode& status);
isBogus() const109   inline UBool isBogus() const {
110     return unitsByVariant == NULL;
111   }
112   void setToBogus();
isEmpty()113   UBool isEmpty() {
114     return unitsByVariant == NULL || unitsByVariant->count == 0;
115   }
116  private:
117   CDFLocaleStyleData(const CDFLocaleStyleData&);
118   CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
119 };
120 
121 // CDFLocaleData contains formatting data for a particular locale.
122 struct CDFLocaleData : public UMemory {
123   CDFLocaleStyleData shortData;
124   CDFLocaleStyleData longData;
CDFLocaleDataCDFLocaleData125   inline CDFLocaleData() : shortData(), longData() { }
~CDFLocaleDataCDFLocaleData126   inline ~CDFLocaleData() { }
127   // Init initializes this object.
128   void Init(UErrorCode& status);
129 };
130 
131 U_NAMESPACE_END
132 
133 U_CDECL_BEGIN
134 
cdf_cleanup(void)135 static UBool U_CALLCONV cdf_cleanup(void) {
136   if (gCompactDecimalData != NULL) {
137     uhash_close(gCompactDecimalData);
138     gCompactDecimalData = NULL;
139   }
140   return TRUE;
141 }
142 
deleteCDFUnits(void * ptr)143 static void U_CALLCONV deleteCDFUnits(void* ptr) {
144   delete [] (icu::CDFUnit*) ptr;
145 }
146 
deleteCDFLocaleData(void * ptr)147 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
148   delete (icu::CDFLocaleData*) ptr;
149 }
150 
151 U_CDECL_END
152 
153 U_NAMESPACE_BEGIN
154 
155 static UBool divisors_equal(const double* lhs, const double* rhs);
156 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
157 
158 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
159 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
160 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
161 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
162 static double calculateDivisor(double power10, int32_t numZeros);
163 static UBool onlySpaces(UnicodeString u);
164 static void fixQuotes(UnicodeString& s);
165 static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
166 static void fillInMissing(CDFLocaleStyleData* result);
167 static int32_t computeLog10(double x, UBool inRange);
168 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
169 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
170 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
172 
173 CompactDecimalFormat::CompactDecimalFormat(
174     const DecimalFormat& decimalFormat,
175     const UHashtable* unitsByVariant,
176     const double* divisors,
177     PluralRules* pluralRules)
178   : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
179 }
180 
CompactDecimalFormat(const CompactDecimalFormat & source)181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
182     : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
183 }
184 
185 CompactDecimalFormat* U_EXPORT2
createInstance(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)186 CompactDecimalFormat::createInstance(
187     const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
188   LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
189   if (U_FAILURE(status)) {
190     return NULL;
191   }
192   LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
193   if (U_FAILURE(status)) {
194     return NULL;
195   }
196   const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
197   if (U_FAILURE(status)) {
198     return NULL;
199   }
200   CompactDecimalFormat* result =
201       new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
202   if (result == NULL) {
203     status = U_MEMORY_ALLOCATION_ERROR;
204     return NULL;
205   }
206   pluralRules.orphan();
207   result->setMaximumSignificantDigits(3);
208   result->setSignificantDigitsUsed(TRUE);
209   result->setGroupingUsed(FALSE);
210   return result;
211 }
212 
213 CompactDecimalFormat&
operator =(const CompactDecimalFormat & rhs)214 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
215   if (this != &rhs) {
216     DecimalFormat::operator=(rhs);
217     _unitsByVariant = rhs._unitsByVariant;
218     _divisors = rhs._divisors;
219     delete _pluralRules;
220     _pluralRules = rhs._pluralRules->clone();
221   }
222   return *this;
223 }
224 
~CompactDecimalFormat()225 CompactDecimalFormat::~CompactDecimalFormat() {
226   delete _pluralRules;
227 }
228 
229 
230 Format*
clone(void) const231 CompactDecimalFormat::clone(void) const {
232   return new CompactDecimalFormat(*this);
233 }
234 
235 UBool
operator ==(const Format & that) const236 CompactDecimalFormat::operator==(const Format& that) const {
237   if (this == &that) {
238     return TRUE;
239   }
240   return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
241 }
242 
243 UBool
eqHelper(const CompactDecimalFormat & that) const244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
245   return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
246 }
247 
248 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos) const249 CompactDecimalFormat::format(
250     double number,
251     UnicodeString& appendTo,
252     FieldPosition& pos) const {
253   UErrorCode status = U_ZERO_ERROR;
254   return format(number, appendTo, pos, status);
255 }
256 
257 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const258 CompactDecimalFormat::format(
259     double number,
260     UnicodeString& appendTo,
261     FieldPosition& pos,
262     UErrorCode &status) const {
263   if (U_FAILURE(status)) {
264     return appendTo;
265   }
266   DigitList orig, rounded;
267   orig.set(number);
268   UBool isNegative;
269   _round(orig, rounded, isNegative, status);
270   if (U_FAILURE(status)) {
271     return appendTo;
272   }
273   double roundedDouble = rounded.getDouble();
274   if (isNegative) {
275     roundedDouble = -roundedDouble;
276   }
277   int32_t baseIdx = computeLog10(roundedDouble, TRUE);
278   double numberToFormat = roundedDouble / _divisors[baseIdx];
279   UnicodeString variant = _pluralRules->select(numberToFormat);
280   if (isNegative) {
281     numberToFormat = -numberToFormat;
282   }
283   const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
284   appendTo += unit->prefix;
285   DecimalFormat::format(numberToFormat, appendTo, pos);
286   appendTo += unit->suffix;
287   return appendTo;
288 }
289 
290 UnicodeString&
format(double,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const291 CompactDecimalFormat::format(
292     double /* number */,
293     UnicodeString& appendTo,
294     FieldPositionIterator* /* posIter */,
295     UErrorCode& status) const {
296   status = U_UNSUPPORTED_ERROR;
297   return appendTo;
298 }
299 
300 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos) const301 CompactDecimalFormat::format(
302     int32_t number,
303     UnicodeString& appendTo,
304     FieldPosition& pos) const {
305   return format((double) number, appendTo, pos);
306 }
307 
308 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const309 CompactDecimalFormat::format(
310     int32_t number,
311     UnicodeString& appendTo,
312     FieldPosition& pos,
313     UErrorCode &status) const {
314   return format((double) number, appendTo, pos, status);
315 }
316 
317 UnicodeString&
format(int32_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const318 CompactDecimalFormat::format(
319     int32_t /* number */,
320     UnicodeString& appendTo,
321     FieldPositionIterator* /* posIter */,
322     UErrorCode& status) const {
323   status = U_UNSUPPORTED_ERROR;
324   return appendTo;
325 }
326 
327 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos) const328 CompactDecimalFormat::format(
329     int64_t number,
330     UnicodeString& appendTo,
331     FieldPosition& pos) const {
332   return format((double) number, appendTo, pos);
333 }
334 
335 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const336 CompactDecimalFormat::format(
337     int64_t number,
338     UnicodeString& appendTo,
339     FieldPosition& pos,
340     UErrorCode &status) const {
341   return format((double) number, appendTo, pos, status);
342 }
343 
344 UnicodeString&
format(int64_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const345 CompactDecimalFormat::format(
346     int64_t /* number */,
347     UnicodeString& appendTo,
348     FieldPositionIterator* /* posIter */,
349     UErrorCode& status) const {
350   status = U_UNSUPPORTED_ERROR;
351   return appendTo;
352 }
353 
354 UnicodeString&
format(StringPiece,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const355 CompactDecimalFormat::format(
356     StringPiece /* number */,
357     UnicodeString& appendTo,
358     FieldPositionIterator* /* posIter */,
359     UErrorCode& status) const {
360   status = U_UNSUPPORTED_ERROR;
361   return appendTo;
362 }
363 
364 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const365 CompactDecimalFormat::format(
366     const DigitList& /* number */,
367     UnicodeString& appendTo,
368     FieldPositionIterator* /* posIter */,
369     UErrorCode& status) const {
370   status = U_UNSUPPORTED_ERROR;
371   return appendTo;
372 }
373 
374 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPosition &,UErrorCode & status) const375 CompactDecimalFormat::format(const DigitList& /* number */,
376                              UnicodeString& appendTo,
377                              FieldPosition& /* pos */,
378                              UErrorCode& status) const {
379   status = U_UNSUPPORTED_ERROR;
380   return appendTo;
381 }
382 
383 void
parse(const UnicodeString &,Formattable &,ParsePosition &) const384 CompactDecimalFormat::parse(
385     const UnicodeString& /* text */,
386     Formattable& /* result */,
387     ParsePosition& /* parsePosition */) const {
388 }
389 
390 void
parse(const UnicodeString &,Formattable &,UErrorCode & status) const391 CompactDecimalFormat::parse(
392     const UnicodeString& /* text */,
393     Formattable& /* result */,
394     UErrorCode& status) const {
395   status = U_UNSUPPORTED_ERROR;
396 }
397 
398 CurrencyAmount*
parseCurrency(const UnicodeString &,ParsePosition &) const399 CompactDecimalFormat::parseCurrency(
400     const UnicodeString& /* text */,
401     ParsePosition& /* pos */) const {
402   return NULL;
403 }
404 
Init(UErrorCode & status)405 void CDFLocaleStyleData::Init(UErrorCode& status) {
406   if (unitsByVariant != NULL) {
407     return;
408   }
409   unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
410   if (U_FAILURE(status)) {
411     return;
412   }
413   uhash_setKeyDeleter(unitsByVariant, uprv_free);
414   uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
415 }
416 
~CDFLocaleStyleData()417 CDFLocaleStyleData::~CDFLocaleStyleData() {
418   setToBogus();
419 }
420 
setToBogus()421 void CDFLocaleStyleData::setToBogus() {
422   if (unitsByVariant != NULL) {
423     uhash_close(unitsByVariant);
424     unitsByVariant = NULL;
425   }
426 }
427 
Init(UErrorCode & status)428 void CDFLocaleData::Init(UErrorCode& status) {
429   shortData.Init(status);
430   if (U_FAILURE(status)) {
431     return;
432   }
433   longData.Init(status);
434 }
435 
436 // Helper method for operator=
divisors_equal(const double * lhs,const double * rhs)437 static UBool divisors_equal(const double* lhs, const double* rhs) {
438   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
439     if (lhs[i] != rhs[i]) {
440       return FALSE;
441     }
442   }
443   return TRUE;
444 }
445 
446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
448 // the data from CLDR into the global cache before returning the pointer. If a
449 // UNUM_LONG data is requested for a locale, and that locale does not have
450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
451 // that locale.
getCDFLocaleStyleData(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)452 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
453   if (U_FAILURE(status)) {
454     return NULL;
455   }
456   CDFLocaleData* result = NULL;
457   const char* key = inLocale.getName();
458   {
459     Mutex lock(&gCompactDecimalMetaLock);
460     if (gCompactDecimalData == NULL) {
461       gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
462       if (U_FAILURE(status)) {
463         return NULL;
464       }
465       uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
466       uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
467       ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
468     } else {
469       result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
470     }
471   }
472   if (result != NULL) {
473     return extractDataByStyleEnum(*result, style, status);
474   }
475 
476   result = loadCDFLocaleData(inLocale, status);
477   if (U_FAILURE(status)) {
478     return NULL;
479   }
480 
481   {
482     Mutex lock(&gCompactDecimalMetaLock);
483     CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
484     if (temp != NULL) {
485       delete result;
486       result = temp;
487     } else {
488       uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
489       if (U_FAILURE(status)) {
490         return NULL;
491       }
492     }
493   }
494   return extractDataByStyleEnum(*result, style, status);
495 }
496 
extractDataByStyleEnum(const CDFLocaleData & data,UNumberCompactStyle style,UErrorCode & status)497 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
498   switch (style) {
499     case UNUM_SHORT:
500       return &data.shortData;
501     case UNUM_LONG:
502       if (!data.longData.isBogus()) {
503         return &data.longData;
504       }
505       return &data.shortData;
506     default:
507       status = U_ILLEGAL_ARGUMENT_ERROR;
508       return NULL;
509   }
510 }
511 
512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
513 // caller owns the returned pointer.
loadCDFLocaleData(const Locale & inLocale,UErrorCode & status)514 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
515   if (U_FAILURE(status)) {
516     return NULL;
517   }
518   CDFLocaleData* result = new CDFLocaleData;
519   if (result == NULL) {
520     status = U_MEMORY_ALLOCATION_ERROR;
521     return NULL;
522   }
523   result->Init(status);
524   if (U_FAILURE(status)) {
525     delete result;
526     return NULL;
527   }
528 
529   load(inLocale, result, status);
530 
531   if (U_FAILURE(status)) {
532     delete result;
533     return NULL;
534   }
535   return result;
536 }
537 
538 namespace {
539 
540 struct CmptDecDataSink : public ResourceSink {
541 
542   CDFLocaleData& dataBundle; // Where to save values when they are read
543   UBool isLatin; // Whether or not we are traversing the Latin tree
544   UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
545 
546   enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
547   enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
548 
549   /*
550    * NumberElements{              <-- top (numbering system table)
551    *  latn{                       <-- patternsTable (one per numbering system)
552    *    patternsLong{             <-- formatsTable (one per pattern)
553    *      decimalFormat{          <-- powersOfTenTable (one per format)
554    *        1000{                 <-- pluralVariantsTable (one per power of ten)
555    *          one{"0 thousand"}   <-- plural variant and template
556    */
557 
CmptDecDataSink__anon85f999190111::CmptDecDataSink558   CmptDecDataSink(CDFLocaleData& _dataBundle)
559     : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
560   virtual ~CmptDecDataSink();
561 
put__anon85f999190111::CmptDecDataSink562   virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
563     // SPECIAL CASE: Don't consume root in the non-Latin numbering system
564     if (isRoot && !isLatin) { return; }
565 
566     ResourceTable patternsTable = value.getTable(errorCode);
567     if (U_FAILURE(errorCode)) { return; }
568     for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
569 
570       // Check for patternsShort or patternsLong
571       EPatternsTableKey patternsTableKey;
572       if (uprv_strcmp(key, gPatternsShort) == 0) {
573         patternsTableKey = PATTERNS_SHORT;
574       } else if (uprv_strcmp(key, gPatternsLong) == 0) {
575         patternsTableKey = PATTERNS_LONG;
576       } else {
577         continue;
578       }
579 
580       // Traverse into the formats table
581       ResourceTable formatsTable = value.getTable(errorCode);
582       if (U_FAILURE(errorCode)) { return; }
583       for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
584 
585         // Check for decimalFormat or currencyFormat
586         EFormatsTableKey formatsTableKey;
587         if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
588           formatsTableKey = DECIMAL_FORMAT;
589         // TODO: Enable this statement when currency support is added
590         // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
591         //   formatsTableKey = CURRENCY_FORMAT;
592         } else {
593           continue;
594         }
595 
596         // Set the current style and destination based on the two keys
597         UNumberCompactStyle style;
598         CDFLocaleStyleData* destination = NULL;
599         if (patternsTableKey == PATTERNS_LONG
600             && formatsTableKey == DECIMAL_FORMAT) {
601           style = UNUM_LONG;
602           destination = &dataBundle.longData;
603         } else if (patternsTableKey == PATTERNS_SHORT
604             && formatsTableKey == DECIMAL_FORMAT) {
605           style = UNUM_SHORT;
606           destination = &dataBundle.shortData;
607         // TODO: Enable the following statements when currency support is added
608         // } else if (patternsTableKey == PATTERNS_SHORT
609         //     && formatsTableKey == CURRENCY_FORMAT) {
610         //   style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
611         //   destination = &dataBundle.shortCurrencyData;
612         // } else {
613         //   // Silently ignore this case
614         //   continue;
615         }
616 
617         // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
618         //   1) Don't consume longData if shortData was consumed from the non-Latin
619         //      locale numbering system
620         //   2) Don't consume longData for the first time if this is the root bundle and
621         //      shortData is already populated from a more specific locale. Note that if
622         //      both longData and shortData are both only in root, longData will be
623         //      consumed since it is alphabetically before shortData in the bundle.
624         if (isFallback
625                 && style == UNUM_LONG
626                 && !dataBundle.shortData.isEmpty()
627                 && !dataBundle.shortData.fromFallback) {
628             continue;
629         }
630         if (isRoot
631                 && style == UNUM_LONG
632                 && dataBundle.longData.isEmpty()
633                 && !dataBundle.shortData.isEmpty()) {
634             continue;
635         }
636 
637         // Set the "fromFallback" flag on the data object
638         destination->fromFallback = isFallback;
639 
640         // Traverse into the powers of ten table
641         ResourceTable powersOfTenTable = value.getTable(errorCode);
642         if (U_FAILURE(errorCode)) { return; }
643         for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
644 
645           // The key will always be some even power of 10. e.g 10000.
646           char* endPtr = NULL;
647           double power10 = uprv_strtod(key, &endPtr);
648           if (*endPtr != 0) {
649             errorCode = U_INTERNAL_PROGRAM_ERROR;
650             return;
651           }
652           int32_t log10Value = computeLog10(power10, FALSE);
653 
654           // Silently ignore divisors that are too big.
655           if (log10Value >= MAX_DIGITS) continue;
656 
657           // Iterate over the plural variants ("one", "other", etc)
658           ResourceTable pluralVariantsTable = value.getTable(errorCode);
659           if (U_FAILURE(errorCode)) { return; }
660           for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
661             const char* pluralVariant = key;
662             const UnicodeString formatStr = value.getUnicodeString(errorCode);
663 
664             // Copy the data into the in-memory data bundle (do not overwrite
665             // existing values)
666             int32_t numZeros = populatePrefixSuffix(
667                 pluralVariant, log10Value, formatStr,
668                 destination->unitsByVariant, FALSE, errorCode);
669 
670             // If populatePrefixSuffix returns -1, it means that this key has been
671             // encountered already.
672             if (numZeros < 0) {
673               continue;
674             }
675 
676             // Set the divisor, which is based on the number of zeros in the template
677             // string.  If the divisor from here is different from the one previously
678             // stored, it means that the number of zeros in different plural variants
679             // differs; throw an exception.
680             // TODO: How should I check for floating-point errors here?
681             //       Is there a good reason why "divisor" is double and not long like Java?
682             double divisor = calculateDivisor(power10, numZeros);
683             if (destination->divisors[log10Value] != 0.0
684                 && destination->divisors[log10Value] != divisor) {
685               errorCode = U_INTERNAL_PROGRAM_ERROR;
686               return;
687             }
688             destination->divisors[log10Value] = divisor;
689           }
690         }
691       }
692     }
693   }
694 };
695 
696 // Virtual destructors must be defined out of line.
~CmptDecDataSink()697 CmptDecDataSink::~CmptDecDataSink() {}
698 
699 } // namespace
700 
load(const Locale & inLocale,CDFLocaleData * result,UErrorCode & status)701 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
702   LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
703   if (U_FAILURE(status)) {
704     return;
705   }
706   const char* nsName = ns->getName();
707 
708   LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
709   if (U_FAILURE(status)) {
710     return;
711   }
712   CmptDecDataSink sink(*result);
713   sink.isFallback = FALSE;
714 
715   // First load the number elements data if nsName is not Latin.
716   if (uprv_strcmp(nsName, gLatnTag) != 0) {
717     sink.isLatin = FALSE;
718     CharString path;
719     path.append(gNumberElementsTag, status)
720         .append('/', status)
721         .append(nsName, status);
722     ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
723     if (status == U_MISSING_RESOURCE_ERROR) {
724       // Silently ignore and use Latin
725       status = U_ZERO_ERROR;
726     } else if  (U_FAILURE(status)) {
727       return;
728     }
729     sink.isFallback = TRUE;
730   }
731 
732   // Now load Latin.
733   sink.isLatin = TRUE;
734   ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
735   if (U_FAILURE(status)) return;
736 
737   // If longData is empty, default it to be equal to shortData
738   if (result->longData.isEmpty()) {
739     result->longData.setToBogus();
740   }
741 
742   // Check for "other" variants in each of the three data classes, and resolve missing elements.
743 
744   if (!result->longData.isBogus()) {
745     checkForOtherVariants(&result->longData, status);
746     if (U_FAILURE(status)) return;
747     fillInMissing(&result->longData);
748   }
749 
750   checkForOtherVariants(&result->shortData, status);
751   if (U_FAILURE(status)) return;
752   fillInMissing(&result->shortData);
753 
754   // TODO: Enable this statement when currency support is added
755   // checkForOtherVariants(&result->shortCurrencyData, status);
756   // if (U_FAILURE(status)) return;
757   // fillInMissing(&result->shortCurrencyData);
758 }
759 
760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
761 // given variant and log10 value.
762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
763 // formatStr is the format string from which the prefix and suffix are
764 // extracted. It is usually of form 'Pefix 000 suffix'.
765 // populatePrefixSuffix returns the number of 0's found in formatStr
766 // before the decimal point.
767 // In the special case that formatStr contains only spaces for prefix
768 // and suffix, populatePrefixSuffix returns log10Value + 1.
populatePrefixSuffix(const char * variant,int32_t log10Value,const UnicodeString & formatStr,UHashtable * result,UBool overwrite,UErrorCode & status)769 static int32_t populatePrefixSuffix(
770     const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
771   if (U_FAILURE(status)) {
772     return 0;
773   }
774 
775   // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior.
776   // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60.
777   int32_t semiPos = formatStr.indexOf(';', 0);
778   if (semiPos == -1) {
779     semiPos = formatStr.length();
780   }
781   UnicodeString positivePart = formatStr.tempSubString(0, semiPos);
782 
783   int32_t firstIdx = positivePart.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
784   // We must have 0's in format string.
785   if (firstIdx == -1) {
786     status = U_INTERNAL_PROGRAM_ERROR;
787     return 0;
788   }
789   int32_t lastIdx = positivePart.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
790   CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
791   if (U_FAILURE(status)) {
792     return 0;
793   }
794 
795   // Return -1 if we are not overwriting an existing value
796   if (unit->isSet() && !overwrite) {
797     return -1;
798   }
799   unit->markAsSet();
800 
801   // Everything up to first 0 is the prefix
802   unit->prefix = positivePart.tempSubString(0, firstIdx);
803   fixQuotes(unit->prefix);
804   // Everything beyond the last 0 is the suffix
805   unit->suffix = positivePart.tempSubString(lastIdx + 1);
806   fixQuotes(unit->suffix);
807 
808   // If there is effectively no prefix or suffix, ignore the actual number of
809   // 0's and act as if the number of 0's matches the size of the number.
810   if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
811     return log10Value + 1;
812   }
813 
814   // Calculate number of zeros before decimal point
815   int32_t idx = firstIdx + 1;
816   while (idx <= lastIdx && positivePart.charAt(idx) == u_0) {
817     ++idx;
818   }
819   return (idx - firstIdx);
820 }
821 
822 // Calculate a divisor based on the magnitude and number of zeros in the
823 // template string.
calculateDivisor(double power10,int32_t numZeros)824 static double calculateDivisor(double power10, int32_t numZeros) {
825   double divisor = power10;
826   for (int32_t i = 1; i < numZeros; ++i) {
827     divisor /= 10.0;
828   }
829   return divisor;
830 }
831 
onlySpaces(UnicodeString u)832 static UBool onlySpaces(UnicodeString u) {
833   return u.trim().length() == 0;
834 }
835 
836 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
837 // Modifies s in place.
fixQuotes(UnicodeString & s)838 static void fixQuotes(UnicodeString& s) {
839   QuoteState state = OUTSIDE;
840   int32_t len = s.length();
841   int32_t dest = 0;
842   for (int32_t i = 0; i < len; ++i) {
843     UChar ch = s.charAt(i);
844     if (ch == u_apos) {
845       if (state == INSIDE_EMPTY) {
846         s.setCharAt(dest, ch);
847         ++dest;
848       }
849     } else {
850       s.setCharAt(dest, ch);
851       ++dest;
852     }
853 
854     // Update state
855     switch (state) {
856       case OUTSIDE:
857         state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
858         break;
859       case INSIDE_EMPTY:
860       case INSIDE_FULL:
861         state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
862         break;
863       default:
864         break;
865     }
866   }
867   s.truncate(dest);
868 }
869 
870 // Checks to make sure that an "other" variant is present in all
871 // powers of 10.
checkForOtherVariants(CDFLocaleStyleData * result,UErrorCode & status)872 static void checkForOtherVariants(CDFLocaleStyleData* result,
873     UErrorCode& status) {
874   if (result == NULL || result->unitsByVariant == NULL) {
875     return;
876   }
877 
878   const CDFUnit* otherByBase =
879       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
880   if (otherByBase == NULL) {
881     status = U_INTERNAL_PROGRAM_ERROR;
882     return;
883   }
884 
885   // Check all other plural variants, and make sure that if
886   // any of them are populated, then other is also populated
887   int32_t pos = UHASH_FIRST;
888   const UHashElement* element;
889   while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
890     CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
891     if (variantsByBase == otherByBase) continue;
892     for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
893       if (variantsByBase[log10Value].isSet()
894           && !otherByBase[log10Value].isSet()) {
895         status = U_INTERNAL_PROGRAM_ERROR;
896         return;
897       }
898     }
899   }
900 }
901 
902 // fillInMissing ensures that the data in result is complete.
903 // result data is complete if for each variant in result, there exists
904 // a prefix-suffix pair for each log10 value and there also exists
905 // a divisor for each log10 value.
906 //
907 // First this function figures out for which log10 values, the other
908 // variant already had data. These are the same log10 values defined
909 // in CLDR.
910 //
911 // For each log10 value not defined in CLDR, it uses the divisor for
912 // the last defined log10 value or 1.
913 //
914 // Then for each variant, it does the following. For each log10
915 // value not defined in CLDR, copy the prefix-suffix pair from the
916 // previous log10 value. If log10 value is defined in CLDR but is
917 // missing from given variant, copy the prefix-suffix pair for that
918 // log10 value from the 'other' variant.
fillInMissing(CDFLocaleStyleData * result)919 static void fillInMissing(CDFLocaleStyleData* result) {
920   const CDFUnit* otherUnits =
921       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
922   UBool definedInCLDR[MAX_DIGITS];
923   double lastDivisor = 1.0;
924   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
925     if (!otherUnits[i].isSet()) {
926       result->divisors[i] = lastDivisor;
927       definedInCLDR[i] = FALSE;
928     } else {
929       lastDivisor = result->divisors[i];
930       definedInCLDR[i] = TRUE;
931     }
932   }
933   // Iterate over each variant.
934   int32_t pos = UHASH_FIRST;
935   const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
936   for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
937     CDFUnit* units = (CDFUnit*) element->value.pointer;
938     for (int32_t i = 0; i < MAX_DIGITS; ++i) {
939       if (definedInCLDR[i]) {
940         if (!units[i].isSet()) {
941           units[i] = otherUnits[i];
942         }
943       } else {
944         if (i == 0) {
945           units[0].markAsSet();
946         } else {
947           units[i] = units[i - 1];
948         }
949       }
950     }
951   }
952 }
953 
954 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
955 // value computeLog10 will return MAX_DIGITS -1 even for
956 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
957 // up to MAX_DIGITS.
computeLog10(double x,UBool inRange)958 static int32_t computeLog10(double x, UBool inRange) {
959   int32_t result = 0;
960   int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
961   while (x >= 10.0) {
962     x /= 10.0;
963     ++result;
964     if (result == max) {
965       break;
966     }
967   }
968   return result;
969 }
970 
971 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
972 // variant and log10 value within table. If no such prefix-suffix pair is
973 // stored in table, one is created within table before returning pointer.
createCDFUnit(const char * variant,int32_t log10Value,UHashtable * table,UErrorCode & status)974 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
975   if (U_FAILURE(status)) {
976     return NULL;
977   }
978   CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
979   if (cdfUnit == NULL) {
980     cdfUnit = new CDFUnit[MAX_DIGITS];
981     if (cdfUnit == NULL) {
982       status = U_MEMORY_ALLOCATION_ERROR;
983       return NULL;
984     }
985     uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
986     if (U_FAILURE(status)) {
987       return NULL;
988     }
989   }
990   CDFUnit* result = &cdfUnit[log10Value];
991   return result;
992 }
993 
994 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
995 // variant and log10 value within table. If the given variant doesn't exist, it
996 // falls back to the OTHER variant. Therefore, this method will always return
997 // some non-NULL value.
getCDFUnitFallback(const UHashtable * table,const UnicodeString & variant,int32_t log10Value)998 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
999   CharString cvariant;
1000   UErrorCode status = U_ZERO_ERROR;
1001   const CDFUnit *cdfUnit = NULL;
1002   cvariant.appendInvariantChars(variant, status);
1003   if (!U_FAILURE(status)) {
1004     cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
1005   }
1006   if (cdfUnit == NULL) {
1007     cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
1008   }
1009   return &cdfUnit[log10Value];
1010 }
1011 
1012 U_NAMESPACE_END
1013 #endif
1014