1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2015, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 * File COMPACTDECIMALFORMAT.CPP
8 *
9 ********************************************************************************
10 */
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_FORMATTING
14 
15 #include "charstr.h"
16 #include "cstring.h"
17 #include "digitlst.h"
18 #include "mutex.h"
19 #include "unicode/compactdecimalformat.h"
20 #include "unicode/numsys.h"
21 #include "unicode/plurrule.h"
22 #include "unicode/ures.h"
23 #include "ucln_in.h"
24 #include "uhash.h"
25 #include "umutex.h"
26 #include "unicode/ures.h"
27 #include "uresimp.h"
28 
29 // Maps locale name to CDFLocaleData struct.
30 static UHashtable* gCompactDecimalData = NULL;
31 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
32 
33 U_NAMESPACE_BEGIN
34 
35 static const int32_t MAX_DIGITS = 15;
36 static const char gOther[] = "other";
37 static const char gLatnTag[] = "latn";
38 static const char gNumberElementsTag[] = "NumberElements";
39 static const char gDecimalFormatTag[] = "decimalFormat";
40 static const char gPatternsShort[] = "patternsShort";
41 static const char gPatternsLong[] = "patternsLong";
42 static const char gRoot[] = "root";
43 
44 static const UChar u_0 = 0x30;
45 static const UChar u_apos = 0x27;
46 
47 static const UChar kZero[] = {u_0};
48 
49 // Used to unescape single quotes.
50 enum QuoteState {
51   OUTSIDE,
52   INSIDE_EMPTY,
53   INSIDE_FULL
54 };
55 
56 enum FallbackFlags {
57   ANY = 0,
58   MUST = 1,
59   NOT_ROOT = 2
60   // Next one will be 4 then 6 etc.
61 };
62 
63 
64 // CDFUnit represents a prefix-suffix pair for a particular variant
65 // and log10 value.
66 struct CDFUnit : public UMemory {
67   UnicodeString prefix;
68   UnicodeString suffix;
CDFUnitCDFUnit69   inline CDFUnit() : prefix(), suffix() {
70     prefix.setToBogus();
71   }
~CDFUnitCDFUnit72   inline ~CDFUnit() {}
isSetCDFUnit73   inline UBool isSet() const {
74     return !prefix.isBogus();
75   }
markAsSetCDFUnit76   inline void markAsSet() {
77     prefix.remove();
78   }
79 };
80 
81 // CDFLocaleStyleData contains formatting data for a particular locale
82 // and style.
83 class CDFLocaleStyleData : public UMemory {
84  public:
85   // What to divide by for each log10 value when formatting. These values
86   // will be powers of 10. For English, would be:
87   // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
88   double divisors[MAX_DIGITS];
89   // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
90   // To format a number x,
91   // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
92   // Compute the plural variant for displayNum
93   // (e.g zero, one, two, few, many, other).
94   // Compute cdfUnits = unitsByVariant[pluralVariant].
95   // Prefix and suffix to use at cdfUnits[log10(x)]
96   UHashtable* unitsByVariant;
CDFLocaleStyleData()97   inline CDFLocaleStyleData() : unitsByVariant(NULL) {}
98   ~CDFLocaleStyleData();
99   // Init initializes this object.
100   void Init(UErrorCode& status);
isBogus() const101   inline UBool isBogus() const {
102     return unitsByVariant == NULL;
103   }
104   void setToBogus();
105  private:
106   CDFLocaleStyleData(const CDFLocaleStyleData&);
107   CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
108 };
109 
110 // CDFLocaleData contains formatting data for a particular locale.
111 struct CDFLocaleData : public UMemory {
112   CDFLocaleStyleData shortData;
113   CDFLocaleStyleData longData;
CDFLocaleDataCDFLocaleData114   inline CDFLocaleData() : shortData(), longData() { }
~CDFLocaleDataCDFLocaleData115   inline ~CDFLocaleData() { }
116   // Init initializes this object.
117   void Init(UErrorCode& status);
118 };
119 
120 U_NAMESPACE_END
121 
122 U_CDECL_BEGIN
123 
cdf_cleanup(void)124 static UBool U_CALLCONV cdf_cleanup(void) {
125   if (gCompactDecimalData != NULL) {
126     uhash_close(gCompactDecimalData);
127     gCompactDecimalData = NULL;
128   }
129   return TRUE;
130 }
131 
deleteCDFUnits(void * ptr)132 static void U_CALLCONV deleteCDFUnits(void* ptr) {
133   delete [] (icu::CDFUnit*) ptr;
134 }
135 
deleteCDFLocaleData(void * ptr)136 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
137   delete (icu::CDFLocaleData*) ptr;
138 }
139 
140 U_CDECL_END
141 
142 U_NAMESPACE_BEGIN
143 
144 static UBool divisors_equal(const double* lhs, const double* rhs);
145 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
146 
147 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
148 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
149 static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
150 static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
151 static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
152 static UBool isRoot(const UResourceBundle* rb, UErrorCode& status);
153 static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status);
154 static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status);
155 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status);
156 static UBool onlySpaces(UnicodeString u);
157 static void fixQuotes(UnicodeString& s);
158 static void fillInMissing(CDFLocaleStyleData* result);
159 static int32_t computeLog10(double x, UBool inRange);
160 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
161 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
162 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)163 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
164 
165 CompactDecimalFormat::CompactDecimalFormat(
166     const DecimalFormat& decimalFormat,
167     const UHashtable* unitsByVariant,
168     const double* divisors,
169     PluralRules* pluralRules)
170   : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
171 }
172 
CompactDecimalFormat(const CompactDecimalFormat & source)173 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
174     : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
175 }
176 
177 CompactDecimalFormat* U_EXPORT2
createInstance(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)178 CompactDecimalFormat::createInstance(
179     const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
180   LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
181   if (U_FAILURE(status)) {
182     return NULL;
183   }
184   LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
185   if (U_FAILURE(status)) {
186     return NULL;
187   }
188   const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
189   if (U_FAILURE(status)) {
190     return NULL;
191   }
192   CompactDecimalFormat* result =
193       new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
194   if (result == NULL) {
195     status = U_MEMORY_ALLOCATION_ERROR;
196     return NULL;
197   }
198   pluralRules.orphan();
199   result->setMaximumSignificantDigits(3);
200   result->setSignificantDigitsUsed(TRUE);
201   result->setGroupingUsed(FALSE);
202   return result;
203 }
204 
205 CompactDecimalFormat&
operator =(const CompactDecimalFormat & rhs)206 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
207   if (this != &rhs) {
208     DecimalFormat::operator=(rhs);
209     _unitsByVariant = rhs._unitsByVariant;
210     _divisors = rhs._divisors;
211     delete _pluralRules;
212     _pluralRules = rhs._pluralRules->clone();
213   }
214   return *this;
215 }
216 
~CompactDecimalFormat()217 CompactDecimalFormat::~CompactDecimalFormat() {
218   delete _pluralRules;
219 }
220 
221 
222 Format*
clone(void) const223 CompactDecimalFormat::clone(void) const {
224   return new CompactDecimalFormat(*this);
225 }
226 
227 UBool
operator ==(const Format & that) const228 CompactDecimalFormat::operator==(const Format& that) const {
229   if (this == &that) {
230     return TRUE;
231   }
232   return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
233 }
234 
235 UBool
eqHelper(const CompactDecimalFormat & that) const236 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
237   return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
238 }
239 
240 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos) const241 CompactDecimalFormat::format(
242     double number,
243     UnicodeString& appendTo,
244     FieldPosition& pos) const {
245   UErrorCode status = U_ZERO_ERROR;
246   return format(number, appendTo, pos, status);
247 }
248 
249 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const250 CompactDecimalFormat::format(
251     double number,
252     UnicodeString& appendTo,
253     FieldPosition& pos,
254     UErrorCode &status) const {
255   if (U_FAILURE(status)) {
256     return appendTo;
257   }
258   DigitList orig, rounded;
259   orig.set(number);
260   UBool isNegative;
261   _round(orig, rounded, isNegative, status);
262   if (U_FAILURE(status)) {
263     return appendTo;
264   }
265   double roundedDouble = rounded.getDouble();
266   if (isNegative) {
267     roundedDouble = -roundedDouble;
268   }
269   int32_t baseIdx = computeLog10(roundedDouble, TRUE);
270   double numberToFormat = roundedDouble / _divisors[baseIdx];
271   UnicodeString variant = _pluralRules->select(numberToFormat);
272   if (isNegative) {
273     numberToFormat = -numberToFormat;
274   }
275   const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
276   appendTo += unit->prefix;
277   DecimalFormat::format(numberToFormat, appendTo, pos);
278   appendTo += unit->suffix;
279   return appendTo;
280 }
281 
282 UnicodeString&
format(double,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const283 CompactDecimalFormat::format(
284     double /* number */,
285     UnicodeString& appendTo,
286     FieldPositionIterator* /* posIter */,
287     UErrorCode& status) const {
288   status = U_UNSUPPORTED_ERROR;
289   return appendTo;
290 }
291 
292 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos) const293 CompactDecimalFormat::format(
294     int32_t number,
295     UnicodeString& appendTo,
296     FieldPosition& pos) const {
297   return format((double) number, appendTo, pos);
298 }
299 
300 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const301 CompactDecimalFormat::format(
302     int32_t number,
303     UnicodeString& appendTo,
304     FieldPosition& pos,
305     UErrorCode &status) const {
306   return format((double) number, appendTo, pos, status);
307 }
308 
309 UnicodeString&
format(int32_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const310 CompactDecimalFormat::format(
311     int32_t /* number */,
312     UnicodeString& appendTo,
313     FieldPositionIterator* /* posIter */,
314     UErrorCode& status) const {
315   status = U_UNSUPPORTED_ERROR;
316   return appendTo;
317 }
318 
319 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos) const320 CompactDecimalFormat::format(
321     int64_t number,
322     UnicodeString& appendTo,
323     FieldPosition& pos) const {
324   return format((double) number, appendTo, pos);
325 }
326 
327 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const328 CompactDecimalFormat::format(
329     int64_t number,
330     UnicodeString& appendTo,
331     FieldPosition& pos,
332     UErrorCode &status) const {
333   return format((double) number, appendTo, pos, status);
334 }
335 
336 UnicodeString&
format(int64_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const337 CompactDecimalFormat::format(
338     int64_t /* number */,
339     UnicodeString& appendTo,
340     FieldPositionIterator* /* posIter */,
341     UErrorCode& status) const {
342   status = U_UNSUPPORTED_ERROR;
343   return appendTo;
344 }
345 
346 UnicodeString&
format(const StringPiece &,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const347 CompactDecimalFormat::format(
348     const StringPiece& /* number */,
349     UnicodeString& appendTo,
350     FieldPositionIterator* /* posIter */,
351     UErrorCode& status) const {
352   status = U_UNSUPPORTED_ERROR;
353   return appendTo;
354 }
355 
356 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const357 CompactDecimalFormat::format(
358     const DigitList& /* number */,
359     UnicodeString& appendTo,
360     FieldPositionIterator* /* posIter */,
361     UErrorCode& status) const {
362   status = U_UNSUPPORTED_ERROR;
363   return appendTo;
364 }
365 
366 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPosition &,UErrorCode & status) const367 CompactDecimalFormat::format(const DigitList& /* number */,
368                              UnicodeString& appendTo,
369                              FieldPosition& /* pos */,
370                              UErrorCode& status) const {
371   status = U_UNSUPPORTED_ERROR;
372   return appendTo;
373 }
374 
375 void
parse(const UnicodeString &,Formattable &,ParsePosition &) const376 CompactDecimalFormat::parse(
377     const UnicodeString& /* text */,
378     Formattable& /* result */,
379     ParsePosition& /* parsePosition */) const {
380 }
381 
382 void
parse(const UnicodeString &,Formattable &,UErrorCode & status) const383 CompactDecimalFormat::parse(
384     const UnicodeString& /* text */,
385     Formattable& /* result */,
386     UErrorCode& status) const {
387   status = U_UNSUPPORTED_ERROR;
388 }
389 
390 CurrencyAmount*
parseCurrency(const UnicodeString &,ParsePosition &) const391 CompactDecimalFormat::parseCurrency(
392     const UnicodeString& /* text */,
393     ParsePosition& /* pos */) const {
394   return NULL;
395 }
396 
Init(UErrorCode & status)397 void CDFLocaleStyleData::Init(UErrorCode& status) {
398   if (unitsByVariant != NULL) {
399     return;
400   }
401   unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
402   if (U_FAILURE(status)) {
403     return;
404   }
405   uhash_setKeyDeleter(unitsByVariant, uprv_free);
406   uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
407 }
408 
~CDFLocaleStyleData()409 CDFLocaleStyleData::~CDFLocaleStyleData() {
410   setToBogus();
411 }
412 
setToBogus()413 void CDFLocaleStyleData::setToBogus() {
414   if (unitsByVariant != NULL) {
415     uhash_close(unitsByVariant);
416     unitsByVariant = NULL;
417   }
418 }
419 
Init(UErrorCode & status)420 void CDFLocaleData::Init(UErrorCode& status) {
421   shortData.Init(status);
422   if (U_FAILURE(status)) {
423     return;
424   }
425   longData.Init(status);
426 }
427 
428 // Helper method for operator=
divisors_equal(const double * lhs,const double * rhs)429 static UBool divisors_equal(const double* lhs, const double* rhs) {
430   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
431     if (lhs[i] != rhs[i]) {
432       return FALSE;
433     }
434   }
435   return TRUE;
436 }
437 
438 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
439 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
440 // the data from CLDR into the global cache before returning the pointer. If a
441 // UNUM_LONG data is requested for a locale, and that locale does not have
442 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
443 // that locale.
getCDFLocaleStyleData(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)444 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
445   if (U_FAILURE(status)) {
446     return NULL;
447   }
448   CDFLocaleData* result = NULL;
449   const char* key = inLocale.getName();
450   {
451     Mutex lock(&gCompactDecimalMetaLock);
452     if (gCompactDecimalData == NULL) {
453       gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
454       if (U_FAILURE(status)) {
455         return NULL;
456       }
457       uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
458       uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
459       ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
460     } else {
461       result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
462     }
463   }
464   if (result != NULL) {
465     return extractDataByStyleEnum(*result, style, status);
466   }
467 
468   result = loadCDFLocaleData(inLocale, status);
469   if (U_FAILURE(status)) {
470     return NULL;
471   }
472 
473   {
474     Mutex lock(&gCompactDecimalMetaLock);
475     CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
476     if (temp != NULL) {
477       delete result;
478       result = temp;
479     } else {
480       uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
481       if (U_FAILURE(status)) {
482         return NULL;
483       }
484     }
485   }
486   return extractDataByStyleEnum(*result, style, status);
487 }
488 
extractDataByStyleEnum(const CDFLocaleData & data,UNumberCompactStyle style,UErrorCode & status)489 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
490   switch (style) {
491     case UNUM_SHORT:
492       return &data.shortData;
493     case UNUM_LONG:
494       if (!data.longData.isBogus()) {
495         return &data.longData;
496       }
497       return &data.shortData;
498     default:
499       status = U_ILLEGAL_ARGUMENT_ERROR;
500       return NULL;
501   }
502 }
503 
504 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
505 // caller owns the returned pointer.
loadCDFLocaleData(const Locale & inLocale,UErrorCode & status)506 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
507   if (U_FAILURE(status)) {
508     return NULL;
509   }
510   CDFLocaleData* result = new CDFLocaleData;
511   if (result == NULL) {
512     status = U_MEMORY_ALLOCATION_ERROR;
513     return NULL;
514   }
515   result->Init(status);
516   if (U_FAILURE(status)) {
517     delete result;
518     return NULL;
519   }
520 
521   initCDFLocaleData(inLocale, result, status);
522   if (U_FAILURE(status)) {
523     delete result;
524     return NULL;
525   }
526   return result;
527 }
528 
529 // initCDFLocaleData initializes result with data from CLDR.
530 // inLocale is the locale, the CLDR data is stored in result.
531 // We load the UNUM_SHORT  and UNUM_LONG data looking first in local numbering
532 // system and not including root locale in fallback. Next we try in the latn
533 // numbering system where we fallback all the way to root. If we don't find
534 // UNUM_SHORT data in these three places, we report an error. If we find
535 // UNUM_SHORT data before finding UNUM_LONG data we make UNUM_LONG data fall
536 // back to UNUM_SHORT data.
initCDFLocaleData(const Locale & inLocale,CDFLocaleData * result,UErrorCode & status)537 static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
538   LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
539   if (U_FAILURE(status)) {
540     return;
541   }
542   const char* numberingSystemName = ns->getName();
543   UResourceBundle* rb = ures_open(NULL, inLocale.getName(), &status);
544   rb = ures_getByKeyWithFallback(rb, gNumberElementsTag, rb, &status);
545   if (U_FAILURE(status)) {
546     ures_close(rb);
547     return;
548   }
549   UResourceBundle* shortDataFillIn = NULL;
550   UResourceBundle* longDataFillIn = NULL;
551   UResourceBundle* shortData = NULL;
552   UResourceBundle* longData = NULL;
553 
554   if (uprv_strcmp(numberingSystemName, gLatnTag) != 0) {
555     LocalUResourceBundlePointer localResource(
556         tryGetByKeyWithFallback(rb, numberingSystemName, NULL, NOT_ROOT, status));
557     shortData = tryGetDecimalFallback(
558         localResource.getAlias(), gPatternsShort, &shortDataFillIn, NOT_ROOT, status);
559     longData = tryGetDecimalFallback(
560         localResource.getAlias(), gPatternsLong, &longDataFillIn, NOT_ROOT, status);
561   }
562   if (U_FAILURE(status)) {
563     ures_close(shortDataFillIn);
564     ures_close(longDataFillIn);
565     ures_close(rb);
566     return;
567   }
568 
569   // If we haven't found UNUM_SHORT look in latn numbering system. We must
570   // succeed at finding UNUM_SHORT here.
571   if (shortData == NULL) {
572     LocalUResourceBundlePointer latnResource(tryGetByKeyWithFallback(rb, gLatnTag, NULL, MUST, status));
573     shortData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsShort, &shortDataFillIn, MUST, status);
574     if (longData == NULL) {
575       longData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsLong, &longDataFillIn, ANY, status);
576       if (longData != NULL && isRoot(longData, status) && !isRoot(shortData, status)) {
577         longData = NULL;
578       }
579     }
580   }
581   initCDFLocaleStyleData(shortData, &result->shortData, status);
582   ures_close(shortDataFillIn);
583   if (U_FAILURE(status)) {
584     ures_close(longDataFillIn);
585     ures_close(rb);
586   }
587 
588   if (longData == NULL) {
589     result->longData.setToBogus();
590   } else {
591     initCDFLocaleStyleData(longData, &result->longData, status);
592   }
593   ures_close(longDataFillIn);
594   ures_close(rb);
595 }
596 
597 /**
598  * tryGetDecimalFallback attempts to fetch the "decimalFormat" resource bundle
599  * with a particular style. style is either "patternsShort" or "patternsLong."
600  * FillIn, flags, and status work in the same way as in tryGetByKeyWithFallback.
601  */
tryGetDecimalFallback(const UResourceBundle * numberSystemResource,const char * style,UResourceBundle ** fillIn,FallbackFlags flags,UErrorCode & status)602 static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
603   UResourceBundle* first = tryGetByKeyWithFallback(numberSystemResource, style, fillIn, flags, status);
604   UResourceBundle* second = tryGetByKeyWithFallback(first, gDecimalFormatTag, fillIn, flags, status);
605   if (fillIn == NULL) {
606     ures_close(first);
607   }
608   return second;
609 }
610 
611 // tryGetByKeyWithFallback returns a sub-resource bundle that matches given
612 // criteria or NULL if none found. rb is the resource bundle that we are
613 // searching. If rb == NULL then this function behaves as if no sub-resource
614 // is found; path is the key of the sub-resource,
615 // (i.e "foo" but not "foo/bar"); If fillIn is NULL, caller must always call
616 // ures_close() on returned resource. See below for example when fillIn is
617 // not NULL. flags is ANY or NOT_ROOT. Optionally, these values
618 // can be ored with MUST. MUST by itself is the same as ANY | MUST.
619 // The locale of the returned sub-resource will either match the
620 // flags or the returned sub-resouce will be NULL. If MUST is included in
621 // flags, and not suitable sub-resource is found then in addition to returning
622 // NULL, this function also sets status to U_MISSING_RESOURCE_ERROR. If MUST
623 // is not included in flags, then this function just returns NULL if no
624 // such sub-resource is found and will never set status to
625 // U_MISSING_RESOURCE_ERROR.
626 //
627 // Example: This code first searches for "foo/bar" sub-resource without falling
628 // back to ROOT. Then searches for "baz" sub-resource as last resort.
629 //
630 // UResourcebundle* fillIn = NULL;
631 // UResourceBundle* data = tryGetByKeyWithFallback(rb, "foo", &fillIn, NON_ROOT, status);
632 // data = tryGetByKeyWithFallback(data, "bar", &fillIn, NON_ROOT, status);
633 // if (!data) {
634 //   data = tryGetbyKeyWithFallback(rb, "baz", &fillIn, MUST,  status);
635 // }
636 // if (U_FAILURE(status)) {
637 //   ures_close(fillIn);
638 //   return;
639 // }
640 // doStuffWithNonNullSubresource(data);
641 //
642 // /* Wrong! don't do the following as it can leak memory if fillIn gets set
643 // to NULL. */
644 // fillIn = tryGetByKeyWithFallback(rb, "wrong", &fillIn, ANY, status);
645 //
646 // ures_close(fillIn);
647 //
tryGetByKeyWithFallback(const UResourceBundle * rb,const char * path,UResourceBundle ** fillIn,FallbackFlags flags,UErrorCode & status)648 static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
649   if (U_FAILURE(status)) {
650     return NULL;
651   }
652   UBool must = (flags & MUST);
653   if (rb == NULL) {
654     if (must) {
655       status = U_MISSING_RESOURCE_ERROR;
656     }
657     return NULL;
658   }
659   UResourceBundle* result = NULL;
660   UResourceBundle* ownedByUs = NULL;
661   if (fillIn == NULL) {
662     ownedByUs = ures_getByKeyWithFallback(rb, path, NULL, &status);
663     result = ownedByUs;
664   } else {
665     *fillIn = ures_getByKeyWithFallback(rb, path, *fillIn, &status);
666     result = *fillIn;
667   }
668   if (U_FAILURE(status)) {
669     ures_close(ownedByUs);
670     if (status == U_MISSING_RESOURCE_ERROR && !must) {
671       status = U_ZERO_ERROR;
672     }
673     return NULL;
674   }
675   flags = (FallbackFlags) (flags & ~MUST);
676   switch (flags) {
677     case NOT_ROOT:
678       {
679         UBool bRoot = isRoot(result, status);
680         if (bRoot || U_FAILURE(status)) {
681           ures_close(ownedByUs);
682           if (must && (status == U_ZERO_ERROR)) {
683             status = U_MISSING_RESOURCE_ERROR;
684           }
685           return NULL;
686         }
687         return result;
688       }
689     case ANY:
690       return result;
691     default:
692       ures_close(ownedByUs);
693       status = U_ILLEGAL_ARGUMENT_ERROR;
694       return NULL;
695   }
696 }
697 
isRoot(const UResourceBundle * rb,UErrorCode & status)698 static UBool isRoot(const UResourceBundle* rb, UErrorCode& status) {
699   const char* actualLocale = ures_getLocaleByType(
700       rb, ULOC_ACTUAL_LOCALE, &status);
701   if (U_FAILURE(status)) {
702     return FALSE;
703   }
704   return uprv_strcmp(actualLocale, gRoot) == 0;
705 }
706 
707 
708 // initCDFLocaleStyleData loads formatting data for a particular style.
709 // decimalFormatBundle is the "decimalFormat" resource bundle in CLDR.
710 // Loaded data stored in result.
initCDFLocaleStyleData(const UResourceBundle * decimalFormatBundle,CDFLocaleStyleData * result,UErrorCode & status)711 static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status) {
712   if (U_FAILURE(status)) {
713     return;
714   }
715   // Iterate through all the powers of 10.
716   int32_t size = ures_getSize(decimalFormatBundle);
717   UResourceBundle* power10 = NULL;
718   for (int32_t i = 0; i < size; ++i) {
719     power10 = ures_getByIndex(decimalFormatBundle, i, power10, &status);
720     if (U_FAILURE(status)) {
721       ures_close(power10);
722       return;
723     }
724     populatePower10(power10, result, status);
725     if (U_FAILURE(status)) {
726       ures_close(power10);
727       return;
728     }
729   }
730   ures_close(power10);
731   fillInMissing(result);
732 }
733 
734 // populatePower10 grabs data for a particular power of 10 from CLDR.
735 // The loaded data is stored in result.
populatePower10(const UResourceBundle * power10Bundle,CDFLocaleStyleData * result,UErrorCode & status)736 static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status) {
737   if (U_FAILURE(status)) {
738     return;
739   }
740   char* endPtr = NULL;
741   double power10 = uprv_strtod(ures_getKey(power10Bundle), &endPtr);
742   if (*endPtr != 0) {
743     status = U_INTERNAL_PROGRAM_ERROR;
744     return;
745   }
746   int32_t log10Value = computeLog10(power10, FALSE);
747   // Silently ignore divisors that are too big.
748   if (log10Value == MAX_DIGITS) {
749     return;
750   }
751   int32_t size = ures_getSize(power10Bundle);
752   int32_t numZeros = 0;
753   UBool otherVariantDefined = FALSE;
754   UResourceBundle* variantBundle = NULL;
755   // Iterate over all the plural variants for the power of 10
756   for (int32_t i = 0; i < size; ++i) {
757     variantBundle = ures_getByIndex(power10Bundle, i, variantBundle, &status);
758     if (U_FAILURE(status)) {
759       ures_close(variantBundle);
760       return;
761     }
762     const char* variant = ures_getKey(variantBundle);
763     int32_t resLen;
764     const UChar* formatStrP = ures_getString(variantBundle, &resLen, &status);
765     if (U_FAILURE(status)) {
766       ures_close(variantBundle);
767       return;
768     }
769     UnicodeString formatStr(false, formatStrP, resLen);
770     if (uprv_strcmp(variant, gOther) == 0) {
771       otherVariantDefined = TRUE;
772     }
773     int32_t nz = populatePrefixSuffix(
774         variant, log10Value, formatStr, result->unitsByVariant, status);
775     if (U_FAILURE(status)) {
776       ures_close(variantBundle);
777       return;
778     }
779     if (nz != numZeros) {
780       // We expect all format strings to have the same number of 0's
781       // left of the decimal point.
782       if (numZeros != 0) {
783         status = U_INTERNAL_PROGRAM_ERROR;
784         ures_close(variantBundle);
785         return;
786       }
787       numZeros = nz;
788     }
789   }
790   ures_close(variantBundle);
791   // We expect to find an OTHER variant for each power of 10.
792   if (!otherVariantDefined) {
793     status = U_INTERNAL_PROGRAM_ERROR;
794     return;
795   }
796   double divisor = power10;
797   for (int32_t i = 1; i < numZeros; ++i) {
798     divisor /= 10.0;
799   }
800   result->divisors[log10Value] = divisor;
801 }
802 
803 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
804 // given variant and log10 value.
805 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
806 // formatStr is the format string from which the prefix and suffix are
807 // extracted. It is usually of form 'Pefix 000 suffix'.
808 // populatePrefixSuffix returns the number of 0's found in formatStr
809 // before the decimal point.
810 // In the special case that formatStr contains only spaces for prefix
811 // and suffix, populatePrefixSuffix returns log10Value + 1.
populatePrefixSuffix(const char * variant,int32_t log10Value,const UnicodeString & formatStr,UHashtable * result,UErrorCode & status)812 static int32_t populatePrefixSuffix(
813     const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status) {
814   if (U_FAILURE(status)) {
815     return 0;
816   }
817   int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
818   // We must have 0's in format string.
819   if (firstIdx == -1) {
820     status = U_INTERNAL_PROGRAM_ERROR;
821     return 0;
822   }
823   int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
824   CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
825   if (U_FAILURE(status)) {
826     return 0;
827   }
828   // Everything up to first 0 is the prefix
829   unit->prefix = formatStr.tempSubString(0, firstIdx);
830   fixQuotes(unit->prefix);
831   // Everything beyond the last 0 is the suffix
832   unit->suffix = formatStr.tempSubString(lastIdx + 1);
833   fixQuotes(unit->suffix);
834 
835   // If there is effectively no prefix or suffix, ignore the actual number of
836   // 0's and act as if the number of 0's matches the size of the number.
837   if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
838     return log10Value + 1;
839   }
840 
841   // Calculate number of zeros before decimal point
842   int32_t idx = firstIdx + 1;
843   while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
844     ++idx;
845   }
846   return (idx - firstIdx);
847 }
848 
onlySpaces(UnicodeString u)849 static UBool onlySpaces(UnicodeString u) {
850   return u.trim().length() == 0;
851 }
852 
853 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
854 // Modifies s in place.
fixQuotes(UnicodeString & s)855 static void fixQuotes(UnicodeString& s) {
856   QuoteState state = OUTSIDE;
857   int32_t len = s.length();
858   int32_t dest = 0;
859   for (int32_t i = 0; i < len; ++i) {
860     UChar ch = s.charAt(i);
861     if (ch == u_apos) {
862       if (state == INSIDE_EMPTY) {
863         s.setCharAt(dest, ch);
864         ++dest;
865       }
866     } else {
867       s.setCharAt(dest, ch);
868       ++dest;
869     }
870 
871     // Update state
872     switch (state) {
873       case OUTSIDE:
874         state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
875         break;
876       case INSIDE_EMPTY:
877       case INSIDE_FULL:
878         state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
879         break;
880       default:
881         break;
882     }
883   }
884   s.truncate(dest);
885 }
886 
887 // fillInMissing ensures that the data in result is complete.
888 // result data is complete if for each variant in result, there exists
889 // a prefix-suffix pair for each log10 value and there also exists
890 // a divisor for each log10 value.
891 //
892 // First this function figures out for which log10 values, the other
893 // variant already had data. These are the same log10 values defined
894 // in CLDR.
895 //
896 // For each log10 value not defined in CLDR, it uses the divisor for
897 // the last defined log10 value or 1.
898 //
899 // Then for each variant, it does the following. For each log10
900 // value not defined in CLDR, copy the prefix-suffix pair from the
901 // previous log10 value. If log10 value is defined in CLDR but is
902 // missing from given variant, copy the prefix-suffix pair for that
903 // log10 value from the 'other' variant.
fillInMissing(CDFLocaleStyleData * result)904 static void fillInMissing(CDFLocaleStyleData* result) {
905   const CDFUnit* otherUnits =
906       (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
907   UBool definedInCLDR[MAX_DIGITS];
908   double lastDivisor = 1.0;
909   for (int32_t i = 0; i < MAX_DIGITS; ++i) {
910     if (!otherUnits[i].isSet()) {
911       result->divisors[i] = lastDivisor;
912       definedInCLDR[i] = FALSE;
913     } else {
914       lastDivisor = result->divisors[i];
915       definedInCLDR[i] = TRUE;
916     }
917   }
918   // Iterate over each variant.
919   int32_t pos = UHASH_FIRST;
920   const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
921   for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
922     CDFUnit* units = (CDFUnit*) element->value.pointer;
923     for (int32_t i = 0; i < MAX_DIGITS; ++i) {
924       if (definedInCLDR[i]) {
925         if (!units[i].isSet()) {
926           units[i] = otherUnits[i];
927         }
928       } else {
929         if (i == 0) {
930           units[0].markAsSet();
931         } else {
932           units[i] = units[i - 1];
933         }
934       }
935     }
936   }
937 }
938 
939 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
940 // value computeLog10 will return MAX_DIGITS -1 even for
941 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
942 // up to MAX_DIGITS.
computeLog10(double x,UBool inRange)943 static int32_t computeLog10(double x, UBool inRange) {
944   int32_t result = 0;
945   int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
946   while (x >= 10.0) {
947     x /= 10.0;
948     ++result;
949     if (result == max) {
950       break;
951     }
952   }
953   return result;
954 }
955 
956 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
957 // variant and log10 value within table. If no such prefix-suffix pair is
958 // stored in table, one is created within table before returning pointer.
createCDFUnit(const char * variant,int32_t log10Value,UHashtable * table,UErrorCode & status)959 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
960   if (U_FAILURE(status)) {
961     return NULL;
962   }
963   CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
964   if (cdfUnit == NULL) {
965     cdfUnit = new CDFUnit[MAX_DIGITS];
966     if (cdfUnit == NULL) {
967       status = U_MEMORY_ALLOCATION_ERROR;
968       return NULL;
969     }
970     uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
971     if (U_FAILURE(status)) {
972       return NULL;
973     }
974   }
975   CDFUnit* result = &cdfUnit[log10Value];
976   result->markAsSet();
977   return result;
978 }
979 
980 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
981 // variant and log10 value within table. If the given variant doesn't exist, it
982 // falls back to the OTHER variant. Therefore, this method will always return
983 // some non-NULL value.
getCDFUnitFallback(const UHashtable * table,const UnicodeString & variant,int32_t log10Value)984 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
985   CharString cvariant;
986   UErrorCode status = U_ZERO_ERROR;
987   const CDFUnit *cdfUnit = NULL;
988   cvariant.appendInvariantChars(variant, status);
989   if (!U_FAILURE(status)) {
990     cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
991   }
992   if (cdfUnit == NULL) {
993     cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
994   }
995   return &cdfUnit[log10Value];
996 }
997 
998 U_NAMESPACE_END
999 #endif
1000