1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File plurrule.cpp
10 */
11 
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "unicode/utypes.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/plurrule.h"
18 #include "unicode/upluralrules.h"
19 #include "unicode/ures.h"
20 #include "unicode/numfmt.h"
21 #include "unicode/decimfmt.h"
22 #include "charstr.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "hash.h"
26 #include "locutil.h"
27 #include "mutex.h"
28 #include "patternprops.h"
29 #include "plurrule_impl.h"
30 #include "putilimp.h"
31 #include "ucln_in.h"
32 #include "ustrfmt.h"
33 #include "uassert.h"
34 #include "uvectr32.h"
35 #include "sharedpluralrules.h"
36 #include "unifiedcache.h"
37 #include "number_decimalquantity.h"
38 #include "util.h"
39 
40 #if !UCONFIG_NO_FORMATTING
41 
42 U_NAMESPACE_BEGIN
43 
44 using namespace icu::pluralimpl;
45 using icu::number::impl::DecimalQuantity;
46 
47 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
48 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
49 static const UChar PK_IN[]={LOW_I,LOW_N,0};
50 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
51 static const UChar PK_IS[]={LOW_I,LOW_S,0};
52 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
53 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
54 static const UChar PK_OR[]={LOW_O,LOW_R,0};
55 static const UChar PK_VAR_N[]={LOW_N,0};
56 static const UChar PK_VAR_I[]={LOW_I,0};
57 static const UChar PK_VAR_F[]={LOW_F,0};
58 static const UChar PK_VAR_T[]={LOW_T,0};
59 static const UChar PK_VAR_V[]={LOW_V,0};
60 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
61 static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
62 static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
63 
64 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)65 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
66 
67 PluralRules::PluralRules(UErrorCode& /*status*/)
68 :   UObject(),
69     mRules(nullptr),
70     mInternalStatus(U_ZERO_ERROR)
71 {
72 }
73 
PluralRules(const PluralRules & other)74 PluralRules::PluralRules(const PluralRules& other)
75 : UObject(other),
76     mRules(nullptr),
77     mInternalStatus(U_ZERO_ERROR)
78 {
79     *this=other;
80 }
81 
~PluralRules()82 PluralRules::~PluralRules() {
83     delete mRules;
84 }
85 
~SharedPluralRules()86 SharedPluralRules::~SharedPluralRules() {
87     delete ptr;
88 }
89 
90 PluralRules*
clone() const91 PluralRules::clone() const {
92     PluralRules* newObj = new PluralRules(*this);
93     // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if
94     // the newly created object was not fully constructed properly (an error occurred).
95     if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) {
96         delete newObj;
97         newObj = nullptr;
98     }
99     return newObj;
100 }
101 
102 PluralRules&
operator =(const PluralRules & other)103 PluralRules::operator=(const PluralRules& other) {
104     if (this != &other) {
105         delete mRules;
106         mRules = nullptr;
107         mInternalStatus = other.mInternalStatus;
108         if (U_FAILURE(mInternalStatus)) {
109             // bail out early if the object we were copying from was already 'invalid'.
110             return *this;
111         }
112         if (other.mRules != nullptr) {
113             mRules = new RuleChain(*other.mRules);
114             if (mRules == nullptr) {
115                 mInternalStatus = U_MEMORY_ALLOCATION_ERROR;
116             }
117             else if (U_FAILURE(mRules->fInternalStatus)) {
118                 // If the RuleChain wasn't fully copied, then set our status to failure as well.
119                 mInternalStatus = mRules->fInternalStatus;
120             }
121         }
122     }
123     return *this;
124 }
125 
getAvailableLocales(UErrorCode & status)126 StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
127     if (U_FAILURE(status)) {
128         return nullptr;
129     }
130     LocalPointer<StringEnumeration> result(new PluralAvailableLocalesEnumeration(status), status);
131     if (U_FAILURE(status)) {
132         return nullptr;
133     }
134     return result.orphan();
135 }
136 
137 
138 PluralRules* U_EXPORT2
createRules(const UnicodeString & description,UErrorCode & status)139 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
140     if (U_FAILURE(status)) {
141         return nullptr;
142     }
143     PluralRuleParser parser;
144     LocalPointer<PluralRules> newRules(new PluralRules(status), status);
145     if (U_FAILURE(status)) {
146         return nullptr;
147     }
148     parser.parse(description, newRules.getAlias(), status);
149     if (U_FAILURE(status)) {
150         newRules.adoptInstead(nullptr);
151     }
152     return newRules.orphan();
153 }
154 
155 
156 PluralRules* U_EXPORT2
createDefaultRules(UErrorCode & status)157 PluralRules::createDefaultRules(UErrorCode& status) {
158     return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
159 }
160 
161 /******************************************************************************/
162 /* Create PluralRules cache */
163 
164 template<> U_I18N_API
createObject(const void *,UErrorCode & status) const165 const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject(
166         const void * /*unused*/, UErrorCode &status) const {
167     const char *localeId = fLoc.getName();
168     LocalPointer<PluralRules> pr(PluralRules::internalForLocale(localeId, UPLURAL_TYPE_CARDINAL, status), status);
169     if (U_FAILURE(status)) {
170         return nullptr;
171     }
172     LocalPointer<SharedPluralRules> result(new SharedPluralRules(pr.getAlias()), status);
173     if (U_FAILURE(status)) {
174         return nullptr;
175     }
176     pr.orphan(); // result was successfully created so it nows pr.
177     result->addRef();
178     return result.orphan();
179 }
180 
181 /* end plural rules cache */
182 /******************************************************************************/
183 
184 const SharedPluralRules* U_EXPORT2
createSharedInstance(const Locale & locale,UPluralType type,UErrorCode & status)185 PluralRules::createSharedInstance(
186         const Locale& locale, UPluralType type, UErrorCode& status) {
187     if (U_FAILURE(status)) {
188         return nullptr;
189     }
190     if (type != UPLURAL_TYPE_CARDINAL) {
191         status = U_UNSUPPORTED_ERROR;
192         return nullptr;
193     }
194     const SharedPluralRules *result = nullptr;
195     UnifiedCache::getByLocale(locale, result, status);
196     return result;
197 }
198 
199 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UErrorCode & status)200 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
201     return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
202 }
203 
204 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UPluralType type,UErrorCode & status)205 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
206     if (type != UPLURAL_TYPE_CARDINAL) {
207         return internalForLocale(locale, type, status);
208     }
209     const SharedPluralRules *shared = createSharedInstance(
210             locale, type, status);
211     if (U_FAILURE(status)) {
212         return nullptr;
213     }
214     PluralRules *result = (*shared)->clone();
215     shared->removeRef();
216     if (result == nullptr) {
217         status = U_MEMORY_ALLOCATION_ERROR;
218     }
219     return result;
220 }
221 
222 PluralRules* U_EXPORT2
internalForLocale(const Locale & locale,UPluralType type,UErrorCode & status)223 PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
224     if (U_FAILURE(status)) {
225         return nullptr;
226     }
227     if (type >= UPLURAL_TYPE_COUNT) {
228         status = U_ILLEGAL_ARGUMENT_ERROR;
229         return nullptr;
230     }
231     LocalPointer<PluralRules> newObj(new PluralRules(status), status);
232     if (U_FAILURE(status)) {
233         return nullptr;
234     }
235     UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
236     // TODO: which other errors, if any, should be returned?
237     if (locRule.length() == 0) {
238         // If an out-of-memory error occurred, then stop and report the failure.
239         if (status == U_MEMORY_ALLOCATION_ERROR) {
240             return nullptr;
241         }
242         // Locales with no specific rules (all numbers have the "other" category
243         //   will return a U_MISSING_RESOURCE_ERROR at this point. This is not
244         //   an error.
245         locRule =  UnicodeString(PLURAL_DEFAULT_RULE);
246         status = U_ZERO_ERROR;
247     }
248     PluralRuleParser parser;
249     parser.parse(locRule, newObj.getAlias(), status);
250         //  TODO: should rule parse errors be returned, or
251         //        should we silently use default rules?
252         //        Original impl used default rules.
253         //        Ask the question to ICU Core.
254 
255     return newObj.orphan();
256 }
257 
258 UnicodeString
select(int32_t number) const259 PluralRules::select(int32_t number) const {
260     return select(FixedDecimal(number));
261 }
262 
263 UnicodeString
select(double number) const264 PluralRules::select(double number) const {
265     return select(FixedDecimal(number));
266 }
267 
268 UnicodeString
select(const number::FormattedNumber & number,UErrorCode & status) const269 PluralRules::select(const number::FormattedNumber& number, UErrorCode& status) const {
270     DecimalQuantity dq;
271     number.getDecimalQuantity(dq, status);
272     if (U_FAILURE(status)) {
273         return ICU_Utility::makeBogusString();
274     }
275     return select(dq);
276 }
277 
278 UnicodeString
select(const IFixedDecimal & number) const279 PluralRules::select(const IFixedDecimal &number) const {
280     if (mRules == nullptr) {
281         return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
282     }
283     else {
284         return mRules->select(number);
285     }
286 }
287 
288 
289 
290 StringEnumeration*
getKeywords(UErrorCode & status) const291 PluralRules::getKeywords(UErrorCode& status) const {
292     if (U_FAILURE(status)) {
293         return nullptr;
294     }
295     if (U_FAILURE(mInternalStatus)) {
296         status = mInternalStatus;
297         return nullptr;
298     }
299     LocalPointer<StringEnumeration> nameEnumerator(new PluralKeywordEnumeration(mRules, status), status);
300     if (U_FAILURE(status)) {
301         return nullptr;
302     }
303     return nameEnumerator.orphan();
304 }
305 
306 double
getUniqueKeywordValue(const UnicodeString &)307 PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
308   // Not Implemented.
309   return UPLRULES_NO_UNIQUE_VALUE;
310 }
311 
312 int32_t
getAllKeywordValues(const UnicodeString &,double *,int32_t,UErrorCode & error)313 PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
314                                  int32_t /* destCapacity */, UErrorCode& error) {
315     error = U_UNSUPPORTED_ERROR;
316     return 0;
317 }
318 
319 
scaleForInt(double d)320 static double scaleForInt(double d) {
321     double scale = 1.0;
322     while (d != floor(d)) {
323         d = d * 10.0;
324         scale = scale * 10.0;
325     }
326     return scale;
327 }
328 
329 static int32_t
getSamplesFromString(const UnicodeString & samples,double * dest,int32_t destCapacity,UErrorCode & status)330 getSamplesFromString(const UnicodeString &samples, double *dest,
331                         int32_t destCapacity, UErrorCode& status) {
332     int32_t sampleCount = 0;
333     int32_t sampleStartIdx = 0;
334     int32_t sampleEndIdx = 0;
335 
336     //std::string ss;  // TODO: debugging.
337     // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
338     for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
339         sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
340         if (sampleEndIdx == -1) {
341             sampleEndIdx = samples.length();
342         }
343         const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
344         // ss.erase();
345         // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
346         int32_t tildeIndex = sampleRange.indexOf(TILDE);
347         if (tildeIndex < 0) {
348             FixedDecimal fixed(sampleRange, status);
349             double sampleValue = fixed.source;
350             if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
351                 dest[sampleCount++] = sampleValue;
352             }
353         } else {
354 
355             FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
356             FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
357             double rangeLo = fixedLo.source;
358             double rangeHi = fixedHi.source;
359             if (U_FAILURE(status)) {
360                 break;
361             }
362             if (rangeHi < rangeLo) {
363                 status = U_INVALID_FORMAT_ERROR;
364                 break;
365             }
366 
367             // For ranges of samples with fraction decimal digits, scale the number up so that we
368             //   are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
369 
370             double scale = scaleForInt(rangeLo);
371             double t = scaleForInt(rangeHi);
372             if (t > scale) {
373                 scale = t;
374             }
375             rangeLo *= scale;
376             rangeHi *= scale;
377             for (double n=rangeLo; n<=rangeHi; n+=1) {
378                 // Hack Alert: don't return any decimal samples with integer values that
379                 //    originated from a format with trailing decimals.
380                 //    This API is returning doubles, which can't distinguish having displayed
381                 //    zeros to the right of the decimal.
382                 //    This results in test failures with values mapping back to a different keyword.
383                 double sampleValue = n/scale;
384                 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
385                     dest[sampleCount++] = sampleValue;
386                 }
387                 if (sampleCount >= destCapacity) {
388                     break;
389                 }
390             }
391         }
392         sampleStartIdx = sampleEndIdx + 1;
393     }
394     return sampleCount;
395 }
396 
397 
398 int32_t
getSamples(const UnicodeString & keyword,double * dest,int32_t destCapacity,UErrorCode & status)399 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
400                         int32_t destCapacity, UErrorCode& status) {
401     if (destCapacity == 0 || U_FAILURE(status)) {
402         return 0;
403     }
404     if (U_FAILURE(mInternalStatus)) {
405         status = mInternalStatus;
406         return 0;
407     }
408     RuleChain *rc = rulesForKeyword(keyword);
409     if (rc == nullptr) {
410         return 0;
411     }
412     int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
413     if (numSamples == 0) {
414         numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
415     }
416     return numSamples;
417 }
418 
419 
rulesForKeyword(const UnicodeString & keyword) const420 RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
421     RuleChain *rc;
422     for (rc = mRules; rc != nullptr; rc = rc->fNext) {
423         if (rc->fKeyword == keyword) {
424             break;
425         }
426     }
427     return rc;
428 }
429 
430 
431 UBool
isKeyword(const UnicodeString & keyword) const432 PluralRules::isKeyword(const UnicodeString& keyword) const {
433     if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
434         return true;
435     }
436     return rulesForKeyword(keyword) != nullptr;
437 }
438 
439 UnicodeString
getKeywordOther() const440 PluralRules::getKeywordOther() const {
441     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
442 }
443 
444 UBool
operator ==(const PluralRules & other) const445 PluralRules::operator==(const PluralRules& other) const  {
446     const UnicodeString *ptrKeyword;
447     UErrorCode status= U_ZERO_ERROR;
448 
449     if ( this == &other ) {
450         return TRUE;
451     }
452     LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
453     LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
454     if (U_FAILURE(status)) {
455         return FALSE;
456     }
457 
458     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
459         return FALSE;
460     }
461     myKeywordList->reset(status);
462     while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) {
463         if (!other.isKeyword(*ptrKeyword)) {
464             return FALSE;
465         }
466     }
467     otherKeywordList->reset(status);
468     while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) {
469         if (!this->isKeyword(*ptrKeyword)) {
470             return FALSE;
471         }
472     }
473     if (U_FAILURE(status)) {
474         return FALSE;
475     }
476 
477     return TRUE;
478 }
479 
480 
481 void
parse(const UnicodeString & ruleData,PluralRules * prules,UErrorCode & status)482 PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
483 {
484     if (U_FAILURE(status)) {
485         return;
486     }
487     U_ASSERT(ruleIndex == 0);    // Parsers are good for a single use only!
488     ruleSrc = &ruleData;
489 
490     while (ruleIndex< ruleSrc->length()) {
491         getNextToken(status);
492         if (U_FAILURE(status)) {
493             return;
494         }
495         checkSyntax(status);
496         if (U_FAILURE(status)) {
497             return;
498         }
499         switch (type) {
500         case tAnd:
501             U_ASSERT(curAndConstraint != nullptr);
502             curAndConstraint = curAndConstraint->add(status);
503             break;
504         case tOr:
505             {
506                 U_ASSERT(currentChain != nullptr);
507                 OrConstraint *orNode=currentChain->ruleHeader;
508                 while (orNode->next != nullptr) {
509                     orNode = orNode->next;
510                 }
511                 orNode->next= new OrConstraint();
512                 if (orNode->next == nullptr) {
513                     status = U_MEMORY_ALLOCATION_ERROR;
514                     break;
515                 }
516                 orNode=orNode->next;
517                 orNode->next=nullptr;
518                 curAndConstraint = orNode->add(status);
519             }
520             break;
521         case tIs:
522             U_ASSERT(curAndConstraint != nullptr);
523             U_ASSERT(curAndConstraint->value == -1);
524             U_ASSERT(curAndConstraint->rangeList == nullptr);
525             break;
526         case tNot:
527             U_ASSERT(curAndConstraint != nullptr);
528             curAndConstraint->negated=TRUE;
529             break;
530 
531         case tNotEqual:
532             curAndConstraint->negated=TRUE;
533             U_FALLTHROUGH;
534         case tIn:
535         case tWithin:
536         case tEqual:
537             {
538                 U_ASSERT(curAndConstraint != nullptr);
539                 LocalPointer<UVector32> newRangeList(new UVector32(status), status);
540                 if (U_FAILURE(status)) {
541                     break;
542                 }
543                 curAndConstraint->rangeList = newRangeList.orphan();
544                 curAndConstraint->rangeList->addElement(-1, status);  // range Low
545                 curAndConstraint->rangeList->addElement(-1, status);  // range Hi
546                 rangeLowIdx = 0;
547                 rangeHiIdx  = 1;
548                 curAndConstraint->value=PLURAL_RANGE_HIGH;
549                 curAndConstraint->integerOnly = (type != tWithin);
550             }
551             break;
552         case tNumber:
553             U_ASSERT(curAndConstraint != nullptr);
554             if ( (curAndConstraint->op==AndConstraint::MOD)&&
555                  (curAndConstraint->opNum == -1 ) ) {
556                 curAndConstraint->opNum=getNumberValue(token);
557             }
558             else {
559                 if (curAndConstraint->rangeList == nullptr) {
560                     // this is for an 'is' rule
561                     curAndConstraint->value = getNumberValue(token);
562                 } else {
563                     // this is for an 'in' or 'within' rule
564                     if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
565                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
566                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
567                     }
568                     else {
569                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
570                         if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
571                                 curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
572                             // Range Lower bound > Range Upper bound.
573                             // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
574                             // used for all plural rule parse errors.
575                             status = U_UNEXPECTED_TOKEN;
576                             break;
577                         }
578                     }
579                 }
580             }
581             break;
582         case tComma:
583             // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
584             //       Catch cases like "n mod 10, is 1" here instead.
585             if (curAndConstraint == nullptr || curAndConstraint->rangeList == nullptr) {
586                 status = U_UNEXPECTED_TOKEN;
587                 break;
588             }
589             U_ASSERT(curAndConstraint->rangeList->size() >= 2);
590             rangeLowIdx = curAndConstraint->rangeList->size();
591             curAndConstraint->rangeList->addElement(-1, status);  // range Low
592             rangeHiIdx = curAndConstraint->rangeList->size();
593             curAndConstraint->rangeList->addElement(-1, status);  // range Hi
594             break;
595         case tMod:
596             U_ASSERT(curAndConstraint != nullptr);
597             curAndConstraint->op=AndConstraint::MOD;
598             break;
599         case tVariableN:
600         case tVariableI:
601         case tVariableF:
602         case tVariableT:
603         case tVariableV:
604             U_ASSERT(curAndConstraint != nullptr);
605             curAndConstraint->digitsType = type;
606             break;
607         case tKeyword:
608             {
609             RuleChain *newChain = new RuleChain;
610             if (newChain == nullptr) {
611                 status = U_MEMORY_ALLOCATION_ERROR;
612                 break;
613             }
614             newChain->fKeyword = token;
615             if (prules->mRules == nullptr) {
616                 prules->mRules = newChain;
617             } else {
618                 // The new rule chain goes at the end of the linked list of rule chains,
619                 //   unless there is an "other" keyword & chain. "other" must remain last.
620                 RuleChain *insertAfter = prules->mRules;
621                 while (insertAfter->fNext!=nullptr &&
622                        insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
623                     insertAfter=insertAfter->fNext;
624                 }
625                 newChain->fNext = insertAfter->fNext;
626                 insertAfter->fNext = newChain;
627             }
628             OrConstraint *orNode = new OrConstraint();
629             if (orNode == nullptr) {
630                 status = U_MEMORY_ALLOCATION_ERROR;
631                 break;
632             }
633             newChain->ruleHeader = orNode;
634             curAndConstraint = orNode->add(status);
635             currentChain = newChain;
636             }
637             break;
638 
639         case tInteger:
640             for (;;) {
641                 getNextToken(status);
642                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
643                     break;
644                 }
645                 if (type == tEllipsis) {
646                     currentChain->fIntegerSamplesUnbounded = TRUE;
647                     continue;
648                 }
649                 currentChain->fIntegerSamples.append(token);
650             }
651             break;
652 
653         case tDecimal:
654             for (;;) {
655                 getNextToken(status);
656                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
657                     break;
658                 }
659                 if (type == tEllipsis) {
660                     currentChain->fDecimalSamplesUnbounded = TRUE;
661                     continue;
662                 }
663                 currentChain->fDecimalSamples.append(token);
664             }
665             break;
666 
667         default:
668             break;
669         }
670         prevType=type;
671         if (U_FAILURE(status)) {
672             break;
673         }
674     }
675 }
676 
677 UnicodeString
getRuleFromResource(const Locale & locale,UPluralType type,UErrorCode & errCode)678 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
679     UnicodeString emptyStr;
680 
681     if (U_FAILURE(errCode)) {
682         return emptyStr;
683     }
684     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &errCode));
685     if(U_FAILURE(errCode)) {
686         return emptyStr;
687     }
688     const char *typeKey;
689     switch (type) {
690     case UPLURAL_TYPE_CARDINAL:
691         typeKey = "locales";
692         break;
693     case UPLURAL_TYPE_ORDINAL:
694         typeKey = "locales_ordinals";
695         break;
696     default:
697         // Must not occur: The caller should have checked for valid types.
698         errCode = U_ILLEGAL_ARGUMENT_ERROR;
699         return emptyStr;
700     }
701     LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, nullptr, &errCode));
702     if(U_FAILURE(errCode)) {
703         return emptyStr;
704     }
705     int32_t resLen=0;
706     const char *curLocaleName=locale.getBaseName();
707     const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
708 
709     if (s == nullptr) {
710         // Check parent locales.
711         UErrorCode status = U_ZERO_ERROR;
712         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
713         const char *curLocaleName2=locale.getBaseName();
714         uprv_strcpy(parentLocaleName, curLocaleName2);
715 
716         while (uloc_getParent(parentLocaleName, parentLocaleName,
717                                        ULOC_FULLNAME_CAPACITY, &status) > 0) {
718             resLen=0;
719             s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
720             if (s != nullptr) {
721                 errCode = U_ZERO_ERROR;
722                 break;
723             }
724             status = U_ZERO_ERROR;
725         }
726     }
727     if (s==nullptr) {
728         return emptyStr;
729     }
730 
731     char setKey[256];
732     u_UCharsToChars(s, setKey, resLen + 1);
733     // printf("\n PluralRule: %s\n", setKey);
734 
735     LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", nullptr, &errCode));
736     if(U_FAILURE(errCode)) {
737         return emptyStr;
738     }
739     LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, nullptr, &errCode));
740     if (U_FAILURE(errCode)) {
741         return emptyStr;
742     }
743 
744     int32_t numberKeys = ures_getSize(setRes.getAlias());
745     UnicodeString result;
746     const char *key=nullptr;
747     for(int32_t i=0; i<numberKeys; ++i) {   // Keys are zero, one, few, ...
748         UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
749         UnicodeString uKey(key, -1, US_INV);
750         result.append(uKey);
751         result.append(COLON);
752         result.append(rules);
753         result.append(SEMI_COLON);
754     }
755     return result;
756 }
757 
758 
759 UnicodeString
getRules() const760 PluralRules::getRules() const {
761     UnicodeString rules;
762     if (mRules != nullptr) {
763         mRules->dumpRules(rules);
764     }
765     return rules;
766 }
767 
AndConstraint(const AndConstraint & other)768 AndConstraint::AndConstraint(const AndConstraint& other) {
769     this->fInternalStatus = other.fInternalStatus;
770     if (U_FAILURE(fInternalStatus)) {
771         return; // stop early if the object we are copying from is invalid.
772     }
773     this->op = other.op;
774     this->opNum=other.opNum;
775     this->value=other.value;
776     if (other.rangeList != nullptr) {
777         LocalPointer<UVector32> newRangeList(new UVector32(fInternalStatus), fInternalStatus);
778         if (U_FAILURE(fInternalStatus)) {
779             return;
780         }
781         this->rangeList = newRangeList.orphan();
782         this->rangeList->assign(*other.rangeList, fInternalStatus);
783     }
784     this->integerOnly=other.integerOnly;
785     this->negated=other.negated;
786     this->digitsType = other.digitsType;
787     if (other.next != nullptr) {
788         this->next = new AndConstraint(*other.next);
789         if (this->next == nullptr) {
790             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
791         }
792     }
793 }
794 
~AndConstraint()795 AndConstraint::~AndConstraint() {
796     delete rangeList;
797     rangeList = nullptr;
798     delete next;
799     next = nullptr;
800 }
801 
802 UBool
isFulfilled(const IFixedDecimal & number)803 AndConstraint::isFulfilled(const IFixedDecimal &number) {
804     UBool result = TRUE;
805     if (digitsType == none) {
806         // An empty AndConstraint, created by a rule with a keyword but no following expression.
807         return TRUE;
808     }
809 
810     PluralOperand operand = tokenTypeToPluralOperand(digitsType);
811     double n = number.getPluralOperand(operand);     // pulls n | i | v | f value for the number.
812                                                      // Will always be positive.
813                                                      // May be non-integer (n option only)
814     do {
815         if (integerOnly && n != uprv_floor(n)) {
816             result = FALSE;
817             break;
818         }
819 
820         if (op == MOD) {
821             n = fmod(n, opNum);
822         }
823         if (rangeList == nullptr) {
824             result = value == -1 ||    // empty rule
825                      n == value;       //  'is' rule
826             break;
827         }
828         result = FALSE;                // 'in' or 'within' rule
829         for (int32_t r=0; r<rangeList->size(); r+=2) {
830             if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
831                 result = TRUE;
832                 break;
833             }
834         }
835     } while (FALSE);
836 
837     if (negated) {
838         result = !result;
839     }
840     return result;
841 }
842 
843 AndConstraint*
add(UErrorCode & status)844 AndConstraint::add(UErrorCode& status) {
845     if (U_FAILURE(fInternalStatus)) {
846         status = fInternalStatus;
847         return nullptr;
848     }
849     this->next = new AndConstraint();
850     if (this->next == nullptr) {
851         status = U_MEMORY_ALLOCATION_ERROR;
852     }
853     return this->next;
854 }
855 
856 
OrConstraint(const OrConstraint & other)857 OrConstraint::OrConstraint(const OrConstraint& other) {
858     this->fInternalStatus = other.fInternalStatus;
859     if (U_FAILURE(fInternalStatus)) {
860         return; // stop early if the object we are copying from is invalid.
861     }
862     if ( other.childNode != nullptr ) {
863         this->childNode = new AndConstraint(*(other.childNode));
864         if (this->childNode == nullptr) {
865             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
866             return;
867         }
868     }
869     if (other.next != nullptr ) {
870         this->next = new OrConstraint(*(other.next));
871         if (this->next == nullptr) {
872             fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
873             return;
874         }
875         if (U_FAILURE(this->next->fInternalStatus)) {
876             this->fInternalStatus = this->next->fInternalStatus;
877         }
878     }
879 }
880 
~OrConstraint()881 OrConstraint::~OrConstraint() {
882     delete childNode;
883     childNode = nullptr;
884     delete next;
885     next = nullptr;
886 }
887 
888 AndConstraint*
add(UErrorCode & status)889 OrConstraint::add(UErrorCode& status) {
890     if (U_FAILURE(fInternalStatus)) {
891         status = fInternalStatus;
892         return nullptr;
893     }
894     OrConstraint *curOrConstraint=this;
895     {
896         while (curOrConstraint->next!=nullptr) {
897             curOrConstraint = curOrConstraint->next;
898         }
899         U_ASSERT(curOrConstraint->childNode == nullptr);
900         curOrConstraint->childNode = new AndConstraint();
901         if (curOrConstraint->childNode == nullptr) {
902             status = U_MEMORY_ALLOCATION_ERROR;
903         }
904     }
905     return curOrConstraint->childNode;
906 }
907 
908 UBool
isFulfilled(const IFixedDecimal & number)909 OrConstraint::isFulfilled(const IFixedDecimal &number) {
910     OrConstraint* orRule=this;
911     UBool result=FALSE;
912 
913     while (orRule!=nullptr && !result) {
914         result=TRUE;
915         AndConstraint* andRule = orRule->childNode;
916         while (andRule!=nullptr && result) {
917             result = andRule->isFulfilled(number);
918             andRule=andRule->next;
919         }
920         orRule = orRule->next;
921     }
922 
923     return result;
924 }
925 
926 
RuleChain(const RuleChain & other)927 RuleChain::RuleChain(const RuleChain& other) :
928         fKeyword(other.fKeyword), fDecimalSamples(other.fDecimalSamples),
929         fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
930         fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded), fInternalStatus(other.fInternalStatus) {
931     if (U_FAILURE(this->fInternalStatus)) {
932         return; // stop early if the object we are copying from is invalid.
933     }
934     if (other.ruleHeader != nullptr) {
935         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
936         if (this->ruleHeader == nullptr) {
937             this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
938         }
939         else if (U_FAILURE(this->ruleHeader->fInternalStatus)) {
940             // If the OrConstraint wasn't fully copied, then set our status to failure as well.
941             this->fInternalStatus = this->ruleHeader->fInternalStatus;
942             return; // exit early.
943         }
944     }
945     if (other.fNext != nullptr ) {
946         this->fNext = new RuleChain(*other.fNext);
947         if (this->fNext == nullptr) {
948             this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
949         }
950         else if (U_FAILURE(this->fNext->fInternalStatus)) {
951             // If the RuleChain wasn't fully copied, then set our status to failure as well.
952             this->fInternalStatus = this->fNext->fInternalStatus;
953         }
954     }
955 }
956 
~RuleChain()957 RuleChain::~RuleChain() {
958     delete fNext;
959     delete ruleHeader;
960 }
961 
962 UnicodeString
select(const IFixedDecimal & number) const963 RuleChain::select(const IFixedDecimal &number) const {
964     if (!number.isNaN() && !number.isInfinite()) {
965         for (const RuleChain *rules = this; rules != nullptr; rules = rules->fNext) {
966              if (rules->ruleHeader->isFulfilled(number)) {
967                  return rules->fKeyword;
968              }
969         }
970     }
971     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
972 }
973 
tokenString(tokenType tok)974 static UnicodeString tokenString(tokenType tok) {
975     UnicodeString s;
976     switch (tok) {
977       case tVariableN:
978         s.append(LOW_N); break;
979       case tVariableI:
980         s.append(LOW_I); break;
981       case tVariableF:
982         s.append(LOW_F); break;
983       case tVariableV:
984         s.append(LOW_V); break;
985       case tVariableT:
986         s.append(LOW_T); break;
987       default:
988         s.append(TILDE);
989     }
990     return s;
991 }
992 
993 void
dumpRules(UnicodeString & result)994 RuleChain::dumpRules(UnicodeString& result) {
995     UChar digitString[16];
996 
997     if ( ruleHeader != nullptr ) {
998         result +=  fKeyword;
999         result += COLON;
1000         result += SPACE;
1001         OrConstraint* orRule=ruleHeader;
1002         while ( orRule != nullptr ) {
1003             AndConstraint* andRule=orRule->childNode;
1004             while ( andRule != nullptr ) {
1005                 if ((andRule->op==AndConstraint::NONE) &&  (andRule->rangeList==nullptr) && (andRule->value == -1)) {
1006                     // Empty Rules.
1007                 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) ) {
1008                     result += tokenString(andRule->digitsType);
1009                     result += UNICODE_STRING_SIMPLE(" is ");
1010                     if (andRule->negated) {
1011                         result += UNICODE_STRING_SIMPLE("not ");
1012                     }
1013                     uprv_itou(digitString,16, andRule->value,10,0);
1014                     result += UnicodeString(digitString);
1015                 }
1016                 else {
1017                     result += tokenString(andRule->digitsType);
1018                     result += SPACE;
1019                     if (andRule->op==AndConstraint::MOD) {
1020                         result += UNICODE_STRING_SIMPLE("mod ");
1021                         uprv_itou(digitString,16, andRule->opNum,10,0);
1022                         result += UnicodeString(digitString);
1023                     }
1024                     if (andRule->rangeList==nullptr) {
1025                         if (andRule->negated) {
1026                             result += UNICODE_STRING_SIMPLE(" is not ");
1027                             uprv_itou(digitString,16, andRule->value,10,0);
1028                             result += UnicodeString(digitString);
1029                         }
1030                         else {
1031                             result += UNICODE_STRING_SIMPLE(" is ");
1032                             uprv_itou(digitString,16, andRule->value,10,0);
1033                             result += UnicodeString(digitString);
1034                         }
1035                     }
1036                     else {
1037                         if (andRule->negated) {
1038                             if ( andRule->integerOnly ) {
1039                                 result += UNICODE_STRING_SIMPLE(" not in ");
1040                             }
1041                             else {
1042                                 result += UNICODE_STRING_SIMPLE(" not within ");
1043                             }
1044                         }
1045                         else {
1046                             if ( andRule->integerOnly ) {
1047                                 result += UNICODE_STRING_SIMPLE(" in ");
1048                             }
1049                             else {
1050                                 result += UNICODE_STRING_SIMPLE(" within ");
1051                             }
1052                         }
1053                         for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
1054                             int32_t rangeLo = andRule->rangeList->elementAti(r);
1055                             int32_t rangeHi = andRule->rangeList->elementAti(r+1);
1056                             uprv_itou(digitString,16, rangeLo, 10, 0);
1057                             result += UnicodeString(digitString);
1058                             result += UNICODE_STRING_SIMPLE("..");
1059                             uprv_itou(digitString,16, rangeHi, 10,0);
1060                             result += UnicodeString(digitString);
1061                             if (r+2 < andRule->rangeList->size()) {
1062                                 result += UNICODE_STRING_SIMPLE(", ");
1063                             }
1064                         }
1065                     }
1066                 }
1067                 if ( (andRule=andRule->next) != nullptr) {
1068                     result += UNICODE_STRING_SIMPLE(" and ");
1069                 }
1070             }
1071             if ( (orRule = orRule->next) != nullptr ) {
1072                 result += UNICODE_STRING_SIMPLE(" or ");
1073             }
1074         }
1075     }
1076     if ( fNext != nullptr ) {
1077         result += UNICODE_STRING_SIMPLE("; ");
1078         fNext->dumpRules(result);
1079     }
1080 }
1081 
1082 
1083 UErrorCode
getKeywords(int32_t capacityOfKeywords,UnicodeString * keywords,int32_t & arraySize) const1084 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1085     if (U_FAILURE(fInternalStatus)) {
1086         return fInternalStatus;
1087     }
1088     if ( arraySize < capacityOfKeywords-1 ) {
1089         keywords[arraySize++]=fKeyword;
1090     }
1091     else {
1092         return U_BUFFER_OVERFLOW_ERROR;
1093     }
1094 
1095     if ( fNext != nullptr ) {
1096         return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
1097     }
1098     else {
1099         return U_ZERO_ERROR;
1100     }
1101 }
1102 
1103 UBool
isKeyword(const UnicodeString & keywordParam) const1104 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1105     if ( fKeyword == keywordParam ) {
1106         return TRUE;
1107     }
1108 
1109     if ( fNext != nullptr ) {
1110         return fNext->isKeyword(keywordParam);
1111     }
1112     else {
1113         return FALSE;
1114     }
1115 }
1116 
1117 
PluralRuleParser()1118 PluralRuleParser::PluralRuleParser() :
1119         ruleIndex(0), token(), type(none), prevType(none),
1120         curAndConstraint(nullptr), currentChain(nullptr), rangeLowIdx(-1), rangeHiIdx(-1)
1121 {
1122 }
1123 
~PluralRuleParser()1124 PluralRuleParser::~PluralRuleParser() {
1125 }
1126 
1127 
1128 int32_t
getNumberValue(const UnicodeString & token)1129 PluralRuleParser::getNumberValue(const UnicodeString& token) {
1130     int32_t i;
1131     char digits[128];
1132 
1133     i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV);
1134     digits[i]='\0';
1135 
1136     return((int32_t)atoi(digits));
1137 }
1138 
1139 
1140 void
checkSyntax(UErrorCode & status)1141 PluralRuleParser::checkSyntax(UErrorCode &status)
1142 {
1143     if (U_FAILURE(status)) {
1144         return;
1145     }
1146     if (!(prevType==none || prevType==tSemiColon)) {
1147         type = getKeyType(token, type);  // Switch token type from tKeyword if we scanned a reserved word,
1148                                                //   and we are not at the start of a rule, where a
1149                                                //   keyword is expected.
1150     }
1151 
1152     switch(prevType) {
1153     case none:
1154     case tSemiColon:
1155         if (type!=tKeyword && type != tEOF) {
1156             status = U_UNEXPECTED_TOKEN;
1157         }
1158         break;
1159     case tVariableN:
1160     case tVariableI:
1161     case tVariableF:
1162     case tVariableT:
1163     case tVariableV:
1164         if (type != tIs && type != tMod && type != tIn &&
1165             type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
1166             status = U_UNEXPECTED_TOKEN;
1167         }
1168         break;
1169     case tKeyword:
1170         if (type != tColon) {
1171             status = U_UNEXPECTED_TOKEN;
1172         }
1173         break;
1174     case tColon:
1175         if (!(type == tVariableN ||
1176               type == tVariableI ||
1177               type == tVariableF ||
1178               type == tVariableT ||
1179               type == tVariableV ||
1180               type == tAt)) {
1181             status = U_UNEXPECTED_TOKEN;
1182         }
1183         break;
1184     case tIs:
1185         if ( type != tNumber && type != tNot) {
1186             status = U_UNEXPECTED_TOKEN;
1187         }
1188         break;
1189     case tNot:
1190         if (type != tNumber && type != tIn && type != tWithin) {
1191             status = U_UNEXPECTED_TOKEN;
1192         }
1193         break;
1194     case tMod:
1195     case tDot2:
1196     case tIn:
1197     case tWithin:
1198     case tEqual:
1199     case tNotEqual:
1200         if (type != tNumber) {
1201             status = U_UNEXPECTED_TOKEN;
1202         }
1203         break;
1204     case tAnd:
1205     case tOr:
1206         if ( type != tVariableN &&
1207              type != tVariableI &&
1208              type != tVariableF &&
1209              type != tVariableT &&
1210              type != tVariableV) {
1211             status = U_UNEXPECTED_TOKEN;
1212         }
1213         break;
1214     case tComma:
1215         if (type != tNumber) {
1216             status = U_UNEXPECTED_TOKEN;
1217         }
1218         break;
1219     case tNumber:
1220         if (type != tDot2  && type != tSemiColon && type != tIs       && type != tNot    &&
1221             type != tIn    && type != tEqual     && type != tNotEqual && type != tWithin &&
1222             type != tAnd   && type != tOr        && type != tComma    && type != tAt     &&
1223             type != tEOF)
1224         {
1225             status = U_UNEXPECTED_TOKEN;
1226         }
1227         // TODO: a comma following a number that is not part of a range will be allowed.
1228         //       It's not the only case of this sort of thing. Parser needs a re-write.
1229         break;
1230     case tAt:
1231         if (type != tDecimal && type != tInteger) {
1232             status = U_UNEXPECTED_TOKEN;
1233         }
1234         break;
1235     default:
1236         status = U_UNEXPECTED_TOKEN;
1237         break;
1238     }
1239 }
1240 
1241 
1242 /*
1243  *  Scan the next token from the input rules.
1244  *     rules and returned token type are in the parser state variables.
1245  */
1246 void
getNextToken(UErrorCode & status)1247 PluralRuleParser::getNextToken(UErrorCode &status)
1248 {
1249     if (U_FAILURE(status)) {
1250         return;
1251     }
1252 
1253     UChar ch;
1254     while (ruleIndex < ruleSrc->length()) {
1255         ch = ruleSrc->charAt(ruleIndex);
1256         type = charType(ch);
1257         if (type != tSpace) {
1258             break;
1259         }
1260         ++(ruleIndex);
1261     }
1262     if (ruleIndex >= ruleSrc->length()) {
1263         type = tEOF;
1264         return;
1265     }
1266     int32_t curIndex= ruleIndex;
1267 
1268     switch (type) {
1269       case tColon:
1270       case tSemiColon:
1271       case tComma:
1272       case tEllipsis:
1273       case tTilde:   // scanned '~'
1274       case tAt:      // scanned '@'
1275       case tEqual:   // scanned '='
1276       case tMod:     // scanned '%'
1277         // Single character tokens.
1278         ++curIndex;
1279         break;
1280 
1281       case tNotEqual:  // scanned '!'
1282         if (ruleSrc->charAt(curIndex+1) == EQUALS) {
1283             curIndex += 2;
1284         } else {
1285             type = none;
1286             curIndex += 1;
1287         }
1288         break;
1289 
1290       case tKeyword:
1291          while (type == tKeyword && ++curIndex < ruleSrc->length()) {
1292              ch = ruleSrc->charAt(curIndex);
1293              type = charType(ch);
1294          }
1295          type = tKeyword;
1296          break;
1297 
1298       case tNumber:
1299          while (type == tNumber && ++curIndex < ruleSrc->length()) {
1300              ch = ruleSrc->charAt(curIndex);
1301              type = charType(ch);
1302          }
1303          type = tNumber;
1304          break;
1305 
1306        case tDot:
1307          // We could be looking at either ".." in a range, or "..." at the end of a sample.
1308          if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
1309              ++curIndex;
1310              break; // Single dot
1311          }
1312          if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
1313              curIndex += 2;
1314              type = tDot2;
1315              break; // double dot
1316          }
1317          type = tEllipsis;
1318          curIndex += 3;
1319          break;     // triple dot
1320 
1321        default:
1322          status = U_UNEXPECTED_TOKEN;
1323          ++curIndex;
1324          break;
1325     }
1326 
1327     U_ASSERT(ruleIndex <= ruleSrc->length());
1328     U_ASSERT(curIndex <= ruleSrc->length());
1329     token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
1330     ruleIndex = curIndex;
1331 }
1332 
1333 tokenType
charType(UChar ch)1334 PluralRuleParser::charType(UChar ch) {
1335     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1336         return tNumber;
1337     }
1338     if (ch>=LOW_A && ch<=LOW_Z) {
1339         return tKeyword;
1340     }
1341     switch (ch) {
1342     case COLON:
1343         return tColon;
1344     case SPACE:
1345         return tSpace;
1346     case SEMI_COLON:
1347         return tSemiColon;
1348     case DOT:
1349         return tDot;
1350     case COMMA:
1351         return tComma;
1352     case EXCLAMATION:
1353         return tNotEqual;
1354     case EQUALS:
1355         return tEqual;
1356     case PERCENT_SIGN:
1357         return tMod;
1358     case AT:
1359         return tAt;
1360     case ELLIPSIS:
1361         return tEllipsis;
1362     case TILDE:
1363         return tTilde;
1364     default :
1365         return none;
1366     }
1367 }
1368 
1369 
1370 //  Set token type for reserved words in the Plural Rule syntax.
1371 
1372 tokenType
getKeyType(const UnicodeString & token,tokenType keyType)1373 PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
1374 {
1375     if (keyType != tKeyword) {
1376         return keyType;
1377     }
1378 
1379     if (0 == token.compare(PK_VAR_N, 1)) {
1380         keyType = tVariableN;
1381     } else if (0 == token.compare(PK_VAR_I, 1)) {
1382         keyType = tVariableI;
1383     } else if (0 == token.compare(PK_VAR_F, 1)) {
1384         keyType = tVariableF;
1385     } else if (0 == token.compare(PK_VAR_T, 1)) {
1386         keyType = tVariableT;
1387     } else if (0 == token.compare(PK_VAR_V, 1)) {
1388         keyType = tVariableV;
1389     } else if (0 == token.compare(PK_IS, 2)) {
1390         keyType = tIs;
1391     } else if (0 == token.compare(PK_AND, 3)) {
1392         keyType = tAnd;
1393     } else if (0 == token.compare(PK_IN, 2)) {
1394         keyType = tIn;
1395     } else if (0 == token.compare(PK_WITHIN, 6)) {
1396         keyType = tWithin;
1397     } else if (0 == token.compare(PK_NOT, 3)) {
1398         keyType = tNot;
1399     } else if (0 == token.compare(PK_MOD, 3)) {
1400         keyType = tMod;
1401     } else if (0 == token.compare(PK_OR, 2)) {
1402         keyType = tOr;
1403     } else if (0 == token.compare(PK_DECIMAL, 7)) {
1404         keyType = tDecimal;
1405     } else if (0 == token.compare(PK_INTEGER, 7)) {
1406         keyType = tInteger;
1407     }
1408     return keyType;
1409 }
1410 
1411 
PluralKeywordEnumeration(RuleChain * header,UErrorCode & status)1412 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1413         : pos(0), fKeywordNames(status) {
1414     if (U_FAILURE(status)) {
1415         return;
1416     }
1417     fKeywordNames.setDeleter(uprv_deleteUObject);
1418     UBool  addKeywordOther = TRUE;
1419     RuleChain *node = header;
1420     while (node != nullptr) {
1421         auto newElem = new UnicodeString(node->fKeyword);
1422         if (newElem == nullptr) {
1423             status = U_MEMORY_ALLOCATION_ERROR;
1424             return;
1425         }
1426         fKeywordNames.addElement(newElem, status);
1427         if (U_FAILURE(status)) {
1428             delete newElem;
1429             return;
1430         }
1431         if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1432             addKeywordOther = FALSE;
1433         }
1434         node = node->fNext;
1435     }
1436 
1437     if (addKeywordOther) {
1438         auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER);
1439         if (newElem == nullptr) {
1440             status = U_MEMORY_ALLOCATION_ERROR;
1441             return;
1442         }
1443         fKeywordNames.addElement(newElem, status);
1444         if (U_FAILURE(status)) {
1445             delete newElem;
1446             return;
1447         }
1448     }
1449 }
1450 
1451 const UnicodeString*
snext(UErrorCode & status)1452 PluralKeywordEnumeration::snext(UErrorCode& status) {
1453     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1454         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1455     }
1456     return nullptr;
1457 }
1458 
1459 void
reset(UErrorCode &)1460 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1461     pos=0;
1462 }
1463 
1464 int32_t
count(UErrorCode &) const1465 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1466     return fKeywordNames.size();
1467 }
1468 
~PluralKeywordEnumeration()1469 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1470 }
1471 
tokenTypeToPluralOperand(tokenType tt)1472 PluralOperand tokenTypeToPluralOperand(tokenType tt) {
1473     switch(tt) {
1474     case tVariableN:
1475         return PLURAL_OPERAND_N;
1476     case tVariableI:
1477         return PLURAL_OPERAND_I;
1478     case tVariableF:
1479         return PLURAL_OPERAND_F;
1480     case tVariableV:
1481         return PLURAL_OPERAND_V;
1482     case tVariableT:
1483         return PLURAL_OPERAND_T;
1484     default:
1485         UPRV_UNREACHABLE;  // unexpected.
1486     }
1487 }
1488 
FixedDecimal(double n,int32_t v,int64_t f)1489 FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
1490     init(n, v, f);
1491     // check values. TODO make into unit test.
1492     //
1493     //            long visiblePower = (int) Math.pow(10, v);
1494     //            if (decimalDigits > visiblePower) {
1495     //                throw new IllegalArgumentException();
1496     //            }
1497     //            double fraction = intValue + (decimalDigits / (double) visiblePower);
1498     //            if (fraction != source) {
1499     //                double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1500     //                if (diff > 0.00000001d) {
1501     //                    throw new IllegalArgumentException();
1502     //                }
1503     //            }
1504 }
1505 
FixedDecimal(double n,int32_t v)1506 FixedDecimal::FixedDecimal(double n, int32_t v) {
1507     // Ugly, but for samples we don't care.
1508     init(n, v, getFractionalDigits(n, v));
1509 }
1510 
FixedDecimal(double n)1511 FixedDecimal::FixedDecimal(double n) {
1512     init(n);
1513 }
1514 
FixedDecimal()1515 FixedDecimal::FixedDecimal() {
1516     init(0, 0, 0);
1517 }
1518 
1519 
1520 // Create a FixedDecimal from a UnicodeString containing a number.
1521 //    Inefficient, but only used for samples, so simplicity trumps efficiency.
1522 
FixedDecimal(const UnicodeString & num,UErrorCode & status)1523 FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
1524     CharString cs;
1525     cs.appendInvariantChars(num, status);
1526     DecimalQuantity dl;
1527     dl.setToDecNumber(cs.toStringPiece(), status);
1528     if (U_FAILURE(status)) {
1529         init(0, 0, 0);
1530         return;
1531     }
1532     int32_t decimalPoint = num.indexOf(DOT);
1533     double n = dl.toDouble();
1534     if (decimalPoint == -1) {
1535         init(n, 0, 0);
1536     } else {
1537         int32_t v = num.length() - decimalPoint - 1;
1538         init(n, v, getFractionalDigits(n, v));
1539     }
1540 }
1541 
1542 
FixedDecimal(const FixedDecimal & other)1543 FixedDecimal::FixedDecimal(const FixedDecimal &other) {
1544     source = other.source;
1545     visibleDecimalDigitCount = other.visibleDecimalDigitCount;
1546     decimalDigits = other.decimalDigits;
1547     decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
1548     intValue = other.intValue;
1549     _hasIntegerValue = other._hasIntegerValue;
1550     isNegative = other.isNegative;
1551     _isNaN = other._isNaN;
1552     _isInfinite = other._isInfinite;
1553 }
1554 
1555 FixedDecimal::~FixedDecimal() = default;
1556 
1557 
init(double n)1558 void FixedDecimal::init(double n) {
1559     int32_t numFractionDigits = decimals(n);
1560     init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1561 }
1562 
1563 
init(double n,int32_t v,int64_t f)1564 void FixedDecimal::init(double n, int32_t v, int64_t f) {
1565     isNegative = n < 0.0;
1566     source = fabs(n);
1567     _isNaN = uprv_isNaN(source);
1568     _isInfinite = uprv_isInfinite(source);
1569     if (_isNaN || _isInfinite) {
1570         v = 0;
1571         f = 0;
1572         intValue = 0;
1573         _hasIntegerValue = FALSE;
1574     } else {
1575         intValue = (int64_t)source;
1576         _hasIntegerValue = (source == intValue);
1577     }
1578 
1579     visibleDecimalDigitCount = v;
1580     decimalDigits = f;
1581     if (f == 0) {
1582          decimalDigitsWithoutTrailingZeros = 0;
1583     } else {
1584         int64_t fdwtz = f;
1585         while ((fdwtz%10) == 0) {
1586             fdwtz /= 10;
1587         }
1588         decimalDigitsWithoutTrailingZeros = fdwtz;
1589     }
1590 }
1591 
1592 
1593 //  Fast path only exact initialization. Return true if successful.
1594 //     Note: Do not multiply by 10 each time through loop, rounding cruft can build
1595 //           up that makes the check for an integer result fail.
1596 //           A single multiply of the original number works more reliably.
1597 static int32_t p10[] = {1, 10, 100, 1000, 10000};
quickInit(double n)1598 UBool FixedDecimal::quickInit(double n) {
1599     UBool success = FALSE;
1600     n = fabs(n);
1601     int32_t numFractionDigits;
1602     for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
1603         double scaledN = n * p10[numFractionDigits];
1604         if (scaledN == floor(scaledN)) {
1605             success = TRUE;
1606             break;
1607         }
1608     }
1609     if (success) {
1610         init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1611     }
1612     return success;
1613 }
1614 
1615 
1616 
decimals(double n)1617 int32_t FixedDecimal::decimals(double n) {
1618     // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1619     // fastpath the common cases, integers or fractions with 3 or fewer digits
1620     n = fabs(n);
1621     for (int ndigits=0; ndigits<=3; ndigits++) {
1622         double scaledN = n * p10[ndigits];
1623         if (scaledN == floor(scaledN)) {
1624             return ndigits;
1625         }
1626     }
1627 
1628     // Slow path, convert with sprintf, parse converted output.
1629     char  buf[30] = {0};
1630     sprintf(buf, "%1.15e", n);
1631     // formatted number looks like this: 1.234567890123457e-01
1632     int exponent = atoi(buf+18);
1633     int numFractionDigits = 15;
1634     for (int i=16; ; --i) {
1635         if (buf[i] != '0') {
1636             break;
1637         }
1638         --numFractionDigits;
1639     }
1640     numFractionDigits -= exponent;   // Fraction part of fixed point representation.
1641     return numFractionDigits;
1642 }
1643 
1644 
1645 // Get the fraction digits of a double, represented as an integer.
1646 //    v is the number of visible fraction digits in the displayed form of the number.
1647 //       Example: n = 1001.234, v = 6, result = 234000
1648 //    TODO: need to think through how this is used in the plural rule context.
1649 //          This function can easily encounter integer overflow,
1650 //          and can easily return noise digits when the precision of a double is exceeded.
1651 
getFractionalDigits(double n,int32_t v)1652 int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
1653     if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
1654         return 0;
1655     }
1656     n = fabs(n);
1657     double fract = n - floor(n);
1658     switch (v) {
1659       case 1: return (int64_t)(fract*10.0 + 0.5);
1660       case 2: return (int64_t)(fract*100.0 + 0.5);
1661       case 3: return (int64_t)(fract*1000.0 + 0.5);
1662       default:
1663           double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
1664           if (scaled > U_INT64_MAX) {
1665               return U_INT64_MAX;
1666           } else {
1667               return (int64_t)scaled;
1668           }
1669       }
1670 }
1671 
1672 
adjustForMinFractionDigits(int32_t minFractionDigits)1673 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
1674     int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
1675     if (numTrailingFractionZeros > 0) {
1676         for (int32_t i=0; i<numTrailingFractionZeros; i++) {
1677             // Do not let the decimalDigits value overflow if there are many trailing zeros.
1678             // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1679             if (decimalDigits >= 100000000000000000LL) {
1680                 break;
1681             }
1682             decimalDigits *= 10;
1683         }
1684         visibleDecimalDigitCount += numTrailingFractionZeros;
1685     }
1686 }
1687 
1688 
getPluralOperand(PluralOperand operand) const1689 double FixedDecimal::getPluralOperand(PluralOperand operand) const {
1690     switch(operand) {
1691         case PLURAL_OPERAND_N: return source;
1692         case PLURAL_OPERAND_I: return static_cast<double>(intValue);
1693         case PLURAL_OPERAND_F: return static_cast<double>(decimalDigits);
1694         case PLURAL_OPERAND_T: return static_cast<double>(decimalDigitsWithoutTrailingZeros);
1695         case PLURAL_OPERAND_V: return visibleDecimalDigitCount;
1696         default:
1697              UPRV_UNREACHABLE;  // unexpected.
1698     }
1699 }
1700 
isNaN() const1701 bool FixedDecimal::isNaN() const {
1702     return _isNaN;
1703 }
1704 
isInfinite() const1705 bool FixedDecimal::isInfinite() const {
1706     return _isInfinite;
1707 }
1708 
hasIntegerValue() const1709 bool FixedDecimal::hasIntegerValue() const {
1710     return _hasIntegerValue;
1711 }
1712 
isNanOrInfinity() const1713 bool FixedDecimal::isNanOrInfinity() const {
1714     return _isNaN || _isInfinite;
1715 }
1716 
getVisibleFractionDigitCount() const1717 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1718     return visibleDecimalDigitCount;
1719 }
1720 
1721 
1722 
PluralAvailableLocalesEnumeration(UErrorCode & status)1723 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
1724     fOpenStatus = status;
1725     if (U_FAILURE(status)) {
1726         return;
1727     }
1728     fOpenStatus = U_ZERO_ERROR; // clear any warnings.
1729     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &fOpenStatus));
1730     fLocales = ures_getByKey(rb.getAlias(), "locales", nullptr, &fOpenStatus);
1731 }
1732 
~PluralAvailableLocalesEnumeration()1733 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1734     ures_close(fLocales);
1735     ures_close(fRes);
1736     fLocales = nullptr;
1737     fRes = nullptr;
1738 }
1739 
next(int32_t * resultLength,UErrorCode & status)1740 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
1741     if (U_FAILURE(status)) {
1742         return nullptr;
1743     }
1744     if (U_FAILURE(fOpenStatus)) {
1745         status = fOpenStatus;
1746         return nullptr;
1747     }
1748     fRes = ures_getNextResource(fLocales, fRes, &status);
1749     if (fRes == nullptr || U_FAILURE(status)) {
1750         if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
1751             status = U_ZERO_ERROR;
1752         }
1753         return nullptr;
1754     }
1755     const char *result = ures_getKey(fRes);
1756     if (resultLength != nullptr) {
1757         *resultLength = static_cast<int32_t>(uprv_strlen(result));
1758     }
1759     return result;
1760 }
1761 
1762 
reset(UErrorCode & status)1763 void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
1764     if (U_FAILURE(status)) {
1765        return;
1766     }
1767     if (U_FAILURE(fOpenStatus)) {
1768         status = fOpenStatus;
1769         return;
1770     }
1771     ures_resetIterator(fLocales);
1772 }
1773 
count(UErrorCode & status) const1774 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
1775     if (U_FAILURE(status)) {
1776         return 0;
1777     }
1778     if (U_FAILURE(fOpenStatus)) {
1779         status = fOpenStatus;
1780         return 0;
1781     }
1782     return ures_getSize(fLocales);
1783 }
1784 
1785 U_NAMESPACE_END
1786 
1787 
1788 #endif /* #if !UCONFIG_NO_FORMATTING */
1789 
1790 //eof
1791