1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *   Copyright (C) 1997-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 *   file name:  nfrs.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 * Modification history
14 * Date        Name      Comments
15 * 10/11/2001  Doug      Ported from ICU4J
16 */
17 
18 #include "nfrs.h"
19 
20 #if U_HAVE_RBNF
21 
22 #include "unicode/uchar.h"
23 #include "nfrule.h"
24 #include "nfrlist.h"
25 #include "patternprops.h"
26 #include "putilimp.h"
27 
28 #ifdef RBNF_DEBUG
29 #include "cmemory.h"
30 #endif
31 
32 enum {
33     /** -x */
34     NEGATIVE_RULE_INDEX = 0,
35     /** x.x */
36     IMPROPER_FRACTION_RULE_INDEX = 1,
37     /** 0.x */
38     PROPER_FRACTION_RULE_INDEX = 2,
39     /** x.0 */
40     DEFAULT_RULE_INDEX = 3,
41     /** Inf */
42     INFINITY_RULE_INDEX = 4,
43     /** NaN */
44     NAN_RULE_INDEX = 5,
45     NON_NUMERICAL_RULE_LENGTH = 6
46 };
47 
48 U_NAMESPACE_BEGIN
49 
50 #if 0
51 // euclid's algorithm works with doubles
52 // note, doubles only get us up to one quadrillion or so, which
53 // isn't as much range as we get with longs.  We probably still
54 // want either 64-bit math, or BigInteger.
55 
56 static int64_t
57 util_lcm(int64_t x, int64_t y)
58 {
59     x.abs();
60     y.abs();
61 
62     if (x == 0 || y == 0) {
63         return 0;
64     } else {
65         do {
66             if (x < y) {
67                 int64_t t = x; x = y; y = t;
68             }
69             x -= y * (x/y);
70         } while (x != 0);
71 
72         return y;
73     }
74 }
75 
76 #else
77 /**
78  * Calculates the least common multiple of x and y.
79  */
80 static int64_t
util_lcm(int64_t x,int64_t y)81 util_lcm(int64_t x, int64_t y)
82 {
83     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84     // vol. 2, 1st ed., pp. 298-299
85     int64_t x1 = x;
86     int64_t y1 = y;
87 
88     int p2 = 0;
89     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90         ++p2;
91         x1 >>= 1;
92         y1 >>= 1;
93     }
94 
95     int64_t t;
96     if ((x1 & 1) == 1) {
97         t = -y1;
98     } else {
99         t = x1;
100     }
101 
102     while (t != 0) {
103         while ((t & 1) == 0) {
104             t = t >> 1;
105         }
106         if (t > 0) {
107             x1 = t;
108         } else {
109             y1 = -t;
110         }
111         t = x1 - y1;
112     }
113 
114     int64_t gcd = x1 << p2;
115 
116     // x * y == gcd(x, y) * lcm(x, y)
117     return x / gcd * y;
118 }
119 #endif
120 
121 static const UChar gPercent = 0x0025;
122 static const UChar gColon = 0x003a;
123 static const UChar gSemicolon = 0x003b;
124 static const UChar gLineFeed = 0x000a;
125 
126 static const UChar gPercentPercent[] =
127 {
128     0x25, 0x25, 0
129 }; /* "%%" */
130 
131 static const UChar gNoparse[] =
132 {
133     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134 }; /* "@noparse" */
135 
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)136 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137   : name()
138   , rules(0)
139   , owner(_owner)
140   , fractionRules()
141   , fIsFractionRuleSet(FALSE)
142   , fIsPublic(FALSE)
143   , fIsParseable(TRUE)
144 {
145     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146         nonNumericalRules[i] = NULL;
147     }
148 
149     if (U_FAILURE(status)) {
150         return;
151     }
152 
153     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154 
155     if (description.length() == 0) {
156         // throw new IllegalArgumentException("Empty rule set description");
157         status = U_PARSE_ERROR;
158         return;
159     }
160 
161     // if the description begins with a rule set name (the rule set
162     // name can be omitted in formatter descriptions that consist
163     // of only one rule set), copy it out into our "name" member
164     // and delete it from the description
165     if (description.charAt(0) == gPercent) {
166         int32_t pos = description.indexOf(gColon);
167         if (pos == -1) {
168             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169             status = U_PARSE_ERROR;
170         } else {
171             name.setTo(description, 0, pos);
172             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173             }
174             description.remove(0, pos);
175         }
176     } else {
177         name.setTo(UNICODE_STRING_SIMPLE("%default"));
178     }
179 
180     if (description.length() == 0) {
181         // throw new IllegalArgumentException("Empty rule set description");
182         status = U_PARSE_ERROR;
183     }
184 
185     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186 
187     if ( name.endsWith(gNoparse,8) ) {
188         fIsParseable = FALSE;
189         name.truncate(name.length()-8); // remove the @noparse from the name
190     }
191 
192     // all of the other members of NFRuleSet are initialized
193     // by parseRules()
194 }
195 
196 void
parseRules(UnicodeString & description,UErrorCode & status)197 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198 {
199     // start by creating a Vector whose elements are Strings containing
200     // the descriptions of the rules (one rule per element).  The rules
201     // are separated by semicolons (there's no escape facility: ALL
202     // semicolons are rule delimiters)
203 
204     if (U_FAILURE(status)) {
205         return;
206     }
207 
208     // ensure we are starting with an empty rule list
209     rules.deleteAll();
210 
211     // dlf - the original code kept a separate description array for no reason,
212     // so I got rid of it.  The loop was too complex so I simplified it.
213 
214     UnicodeString currentDescription;
215     int32_t oldP = 0;
216     while (oldP < description.length()) {
217         int32_t p = description.indexOf(gSemicolon, oldP);
218         if (p == -1) {
219             p = description.length();
220         }
221         currentDescription.setTo(description, oldP, p - oldP);
222         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223         oldP = p + 1;
224     }
225 
226     // for rules that didn't specify a base value, their base values
227     // were initialized to 0.  Make another pass through the list and
228     // set all those rules' base values.  We also remove any special
229     // rules from the list and put them into their own member variables
230     int64_t defaultBaseValue = 0;
231 
232     // (this isn't a for loop because we might be deleting items from
233     // the vector-- we want to make sure we only increment i when
234     // we _didn't_ delete anything from the vector)
235     int32_t rulesSize = rules.size();
236     for (int32_t i = 0; i < rulesSize; i++) {
237         NFRule* rule = rules[i];
238         int64_t baseValue = rule->getBaseValue();
239 
240         if (baseValue == 0) {
241             // if the rule's base value is 0, fill in a default
242             // base value (this will be 1 plus the preceding
243             // rule's base value for regular rule sets, and the
244             // same as the preceding rule's base value in fraction
245             // rule sets)
246             rule->setBaseValue(defaultBaseValue, status);
247         }
248         else {
249             // if it's a regular rule that already knows its base value,
250             // check to make sure the rules are in order, and update
251             // the default base value for the next rule
252             if (baseValue < defaultBaseValue) {
253                 // throw new IllegalArgumentException("Rules are not in order");
254                 status = U_PARSE_ERROR;
255                 return;
256             }
257             defaultBaseValue = baseValue;
258         }
259         if (!fIsFractionRuleSet) {
260             ++defaultBaseValue;
261         }
262     }
263 }
264 
265 /**
266  * Set one of the non-numerical rules.
267  * @param rule The rule to set.
268  */
setNonNumericalRule(NFRule * rule)269 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270     int64_t baseValue = rule->getBaseValue();
271     if (baseValue == NFRule::kNegativeNumberRule) {
272         delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273         nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274     }
275     else if (baseValue == NFRule::kImproperFractionRule) {
276         setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277     }
278     else if (baseValue == NFRule::kProperFractionRule) {
279         setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280     }
281     else if (baseValue == NFRule::kDefaultRule) {
282         setBestFractionRule(DEFAULT_RULE_INDEX, rule, TRUE);
283     }
284     else if (baseValue == NFRule::kInfinityRule) {
285         delete nonNumericalRules[INFINITY_RULE_INDEX];
286         nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287     }
288     else if (baseValue == NFRule::kNaNRule) {
289         delete nonNumericalRules[NAN_RULE_INDEX];
290         nonNumericalRules[NAN_RULE_INDEX] = rule;
291     }
292 }
293 
294 /**
295  * Determine the best fraction rule to use. Rules matching the decimal point from
296  * DecimalFormatSymbols become the main set of rules to use.
297  * @param originalIndex The index into nonNumericalRules
298  * @param newRule The new rule to consider
299  * @param rememberRule Should the new rule be added to fractionRules.
300  */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)301 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302     if (rememberRule) {
303         fractionRules.add(newRule);
304     }
305     NFRule *bestResult = nonNumericalRules[originalIndex];
306     if (bestResult == NULL) {
307         nonNumericalRules[originalIndex] = newRule;
308     }
309     else {
310         // We have more than one. Which one is better?
311         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
313             == newRule->getDecimalPoint())
314         {
315             nonNumericalRules[originalIndex] = newRule;
316         }
317         // else leave it alone
318     }
319 }
320 
~NFRuleSet()321 NFRuleSet::~NFRuleSet()
322 {
323     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324         if (i != IMPROPER_FRACTION_RULE_INDEX
325             && i != PROPER_FRACTION_RULE_INDEX
326             && i != DEFAULT_RULE_INDEX)
327         {
328             delete nonNumericalRules[i];
329         }
330         // else it will be deleted via NFRuleList fractionRules
331     }
332 }
333 
334 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)335 util_equalRules(const NFRule* rule1, const NFRule* rule2)
336 {
337     if (rule1) {
338         if (rule2) {
339             return *rule1 == *rule2;
340         }
341     } else if (!rule2) {
342         return TRUE;
343     }
344     return FALSE;
345 }
346 
347 bool
operator ==(const NFRuleSet & rhs) const348 NFRuleSet::operator==(const NFRuleSet& rhs) const
349 {
350     if (rules.size() == rhs.rules.size() &&
351         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
352         name == rhs.name) {
353 
354         // ...then compare the non-numerical rule lists...
355         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357                 return false;
358             }
359         }
360 
361         // ...then compare the rule lists...
362         for (uint32_t i = 0; i < rules.size(); ++i) {
363             if (*rules[i] != *rhs.rules[i]) {
364                 return false;
365             }
366         }
367         return true;
368     }
369     return false;
370 }
371 
372 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)373 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374     for (uint32_t i = 0; i < rules.size(); ++i) {
375         rules[i]->setDecimalFormatSymbols(newSymbols, status);
376     }
377     // Switch the fraction rules to mirror the DecimalFormatSymbols.
378     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
379         if (nonNumericalRules[nonNumericalIdx]) {
380             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
381                 NFRule *fractionRule = fractionRules[fIdx];
382                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383                     setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384                 }
385             }
386         }
387     }
388 
389     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390         NFRule *rule = nonNumericalRules[nnrIdx];
391         if (rule) {
392             rule->setDecimalFormatSymbols(newSymbols, status);
393         }
394     }
395 }
396 
397 #define RECURSION_LIMIT 64
398 
399 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const400 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
401 {
402     if (recursionCount >= RECURSION_LIMIT) {
403         // stop recursion
404         status = U_INVALID_STATE_ERROR;
405         return;
406     }
407     const NFRule *rule = findNormalRule(number);
408     if (rule) { // else error, but can't report it
409         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
410     }
411 }
412 
413 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const414 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
415 {
416     if (recursionCount >= RECURSION_LIMIT) {
417         // stop recursion
418         status = U_INVALID_STATE_ERROR;
419         return;
420     }
421     const NFRule *rule = findDoubleRule(number);
422     if (rule) { // else error, but can't report it
423         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
424     }
425 }
426 
427 const NFRule*
findDoubleRule(double number) const428 NFRuleSet::findDoubleRule(double number) const
429 {
430     // if this is a fraction rule set, use findFractionRuleSetRule()
431     if (isFractionRuleSet()) {
432         return findFractionRuleSetRule(number);
433     }
434 
435     if (uprv_isNaN(number)) {
436         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437         if (!rule) {
438             rule = owner->getDefaultNaNRule();
439         }
440         return rule;
441     }
442 
443     // if the number is negative, return the negative number rule
444     // (if there isn't a negative-number rule, we pretend it's a
445     // positive number)
446     if (number < 0) {
447         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
449         } else {
450             number = -number;
451         }
452     }
453 
454     if (uprv_isInfinite(number)) {
455         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456         if (!rule) {
457             rule = owner->getDefaultInfinityRule();
458         }
459         return rule;
460     }
461 
462     // if the number isn't an integer, we use one of the fraction rules...
463     if (number != uprv_floor(number)) {
464         // if the number is between 0 and 1, return the proper
465         // fraction rule
466         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
468         }
469         // otherwise, return the improper fraction rule
470         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
472         }
473     }
474 
475     // if there's a default rule, use it to format the number
476     if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
477         return nonNumericalRules[DEFAULT_RULE_INDEX];
478     }
479 
480     // and if we haven't yet returned a rule, use findNormalRule()
481     // to find the applicable rule
482     int64_t r = util64_fromDouble(number + 0.5);
483     return findNormalRule(r);
484 }
485 
486 const NFRule *
findNormalRule(int64_t number) const487 NFRuleSet::findNormalRule(int64_t number) const
488 {
489     // if this is a fraction rule set, use findFractionRuleSetRule()
490     // to find the rule (we should only go into this clause if the
491     // value is 0)
492     if (fIsFractionRuleSet) {
493         return findFractionRuleSetRule((double)number);
494     }
495 
496     // if the number is negative, return the negative-number rule
497     // (if there isn't one, pretend the number is positive)
498     if (number < 0) {
499         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500             return nonNumericalRules[NEGATIVE_RULE_INDEX];
501         } else {
502             number = -number;
503         }
504     }
505 
506     // we have to repeat the preceding two checks, even though we
507     // do them in findRule(), because the version of format() that
508     // takes a long bypasses findRule() and goes straight to this
509     // function.  This function does skip the fraction rules since
510     // we know the value is an integer (it also skips the default
511     // rule, since it's considered a fraction rule.  Skipping the
512     // default rule in this function is also how we avoid infinite
513     // recursion)
514 
515     // {dlf} unfortunately this fails if there are no rules except
516     // special rules.  If there are no rules, use the default rule.
517 
518     // binary-search the rule list for the applicable rule
519     // (a rule is used for all values from its base value to
520     // the next rule's base value)
521     int32_t hi = rules.size();
522     if (hi > 0) {
523         int32_t lo = 0;
524 
525         while (lo < hi) {
526             int32_t mid = (lo + hi) / 2;
527             if (rules[mid]->getBaseValue() == number) {
528                 return rules[mid];
529             }
530             else if (rules[mid]->getBaseValue() > number) {
531                 hi = mid;
532             }
533             else {
534                 lo = mid + 1;
535             }
536         }
537         if (hi == 0) { // bad rule set, minimum base > 0
538             return NULL; // want to throw exception here
539         }
540 
541         NFRule *result = rules[hi - 1];
542 
543         // use shouldRollBack() to see whether we need to invoke the
544         // rollback rule (see shouldRollBack()'s documentation for
545         // an explanation of the rollback rule).  If we do, roll back
546         // one rule and return that one instead of the one we'd normally
547         // return
548         if (result->shouldRollBack(number)) {
549             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
550                 return NULL;
551             }
552             result = rules[hi - 2];
553         }
554         return result;
555     }
556     // else use the default rule
557     return nonNumericalRules[DEFAULT_RULE_INDEX];
558 }
559 
560 /**
561  * If this rule is a fraction rule set, this function is used by
562  * findRule() to select the most appropriate rule for formatting
563  * the number.  Basically, the base value of each rule in the rule
564  * set is treated as the denominator of a fraction.  Whichever
565  * denominator can produce the fraction closest in value to the
566  * number passed in is the result.  If there's a tie, the earlier
567  * one in the list wins.  (If there are two rules in a row with the
568  * same base value, the first one is used when the numerator of the
569  * fraction would be 1, and the second rule is used the rest of the
570  * time.
571  * @param number The number being formatted (which will always be
572  * a number between 0 and 1)
573  * @return The rule to use to format this number
574  */
575 const NFRule*
findFractionRuleSetRule(double number) const576 NFRuleSet::findFractionRuleSetRule(double number) const
577 {
578     // the obvious way to do this (multiply the value being formatted
579     // by each rule's base value until you get an integral result)
580     // doesn't work because of rounding error.  This method is more
581     // accurate
582 
583     // find the least common multiple of the rules' base values
584     // and multiply this by the number being formatted.  This is
585     // all the precision we need, and we can do all of the rest
586     // of the math using integer arithmetic
587     int64_t leastCommonMultiple = rules[0]->getBaseValue();
588     int64_t numerator;
589     {
590         for (uint32_t i = 1; i < rules.size(); ++i) {
591             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
592         }
593         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
594     }
595     // for each rule, do the following...
596     int64_t tempDifference;
597     int64_t difference = util64_fromDouble(uprv_maxMantissa());
598     int32_t winner = 0;
599     for (uint32_t i = 0; i < rules.size(); ++i) {
600         // "numerator" is the numerator of the fraction if the
601         // denominator is the LCD.  The numerator if the rule's
602         // base value is the denominator is "numerator" times the
603         // base value divided bythe LCD.  Here we check to see if
604         // that's an integer, and if not, how close it is to being
605         // an integer.
606         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
607 
608 
609         // normalize the result of the above calculation: we want
610         // the numerator's distance from the CLOSEST multiple
611         // of the LCD
612         if (leastCommonMultiple - tempDifference < tempDifference) {
613             tempDifference = leastCommonMultiple - tempDifference;
614         }
615 
616         // if this is as close as we've come, keep track of how close
617         // that is, and the line number of the rule that did it.  If
618         // we've scored a direct hit, we don't have to look at any more
619         // rules
620         if (tempDifference < difference) {
621             difference = tempDifference;
622             winner = i;
623             if (difference == 0) {
624                 break;
625             }
626         }
627     }
628 
629     // if we have two successive rules that both have the winning base
630     // value, then the first one (the one we found above) is used if
631     // the numerator of the fraction is 1 and the second one is used if
632     // the numerator of the fraction is anything else (this lets us
633     // do things like "one third"/"two thirds" without having to define
634     // a whole bunch of extra rule sets)
635     if ((unsigned)(winner + 1) < rules.size() &&
636         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
637         double n = ((double)rules[winner]->getBaseValue()) * number;
638         if (n < 0.5 || n >= 2) {
639             ++winner;
640         }
641     }
642 
643     // finally, return the winning rule
644     return rules[winner];
645 }
646 
647 /**
648  * Parses a string.  Matches the string to be parsed against each
649  * of its rules (with a base value less than upperBound) and returns
650  * the value produced by the rule that matched the most characters
651  * in the source string.
652  * @param text The string to parse
653  * @param parsePosition The initial position is ignored and assumed
654  * to be 0.  On exit, this object has been updated to point to the
655  * first character position this rule set didn't consume.
656  * @param upperBound Limits the rules that can be allowed to match.
657  * Only rules whose base values are strictly less than upperBound
658  * are considered.
659  * @return The numerical result of parsing this string.  This will
660  * be the matching rule's base value, composed appropriately with
661  * the results of matching any of its substitutions.  The object
662  * will be an instance of Long if it's an integral value; otherwise,
663  * it will be an instance of Double.  This function always returns
664  * a valid object: If nothing matched the input string at all,
665  * this function returns new Long(0), and the parse position is
666  * left unchanged.
667  */
668 #ifdef RBNF_DEBUG
669 #include <stdio.h>
670 
dumpUS(FILE * f,const UnicodeString & us)671 static void dumpUS(FILE* f, const UnicodeString& us) {
672   int len = us.length();
673   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
674   if (buf != NULL) {
675 	  us.extract(0, len, buf);
676 	  buf[len] = 0;
677 	  fprintf(f, "%s", buf);
678 	  uprv_free(buf); //delete[] buf;
679   }
680 }
681 #endif
682 
683 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,uint32_t nonNumericalExecutedRuleMask,Formattable & result) const684 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, Formattable& result) const
685 {
686     // try matching each rule in the rule set against the text being
687     // parsed.  Whichever one matches the most characters is the one
688     // that determines the value we return.
689 
690     result.setLong(0);
691 
692     // dump out if there's no text to parse
693     if (text.length() == 0) {
694         return 0;
695     }
696 
697     ParsePosition highWaterMark;
698     ParsePosition workingPos = pos;
699 
700 #ifdef RBNF_DEBUG
701     fprintf(stderr, "<nfrs> %x '", this);
702     dumpUS(stderr, name);
703     fprintf(stderr, "' text '");
704     dumpUS(stderr, text);
705     fprintf(stderr, "'\n");
706     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
707 #endif
708     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
709     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
710         if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
711             // Mark this rule as being executed so that we don't try to execute it again.
712             nonNumericalExecutedRuleMask |= 1 << i;
713 
714             Formattable tempResult;
715             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
716             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
717                 result = tempResult;
718                 highWaterMark = workingPos;
719             }
720             workingPos = pos;
721         }
722     }
723 #ifdef RBNF_DEBUG
724     fprintf(stderr, "<nfrs> continue other with text '");
725     dumpUS(stderr, text);
726     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
727 #endif
728 
729     // finally, go through the regular rules one at a time.  We start
730     // at the end of the list because we want to try matching the most
731     // sigificant rule first (this helps ensure that we parse
732     // "five thousand three hundred six" as
733     // "(five thousand) (three hundred) (six)" rather than
734     // "((five thousand three) hundred) (six)").  Skip rules whose
735     // base values are higher than the upper bound (again, this helps
736     // limit ambiguity by making sure the rules that match a rule's
737     // are less significant than the rule containing the substitutions)/
738     {
739         int64_t ub = util64_fromDouble(upperBound);
740 #ifdef RBNF_DEBUG
741         {
742             char ubstr[64];
743             util64_toa(ub, ubstr, 64);
744             char ubstrhex[64];
745             util64_toa(ub, ubstrhex, 64, 16);
746             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
747         }
748 #endif
749         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
750             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
751                 continue;
752             }
753             Formattable tempResult;
754             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
755             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
756                 result = tempResult;
757                 highWaterMark = workingPos;
758             }
759             workingPos = pos;
760         }
761     }
762 #ifdef RBNF_DEBUG
763     fprintf(stderr, "<nfrs> exit\n");
764 #endif
765     // finally, update the parse position we were passed to point to the
766     // first character we didn't use, and return the result that
767     // corresponds to that string of characters
768     pos = highWaterMark;
769 
770     return 1;
771 }
772 
773 void
appendRules(UnicodeString & result) const774 NFRuleSet::appendRules(UnicodeString& result) const
775 {
776     uint32_t i;
777 
778     // the rule set name goes first...
779     result.append(name);
780     result.append(gColon);
781     result.append(gLineFeed);
782 
783     // followed by the regular rules...
784     for (i = 0; i < rules.size(); i++) {
785         rules[i]->_appendRuleText(result);
786         result.append(gLineFeed);
787     }
788 
789     // followed by the special rules (if they exist)
790     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
791         NFRule *rule = nonNumericalRules[i];
792         if (nonNumericalRules[i]) {
793             if (rule->getBaseValue() == NFRule::kImproperFractionRule
794                 || rule->getBaseValue() == NFRule::kProperFractionRule
795                 || rule->getBaseValue() == NFRule::kDefaultRule)
796             {
797                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
798                     NFRule *fractionRule = fractionRules[fIdx];
799                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
800                         fractionRule->_appendRuleText(result);
801                         result.append(gLineFeed);
802                     }
803                 }
804             }
805             else {
806                 rule->_appendRuleText(result);
807                 result.append(gLineFeed);
808             }
809         }
810     }
811 }
812 
813 // utility functions
814 
util64_fromDouble(double d)815 int64_t util64_fromDouble(double d) {
816     int64_t result = 0;
817     if (!uprv_isNaN(d)) {
818         double mant = uprv_maxMantissa();
819         if (d < -mant) {
820             d = -mant;
821         } else if (d > mant) {
822             d = mant;
823         }
824         UBool neg = d < 0;
825         if (neg) {
826             d = -d;
827         }
828         result = (int64_t)uprv_floor(d);
829         if (neg) {
830             result = -result;
831         }
832     }
833     return result;
834 }
835 
util64_pow(uint32_t base,uint16_t exponent)836 uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
837     if (base == 0) {
838         return 0;
839     }
840     uint64_t result = 1;
841     uint64_t pow = base;
842     while (true) {
843         if ((exponent & 1) == 1) {
844             result *= pow;
845         }
846         exponent >>= 1;
847         if (exponent == 0) {
848             break;
849         }
850         pow *= pow;
851     }
852     return result;
853 }
854 
855 static const uint8_t asciiDigits[] = {
856     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
857     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
858     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
859     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
860     0x77u, 0x78u, 0x79u, 0x7au,
861 };
862 
863 static const UChar kUMinus = (UChar)0x002d;
864 
865 #ifdef RBNF_DEBUG
866 static const char kMinus = '-';
867 
868 static const uint8_t digitInfo[] = {
869         0,     0,     0,     0,     0,     0,     0,     0,
870         0,     0,     0,     0,     0,     0,     0,     0,
871         0,     0,     0,     0,     0,     0,     0,     0,
872         0,     0,     0,     0,     0,     0,     0,     0,
873         0,     0,     0,     0,     0,     0,     0,     0,
874         0,     0,     0,     0,     0,     0,     0,     0,
875     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
876     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
877         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
878     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
879     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
880     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
881         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
882     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
883     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
884     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
885 };
886 
util64_atoi(const char * str,uint32_t radix)887 int64_t util64_atoi(const char* str, uint32_t radix)
888 {
889     if (radix > 36) {
890         radix = 36;
891     } else if (radix < 2) {
892         radix = 2;
893     }
894     int64_t lradix = radix;
895 
896     int neg = 0;
897     if (*str == kMinus) {
898         ++str;
899         neg = 1;
900     }
901     int64_t result = 0;
902     uint8_t b;
903     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
904         result *= lradix;
905         result += (int32_t)b;
906     }
907     if (neg) {
908         result = -result;
909     }
910     return result;
911 }
912 
util64_utoi(const UChar * str,uint32_t radix)913 int64_t util64_utoi(const UChar* str, uint32_t radix)
914 {
915     if (radix > 36) {
916         radix = 36;
917     } else if (radix < 2) {
918         radix = 2;
919     }
920     int64_t lradix = radix;
921 
922     int neg = 0;
923     if (*str == kUMinus) {
924         ++str;
925         neg = 1;
926     }
927     int64_t result = 0;
928     UChar c;
929     uint8_t b;
930     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
931         result *= lradix;
932         result += (int32_t)b;
933     }
934     if (neg) {
935         result = -result;
936     }
937     return result;
938 }
939 
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)940 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
941 {
942     if (radix > 36) {
943         radix = 36;
944     } else if (radix < 2) {
945         radix = 2;
946     }
947     int64_t base = radix;
948 
949     char* p = buf;
950     if (len && (w < 0) && (radix == 10) && !raw) {
951         w = -w;
952         *p++ = kMinus;
953         --len;
954     } else if (len && (w == 0)) {
955         *p++ = (char)raw ? 0 : asciiDigits[0];
956         --len;
957     }
958 
959     while (len && w != 0) {
960         int64_t n = w / base;
961         int64_t m = n * base;
962         int32_t d = (int32_t)(w-m);
963         *p++ = raw ? (char)d : asciiDigits[d];
964         w = n;
965         --len;
966     }
967     if (len) {
968         *p = 0; // null terminate if room for caller convenience
969     }
970 
971     len = p - buf;
972     if (*buf == kMinus) {
973         ++buf;
974     }
975     while (--p > buf) {
976         char c = *p;
977         *p = *buf;
978         *buf = c;
979         ++buf;
980     }
981 
982     return len;
983 }
984 #endif
985 
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)986 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
987 {
988     if (radix > 36) {
989         radix = 36;
990     } else if (radix < 2) {
991         radix = 2;
992     }
993     int64_t base = radix;
994 
995     UChar* p = buf;
996     if (len && (w < 0) && (radix == 10) && !raw) {
997         w = -w;
998         *p++ = kUMinus;
999         --len;
1000     } else if (len && (w == 0)) {
1001         *p++ = (UChar)raw ? 0 : asciiDigits[0];
1002         --len;
1003     }
1004 
1005     while (len && (w != 0)) {
1006         int64_t n = w / base;
1007         int64_t m = n * base;
1008         int32_t d = (int32_t)(w-m);
1009         *p++ = (UChar)(raw ? d : asciiDigits[d]);
1010         w = n;
1011         --len;
1012     }
1013     if (len) {
1014         *p = 0; // null terminate if room for caller convenience
1015     }
1016 
1017     len = (uint32_t)(p - buf);
1018     if (*buf == kUMinus) {
1019         ++buf;
1020     }
1021     while (--p > buf) {
1022         UChar c = *p;
1023         *p = *buf;
1024         *buf = c;
1025         ++buf;
1026     }
1027 
1028     return len;
1029 }
1030 
1031 
1032 U_NAMESPACE_END
1033 
1034 /* U_HAVE_RBNF */
1035 #endif
1036