1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File PLURRULE_IMPL.H 8 * 9 ******************************************************************************* 10 */ 11 12 13 #ifndef PLURRULE_IMPL 14 #define PLURRULE_IMPL 15 16 // Internal definitions for the PluralRules implementation. 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_FORMATTING 21 22 #include "unicode/format.h" 23 #include "unicode/locid.h" 24 #include "unicode/parseerr.h" 25 #include "unicode/ures.h" 26 #include "uvector.h" 27 #include "hash.h" 28 29 class PluralRulesTest; 30 31 U_NAMESPACE_BEGIN 32 33 class AndConstraint; 34 class RuleChain; 35 class DigitInterval; 36 class PluralRules; 37 class VisibleDigits; 38 39 static const UChar DOT = ((UChar)0x002E); 40 static const UChar SINGLE_QUOTE = ((UChar)0x0027); 41 static const UChar SLASH = ((UChar)0x002F); 42 static const UChar BACKSLASH = ((UChar)0x005C); 43 static const UChar SPACE = ((UChar)0x0020); 44 static const UChar EXCLAMATION = ((UChar)0x0021); 45 static const UChar QUOTATION_MARK = ((UChar)0x0022); 46 static const UChar NUMBER_SIGN = ((UChar)0x0023); 47 static const UChar PERCENT_SIGN = ((UChar)0x0025); 48 static const UChar ASTERISK = ((UChar)0x002A); 49 static const UChar COMMA = ((UChar)0x002C); 50 static const UChar HYPHEN = ((UChar)0x002D); 51 static const UChar U_ZERO = ((UChar)0x0030); 52 static const UChar U_ONE = ((UChar)0x0031); 53 static const UChar U_TWO = ((UChar)0x0032); 54 static const UChar U_THREE = ((UChar)0x0033); 55 static const UChar U_FOUR = ((UChar)0x0034); 56 static const UChar U_FIVE = ((UChar)0x0035); 57 static const UChar U_SIX = ((UChar)0x0036); 58 static const UChar U_SEVEN = ((UChar)0x0037); 59 static const UChar U_EIGHT = ((UChar)0x0038); 60 static const UChar U_NINE = ((UChar)0x0039); 61 static const UChar COLON = ((UChar)0x003A); 62 static const UChar SEMI_COLON = ((UChar)0x003B); 63 static const UChar EQUALS = ((UChar)0x003D); 64 static const UChar AT = ((UChar)0x0040); 65 static const UChar CAP_A = ((UChar)0x0041); 66 static const UChar CAP_B = ((UChar)0x0042); 67 static const UChar CAP_R = ((UChar)0x0052); 68 static const UChar CAP_Z = ((UChar)0x005A); 69 static const UChar LOWLINE = ((UChar)0x005F); 70 static const UChar LEFTBRACE = ((UChar)0x007B); 71 static const UChar RIGHTBRACE = ((UChar)0x007D); 72 static const UChar TILDE = ((UChar)0x007E); 73 static const UChar ELLIPSIS = ((UChar)0x2026); 74 75 static const UChar LOW_A = ((UChar)0x0061); 76 static const UChar LOW_B = ((UChar)0x0062); 77 static const UChar LOW_C = ((UChar)0x0063); 78 static const UChar LOW_D = ((UChar)0x0064); 79 static const UChar LOW_E = ((UChar)0x0065); 80 static const UChar LOW_F = ((UChar)0x0066); 81 static const UChar LOW_G = ((UChar)0x0067); 82 static const UChar LOW_H = ((UChar)0x0068); 83 static const UChar LOW_I = ((UChar)0x0069); 84 static const UChar LOW_J = ((UChar)0x006a); 85 static const UChar LOW_K = ((UChar)0x006B); 86 static const UChar LOW_L = ((UChar)0x006C); 87 static const UChar LOW_M = ((UChar)0x006D); 88 static const UChar LOW_N = ((UChar)0x006E); 89 static const UChar LOW_O = ((UChar)0x006F); 90 static const UChar LOW_P = ((UChar)0x0070); 91 static const UChar LOW_Q = ((UChar)0x0071); 92 static const UChar LOW_R = ((UChar)0x0072); 93 static const UChar LOW_S = ((UChar)0x0073); 94 static const UChar LOW_T = ((UChar)0x0074); 95 static const UChar LOW_U = ((UChar)0x0075); 96 static const UChar LOW_V = ((UChar)0x0076); 97 static const UChar LOW_W = ((UChar)0x0077); 98 static const UChar LOW_Y = ((UChar)0x0079); 99 static const UChar LOW_Z = ((UChar)0x007A); 100 101 102 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 103 104 enum tokenType { 105 none, 106 tNumber, 107 tComma, 108 tSemiColon, 109 tSpace, 110 tColon, 111 tAt, // '@' 112 tDot, 113 tDot2, 114 tEllipsis, 115 tKeyword, 116 tAnd, 117 tOr, 118 tMod, // 'mod' or '%' 119 tNot, // 'not' only. 120 tIn, // 'in' only. 121 tEqual, // '=' only. 122 tNotEqual, // '!=' 123 tTilde, 124 tWithin, 125 tIs, 126 tVariableN, 127 tVariableI, 128 tVariableF, 129 tVariableV, 130 tVariableT, 131 tDecimal, 132 tInteger, 133 tEOF 134 }; 135 136 137 class PluralRuleParser: public UMemory { 138 public: 139 PluralRuleParser(); 140 virtual ~PluralRuleParser(); 141 142 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 143 void getNextToken(UErrorCode &status); 144 void checkSyntax(UErrorCode &status); 145 static int32_t getNumberValue(const UnicodeString &token); 146 147 private: 148 static tokenType getKeyType(const UnicodeString& token, tokenType type); 149 static tokenType charType(UChar ch); 150 static UBool isValidKeyword(const UnicodeString& token); 151 152 const UnicodeString *ruleSrc; // The rules string. 153 int32_t ruleIndex; // String index in the input rules, the current parse position. 154 UnicodeString token; // Token most recently scanned. 155 tokenType type; 156 tokenType prevType; 157 158 // The items currently being parsed & built. 159 // Note: currentChain may not be the last RuleChain in the 160 // list because the "other" chain is forced to the end. 161 AndConstraint *curAndConstraint; 162 RuleChain *currentChain; 163 164 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 165 int32_t rangeHiIdx; // low and hi values currently being parsed. 166 167 enum EParseState { 168 kKeyword, 169 kExpr, 170 kValue, 171 kRangeList, 172 kSamples 173 }; 174 175 }; 176 177 /** 178 * class FixedDecimal serves to communicate the properties 179 * of a formatted number from a decimal formatter to PluralRules::select() 180 * 181 * see DecimalFormat::getFixedDecimal() 182 * @internal 183 */ 184 class U_I18N_API FixedDecimal: public UMemory { 185 public: 186 /** 187 * @param n the number, e.g. 12.345 188 * @param v The number of visible fraction digits, e.g. 3 189 * @param f The fraction digits, e.g. 345 190 */ 191 FixedDecimal(double n, int32_t v, int64_t f); 192 FixedDecimal(double n, int32_t); 193 explicit FixedDecimal(double n); 194 explicit FixedDecimal(const VisibleDigits &n); 195 FixedDecimal(); 196 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 197 FixedDecimal(const FixedDecimal &other); 198 199 double get(tokenType operand) const; 200 int32_t getVisibleFractionDigitCount() const; 201 202 void init(double n, int32_t v, int64_t f); 203 void init(double n); 204 UBool quickInit(double n); // Try a fast-path only initialization, 205 // return TRUE if successful. 206 void adjustForMinFractionDigits(int32_t min); 207 static int64_t getFractionalDigits(double n, int32_t v); 208 static int32_t decimals(double n); 209 210 double source; 211 int32_t visibleDecimalDigitCount; 212 int64_t decimalDigits; 213 int64_t decimalDigitsWithoutTrailingZeros; 214 int64_t intValue; 215 UBool hasIntegerValue; 216 UBool isNegative; 217 UBool isNanOrInfinity; 218 }; 219 220 class AndConstraint : public UMemory { 221 public: 222 typedef enum RuleOp { 223 NONE, 224 MOD 225 } RuleOp; 226 RuleOp op; 227 int32_t opNum; // for mod expressions, the right operand of the mod. 228 int32_t value; // valid for 'is' rules only. 229 UVector32 *rangeList; // for 'in', 'within' rules. Null otherwise. 230 UBool negated; // TRUE for negated rules. 231 UBool integerOnly; // TRUE for 'within' rules. 232 tokenType digitsType; // n | i | v | f constraint. 233 AndConstraint *next; 234 235 AndConstraint(); 236 AndConstraint(const AndConstraint& other); 237 virtual ~AndConstraint(); 238 AndConstraint* add(); 239 // UBool isFulfilled(double number); 240 UBool isFulfilled(const FixedDecimal &number); 241 }; 242 243 class OrConstraint : public UMemory { 244 public: 245 AndConstraint *childNode; 246 OrConstraint *next; 247 OrConstraint(); 248 249 OrConstraint(const OrConstraint& other); 250 virtual ~OrConstraint(); 251 AndConstraint* add(); 252 // UBool isFulfilled(double number); 253 UBool isFulfilled(const FixedDecimal &number); 254 }; 255 256 class RuleChain : public UMemory { 257 public: 258 UnicodeString fKeyword; 259 RuleChain *fNext; 260 OrConstraint *ruleHeader; 261 UnicodeString fDecimalSamples; // Samples strings from rule source 262 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 263 UBool fDecimalSamplesUnbounded; 264 UBool fIntegerSamplesUnbounded; 265 266 267 RuleChain(); 268 RuleChain(const RuleChain& other); 269 virtual ~RuleChain(); 270 271 UnicodeString select(const FixedDecimal &number) const; 272 void dumpRules(UnicodeString& result); 273 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 274 UBool isKeyword(const UnicodeString& keyword) const; 275 }; 276 277 class PluralKeywordEnumeration : public StringEnumeration { 278 public: 279 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 280 virtual ~PluralKeywordEnumeration(); 281 static UClassID U_EXPORT2 getStaticClassID(void); 282 virtual UClassID getDynamicClassID(void) const; 283 virtual const UnicodeString* snext(UErrorCode& status); 284 virtual void reset(UErrorCode& status); 285 virtual int32_t count(UErrorCode& status) const; 286 private: 287 int32_t pos; 288 UVector fKeywordNames; 289 }; 290 291 292 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 293 public: 294 PluralAvailableLocalesEnumeration(UErrorCode &status); 295 virtual ~PluralAvailableLocalesEnumeration(); 296 virtual const char* next(int32_t *resultLength, UErrorCode& status); 297 virtual void reset(UErrorCode& status); 298 virtual int32_t count(UErrorCode& status) const; 299 private: 300 UErrorCode fOpenStatus; 301 UResourceBundle *fLocales; 302 UResourceBundle *fRes; 303 }; 304 305 U_NAMESPACE_END 306 307 #endif /* #if !UCONFIG_NO_FORMATTING */ 308 309 #endif // _PLURRULE_IMPL 310 //eof 311