1 // Copyright 2010-2018, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 #ifndef MOZC_BASE_NUMBER_UTIL_H_ 31 #define MOZC_BASE_NUMBER_UTIL_H_ 32 33 #include <string> 34 #include <vector> 35 36 #include "base/port.h" 37 #include "base/string_piece.h" 38 39 namespace mozc { 40 41 // This class sets up utilities to manage strings including numbers like 42 // Arabic numbers, Roman numbers, Kanji numbers, and so on. 43 class NumberUtil { 44 public: 45 // Converts the string to a number and return it. 46 static int SimpleAtoi(StringPiece str); 47 48 // Returns true if the given input_string contains only number characters 49 // (regardless of halfwidth or fullwidth). 50 // False for empty string. 51 static bool IsArabicNumber(StringPiece input_string); 52 53 // Returns true if the given str consists of only ASCII digits. 54 // False for empty string. 55 static bool IsDecimalInteger(StringPiece str); 56 57 struct NumberString { 58 public: 59 enum Style { 60 DEFAULT_STYLE = 0, 61 // 123,456,789 62 NUMBER_SEPARATED_ARABIC_HALFWIDTH, 63 // "123,456,789" 64 NUMBER_SEPARATED_ARABIC_FULLWIDTH, 65 // "123億456万7890" 66 NUMBER_ARABIC_AND_KANJI_HALFWIDTH, 67 // "123億456万7890" 68 NUMBER_ARABIC_AND_KANJI_FULLWIDTH, 69 // "一億二千三百四十五万六千七百八十九" 70 NUMBER_KANJI, 71 // "壱億弐千参百四拾五万六千七百八拾九" 72 NUMBER_OLD_KANJI, 73 // "ⅠⅡⅢ" 74 NUMBER_ROMAN_CAPITAL, 75 // "ⅰⅱⅲ" 76 NUMBER_ROMAN_SMALL, 77 // "①②③" 78 NUMBER_CIRCLED, 79 // "ニ〇〇" 80 NUMBER_KANJI_ARABIC, 81 // "0x4d2" (1234 in decimal) 82 NUMBER_HEX, 83 // "02322" (1234 in decimal) 84 NUMBER_OCT, 85 // "0b10011010010" (1234 in decimal) 86 NUMBER_BIN, 87 }; 88 NumberStringNumberString89 NumberString(StringPiece value, StringPiece description, Style style) 90 : value(value.as_string()), 91 description(description.as_string()), 92 style(style) {} 93 94 // Converted string 95 string value; 96 97 // Description of Converted String 98 string description; 99 100 // Converted Number Style 101 Style style; 102 }; 103 104 // Following five functions are main functions to convert number strings. 105 // They receive two arguments: 106 // - input_num: a string consisting of Arabic numeric characters. 107 // - output: a vector consists of conveted results. 108 // If |input_num| is invalid or cannot represent as the form, these 109 // functions do nothing. If a method finds more than one representations, 110 // it pushes all candidates into the output. 111 112 // Converts half-width Arabic number string to Kan-su-ji string. 113 // - input_num: a string which *must* be half-width number string. 114 // - output: function appends new representation into output vector. 115 // value, desc and style are stored same size and same order. 116 // if invalid string is set, this function do nothing. 117 static bool ArabicToKanji(StringPiece input_num, 118 std::vector<NumberString> *output); 119 120 // Converts half-width Arabic number string to Separated Arabic string. 121 // (e.g. 1234567890 are converted to 1,234,567,890) 122 // Arguments are same as ArabicToKanji (above). 123 static bool ArabicToSeparatedArabic(StringPiece input_num, 124 std::vector<NumberString> *output); 125 126 // Converts half-width Arabic number string to full-width Arabic number 127 // string. 128 // Arguments are same as ArabicToKanji (above). 129 static bool ArabicToWideArabic(StringPiece input_num, 130 std::vector<NumberString> *output); 131 132 // Converts half-width Arabic number to various styles. 133 // Arguments are same as ArabicToKanji (above). 134 // - Roman style (i) (ii) ... 135 static bool ArabicToOtherForms(StringPiece input_num, 136 std::vector<NumberString> *output); 137 138 // Converts half-width Arabic number to various radices (2,8,16). 139 // Arguments are same as ArabicToKanji (above). 140 // Excepted number of input digits is smaller than 20, but it can be 141 // converted only if it can be stored in an unsigned 64-bit integer. 142 static bool ArabicToOtherRadixes(StringPiece input_num, 143 std::vector<NumberString> *output); 144 145 // Converts the string to a 32-/64-bit signed/unsigned int. Returns true if 146 // success or false if the string is in the wrong format. 147 static bool SafeStrToInt16(StringPiece str, int16 *value); 148 static bool SafeStrToInt32(StringPiece str, int32 *value); 149 static bool SafeStrToInt64(StringPiece str, int64 *value); 150 static bool SafeStrToUInt16(StringPiece str, uint16 *value); 151 static bool SafeStrToUInt32(StringPiece str, uint32 *value); 152 static bool SafeStrToUInt64(StringPiece str, uint64 *value); 153 static bool SafeHexStrToUInt32(StringPiece str, uint32 *value); 154 static bool SafeOctStrToUInt32(StringPiece str, uint32 *value); 155 156 // Converts the string to a double. Returns true if success or false if the 157 // string is in the wrong format. 158 // If |str| is a hexadecimal number like "0x1234", the result depends on 159 // compiler. It returns false when compiled by VisualC++. On the other hand 160 // it returns true and sets correct value when compiled by gcc. 161 static bool SafeStrToDouble(StringPiece str, double *value); 162 163 // Convert Kanji numeric into Arabic numeric. 164 // When the trim_leading_zeros is true, leading zeros for arabic_output 165 // are trimmed off. 166 // TODO(toshiyuki): This parameter is only applied for arabic_output now. 167 // 168 // Input: "2千五百" 169 // kanji_output: "二千五百" 170 // arabic output: 2500 171 // 172 // NormalizeNumbers() returns false if it finds non-number characters. 173 // NormalizeNumbersWithSuffix() skips trailing non-number characters and 174 // return them in "suffix". 175 static bool NormalizeNumbers(StringPiece input, 176 bool trim_leading_zeros, 177 string *kanji_output, 178 string *arabic_output); 179 180 static bool NormalizeNumbersWithSuffix(StringPiece input, 181 bool trim_leading_zeros, 182 string *kanji_output, 183 string *arabic_output, 184 string *suffix); 185 186 // Note: this function just does charcter-by-character conversion 187 // "百二十" -> 10020 188 static void KanjiNumberToArabicNumber(StringPiece input, string *output); 189 190 private: 191 DISALLOW_IMPLICIT_CONSTRUCTORS(NumberUtil); 192 }; 193 194 } // namespace mozc 195 196 #endif // MOZC_BASE_NUMBER_UTIL_H_ 197