1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMBER_STRINGBUILDER_H__ 8 #define __NUMBER_STRINGBUILDER_H__ 9 10 11 #include <cstdint> 12 #include "unicode/unum.h" // for UNUM_FIELD_COUNT 13 #include "cstring.h" 14 #include "uassert.h" 15 #include "fphdlimp.h" 16 17 U_NAMESPACE_BEGIN 18 19 class FormattedValueStringBuilderImpl; 20 21 /** 22 * A StringBuilder optimized for formatting. It implements the following key 23 * features beyond a UnicodeString: 24 * 25 * <ol> 26 * <li>Efficient prepend as well as append. 27 * <li>Keeps tracks of Fields in an efficient manner. 28 * </ol> 29 * 30 * See also FormattedValueStringBuilderImpl. 31 * 32 * @author sffc (Shane Carr) 33 */ 34 class U_I18N_API FormattedStringBuilder : public UMemory { 35 private: 36 static const int32_t DEFAULT_CAPACITY = 40; 37 38 template<typename T> 39 union ValueOrHeapArray { 40 T value[DEFAULT_CAPACITY]; 41 struct { 42 T *ptr; 43 int32_t capacity; 44 } heap; 45 }; 46 47 public: 48 FormattedStringBuilder(); 49 50 ~FormattedStringBuilder(); 51 52 FormattedStringBuilder(const FormattedStringBuilder &other); 53 54 // Convention: bottom 4 bits for field, top 4 bits for field category. 55 // Field category 0 implies the number category so that the number field 56 // literals can be directly passed as a Field type. 57 // See the helper functions in "StringBuilderFieldUtils" below. 58 typedef uint8_t Field; 59 60 FormattedStringBuilder &operator=(const FormattedStringBuilder &other); 61 62 int32_t length() const; 63 64 int32_t codePointCount() const; 65 charAt(int32_t index)66 inline char16_t charAt(int32_t index) const { 67 U_ASSERT(index >= 0); 68 U_ASSERT(index < fLength); 69 return getCharPtr()[fZero + index]; 70 } 71 fieldAt(int32_t index)72 inline Field fieldAt(int32_t index) const { 73 U_ASSERT(index >= 0); 74 U_ASSERT(index < fLength); 75 return getFieldPtr()[fZero + index]; 76 } 77 78 UChar32 getFirstCodePoint() const; 79 80 UChar32 getLastCodePoint() const; 81 82 UChar32 codePointAt(int32_t index) const; 83 84 UChar32 codePointBefore(int32_t index) const; 85 86 FormattedStringBuilder &clear(); 87 88 /** Appends a UTF-16 code unit. */ appendChar16(char16_t codeUnit,Field field,UErrorCode & status)89 inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) { 90 // appendCodePoint handles both code units and code points. 91 return insertCodePoint(fLength, codeUnit, field, status); 92 } 93 94 /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */ insertChar16(int32_t index,char16_t codeUnit,Field field,UErrorCode & status)95 inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) { 96 // insertCodePoint handles both code units and code points. 97 return insertCodePoint(index, codeUnit, field, status); 98 } 99 100 /** Appends a Unicode code point. */ appendCodePoint(UChar32 codePoint,Field field,UErrorCode & status)101 inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) { 102 return insertCodePoint(fLength, codePoint, field, status); 103 } 104 105 /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */ 106 int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status); 107 108 /** Appends a string. */ append(const UnicodeString & unistr,Field field,UErrorCode & status)109 inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) { 110 return insert(fLength, unistr, field, status); 111 } 112 113 /** Inserts a string. Note: insert at index 0 is very efficient. */ 114 int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status); 115 116 /** Inserts a substring. Note: insert at index 0 is very efficient. 117 * 118 * @param start Start index of the substring of unistr to be inserted. 119 * @param end End index of the substring of unistr to be inserted (exclusive). 120 */ 121 int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field, 122 UErrorCode &status); 123 124 /** Deletes a substring and then inserts a string at that same position. 125 * Similar to JavaScript Array.prototype.splice(). 126 * 127 * @param startThis Start of the span to delete. 128 * @param endThis End of the span to delete (exclusive). 129 * @param unistr The string to insert at the deletion position. 130 * @param startOther Start index of the substring of unistr to be inserted. 131 * @param endOther End index of the substring of unistr to be inserted (exclusive). 132 */ 133 int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, 134 int32_t startOther, int32_t endOther, Field field, UErrorCode& status); 135 136 /** Appends a formatted string. */ 137 int32_t append(const FormattedStringBuilder &other, UErrorCode &status); 138 139 /** Inserts a formatted string. Note: insert at index 0 is very efficient. */ 140 int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status); 141 142 /** 143 * Ensures that the string buffer contains a NUL terminator. The NUL terminator does 144 * not count toward the string length. Any further changes to the string (insert or 145 * append) may invalidate the NUL terminator. 146 * 147 * You should call this method after the formatted string is completely built if you 148 * plan to return a pointer to the string from a C API. 149 */ 150 void writeTerminator(UErrorCode& status); 151 152 /** 153 * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed. 154 */ 155 UnicodeString toUnicodeString() const; 156 157 /** 158 * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and 159 * unchanged. Slightly faster than toUnicodeString(). 160 */ 161 const UnicodeString toTempUnicodeString() const; 162 163 UnicodeString toDebugString() const; 164 165 const char16_t *chars() const; 166 167 bool contentEquals(const FormattedStringBuilder &other) const; 168 169 bool containsField(Field field) const; 170 171 private: 172 bool fUsingHeap = false; 173 ValueOrHeapArray<char16_t> fChars; 174 ValueOrHeapArray<Field> fFields; 175 int32_t fZero = DEFAULT_CAPACITY / 2; 176 int32_t fLength = 0; 177 getCharPtr()178 inline char16_t *getCharPtr() { 179 return fUsingHeap ? fChars.heap.ptr : fChars.value; 180 } 181 getCharPtr()182 inline const char16_t *getCharPtr() const { 183 return fUsingHeap ? fChars.heap.ptr : fChars.value; 184 } 185 getFieldPtr()186 inline Field *getFieldPtr() { 187 return fUsingHeap ? fFields.heap.ptr : fFields.value; 188 } 189 getFieldPtr()190 inline const Field *getFieldPtr() const { 191 return fUsingHeap ? fFields.heap.ptr : fFields.value; 192 } 193 getCapacity()194 inline int32_t getCapacity() const { 195 return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY; 196 } 197 198 int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status); 199 200 int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status); 201 202 int32_t remove(int32_t index, int32_t count); 203 204 friend class FormattedValueStringBuilderImpl; 205 }; 206 207 /** 208 * Helper functions for dealing with the Field typedef, which stores fields 209 * in a compressed format. 210 */ 211 class StringBuilderFieldUtils { 212 public: 213 struct CategoryFieldPair { 214 int32_t category; 215 int32_t field; 216 }; 217 218 /** Compile-time function to construct a Field from a category and a field */ 219 template <int32_t category, int32_t field> compress()220 static constexpr FormattedStringBuilder::Field compress() { 221 static_assert(category != 0, "cannot use Undefined category in FieldUtils"); 222 static_assert(category <= 0xf, "only 4 bits for category"); 223 static_assert(field <= 0xf, "only 4 bits for field"); 224 return static_cast<int8_t>((category << 4) | field); 225 } 226 227 /** Runtime inline function to unpack the category and field from the Field */ expand(FormattedStringBuilder::Field field)228 static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) { 229 if (field == UNUM_FIELD_COUNT) { 230 return {UFIELD_CATEGORY_UNDEFINED, 0}; 231 } 232 CategoryFieldPair ret = { 233 (field >> 4), 234 (field & 0xf) 235 }; 236 if (ret.category == 0) { 237 ret.category = UFIELD_CATEGORY_NUMBER; 238 } 239 return ret; 240 } 241 isNumericField(FormattedStringBuilder::Field field)242 static inline bool isNumericField(FormattedStringBuilder::Field field) { 243 int8_t category = field >> 4; 244 return category == 0 || category == UFIELD_CATEGORY_NUMBER; 245 } 246 }; 247 248 U_NAMESPACE_END 249 250 251 #endif //__NUMBER_STRINGBUILDER_H__ 252 253 #endif /* #if !UCONFIG_NO_FORMATTING */ 254