1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __NUMBER_STRINGBUILDER_H__
8 #define __NUMBER_STRINGBUILDER_H__
9 
10 
11 #include <cstdint>
12 #include <type_traits>
13 
14 #include "cstring.h"
15 #include "uassert.h"
16 #include "fphdlimp.h"
17 
18 U_NAMESPACE_BEGIN
19 
20 class FormattedValueStringBuilderImpl;
21 
22 /**
23  * A StringBuilder optimized for formatting. It implements the following key
24  * features beyond a UnicodeString:
25  *
26  * <ol>
27  * <li>Efficient prepend as well as append.
28  * <li>Keeps track of Fields in an efficient manner.
29  * </ol>
30  *
31  * See also FormattedValueStringBuilderImpl.
32  *
33  * @author sffc (Shane Carr)
34  */
35 class U_I18N_API FormattedStringBuilder : public UMemory {
36   private:
37     static const int32_t DEFAULT_CAPACITY = 40;
38 
39     template<typename T>
40     union ValueOrHeapArray {
41         T value[DEFAULT_CAPACITY];
42         struct {
43             T *ptr;
44             int32_t capacity;
45         } heap;
46     };
47 
48   public:
49     FormattedStringBuilder();
50 
51     ~FormattedStringBuilder();
52 
53     FormattedStringBuilder(const FormattedStringBuilder &other);
54 
55     // Convention: bottom 4 bits for field, top 4 bits for field category.
56     // Field category 0 implies the number category so that the number field
57     // literals can be directly passed as a Field type.
58     // Exported as U_I18N_API so it can be used by other exports on Windows.
59     struct U_I18N_API Field {
60         uint8_t bits;
61 
62         Field() = default;
63         constexpr Field(uint8_t category, uint8_t field);
64 
65         inline UFieldCategory getCategory() const;
66         inline int32_t getField() const;
67         inline bool isNumeric() const;
68         inline bool isUndefined() const;
69         inline bool operator==(const Field& other) const;
70         inline bool operator!=(const Field& other) const;
71     };
72 
73     FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
74 
75     int32_t length() const;
76 
77     int32_t codePointCount() const;
78 
charAt(int32_t index)79     inline char16_t charAt(int32_t index) const {
80         U_ASSERT(index >= 0);
81         U_ASSERT(index < fLength);
82         return getCharPtr()[fZero + index];
83     }
84 
fieldAt(int32_t index)85     inline Field fieldAt(int32_t index) const {
86         U_ASSERT(index >= 0);
87         U_ASSERT(index < fLength);
88         return getFieldPtr()[fZero + index];
89     }
90 
91     UChar32 getFirstCodePoint() const;
92 
93     UChar32 getLastCodePoint() const;
94 
95     UChar32 codePointAt(int32_t index) const;
96 
97     UChar32 codePointBefore(int32_t index) const;
98 
99     FormattedStringBuilder &clear();
100 
101     /** Appends a UTF-16 code unit. */
appendChar16(char16_t codeUnit,Field field,UErrorCode & status)102     inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
103         // appendCodePoint handles both code units and code points.
104         return insertCodePoint(fLength, codeUnit, field, status);
105     }
106 
107     /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
insertChar16(int32_t index,char16_t codeUnit,Field field,UErrorCode & status)108     inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
109         // insertCodePoint handles both code units and code points.
110         return insertCodePoint(index, codeUnit, field, status);
111     }
112 
113     /** Appends a Unicode code point. */
appendCodePoint(UChar32 codePoint,Field field,UErrorCode & status)114     inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
115         return insertCodePoint(fLength, codePoint, field, status);
116     }
117 
118     /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
119     int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
120 
121     /** Appends a string. */
append(const UnicodeString & unistr,Field field,UErrorCode & status)122     inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
123         return insert(fLength, unistr, field, status);
124     }
125 
126     /** Inserts a string. Note: insert at index 0 is very efficient. */
127     int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
128 
129     /** Inserts a substring. Note: insert at index 0 is very efficient.
130      *
131      * @param start Start index of the substring of unistr to be inserted.
132      * @param end End index of the substring of unistr to be inserted (exclusive).
133      */
134     int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
135                    UErrorCode &status);
136 
137     /** Deletes a substring and then inserts a string at that same position.
138      * Similar to JavaScript Array.prototype.splice().
139      *
140      * @param startThis Start of the span to delete.
141      * @param endThis End of the span to delete (exclusive).
142      * @param unistr The string to insert at the deletion position.
143      * @param startOther Start index of the substring of unistr to be inserted.
144      * @param endOther End index of the substring of unistr to be inserted (exclusive).
145      */
146     int32_t splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
147                    int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
148 
149     /** Appends a formatted string. */
150     int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
151 
152     /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
153     int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
154 
155     /**
156      * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
157      * not count toward the string length. Any further changes to the string (insert or
158      * append) may invalidate the NUL terminator.
159      *
160      * You should call this method after the formatted string is completely built if you
161      * plan to return a pointer to the string from a C API.
162      */
163     void writeTerminator(UErrorCode& status);
164 
165     /**
166      * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
167      */
168     UnicodeString toUnicodeString() const;
169 
170     /**
171      * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
172      * unchanged. Slightly faster than toUnicodeString().
173      */
174     const UnicodeString toTempUnicodeString() const;
175 
176     UnicodeString toDebugString() const;
177 
178     const char16_t *chars() const;
179 
180     bool contentEquals(const FormattedStringBuilder &other) const;
181 
182     bool containsField(Field field) const;
183 
184   private:
185     bool fUsingHeap = false;
186     ValueOrHeapArray<char16_t> fChars;
187     ValueOrHeapArray<Field> fFields;
188     int32_t fZero = DEFAULT_CAPACITY / 2;
189     int32_t fLength = 0;
190 
getCharPtr()191     inline char16_t *getCharPtr() {
192         return fUsingHeap ? fChars.heap.ptr : fChars.value;
193     }
194 
getCharPtr()195     inline const char16_t *getCharPtr() const {
196         return fUsingHeap ? fChars.heap.ptr : fChars.value;
197     }
198 
getFieldPtr()199     inline Field *getFieldPtr() {
200         return fUsingHeap ? fFields.heap.ptr : fFields.value;
201     }
202 
getFieldPtr()203     inline const Field *getFieldPtr() const {
204         return fUsingHeap ? fFields.heap.ptr : fFields.value;
205     }
206 
getCapacity()207     inline int32_t getCapacity() const {
208         return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
209     }
210 
211     int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
212 
213     int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
214 
215     int32_t remove(int32_t index, int32_t count);
216 
217     friend class FormattedValueStringBuilderImpl;
218 };
219 
220 static_assert(
221     std::is_pod<FormattedStringBuilder::Field>::value,
222     "Field should be a POD type for efficient initialization");
223 
Field(uint8_t category,uint8_t field)224 constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field)
225     : bits((
226         U_ASSERT(category <= 0xf),
227         U_ASSERT(field <= 0xf),
228         static_cast<uint8_t>((category << 4) | field)
229     )) {}
230 
231 /**
232  * Internal constant for the undefined field for use in FormattedStringBuilder.
233  */
234 constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0};
235 
236 /**
237  * Internal field to signal "numeric" when fields are not supported in NumberFormat.
238  */
239 constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1};
240 
getCategory()241 inline UFieldCategory FormattedStringBuilder::Field::getCategory() const {
242     return static_cast<UFieldCategory>(bits >> 4);
243 }
244 
getField()245 inline int32_t FormattedStringBuilder::Field::getField() const {
246     return bits & 0xf;
247 }
248 
isNumeric()249 inline bool FormattedStringBuilder::Field::isNumeric() const {
250     return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField;
251 }
252 
isUndefined()253 inline bool FormattedStringBuilder::Field::isUndefined() const {
254     return getCategory() == UFIELD_CATEGORY_UNDEFINED;
255 }
256 
257 inline bool FormattedStringBuilder::Field::operator==(const Field& other) const {
258     return bits == other.bits;
259 }
260 
261 inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const {
262     return bits != other.bits;
263 }
264 
265 U_NAMESPACE_END
266 
267 
268 #endif //__NUMBER_STRINGBUILDER_H__
269 
270 #endif /* #if !UCONFIG_NO_FORMATTING */
271