1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 #ifndef __NUMBER_STRINGBUILDER_H__
8 #define __NUMBER_STRINGBUILDER_H__
9 
10 
11 #include <cstdint>
12 #include "unicode/unum.h" // for UNUM_FIELD_COUNT
13 #include "cstring.h"
14 #include "uassert.h"
15 #include "fphdlimp.h"
16 
17 U_NAMESPACE_BEGIN
18 
19 class FormattedValueStringBuilderImpl;
20 
21 /**
22  * A StringBuilder optimized for formatting. It implements the following key
23  * features beyond a UnicodeString:
24  *
25  * <ol>
26  * <li>Efficient prepend as well as append.
27  * <li>Keeps tracks of Fields in an efficient manner.
28  * </ol>
29  *
30  * See also FormattedValueStringBuilderImpl.
31  *
32  * @author sffc (Shane Carr)
33  */
34 class U_I18N_API FormattedStringBuilder : public UMemory {
35   private:
36     static const int32_t DEFAULT_CAPACITY = 40;
37 
38     template<typename T>
39     union ValueOrHeapArray {
40         T value[DEFAULT_CAPACITY];
41         struct {
42             T *ptr;
43             int32_t capacity;
44         } heap;
45     };
46 
47   public:
48     FormattedStringBuilder();
49 
50     ~FormattedStringBuilder();
51 
52     FormattedStringBuilder(const FormattedStringBuilder &other);
53 
54     // Convention: bottom 4 bits for field, top 4 bits for field category.
55     // Field category 0 implies the number category so that the number field
56     // literals can be directly passed as a Field type.
57     // See the helper functions in "StringBuilderFieldUtils" below.
58     typedef uint8_t Field;
59 
60     FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
61 
62     int32_t length() const;
63 
64     int32_t codePointCount() const;
65 
charAt(int32_t index)66     inline char16_t charAt(int32_t index) const {
67         U_ASSERT(index >= 0);
68         U_ASSERT(index < fLength);
69         return getCharPtr()[fZero + index];
70     }
71 
fieldAt(int32_t index)72     inline Field fieldAt(int32_t index) const {
73         U_ASSERT(index >= 0);
74         U_ASSERT(index < fLength);
75         return getFieldPtr()[fZero + index];
76     }
77 
78     UChar32 getFirstCodePoint() const;
79 
80     UChar32 getLastCodePoint() const;
81 
82     UChar32 codePointAt(int32_t index) const;
83 
84     UChar32 codePointBefore(int32_t index) const;
85 
86     FormattedStringBuilder &clear();
87 
88     /** Appends a UTF-16 code unit. */
appendChar16(char16_t codeUnit,Field field,UErrorCode & status)89     inline int32_t appendChar16(char16_t codeUnit, Field field, UErrorCode& status) {
90         // appendCodePoint handles both code units and code points.
91         return insertCodePoint(fLength, codeUnit, field, status);
92     }
93 
94     /** Inserts a UTF-16 code unit. Note: insert at index 0 is very efficient. */
insertChar16(int32_t index,char16_t codeUnit,Field field,UErrorCode & status)95     inline int32_t insertChar16(int32_t index, char16_t codeUnit, Field field, UErrorCode& status) {
96         // insertCodePoint handles both code units and code points.
97         return insertCodePoint(index, codeUnit, field, status);
98     }
99 
100     /** Appends a Unicode code point. */
appendCodePoint(UChar32 codePoint,Field field,UErrorCode & status)101     inline int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
102         return insertCodePoint(fLength, codePoint, field, status);
103     }
104 
105     /** Inserts a Unicode code point. Note: insert at index 0 is very efficient. */
106     int32_t insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status);
107 
108     /** Appends a string. */
append(const UnicodeString & unistr,Field field,UErrorCode & status)109     inline int32_t append(const UnicodeString &unistr, Field field, UErrorCode &status) {
110         return insert(fLength, unistr, field, status);
111     }
112 
113     /** Inserts a string. Note: insert at index 0 is very efficient. */
114     int32_t insert(int32_t index, const UnicodeString &unistr, Field field, UErrorCode &status);
115 
116     /** Inserts a substring. Note: insert at index 0 is very efficient.
117      *
118      * @param start Start index of the substring of unistr to be inserted.
119      * @param end End index of the substring of unistr to be inserted (exclusive).
120      */
121     int32_t insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, Field field,
122                    UErrorCode &status);
123 
124     /** Deletes a substring and then inserts a string at that same position.
125      * Similar to JavaScript Array.prototype.splice().
126      *
127      * @param startThis Start of the span to delete.
128      * @param endThis End of the span to delete (exclusive).
129      * @param unistr The string to insert at the deletion position.
130      * @param startOther Start index of the substring of unistr to be inserted.
131      * @param endOther End index of the substring of unistr to be inserted (exclusive).
132      */
133     int32_t splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
134                    int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
135 
136     /** Appends a formatted string. */
137     int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
138 
139     /** Inserts a formatted string. Note: insert at index 0 is very efficient. */
140     int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
141 
142     /**
143      * Ensures that the string buffer contains a NUL terminator. The NUL terminator does
144      * not count toward the string length. Any further changes to the string (insert or
145      * append) may invalidate the NUL terminator.
146      *
147      * You should call this method after the formatted string is completely built if you
148      * plan to return a pointer to the string from a C API.
149      */
150     void writeTerminator(UErrorCode& status);
151 
152     /**
153      * Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
154      */
155     UnicodeString toUnicodeString() const;
156 
157     /**
158      * Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
159      * unchanged. Slightly faster than toUnicodeString().
160      */
161     const UnicodeString toTempUnicodeString() const;
162 
163     UnicodeString toDebugString() const;
164 
165     const char16_t *chars() const;
166 
167     bool contentEquals(const FormattedStringBuilder &other) const;
168 
169     bool containsField(Field field) const;
170 
171   private:
172     bool fUsingHeap = false;
173     ValueOrHeapArray<char16_t> fChars;
174     ValueOrHeapArray<Field> fFields;
175     int32_t fZero = DEFAULT_CAPACITY / 2;
176     int32_t fLength = 0;
177 
getCharPtr()178     inline char16_t *getCharPtr() {
179         return fUsingHeap ? fChars.heap.ptr : fChars.value;
180     }
181 
getCharPtr()182     inline const char16_t *getCharPtr() const {
183         return fUsingHeap ? fChars.heap.ptr : fChars.value;
184     }
185 
getFieldPtr()186     inline Field *getFieldPtr() {
187         return fUsingHeap ? fFields.heap.ptr : fFields.value;
188     }
189 
getFieldPtr()190     inline const Field *getFieldPtr() const {
191         return fUsingHeap ? fFields.heap.ptr : fFields.value;
192     }
193 
getCapacity()194     inline int32_t getCapacity() const {
195         return fUsingHeap ? fChars.heap.capacity : DEFAULT_CAPACITY;
196     }
197 
198     int32_t prepareForInsert(int32_t index, int32_t count, UErrorCode &status);
199 
200     int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
201 
202     int32_t remove(int32_t index, int32_t count);
203 
204     friend class FormattedValueStringBuilderImpl;
205 };
206 
207 /**
208  * Helper functions for dealing with the Field typedef, which stores fields
209  * in a compressed format.
210  */
211 class StringBuilderFieldUtils {
212 public:
213     struct CategoryFieldPair {
214         int32_t category;
215         int32_t field;
216     };
217 
218     /** Compile-time function to construct a Field from a category and a field */
219     template <int32_t category, int32_t field>
compress()220     static constexpr FormattedStringBuilder::Field compress() {
221         static_assert(category != 0, "cannot use Undefined category in FieldUtils");
222         static_assert(category <= 0xf, "only 4 bits for category");
223         static_assert(field <= 0xf, "only 4 bits for field");
224         return static_cast<int8_t>((category << 4) | field);
225     }
226 
227     /** Runtime inline function to unpack the category and field from the Field */
expand(FormattedStringBuilder::Field field)228     static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) {
229         if (field == UNUM_FIELD_COUNT) {
230             return {UFIELD_CATEGORY_UNDEFINED, 0};
231         }
232         CategoryFieldPair ret = {
233             (field >> 4),
234             (field & 0xf)
235         };
236         if (ret.category == 0) {
237             ret.category = UFIELD_CATEGORY_NUMBER;
238         }
239         return ret;
240     }
241 
isNumericField(FormattedStringBuilder::Field field)242     static inline bool isNumericField(FormattedStringBuilder::Field field) {
243         int8_t category = field >> 4;
244         return category == 0 || category == UFIELD_CATEGORY_NUMBER;
245     }
246 };
247 
248 U_NAMESPACE_END
249 
250 
251 #endif //__NUMBER_STRINGBUILDER_H__
252 
253 #endif /* #if !UCONFIG_NO_FORMATTING */
254