1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "formatted_string_builder.h"
9 #include "unicode/ustring.h"
10 #include "unicode/utf16.h"
11 
12 namespace {
13 
14 // A version of uprv_memcpy that checks for length 0.
15 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)16 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
17     if (len > 0) {
18         uprv_memcpy(dest, src, len);
19     }
20 }
21 
22 // A version of uprv_memmove that checks for length 0.
23 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)24 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
25     if (len > 0) {
26         uprv_memmove(dest, src, len);
27     }
28 }
29 
30 } // namespace
31 
32 
33 U_NAMESPACE_BEGIN
34 
FormattedStringBuilder()35 FormattedStringBuilder::FormattedStringBuilder() {
36 #if U_DEBUG
37     // Initializing the memory to non-zero helps catch some bugs that involve
38     // reading from an improperly terminated string.
39     for (int32_t i=0; i<getCapacity(); i++) {
40         getCharPtr()[i] = 1;
41     }
42 #endif
43 }
44 
~FormattedStringBuilder()45 FormattedStringBuilder::~FormattedStringBuilder() {
46     if (fUsingHeap) {
47         uprv_free(fChars.heap.ptr);
48         uprv_free(fFields.heap.ptr);
49     }
50 }
51 
FormattedStringBuilder(const FormattedStringBuilder & other)52 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
53     *this = other;
54 }
55 
operator =(const FormattedStringBuilder & other)56 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
57     // Check for self-assignment
58     if (this == &other) {
59         return *this;
60     }
61 
62     // Continue with deallocation and copying
63     if (fUsingHeap) {
64         uprv_free(fChars.heap.ptr);
65         uprv_free(fFields.heap.ptr);
66         fUsingHeap = false;
67     }
68 
69     int32_t capacity = other.getCapacity();
70     if (capacity > DEFAULT_CAPACITY) {
71         // FIXME: uprv_malloc
72         // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
73         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
74         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
75         if (newChars == nullptr || newFields == nullptr) {
76             // UErrorCode is not available; fail silently.
77             uprv_free(newChars);
78             uprv_free(newFields);
79             *this = FormattedStringBuilder();  // can't fail
80             return *this;
81         }
82 
83         fUsingHeap = true;
84         fChars.heap.capacity = capacity;
85         fChars.heap.ptr = newChars;
86         fFields.heap.capacity = capacity;
87         fFields.heap.ptr = newFields;
88     }
89 
90     uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
91     uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
92 
93     fZero = other.fZero;
94     fLength = other.fLength;
95     return *this;
96 }
97 
length() const98 int32_t FormattedStringBuilder::length() const {
99     return fLength;
100 }
101 
codePointCount() const102 int32_t FormattedStringBuilder::codePointCount() const {
103     return u_countChar32(getCharPtr() + fZero, fLength);
104 }
105 
getFirstCodePoint() const106 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
107     if (fLength == 0) {
108         return -1;
109     }
110     UChar32 cp;
111     U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
112     return cp;
113 }
114 
getLastCodePoint() const115 UChar32 FormattedStringBuilder::getLastCodePoint() const {
116     if (fLength == 0) {
117         return -1;
118     }
119     int32_t offset = fLength;
120     U16_BACK_1(getCharPtr() + fZero, 0, offset);
121     UChar32 cp;
122     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
123     return cp;
124 }
125 
codePointAt(int32_t index) const126 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
127     UChar32 cp;
128     U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
129     return cp;
130 }
131 
codePointBefore(int32_t index) const132 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
133     int32_t offset = index;
134     U16_BACK_1(getCharPtr() + fZero, 0, offset);
135     UChar32 cp;
136     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
137     return cp;
138 }
139 
clear()140 FormattedStringBuilder &FormattedStringBuilder::clear() {
141     // TODO: Reset the heap here?
142     fZero = getCapacity() / 2;
143     fLength = 0;
144     return *this;
145 }
146 
147 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)148 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
149     int32_t count = U16_LENGTH(codePoint);
150     int32_t position = prepareForInsert(index, count, status);
151     if (U_FAILURE(status)) {
152         return count;
153     }
154     if (count == 1) {
155         getCharPtr()[position] = (char16_t) codePoint;
156         getFieldPtr()[position] = field;
157     } else {
158         getCharPtr()[position] = U16_LEAD(codePoint);
159         getCharPtr()[position + 1] = U16_TRAIL(codePoint);
160         getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
161     }
162     return count;
163 }
164 
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)165 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
166                                     UErrorCode &status) {
167     if (unistr.length() == 0) {
168         // Nothing to insert.
169         return 0;
170     } else if (unistr.length() == 1) {
171         // Fast path: insert using insertCodePoint.
172         return insertCodePoint(index, unistr.charAt(0), field, status);
173     } else {
174         return insert(index, unistr, 0, unistr.length(), field, status);
175     }
176 }
177 
178 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)179 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
180                             Field field, UErrorCode &status) {
181     int32_t count = end - start;
182     int32_t position = prepareForInsert(index, count, status);
183     if (U_FAILURE(status)) {
184         return count;
185     }
186     for (int32_t i = 0; i < count; i++) {
187         getCharPtr()[position + i] = unistr.charAt(start + i);
188         getFieldPtr()[position + i] = field;
189     }
190     return count;
191 }
192 
193 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)194 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
195                             int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
196     int32_t thisLength = endThis - startThis;
197     int32_t otherLength = endOther - startOther;
198     int32_t count = otherLength - thisLength;
199     int32_t position;
200     if (count > 0) {
201         // Overall, chars need to be added.
202         position = prepareForInsert(startThis, count, status);
203     } else {
204         // Overall, chars need to be removed or kept the same.
205         position = remove(startThis, -count);
206     }
207     if (U_FAILURE(status)) {
208         return count;
209     }
210     for (int32_t i = 0; i < otherLength; i++) {
211         getCharPtr()[position + i] = unistr.charAt(startOther + i);
212         getFieldPtr()[position + i] = field;
213     }
214     return count;
215 }
216 
append(const FormattedStringBuilder & other,UErrorCode & status)217 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
218     return insert(fLength, other, status);
219 }
220 
221 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)222 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
223     if (this == &other) {
224         status = U_ILLEGAL_ARGUMENT_ERROR;
225         return 0;
226     }
227     int32_t count = other.fLength;
228     if (count == 0) {
229         // Nothing to insert.
230         return 0;
231     }
232     int32_t position = prepareForInsert(index, count, status);
233     if (U_FAILURE(status)) {
234         return count;
235     }
236     for (int32_t i = 0; i < count; i++) {
237         getCharPtr()[position + i] = other.charAt(i);
238         getFieldPtr()[position + i] = other.fieldAt(i);
239     }
240     return count;
241 }
242 
writeTerminator(UErrorCode & status)243 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
244     int32_t position = prepareForInsert(fLength, 1, status);
245     if (U_FAILURE(status)) {
246         return;
247     }
248     getCharPtr()[position] = 0;
249     getFieldPtr()[position] = UNUM_FIELD_COUNT;
250     fLength--;
251 }
252 
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)253 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
254     U_ASSERT(index >= 0);
255     U_ASSERT(index <= fLength);
256     U_ASSERT(count >= 0);
257     if (index == 0 && fZero - count >= 0) {
258         // Append to start
259         fZero -= count;
260         fLength += count;
261         return fZero;
262     } else if (index == fLength && fZero + fLength + count < getCapacity()) {
263         // Append to end
264         fLength += count;
265         return fZero + fLength - count;
266     } else {
267         // Move chars around and/or allocate more space
268         return prepareForInsertHelper(index, count, status);
269     }
270 }
271 
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)272 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
273     int32_t oldCapacity = getCapacity();
274     int32_t oldZero = fZero;
275     char16_t *oldChars = getCharPtr();
276     Field *oldFields = getFieldPtr();
277     if (fLength + count > oldCapacity) {
278         int32_t newCapacity = (fLength + count) * 2;
279         int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
280 
281         // C++ note: malloc appears in two places: here and in the assignment operator.
282         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
283         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
284         if (newChars == nullptr || newFields == nullptr) {
285             uprv_free(newChars);
286             uprv_free(newFields);
287             status = U_MEMORY_ALLOCATION_ERROR;
288             return -1;
289         }
290 
291         // First copy the prefix and then the suffix, leaving room for the new chars that the
292         // caller wants to insert.
293         // C++ note: memcpy is OK because the src and dest do not overlap.
294         uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
295         uprv_memcpy2(newChars + newZero + index + count,
296                 oldChars + oldZero + index,
297                 sizeof(char16_t) * (fLength - index));
298         uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
299         uprv_memcpy2(newFields + newZero + index + count,
300                 oldFields + oldZero + index,
301                 sizeof(Field) * (fLength - index));
302 
303         if (fUsingHeap) {
304             uprv_free(oldChars);
305             uprv_free(oldFields);
306         }
307         fUsingHeap = true;
308         fChars.heap.ptr = newChars;
309         fChars.heap.capacity = newCapacity;
310         fFields.heap.ptr = newFields;
311         fFields.heap.capacity = newCapacity;
312         fZero = newZero;
313         fLength += count;
314     } else {
315         int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
316 
317         // C++ note: memmove is required because src and dest may overlap.
318         // First copy the entire string to the location of the prefix, and then move the suffix
319         // to make room for the new chars that the caller wants to insert.
320         uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
321         uprv_memmove2(oldChars + newZero + index + count,
322                 oldChars + newZero + index,
323                 sizeof(char16_t) * (fLength - index));
324         uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
325         uprv_memmove2(oldFields + newZero + index + count,
326                 oldFields + newZero + index,
327                 sizeof(Field) * (fLength - index));
328 
329         fZero = newZero;
330         fLength += count;
331     }
332     return fZero + index;
333 }
334 
remove(int32_t index,int32_t count)335 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
336     // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
337     int32_t position = index + fZero;
338     uprv_memmove2(getCharPtr() + position,
339             getCharPtr() + position + count,
340             sizeof(char16_t) * (fLength - index - count));
341     uprv_memmove2(getFieldPtr() + position,
342             getFieldPtr() + position + count,
343             sizeof(Field) * (fLength - index - count));
344     fLength -= count;
345     return position;
346 }
347 
toUnicodeString() const348 UnicodeString FormattedStringBuilder::toUnicodeString() const {
349     return UnicodeString(getCharPtr() + fZero, fLength);
350 }
351 
toTempUnicodeString() const352 const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
353     // Readonly-alias constructor:
354     return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
355 }
356 
toDebugString() const357 UnicodeString FormattedStringBuilder::toDebugString() const {
358     UnicodeString sb;
359     sb.append(u"<FormattedStringBuilder [", -1);
360     sb.append(toUnicodeString());
361     sb.append(u"] [", -1);
362     for (int i = 0; i < fLength; i++) {
363         if (fieldAt(i) == UNUM_FIELD_COUNT) {
364             sb.append(u'n');
365         } else {
366             char16_t c;
367             switch (fieldAt(i)) {
368                 case UNUM_SIGN_FIELD:
369                     c = u'-';
370                     break;
371                 case UNUM_INTEGER_FIELD:
372                     c = u'i';
373                     break;
374                 case UNUM_FRACTION_FIELD:
375                     c = u'f';
376                     break;
377                 case UNUM_EXPONENT_FIELD:
378                     c = u'e';
379                     break;
380                 case UNUM_EXPONENT_SIGN_FIELD:
381                     c = u'+';
382                     break;
383                 case UNUM_EXPONENT_SYMBOL_FIELD:
384                     c = u'E';
385                     break;
386                 case UNUM_DECIMAL_SEPARATOR_FIELD:
387                     c = u'.';
388                     break;
389                 case UNUM_GROUPING_SEPARATOR_FIELD:
390                     c = u',';
391                     break;
392                 case UNUM_PERCENT_FIELD:
393                     c = u'%';
394                     break;
395                 case UNUM_PERMILL_FIELD:
396                     c = u'‰';
397                     break;
398                 case UNUM_CURRENCY_FIELD:
399                     c = u'$';
400                     break;
401                 default:
402                     c = u'?';
403                     break;
404             }
405             sb.append(c);
406         }
407     }
408     sb.append(u"]>", -1);
409     return sb;
410 }
411 
chars() const412 const char16_t *FormattedStringBuilder::chars() const {
413     return getCharPtr() + fZero;
414 }
415 
contentEquals(const FormattedStringBuilder & other) const416 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
417     if (fLength != other.fLength) {
418         return false;
419     }
420     for (int32_t i = 0; i < fLength; i++) {
421         if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
422             return false;
423         }
424     }
425     return true;
426 }
427 
containsField(Field field) const428 bool FormattedStringBuilder::containsField(Field field) const {
429     for (int32_t i = 0; i < fLength; i++) {
430         if (field == fieldAt(i)) {
431             return true;
432         }
433     }
434     return false;
435 }
436 
437 U_NAMESPACE_END
438 
439 #endif /* #if !UCONFIG_NO_FORMATTING */
440