1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "formatted_string_builder.h"
9 #include "unicode/ustring.h"
10 #include "unicode/utf16.h"
11 #include "unicode/unum.h" // for UNumberFormatFields literals
12 
13 namespace {
14 
15 // A version of uprv_memcpy that checks for length 0.
16 // By default, uprv_memcpy requires a length of at least 1.
uprv_memcpy2(void * dest,const void * src,size_t len)17 inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
18     if (len > 0) {
19         uprv_memcpy(dest, src, len);
20     }
21 }
22 
23 // A version of uprv_memmove that checks for length 0.
24 // By default, uprv_memmove requires a length of at least 1.
uprv_memmove2(void * dest,const void * src,size_t len)25 inline void uprv_memmove2(void* dest, const void* src, size_t len) {
26     if (len > 0) {
27         uprv_memmove(dest, src, len);
28     }
29 }
30 
31 } // namespace
32 
33 
34 U_NAMESPACE_BEGIN
35 
FormattedStringBuilder()36 FormattedStringBuilder::FormattedStringBuilder() {
37 #if U_DEBUG
38     // Initializing the memory to non-zero helps catch some bugs that involve
39     // reading from an improperly terminated string.
40     for (int32_t i=0; i<getCapacity(); i++) {
41         getCharPtr()[i] = 1;
42     }
43 #endif
44 }
45 
~FormattedStringBuilder()46 FormattedStringBuilder::~FormattedStringBuilder() {
47     if (fUsingHeap) {
48         uprv_free(fChars.heap.ptr);
49         uprv_free(fFields.heap.ptr);
50     }
51 }
52 
FormattedStringBuilder(const FormattedStringBuilder & other)53 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
54     *this = other;
55 }
56 
operator =(const FormattedStringBuilder & other)57 FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
58     // Check for self-assignment
59     if (this == &other) {
60         return *this;
61     }
62 
63     // Continue with deallocation and copying
64     if (fUsingHeap) {
65         uprv_free(fChars.heap.ptr);
66         uprv_free(fFields.heap.ptr);
67         fUsingHeap = false;
68     }
69 
70     int32_t capacity = other.getCapacity();
71     if (capacity > DEFAULT_CAPACITY) {
72         // FIXME: uprv_malloc
73         // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
74         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
75         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
76         if (newChars == nullptr || newFields == nullptr) {
77             // UErrorCode is not available; fail silently.
78             uprv_free(newChars);
79             uprv_free(newFields);
80             *this = FormattedStringBuilder();  // can't fail
81             return *this;
82         }
83 
84         fUsingHeap = true;
85         fChars.heap.capacity = capacity;
86         fChars.heap.ptr = newChars;
87         fFields.heap.capacity = capacity;
88         fFields.heap.ptr = newFields;
89     }
90 
91     uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
92     uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
93 
94     fZero = other.fZero;
95     fLength = other.fLength;
96     return *this;
97 }
98 
length() const99 int32_t FormattedStringBuilder::length() const {
100     return fLength;
101 }
102 
codePointCount() const103 int32_t FormattedStringBuilder::codePointCount() const {
104     return u_countChar32(getCharPtr() + fZero, fLength);
105 }
106 
getFirstCodePoint() const107 UChar32 FormattedStringBuilder::getFirstCodePoint() const {
108     if (fLength == 0) {
109         return -1;
110     }
111     UChar32 cp;
112     U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
113     return cp;
114 }
115 
getLastCodePoint() const116 UChar32 FormattedStringBuilder::getLastCodePoint() const {
117     if (fLength == 0) {
118         return -1;
119     }
120     int32_t offset = fLength;
121     U16_BACK_1(getCharPtr() + fZero, 0, offset);
122     UChar32 cp;
123     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
124     return cp;
125 }
126 
codePointAt(int32_t index) const127 UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
128     UChar32 cp;
129     U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
130     return cp;
131 }
132 
codePointBefore(int32_t index) const133 UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
134     int32_t offset = index;
135     U16_BACK_1(getCharPtr() + fZero, 0, offset);
136     UChar32 cp;
137     U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
138     return cp;
139 }
140 
clear()141 FormattedStringBuilder &FormattedStringBuilder::clear() {
142     // TODO: Reset the heap here?
143     fZero = getCapacity() / 2;
144     fLength = 0;
145     return *this;
146 }
147 
148 int32_t
insertCodePoint(int32_t index,UChar32 codePoint,Field field,UErrorCode & status)149 FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
150     int32_t count = U16_LENGTH(codePoint);
151     int32_t position = prepareForInsert(index, count, status);
152     if (U_FAILURE(status)) {
153         return count;
154     }
155     if (count == 1) {
156         getCharPtr()[position] = (char16_t) codePoint;
157         getFieldPtr()[position] = field;
158     } else {
159         getCharPtr()[position] = U16_LEAD(codePoint);
160         getCharPtr()[position + 1] = U16_TRAIL(codePoint);
161         getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
162     }
163     return count;
164 }
165 
insert(int32_t index,const UnicodeString & unistr,Field field,UErrorCode & status)166 int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
167                                     UErrorCode &status) {
168     if (unistr.length() == 0) {
169         // Nothing to insert.
170         return 0;
171     } else if (unistr.length() == 1) {
172         // Fast path: insert using insertCodePoint.
173         return insertCodePoint(index, unistr.charAt(0), field, status);
174     } else {
175         return insert(index, unistr, 0, unistr.length(), field, status);
176     }
177 }
178 
179 int32_t
insert(int32_t index,const UnicodeString & unistr,int32_t start,int32_t end,Field field,UErrorCode & status)180 FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
181                             Field field, UErrorCode &status) {
182     int32_t count = end - start;
183     int32_t position = prepareForInsert(index, count, status);
184     if (U_FAILURE(status)) {
185         return count;
186     }
187     for (int32_t i = 0; i < count; i++) {
188         getCharPtr()[position + i] = unistr.charAt(start + i);
189         getFieldPtr()[position + i] = field;
190     }
191     return count;
192 }
193 
194 int32_t
splice(int32_t startThis,int32_t endThis,const UnicodeString & unistr,int32_t startOther,int32_t endOther,Field field,UErrorCode & status)195 FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
196                             int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
197     int32_t thisLength = endThis - startThis;
198     int32_t otherLength = endOther - startOther;
199     int32_t count = otherLength - thisLength;
200     int32_t position;
201     if (count > 0) {
202         // Overall, chars need to be added.
203         position = prepareForInsert(startThis, count, status);
204     } else {
205         // Overall, chars need to be removed or kept the same.
206         position = remove(startThis, -count);
207     }
208     if (U_FAILURE(status)) {
209         return count;
210     }
211     for (int32_t i = 0; i < otherLength; i++) {
212         getCharPtr()[position + i] = unistr.charAt(startOther + i);
213         getFieldPtr()[position + i] = field;
214     }
215     return count;
216 }
217 
append(const FormattedStringBuilder & other,UErrorCode & status)218 int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
219     return insert(fLength, other, status);
220 }
221 
222 int32_t
insert(int32_t index,const FormattedStringBuilder & other,UErrorCode & status)223 FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
224     if (this == &other) {
225         status = U_ILLEGAL_ARGUMENT_ERROR;
226         return 0;
227     }
228     int32_t count = other.fLength;
229     if (count == 0) {
230         // Nothing to insert.
231         return 0;
232     }
233     int32_t position = prepareForInsert(index, count, status);
234     if (U_FAILURE(status)) {
235         return count;
236     }
237     for (int32_t i = 0; i < count; i++) {
238         getCharPtr()[position + i] = other.charAt(i);
239         getFieldPtr()[position + i] = other.fieldAt(i);
240     }
241     return count;
242 }
243 
writeTerminator(UErrorCode & status)244 void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
245     int32_t position = prepareForInsert(fLength, 1, status);
246     if (U_FAILURE(status)) {
247         return;
248     }
249     getCharPtr()[position] = 0;
250     getFieldPtr()[position] = kUndefinedField;
251     fLength--;
252 }
253 
prepareForInsert(int32_t index,int32_t count,UErrorCode & status)254 int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
255     U_ASSERT(index >= 0);
256     U_ASSERT(index <= fLength);
257     U_ASSERT(count >= 0);
258     if (index == 0 && fZero - count >= 0) {
259         // Append to start
260         fZero -= count;
261         fLength += count;
262         return fZero;
263     } else if (index == fLength && fZero + fLength + count < getCapacity()) {
264         // Append to end
265         fLength += count;
266         return fZero + fLength - count;
267     } else {
268         // Move chars around and/or allocate more space
269         return prepareForInsertHelper(index, count, status);
270     }
271 }
272 
prepareForInsertHelper(int32_t index,int32_t count,UErrorCode & status)273 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
274     int32_t oldCapacity = getCapacity();
275     int32_t oldZero = fZero;
276     char16_t *oldChars = getCharPtr();
277     Field *oldFields = getFieldPtr();
278     if (fLength + count > oldCapacity) {
279         if ((fLength + count) > INT32_MAX / 2) {
280             // If we continue, then newCapacity will overlow int32_t in the next line.
281             status = U_INPUT_TOO_LONG_ERROR;
282             return -1;
283         }
284         int32_t newCapacity = (fLength + count) * 2;
285         int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
286 
287         // C++ note: malloc appears in two places: here and in the assignment operator.
288         auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
289         auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
290         if (newChars == nullptr || newFields == nullptr) {
291             uprv_free(newChars);
292             uprv_free(newFields);
293             status = U_MEMORY_ALLOCATION_ERROR;
294             return -1;
295         }
296 
297         // First copy the prefix and then the suffix, leaving room for the new chars that the
298         // caller wants to insert.
299         // C++ note: memcpy is OK because the src and dest do not overlap.
300         uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
301         uprv_memcpy2(newChars + newZero + index + count,
302                 oldChars + oldZero + index,
303                 sizeof(char16_t) * (fLength - index));
304         uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
305         uprv_memcpy2(newFields + newZero + index + count,
306                 oldFields + oldZero + index,
307                 sizeof(Field) * (fLength - index));
308 
309         if (fUsingHeap) {
310             uprv_free(oldChars);
311             uprv_free(oldFields);
312         }
313         fUsingHeap = true;
314         fChars.heap.ptr = newChars;
315         fChars.heap.capacity = newCapacity;
316         fFields.heap.ptr = newFields;
317         fFields.heap.capacity = newCapacity;
318         fZero = newZero;
319         fLength += count;
320     } else {
321         int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
322 
323         // C++ note: memmove is required because src and dest may overlap.
324         // First copy the entire string to the location of the prefix, and then move the suffix
325         // to make room for the new chars that the caller wants to insert.
326         uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
327         uprv_memmove2(oldChars + newZero + index + count,
328                 oldChars + newZero + index,
329                 sizeof(char16_t) * (fLength - index));
330         uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
331         uprv_memmove2(oldFields + newZero + index + count,
332                 oldFields + newZero + index,
333                 sizeof(Field) * (fLength - index));
334 
335         fZero = newZero;
336         fLength += count;
337     }
338     U_ASSERT((fZero + index) >= 0);
339     return fZero + index;
340 }
341 
remove(int32_t index,int32_t count)342 int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
343     // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
344     int32_t position = index + fZero;
345     U_ASSERT(position >= 0);
346     uprv_memmove2(getCharPtr() + position,
347             getCharPtr() + position + count,
348             sizeof(char16_t) * (fLength - index - count));
349     uprv_memmove2(getFieldPtr() + position,
350             getFieldPtr() + position + count,
351             sizeof(Field) * (fLength - index - count));
352     fLength -= count;
353     return position;
354 }
355 
toUnicodeString() const356 UnicodeString FormattedStringBuilder::toUnicodeString() const {
357     return UnicodeString(getCharPtr() + fZero, fLength);
358 }
359 
toTempUnicodeString() const360 const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
361     // Readonly-alias constructor:
362     return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
363 }
364 
toDebugString() const365 UnicodeString FormattedStringBuilder::toDebugString() const {
366     UnicodeString sb;
367     sb.append(u"<FormattedStringBuilder [", -1);
368     sb.append(toUnicodeString());
369     sb.append(u"] [", -1);
370     for (int i = 0; i < fLength; i++) {
371         if (fieldAt(i) == kUndefinedField) {
372             sb.append(u'n');
373         } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
374             char16_t c;
375             switch (fieldAt(i).getField()) {
376                 case UNUM_SIGN_FIELD:
377                     c = u'-';
378                     break;
379                 case UNUM_INTEGER_FIELD:
380                     c = u'i';
381                     break;
382                 case UNUM_FRACTION_FIELD:
383                     c = u'f';
384                     break;
385                 case UNUM_EXPONENT_FIELD:
386                     c = u'e';
387                     break;
388                 case UNUM_EXPONENT_SIGN_FIELD:
389                     c = u'+';
390                     break;
391                 case UNUM_EXPONENT_SYMBOL_FIELD:
392                     c = u'E';
393                     break;
394                 case UNUM_DECIMAL_SEPARATOR_FIELD:
395                     c = u'.';
396                     break;
397                 case UNUM_GROUPING_SEPARATOR_FIELD:
398                     c = u',';
399                     break;
400                 case UNUM_PERCENT_FIELD:
401                     c = u'%';
402                     break;
403                 case UNUM_PERMILL_FIELD:
404                     c = u'‰';
405                     break;
406                 case UNUM_CURRENCY_FIELD:
407                     c = u'$';
408                     break;
409                 default:
410                     c = u'0' + fieldAt(i).getField();
411                     break;
412             }
413             sb.append(c);
414         } else {
415             sb.append(u'0' + fieldAt(i).getCategory());
416         }
417     }
418     sb.append(u"]>", -1);
419     return sb;
420 }
421 
chars() const422 const char16_t *FormattedStringBuilder::chars() const {
423     return getCharPtr() + fZero;
424 }
425 
contentEquals(const FormattedStringBuilder & other) const426 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
427     if (fLength != other.fLength) {
428         return false;
429     }
430     for (int32_t i = 0; i < fLength; i++) {
431         if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
432             return false;
433         }
434     }
435     return true;
436 }
437 
containsField(Field field) const438 bool FormattedStringBuilder::containsField(Field field) const {
439     for (int32_t i = 0; i < fLength; i++) {
440         if (field == fieldAt(i)) {
441             return true;
442         }
443     }
444     return false;
445 }
446 
447 U_NAMESPACE_END
448 
449 #endif /* #if !UCONFIG_NO_FORMATTING */
450