1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 * simpleformatter.cpp
9 */
10 
11 #include "unicode/utypes.h"
12 #include "unicode/simpleformatter.h"
13 #include "unicode/unistr.h"
14 #include "uassert.h"
15 
16 U_NAMESPACE_BEGIN
17 
18 namespace {
19 
20 /**
21  * Argument numbers must be smaller than this limit.
22  * Text segment lengths are offset by this much.
23  * This is currently the only unused char value in compiled patterns,
24  * except it is the maximum value of the first unit (max arg +1).
25  */
26 const int32_t ARG_NUM_LIMIT = 0x100;
27 /**
28  * Initial and maximum char/UChar value set for a text segment.
29  * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
30  * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
31  */
32 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
33 /**
34  * Maximum length of a text segment. Longer segments are split into shorter ones.
35  */
36 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
37 
38 enum {
39     APOS = 0x27,
40     DIGIT_ZERO = 0x30,
41     DIGIT_ONE = 0x31,
42     DIGIT_NINE = 0x39,
43     OPEN_BRACE = 0x7b,
44     CLOSE_BRACE = 0x7d
45 };
46 
isInvalidArray(const void * array,int32_t length)47 inline UBool isInvalidArray(const void *array, int32_t length) {
48    return (length < 0 || (array == NULL && length != 0));
49 }
50 
51 }  // namespace
52 
operator =(const SimpleFormatter & other)53 SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
54     if (this == &other) {
55         return *this;
56     }
57     compiledPattern = other.compiledPattern;
58     return *this;
59 }
60 
~SimpleFormatter()61 SimpleFormatter::~SimpleFormatter() {}
62 
applyPatternMinMaxArguments(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)63 UBool SimpleFormatter::applyPatternMinMaxArguments(
64         const UnicodeString &pattern,
65         int32_t min, int32_t max,
66         UErrorCode &errorCode) {
67     if (U_FAILURE(errorCode)) {
68         return FALSE;
69     }
70     // Parse consistent with MessagePattern, but
71     // - support only simple numbered arguments
72     // - build a simple binary structure into the result string
73     const UChar *patternBuffer = pattern.getBuffer();
74     int32_t patternLength = pattern.length();
75     // Reserve the first char for the number of arguments.
76     compiledPattern.setTo((UChar)0);
77     int32_t textLength = 0;
78     int32_t maxArg = -1;
79     UBool inQuote = FALSE;
80     for (int32_t i = 0; i < patternLength;) {
81         UChar c = patternBuffer[i++];
82         if (c == APOS) {
83             if (i < patternLength && (c = patternBuffer[i]) == APOS) {
84                 // double apostrophe, skip the second one
85                 ++i;
86             } else if (inQuote) {
87                 // skip the quote-ending apostrophe
88                 inQuote = FALSE;
89                 continue;
90             } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
91                 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
92                 ++i;
93                 inQuote = TRUE;
94             } else {
95                 // The apostrophe is part of literal text.
96                 c = APOS;
97             }
98         } else if (!inQuote && c == OPEN_BRACE) {
99             if (textLength > 0) {
100                 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
101                                           (UChar)(ARG_NUM_LIMIT + textLength));
102                 textLength = 0;
103             }
104             int32_t argNumber;
105             if ((i + 1) < patternLength &&
106                     0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
107                     patternBuffer[i + 1] == CLOSE_BRACE) {
108                 i += 2;
109             } else {
110                 // Multi-digit argument number (no leading zero) or syntax error.
111                 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
112                 // around the number, but this class does not.
113                 argNumber = -1;
114                 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
115                     argNumber = c - DIGIT_ZERO;
116                     while (i < patternLength &&
117                             DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
118                         argNumber = argNumber * 10 + (c - DIGIT_ZERO);
119                         if (argNumber >= ARG_NUM_LIMIT) {
120                             break;
121                         }
122                     }
123                 }
124                 if (argNumber < 0 || c != CLOSE_BRACE) {
125                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
126                     return FALSE;
127                 }
128             }
129             if (argNumber > maxArg) {
130                 maxArg = argNumber;
131             }
132             compiledPattern.append((UChar)argNumber);
133             continue;
134         }  // else: c is part of literal text
135         // Append c and track the literal-text segment length.
136         if (textLength == 0) {
137             // Reserve a char for the length of a new text segment, preset the maximum length.
138             compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
139         }
140         compiledPattern.append(c);
141         if (++textLength == MAX_SEGMENT_LENGTH) {
142             textLength = 0;
143         }
144     }
145     if (textLength > 0) {
146         compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
147                                   (UChar)(ARG_NUM_LIMIT + textLength));
148     }
149     int32_t argCount = maxArg + 1;
150     if (argCount < min || max < argCount) {
151         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
152         return FALSE;
153     }
154     compiledPattern.setCharAt(0, (UChar)argCount);
155     return TRUE;
156 }
157 
format(const UnicodeString & value0,UnicodeString & appendTo,UErrorCode & errorCode) const158 UnicodeString& SimpleFormatter::format(
159         const UnicodeString &value0,
160         UnicodeString &appendTo, UErrorCode &errorCode) const {
161     const UnicodeString *values[] = { &value0 };
162     return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
163 }
164 
format(const UnicodeString & value0,const UnicodeString & value1,UnicodeString & appendTo,UErrorCode & errorCode) const165 UnicodeString& SimpleFormatter::format(
166         const UnicodeString &value0,
167         const UnicodeString &value1,
168         UnicodeString &appendTo, UErrorCode &errorCode) const {
169     const UnicodeString *values[] = { &value0, &value1 };
170     return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
171 }
172 
format(const UnicodeString & value0,const UnicodeString & value1,const UnicodeString & value2,UnicodeString & appendTo,UErrorCode & errorCode) const173 UnicodeString& SimpleFormatter::format(
174         const UnicodeString &value0,
175         const UnicodeString &value1,
176         const UnicodeString &value2,
177         UnicodeString &appendTo, UErrorCode &errorCode) const {
178     const UnicodeString *values[] = { &value0, &value1, &value2 };
179     return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
180 }
181 
formatAndAppend(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & appendTo,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const182 UnicodeString& SimpleFormatter::formatAndAppend(
183         const UnicodeString *const *values, int32_t valuesLength,
184         UnicodeString &appendTo,
185         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
186     if (U_FAILURE(errorCode)) {
187         return appendTo;
188     }
189     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
190             valuesLength < getArgumentLimit()) {
191         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
192         return appendTo;
193     }
194     return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
195                   appendTo, NULL, TRUE,
196                   offsets, offsetsLength, errorCode);
197 }
198 
formatAndReplace(const UnicodeString * const * values,int32_t valuesLength,UnicodeString & result,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode) const199 UnicodeString &SimpleFormatter::formatAndReplace(
200         const UnicodeString *const *values, int32_t valuesLength,
201         UnicodeString &result,
202         int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
203     if (U_FAILURE(errorCode)) {
204         return result;
205     }
206     if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
207         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
208         return result;
209     }
210     const UChar *cp = compiledPattern.getBuffer();
211     int32_t cpLength = compiledPattern.length();
212     if (valuesLength < getArgumentLimit(cp, cpLength)) {
213         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
214         return result;
215     }
216 
217     // If the pattern starts with an argument whose value is the same object
218     // as the result, then we keep the result contents and append to it.
219     // Otherwise we replace its contents.
220     int32_t firstArg = -1;
221     // If any non-initial argument value is the same object as the result,
222     // then we first copy its contents and use that instead while formatting.
223     UnicodeString resultCopy;
224     if (getArgumentLimit(cp, cpLength) > 0) {
225         for (int32_t i = 1; i < cpLength;) {
226             int32_t n = cp[i++];
227             if (n < ARG_NUM_LIMIT) {
228                 if (values[n] == &result) {
229                     if (i == 2) {
230                         firstArg = n;
231                     } else if (resultCopy.isEmpty() && !result.isEmpty()) {
232                         resultCopy = result;
233                     }
234                 }
235             } else {
236                 i += n - ARG_NUM_LIMIT;
237             }
238         }
239     }
240     if (firstArg < 0) {
241         result.remove();
242     }
243     return format(cp, cpLength, values,
244                   result, &resultCopy, FALSE,
245                   offsets, offsetsLength, errorCode);
246 }
247 
getTextWithNoArguments(const UChar * compiledPattern,int32_t compiledPatternLength,int32_t * offsets,int32_t offsetsLength)248 UnicodeString SimpleFormatter::getTextWithNoArguments(
249         const UChar *compiledPattern,
250         int32_t compiledPatternLength,
251         int32_t* offsets,
252         int32_t offsetsLength) {
253     for (int32_t i = 0; i < offsetsLength; i++) {
254         offsets[i] = -1;
255     }
256     int32_t capacity = compiledPatternLength - 1 -
257             getArgumentLimit(compiledPattern, compiledPatternLength);
258     UnicodeString sb(capacity, 0, 0);  // Java: StringBuilder
259     for (int32_t i = 1; i < compiledPatternLength;) {
260         int32_t n = compiledPattern[i++];
261         if (n > ARG_NUM_LIMIT) {
262             n -= ARG_NUM_LIMIT;
263             sb.append(compiledPattern + i, n);
264             i += n;
265         } else if (n < offsetsLength) {
266             // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}".
267             // Consider removing this function and replacing it with an iterator interface.
268             offsets[n] = sb.length();
269         }
270     }
271     return sb;
272 }
273 
format(const UChar * compiledPattern,int32_t compiledPatternLength,const UnicodeString * const * values,UnicodeString & result,const UnicodeString * resultCopy,UBool forbidResultAsValue,int32_t * offsets,int32_t offsetsLength,UErrorCode & errorCode)274 UnicodeString &SimpleFormatter::format(
275         const UChar *compiledPattern, int32_t compiledPatternLength,
276         const UnicodeString *const *values,
277         UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
278         int32_t *offsets, int32_t offsetsLength,
279         UErrorCode &errorCode) {
280     if (U_FAILURE(errorCode)) {
281         return result;
282     }
283     for (int32_t i = 0; i < offsetsLength; i++) {
284         offsets[i] = -1;
285     }
286     for (int32_t i = 1; i < compiledPatternLength;) {
287         int32_t n = compiledPattern[i++];
288         if (n < ARG_NUM_LIMIT) {
289             const UnicodeString *value = values[n];
290             if (value == NULL) {
291                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
292                 return result;
293             }
294             if (value == &result) {
295                 if (forbidResultAsValue) {
296                     errorCode = U_ILLEGAL_ARGUMENT_ERROR;
297                     return result;
298                 }
299                 if (i == 2) {
300                     // We are appending to result which is also the first value object.
301                     if (n < offsetsLength) {
302                         offsets[n] = 0;
303                     }
304                 } else {
305                     if (n < offsetsLength) {
306                         offsets[n] = result.length();
307                     }
308                     result.append(*resultCopy);
309                 }
310             } else {
311                 if (n < offsetsLength) {
312                     offsets[n] = result.length();
313                 }
314                 result.append(*value);
315             }
316         } else {
317             int32_t length = n - ARG_NUM_LIMIT;
318             result.append(compiledPattern + i, length);
319             i += length;
320         }
321     }
322     return result;
323 }
324 
325 U_NAMESPACE_END
326