1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 * simpleformatter.h
9 */
10 
11 #ifndef __SIMPLEFORMATTER_H__
12 #define __SIMPLEFORMATTER_H__
13 
14 /**
15  * \file
16  * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
17  */
18 
19 #include "unicode/utypes.h"
20 #include "unicode/unistr.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 // Forward declaration:
25 namespace number {
26 namespace impl {
27 class SimpleModifier;
28 }
29 }
30 
31 /**
32  * Formats simple patterns like "{1} was born in {0}".
33  * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
34  * Supports only numbered arguments with no type nor style parameters,
35  * and formats only string values.
36  * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
37  *
38  * Factory methods set error codes for syntax errors
39  * and for too few or too many arguments/placeholders.
40  *
41  * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
42  *
43  * Example:
44  * <pre>
45  * UErrorCode errorCode = U_ZERO_ERROR;
46  * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
47  * UnicodeString result;
48  *
49  * // Output: "paul {born} in england"
50  * fmt.format("england", "paul", result, errorCode);
51  * </pre>
52  *
53  * This class is not intended for public subclassing.
54  *
55  * @see MessageFormat
56  * @see UMessagePatternApostropheMode
57  * @stable ICU 57
58  */
59 class U_COMMON_API SimpleFormatter U_FINAL : public UMemory {
60 public:
61     /**
62      * Default constructor.
63      * @stable ICU 57
64      */
SimpleFormatter()65     SimpleFormatter() : compiledPattern((char16_t)0) {}
66 
67     /**
68      * Constructs a formatter from the pattern string.
69      *
70      * @param pattern The pattern string.
71      * @param errorCode ICU error code in/out parameter.
72      *                  Must fulfill U_SUCCESS before the function call.
73      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
74      * @stable ICU 57
75      */
SimpleFormatter(const UnicodeString & pattern,UErrorCode & errorCode)76     SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
77         applyPattern(pattern, errorCode);
78     }
79 
80     /**
81      * Constructs a formatter from the pattern string.
82      * The number of arguments checked against the given limits is the
83      * highest argument number plus one, not the number of occurrences of arguments.
84      *
85      * @param pattern The pattern string.
86      * @param min The pattern must have at least this many arguments.
87      * @param max The pattern must have at most this many arguments.
88      * @param errorCode ICU error code in/out parameter.
89      *                  Must fulfill U_SUCCESS before the function call.
90      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
91      *                  too few or too many arguments.
92      * @stable ICU 57
93      */
SimpleFormatter(const UnicodeString & pattern,int32_t min,int32_t max,UErrorCode & errorCode)94     SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
95                     UErrorCode &errorCode) {
96         applyPatternMinMaxArguments(pattern, min, max, errorCode);
97     }
98 
99     /**
100      * Copy constructor.
101      * @stable ICU 57
102      */
SimpleFormatter(const SimpleFormatter & other)103     SimpleFormatter(const SimpleFormatter& other)
104             : compiledPattern(other.compiledPattern) {}
105 
106     /**
107      * Assignment operator.
108      * @stable ICU 57
109      */
110     SimpleFormatter &operator=(const SimpleFormatter& other);
111 
112     /**
113      * Destructor.
114      * @stable ICU 57
115      */
116     ~SimpleFormatter();
117 
118     /**
119      * Changes this object according to the new pattern.
120      *
121      * @param pattern The pattern string.
122      * @param errorCode ICU error code in/out parameter.
123      *                  Must fulfill U_SUCCESS before the function call.
124      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
125      * @return TRUE if U_SUCCESS(errorCode).
126      * @stable ICU 57
127      */
applyPattern(const UnicodeString & pattern,UErrorCode & errorCode)128     UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
129         return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
130     }
131 
132     /**
133      * Changes this object according to the new pattern.
134      * The number of arguments checked against the given limits is the
135      * highest argument number plus one, not the number of occurrences of arguments.
136      *
137      * @param pattern The pattern string.
138      * @param min The pattern must have at least this many arguments.
139      * @param max The pattern must have at most this many arguments.
140      * @param errorCode ICU error code in/out parameter.
141      *                  Must fulfill U_SUCCESS before the function call.
142      *                  Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
143      *                  too few or too many arguments.
144      * @return TRUE if U_SUCCESS(errorCode).
145      * @stable ICU 57
146      */
147     UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
148                                       int32_t min, int32_t max, UErrorCode &errorCode);
149 
150     /**
151      * @return The max argument number + 1.
152      * @stable ICU 57
153      */
getArgumentLimit()154     int32_t getArgumentLimit() const {
155         return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
156     }
157 
158     /**
159      * Formats the given value, appending to the appendTo builder.
160      * The argument value must not be the same object as appendTo.
161      * getArgumentLimit() must be at most 1.
162      *
163      * @param value0 Value for argument {0}.
164      * @param appendTo Gets the formatted pattern and value appended.
165      * @param errorCode ICU error code in/out parameter.
166      *                  Must fulfill U_SUCCESS before the function call.
167      * @return appendTo
168      * @stable ICU 57
169      */
170     UnicodeString &format(
171             const UnicodeString &value0,
172             UnicodeString &appendTo, UErrorCode &errorCode) const;
173 
174     /**
175      * Formats the given values, appending to the appendTo builder.
176      * An argument value must not be the same object as appendTo.
177      * getArgumentLimit() must be at most 2.
178      *
179      * @param value0 Value for argument {0}.
180      * @param value1 Value for argument {1}.
181      * @param appendTo Gets the formatted pattern and values appended.
182      * @param errorCode ICU error code in/out parameter.
183      *                  Must fulfill U_SUCCESS before the function call.
184      * @return appendTo
185      * @stable ICU 57
186      */
187     UnicodeString &format(
188             const UnicodeString &value0,
189             const UnicodeString &value1,
190             UnicodeString &appendTo, UErrorCode &errorCode) const;
191 
192     /**
193      * Formats the given values, appending to the appendTo builder.
194      * An argument value must not be the same object as appendTo.
195      * getArgumentLimit() must be at most 3.
196      *
197      * @param value0 Value for argument {0}.
198      * @param value1 Value for argument {1}.
199      * @param value2 Value for argument {2}.
200      * @param appendTo Gets the formatted pattern and values appended.
201      * @param errorCode ICU error code in/out parameter.
202      *                  Must fulfill U_SUCCESS before the function call.
203      * @return appendTo
204      * @stable ICU 57
205      */
206     UnicodeString &format(
207             const UnicodeString &value0,
208             const UnicodeString &value1,
209             const UnicodeString &value2,
210             UnicodeString &appendTo, UErrorCode &errorCode) const;
211 
212     /**
213      * Formats the given values, appending to the appendTo string.
214      *
215      * @param values The argument values.
216      *               An argument value must not be the same object as appendTo.
217      *               Can be NULL if valuesLength==getArgumentLimit()==0.
218      * @param valuesLength The length of the values array.
219      *                     Must be at least getArgumentLimit().
220      * @param appendTo Gets the formatted pattern and values appended.
221      * @param offsets offsets[i] receives the offset of where
222      *                values[i] replaced pattern argument {i}.
223      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
224      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
225      * @param offsetsLength The length of the offsets array.
226      * @param errorCode ICU error code in/out parameter.
227      *                  Must fulfill U_SUCCESS before the function call.
228      * @return appendTo
229      * @stable ICU 57
230      */
231     UnicodeString &formatAndAppend(
232             const UnicodeString *const *values, int32_t valuesLength,
233             UnicodeString &appendTo,
234             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
235 
236     /**
237      * Formats the given values, replacing the contents of the result string.
238      * May optimize by actually appending to the result if it is the same object
239      * as the value corresponding to the initial argument in the pattern.
240      *
241      * @param values The argument values.
242      *               An argument value may be the same object as result.
243      *               Can be NULL if valuesLength==getArgumentLimit()==0.
244      * @param valuesLength The length of the values array.
245      *                     Must be at least getArgumentLimit().
246      * @param result Gets its contents replaced by the formatted pattern and values.
247      * @param offsets offsets[i] receives the offset of where
248      *                values[i] replaced pattern argument {i}.
249      *                Can be shorter or longer than values. Can be NULL if offsetsLength==0.
250      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
251      * @param offsetsLength The length of the offsets array.
252      * @param errorCode ICU error code in/out parameter.
253      *                  Must fulfill U_SUCCESS before the function call.
254      * @return result
255      * @stable ICU 57
256      */
257     UnicodeString &formatAndReplace(
258             const UnicodeString *const *values, int32_t valuesLength,
259             UnicodeString &result,
260             int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
261 
262     /**
263      * Returns the pattern text with none of the arguments.
264      * Like formatting with all-empty string values.
265      * @stable ICU 57
266      */
getTextWithNoArguments()267     UnicodeString getTextWithNoArguments() const {
268         return getTextWithNoArguments(
269             compiledPattern.getBuffer(),
270             compiledPattern.length(),
271             nullptr,
272             0);
273     }
274 
275 #ifndef U_HIDE_INTERNAL_API
276     /**
277      * Returns the pattern text with none of the arguments.
278      * Like formatting with all-empty string values.
279      *
280      * TODO(ICU-20406): Replace this with an Iterator interface.
281      *
282      * @param offsets offsets[i] receives the offset of where {i} was located
283      *                before it was replaced by an empty string.
284      *                For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
285      *                Can be nullptr if offsetsLength==0.
286      *                If there is no {i} in the pattern, then offsets[i] is set to -1.
287      * @param offsetsLength The length of the offsets array.
288      *
289      * @internal
290      */
getTextWithNoArguments(int32_t * offsets,int32_t offsetsLength)291     UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
292         return getTextWithNoArguments(
293             compiledPattern.getBuffer(),
294             compiledPattern.length(),
295             offsets,
296             offsetsLength);
297     }
298 #endif // U_HIDE_INTERNAL_API
299 
300 private:
301     /**
302      * Binary representation of the compiled pattern.
303      * Index 0: One more than the highest argument number.
304      * Followed by zero or more arguments or literal-text segments.
305      *
306      * An argument is stored as its number, less than ARG_NUM_LIMIT.
307      * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
308      * followed by that many chars.
309      */
310     UnicodeString compiledPattern;
311 
getArgumentLimit(const char16_t * compiledPattern,int32_t compiledPatternLength)312     static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
313                                               int32_t compiledPatternLength) {
314         return compiledPatternLength == 0 ? 0 : compiledPattern[0];
315     }
316 
317     static UnicodeString getTextWithNoArguments(
318         const char16_t *compiledPattern,
319         int32_t compiledPatternLength,
320         int32_t *offsets,
321         int32_t offsetsLength);
322 
323     static UnicodeString &format(
324             const char16_t *compiledPattern, int32_t compiledPatternLength,
325             const UnicodeString *const *values,
326             UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
327             int32_t *offsets, int32_t offsetsLength,
328             UErrorCode &errorCode);
329 
330     // Give access to internals to SimpleModifier for number formatting
331     friend class number::impl::SimpleModifier;
332 };
333 
334 U_NAMESPACE_END
335 
336 #endif  // __SIMPLEFORMATTER_H__
337