1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2011-2013, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  messagepattern.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2011mar14
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __MESSAGEPATTERN_H__
18 #define __MESSAGEPATTERN_H__
19 
20 /**
21  * \file
22  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
23  */
24 
25 #include "unicode/utypes.h"
26 
27 #if U_SHOW_CPLUSPLUS_API
28 
29 #if !UCONFIG_NO_FORMATTING
30 
31 #include "unicode/parseerr.h"
32 #include "unicode/unistr.h"
33 
34 /**
35  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
36  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
37  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
38  * <p>
39  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
40  * even when the pair is between two single, text-quoting apostrophes.
41  * <p>
42  * The following table shows examples of desired MessageFormat.format() output
43  * with the pattern strings that yield that output.
44  * <p>
45  * <table>
46  *   <tr>
47  *     <th>Desired output</th>
48  *     <th>DOUBLE_OPTIONAL</th>
49  *     <th>DOUBLE_REQUIRED</th>
50  *   </tr>
51  *   <tr>
52  *     <td>I see {many}</td>
53  *     <td>I see '{many}'</td>
54  *     <td>(same)</td>
55  *   </tr>
56  *   <tr>
57  *     <td>I said {'Wow!'}</td>
58  *     <td>I said '{''Wow!''}'</td>
59  *     <td>(same)</td>
60  *   </tr>
61  *   <tr>
62  *     <td>I don't know</td>
63  *     <td>I don't know OR<br> I don''t know</td>
64  *     <td>I don''t know</td>
65  *   </tr>
66  * </table>
67  * @stable ICU 4.8
68  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
69  */
70 enum UMessagePatternApostropheMode {
71     /**
72      * A literal apostrophe is represented by
73      * either a single or a double apostrophe pattern character.
74      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
75      * if it immediately precedes a curly brace {},
76      * or a pipe symbol | if inside a choice format,
77      * or a pound symbol # if inside a plural format.
78      * <p>
79      * This is the default behavior starting with ICU 4.8.
80      * @stable ICU 4.8
81      */
82     UMSGPAT_APOS_DOUBLE_OPTIONAL,
83     /**
84      * A literal apostrophe must be represented by
85      * a double apostrophe pattern character.
86      * A single apostrophe always starts quoted literal text.
87      * <p>
88      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
89      * @stable ICU 4.8
90      */
91     UMSGPAT_APOS_DOUBLE_REQUIRED
92 };
93 /**
94  * @stable ICU 4.8
95  */
96 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
97 
98 /**
99  * MessagePattern::Part type constants.
100  * @stable ICU 4.8
101  */
102 enum UMessagePatternPartType {
103     /**
104      * Start of a message pattern (main or nested).
105      * The length is 0 for the top-level message
106      * and for a choice argument sub-message, otherwise 1 for the '{'.
107      * The value indicates the nesting level, starting with 0 for the main message.
108      * <p>
109      * There is always a later MSG_LIMIT part.
110      * @stable ICU 4.8
111      */
112     UMSGPAT_PART_TYPE_MSG_START,
113     /**
114      * End of a message pattern (main or nested).
115      * The length is 0 for the top-level message and
116      * the last sub-message of a choice argument,
117      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
118      * The value indicates the nesting level, starting with 0 for the main message.
119      * @stable ICU 4.8
120      */
121     UMSGPAT_PART_TYPE_MSG_LIMIT,
122     /**
123      * Indicates a substring of the pattern string which is to be skipped when formatting.
124      * For example, an apostrophe that begins or ends quoted text
125      * would be indicated with such a part.
126      * The value is undefined and currently always 0.
127      * @stable ICU 4.8
128      */
129     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
130     /**
131      * Indicates that a syntax character needs to be inserted for auto-quoting.
132      * The length is 0.
133      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
134      * @stable ICU 4.8
135      */
136     UMSGPAT_PART_TYPE_INSERT_CHAR,
137     /**
138      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
139      * When formatting, replace this part's substring with the
140      * (value-offset) for the plural argument value.
141      * The value is undefined and currently always 0.
142      * @stable ICU 4.8
143      */
144     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
145     /**
146      * Start of an argument.
147      * The length is 1 for the '{'.
148      * The value is the ordinal value of the ArgType. Use getArgType().
149      * <p>
150      * This part is followed by either an ARG_NUMBER or ARG_NAME,
151      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
152      * and finally an ARG_LIMIT part.
153      * @stable ICU 4.8
154      */
155     UMSGPAT_PART_TYPE_ARG_START,
156     /**
157      * End of an argument.
158      * The length is 1 for the '}'.
159      * The value is the ordinal value of the ArgType. Use getArgType().
160      * @stable ICU 4.8
161      */
162     UMSGPAT_PART_TYPE_ARG_LIMIT,
163     /**
164      * The argument number, provided by the value.
165      * @stable ICU 4.8
166      */
167     UMSGPAT_PART_TYPE_ARG_NUMBER,
168     /**
169      * The argument name.
170      * The value is undefined and currently always 0.
171      * @stable ICU 4.8
172      */
173     UMSGPAT_PART_TYPE_ARG_NAME,
174     /**
175      * The argument type.
176      * The value is undefined and currently always 0.
177      * @stable ICU 4.8
178      */
179     UMSGPAT_PART_TYPE_ARG_TYPE,
180     /**
181      * The argument style text.
182      * The value is undefined and currently always 0.
183      * @stable ICU 4.8
184      */
185     UMSGPAT_PART_TYPE_ARG_STYLE,
186     /**
187      * A selector substring in a "complex" argument style.
188      * The value is undefined and currently always 0.
189      * @stable ICU 4.8
190      */
191     UMSGPAT_PART_TYPE_ARG_SELECTOR,
192     /**
193      * An integer value, for example the offset or an explicit selector value
194      * in a PluralFormat style.
195      * The part value is the integer value.
196      * @stable ICU 4.8
197      */
198     UMSGPAT_PART_TYPE_ARG_INT,
199     /**
200      * A numeric value, for example the offset or an explicit selector value
201      * in a PluralFormat style.
202      * The part value is an index into an internal array of numeric values;
203      * use getNumericValue().
204      * @stable ICU 4.8
205      */
206     UMSGPAT_PART_TYPE_ARG_DOUBLE
207 };
208 /**
209  * @stable ICU 4.8
210  */
211 typedef enum UMessagePatternPartType UMessagePatternPartType;
212 
213 /**
214  * Argument type constants.
215  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
216  *
217  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
218  * with a nesting level one greater than the surrounding message.
219  * @stable ICU 4.8
220  */
221 enum UMessagePatternArgType {
222     /**
223      * The argument has no specified type.
224      * @stable ICU 4.8
225      */
226     UMSGPAT_ARG_TYPE_NONE,
227     /**
228      * The argument has a "simple" type which is provided by the ARG_TYPE part.
229      * An ARG_STYLE part might follow that.
230      * @stable ICU 4.8
231      */
232     UMSGPAT_ARG_TYPE_SIMPLE,
233     /**
234      * The argument is a ChoiceFormat with one or more
235      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
236      * @stable ICU 4.8
237      */
238     UMSGPAT_ARG_TYPE_CHOICE,
239     /**
240      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
241      * (e.g., offset:1)
242      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
243      * If the selector has an explicit value (e.g., =2), then
244      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
245      * Otherwise the message immediately follows the ARG_SELECTOR.
246      * @stable ICU 4.8
247      */
248     UMSGPAT_ARG_TYPE_PLURAL,
249     /**
250      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
251      * @stable ICU 4.8
252      */
253     UMSGPAT_ARG_TYPE_SELECT,
254     /**
255      * The argument is an ordinal-number PluralFormat
256      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
257      * @stable ICU 50
258      */
259     UMSGPAT_ARG_TYPE_SELECTORDINAL
260 };
261 /**
262  * @stable ICU 4.8
263  */
264 typedef enum UMessagePatternArgType UMessagePatternArgType;
265 
266 /**
267  * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
268  * Returns true if the argument type has a plural style part sequence and semantics,
269  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
270  * @stable ICU 50
271  */
272 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
273     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
274 
275 enum {
276     /**
277      * Return value from MessagePattern.validateArgumentName() for when
278      * the string is a valid "pattern identifier" but not a number.
279      * @stable ICU 4.8
280      */
281     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
282 
283     /**
284      * Return value from MessagePattern.validateArgumentName() for when
285      * the string is invalid.
286      * It might not be a valid "pattern identifier",
287      * or it have only ASCII digits but there is a leading zero or the number is too large.
288      * @stable ICU 4.8
289      */
290     UMSGPAT_ARG_NAME_NOT_VALID=-2
291 };
292 
293 /**
294  * Special value that is returned by getNumericValue(Part) when no
295  * numeric value is defined for a part.
296  * @see MessagePattern.getNumericValue()
297  * @stable ICU 4.8
298  */
299 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
300 
301 U_NAMESPACE_BEGIN
302 
303 class MessagePatternDoubleList;
304 class MessagePatternPartsList;
305 
306 /**
307  * Parses and represents ICU MessageFormat patterns.
308  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
309  * Used in the implementations of those classes as well as in tools
310  * for message validation, translation and format conversion.
311  * <p>
312  * The parser handles all syntax relevant for identifying message arguments.
313  * This includes "complex" arguments whose style strings contain
314  * nested MessageFormat pattern substrings.
315  * For "simple" arguments (with no nested MessageFormat pattern substrings),
316  * the argument style is not parsed any further.
317  * <p>
318  * The parser handles named and numbered message arguments and allows both in one message.
319  * <p>
320  * Once a pattern has been parsed successfully, iterate through the parsed data
321  * with countParts(), getPart() and related methods.
322  * <p>
323  * The data logically represents a parse tree, but is stored and accessed
324  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
325  * Arguments and nested messages are best handled via recursion.
326  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
327  * the index of the corresponding _LIMIT "part".
328  * <p>
329  * List of "parts":
330  * <pre>
331  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
332  * argument = noneArg | simpleArg | complexArg
333  * complexArg = choiceArg | pluralArg | selectArg
334  *
335  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
336  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
337  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
338  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
339  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
340  *
341  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
342  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
343  * selectStyle = (ARG_SELECTOR message)+
344  * </pre>
345  * <ul>
346  *   <li>Literal output text is not represented directly by "parts" but accessed
347  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
348  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
349  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
350  *       the less-than-or-equal-to sign (U+2264).
351  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
352  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
353  *       is the value of an explicit-number selector like "=2",
354  *       otherwise the selector is a non-numeric identifier.
355  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
356  * </ul>
357  * <p>
358  * This class is not intended for public subclassing.
359  *
360  * @stable ICU 4.8
361  */
362 class U_COMMON_API MessagePattern : public UObject {
363 public:
364     /**
365      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
366      * @param errorCode Standard ICU error code. Its input value must
367      *                  pass the U_SUCCESS() test, or else the function returns
368      *                  immediately. Check for U_FAILURE() on output or use with
369      *                  function chaining. (See User Guide for details.)
370      * @stable ICU 4.8
371      */
372     MessagePattern(UErrorCode &errorCode);
373 
374     /**
375      * Constructs an empty MessagePattern.
376      * @param mode Explicit UMessagePatternApostropheMode.
377      * @param errorCode Standard ICU error code. Its input value must
378      *                  pass the U_SUCCESS() test, or else the function returns
379      *                  immediately. Check for U_FAILURE() on output or use with
380      *                  function chaining. (See User Guide for details.)
381      * @stable ICU 4.8
382      */
383     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
384 
385     /**
386      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
387      * parses the MessageFormat pattern string.
388      * @param pattern a MessageFormat pattern string
389      * @param parseError Struct to receive information on the position
390      *                   of an error within the pattern.
391      *                   Can be NULL.
392      * @param errorCode Standard ICU error code. Its input value must
393      *                  pass the U_SUCCESS() test, or else the function returns
394      *                  immediately. Check for U_FAILURE() on output or use with
395      *                  function chaining. (See User Guide for details.)
396      * TODO: turn @throws into UErrorCode specifics?
397      * @throws IllegalArgumentException for syntax errors in the pattern string
398      * @throws IndexOutOfBoundsException if certain limits are exceeded
399      *         (e.g., argument number too high, argument name too long, etc.)
400      * @throws NumberFormatException if a number could not be parsed
401      * @stable ICU 4.8
402      */
403     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
404 
405     /**
406      * Copy constructor.
407      * @param other Object to copy.
408      * @stable ICU 4.8
409      */
410     MessagePattern(const MessagePattern &other);
411 
412     /**
413      * Assignment operator.
414      * @param other Object to copy.
415      * @return *this=other
416      * @stable ICU 4.8
417      */
418     MessagePattern &operator=(const MessagePattern &other);
419 
420     /**
421      * Destructor.
422      * @stable ICU 4.8
423      */
424     virtual ~MessagePattern();
425 
426     /**
427      * Parses a MessageFormat pattern string.
428      * @param pattern a MessageFormat pattern string
429      * @param parseError Struct to receive information on the position
430      *                   of an error within the pattern.
431      *                   Can be NULL.
432      * @param errorCode Standard ICU error code. Its input value must
433      *                  pass the U_SUCCESS() test, or else the function returns
434      *                  immediately. Check for U_FAILURE() on output or use with
435      *                  function chaining. (See User Guide for details.)
436      * @return *this
437      * @throws IllegalArgumentException for syntax errors in the pattern string
438      * @throws IndexOutOfBoundsException if certain limits are exceeded
439      *         (e.g., argument number too high, argument name too long, etc.)
440      * @throws NumberFormatException if a number could not be parsed
441      * @stable ICU 4.8
442      */
443     MessagePattern &parse(const UnicodeString &pattern,
444                           UParseError *parseError, UErrorCode &errorCode);
445 
446     /**
447      * Parses a ChoiceFormat pattern string.
448      * @param pattern a ChoiceFormat pattern string
449      * @param parseError Struct to receive information on the position
450      *                   of an error within the pattern.
451      *                   Can be NULL.
452      * @param errorCode Standard ICU error code. Its input value must
453      *                  pass the U_SUCCESS() test, or else the function returns
454      *                  immediately. Check for U_FAILURE() on output or use with
455      *                  function chaining. (See User Guide for details.)
456      * @return *this
457      * @throws IllegalArgumentException for syntax errors in the pattern string
458      * @throws IndexOutOfBoundsException if certain limits are exceeded
459      *         (e.g., argument number too high, argument name too long, etc.)
460      * @throws NumberFormatException if a number could not be parsed
461      * @stable ICU 4.8
462      */
463     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
464                                      UParseError *parseError, UErrorCode &errorCode);
465 
466     /**
467      * Parses a PluralFormat pattern string.
468      * @param pattern a PluralFormat pattern string
469      * @param parseError Struct to receive information on the position
470      *                   of an error within the pattern.
471      *                   Can be NULL.
472      * @param errorCode Standard ICU error code. Its input value must
473      *                  pass the U_SUCCESS() test, or else the function returns
474      *                  immediately. Check for U_FAILURE() on output or use with
475      *                  function chaining. (See User Guide for details.)
476      * @return *this
477      * @throws IllegalArgumentException for syntax errors in the pattern string
478      * @throws IndexOutOfBoundsException if certain limits are exceeded
479      *         (e.g., argument number too high, argument name too long, etc.)
480      * @throws NumberFormatException if a number could not be parsed
481      * @stable ICU 4.8
482      */
483     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
484                                      UParseError *parseError, UErrorCode &errorCode);
485 
486     /**
487      * Parses a SelectFormat pattern string.
488      * @param pattern a SelectFormat pattern string
489      * @param parseError Struct to receive information on the position
490      *                   of an error within the pattern.
491      *                   Can be NULL.
492      * @param errorCode Standard ICU error code. Its input value must
493      *                  pass the U_SUCCESS() test, or else the function returns
494      *                  immediately. Check for U_FAILURE() on output or use with
495      *                  function chaining. (See User Guide for details.)
496      * @return *this
497      * @throws IllegalArgumentException for syntax errors in the pattern string
498      * @throws IndexOutOfBoundsException if certain limits are exceeded
499      *         (e.g., argument number too high, argument name too long, etc.)
500      * @throws NumberFormatException if a number could not be parsed
501      * @stable ICU 4.8
502      */
503     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
504                                      UParseError *parseError, UErrorCode &errorCode);
505 
506     /**
507      * Clears this MessagePattern.
508      * countParts() will return 0.
509      * @stable ICU 4.8
510      */
511     void clear();
512 
513     /**
514      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
515      * countParts() will return 0.
516      * @param mode The new UMessagePatternApostropheMode.
517      * @stable ICU 4.8
518      */
clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)519     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
520         clear();
521         aposMode=mode;
522     }
523 
524     /**
525      * @param other another object to compare with.
526      * @return true if this object is equivalent to the other one.
527      * @stable ICU 4.8
528      */
529     UBool operator==(const MessagePattern &other) const;
530 
531     /**
532      * @param other another object to compare with.
533      * @return false if this object is equivalent to the other one.
534      * @stable ICU 4.8
535      */
536     inline UBool operator!=(const MessagePattern &other) const {
537         return !operator==(other);
538     }
539 
540     /**
541      * @return A hash code for this object.
542      * @stable ICU 4.8
543      */
544     int32_t hashCode() const;
545 
546     /**
547      * @return this instance's UMessagePatternApostropheMode.
548      * @stable ICU 4.8
549      */
getApostropheMode()550     UMessagePatternApostropheMode getApostropheMode() const {
551         return aposMode;
552     }
553 
554     // Java has package-private jdkAposMode() here.
555     // In C++, this is declared in the MessageImpl class.
556 
557     /**
558      * @return the parsed pattern string (null if none was parsed).
559      * @stable ICU 4.8
560      */
getPatternString()561     const UnicodeString &getPatternString() const {
562         return msg;
563     }
564 
565     /**
566      * Does the parsed pattern have named arguments like {first_name}?
567      * @return true if the parsed pattern has at least one named argument.
568      * @stable ICU 4.8
569      */
hasNamedArguments()570     UBool hasNamedArguments() const {
571         return hasArgNames;
572     }
573 
574     /**
575      * Does the parsed pattern have numbered arguments like {2}?
576      * @return true if the parsed pattern has at least one numbered argument.
577      * @stable ICU 4.8
578      */
hasNumberedArguments()579     UBool hasNumberedArguments() const {
580         return hasArgNumbers;
581     }
582 
583     /**
584      * Validates and parses an argument name or argument number string.
585      * An argument name must be a "pattern identifier", that is, it must contain
586      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
587      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
588      * @param name Input string.
589      * @return &gt;=0 if the name is a valid number,
590      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
591      *         ARG_NAME_NOT_VALID (-2) if it is neither.
592      * @stable ICU 4.8
593      */
594     static int32_t validateArgumentName(const UnicodeString &name);
595 
596     /**
597      * Returns a version of the parsed pattern string where each ASCII apostrophe
598      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
599      * <p>
600      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
601      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
602      * @return the deep-auto-quoted version of the parsed pattern string.
603      * @see MessageFormat.autoQuoteApostrophe()
604      * @stable ICU 4.8
605      */
606     UnicodeString autoQuoteApostropheDeep() const;
607 
608     class Part;
609 
610     /**
611      * Returns the number of "parts" created by parsing the pattern string.
612      * Returns 0 if no pattern has been parsed or clear() was called.
613      * @return the number of pattern parts.
614      * @stable ICU 4.8
615      */
countParts()616     int32_t countParts() const {
617         return partsLength;
618     }
619 
620     /**
621      * Gets the i-th pattern "part".
622      * @param i The index of the Part data. (0..countParts()-1)
623      * @return the i-th pattern "part".
624      * @stable ICU 4.8
625      */
getPart(int32_t i)626     const Part &getPart(int32_t i) const {
627         return parts[i];
628     }
629 
630     /**
631      * Returns the UMessagePatternPartType of the i-th pattern "part".
632      * Convenience method for getPart(i).getType().
633      * @param i The index of the Part data. (0..countParts()-1)
634      * @return The UMessagePatternPartType of the i-th Part.
635      * @stable ICU 4.8
636      */
getPartType(int32_t i)637     UMessagePatternPartType getPartType(int32_t i) const {
638         return getPart(i).type;
639     }
640 
641     /**
642      * Returns the pattern index of the specified pattern "part".
643      * Convenience method for getPart(partIndex).getIndex().
644      * @param partIndex The index of the Part data. (0..countParts()-1)
645      * @return The pattern index of this Part.
646      * @stable ICU 4.8
647      */
getPatternIndex(int32_t partIndex)648     int32_t getPatternIndex(int32_t partIndex) const {
649         return getPart(partIndex).index;
650     }
651 
652     /**
653      * Returns the substring of the pattern string indicated by the Part.
654      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
655      * @param part a part of this MessagePattern.
656      * @return the substring associated with part.
657      * @stable ICU 4.8
658      */
getSubstring(const Part & part)659     UnicodeString getSubstring(const Part &part) const {
660         return msg.tempSubString(part.index, part.length);
661     }
662 
663     /**
664      * Compares the part's substring with the input string s.
665      * @param part a part of this MessagePattern.
666      * @param s a string.
667      * @return true if getSubstring(part).equals(s).
668      * @stable ICU 4.8
669      */
partSubstringMatches(const Part & part,const UnicodeString & s)670     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
671         return 0==msg.compare(part.index, part.length, s);
672     }
673 
674     /**
675      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
676      * @param part a part of this MessagePattern.
677      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
678      * @stable ICU 4.8
679      */
680     double getNumericValue(const Part &part) const;
681 
682     /**
683      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
684      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
685      * @return the "offset:" value.
686      * @stable ICU 4.8
687      */
688     double getPluralOffset(int32_t pluralStart) const;
689 
690     /**
691      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
692      * @param start The index of some Part data (0..countParts()-1);
693      *        this Part should be of Type ARG_START or MSG_START.
694      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
695      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
696      * @stable ICU 4.8
697      */
getLimitPartIndex(int32_t start)698     int32_t getLimitPartIndex(int32_t start) const {
699         int32_t limit=getPart(start).limitPartIndex;
700         if(limit<start) {
701             return start;
702         }
703         return limit;
704     }
705 
706     /**
707      * A message pattern "part", representing a pattern parsing event.
708      * There is a part for the start and end of a message or argument,
709      * for quoting and escaping of and with ASCII apostrophes,
710      * and for syntax elements of "complex" arguments.
711      * @stable ICU 4.8
712      */
713     class Part : public UMemory {
714     public:
715         /**
716          * Default constructor, do not use.
717          * @internal
718          */
Part()719         Part() {}
720 
721         /**
722          * Returns the type of this part.
723          * @return the part type.
724          * @stable ICU 4.8
725          */
getType()726         UMessagePatternPartType getType() const {
727             return type;
728         }
729 
730         /**
731          * Returns the pattern string index associated with this Part.
732          * @return this part's pattern string index.
733          * @stable ICU 4.8
734          */
getIndex()735         int32_t getIndex() const {
736             return index;
737         }
738 
739         /**
740          * Returns the length of the pattern substring associated with this Part.
741          * This is 0 for some parts.
742          * @return this part's pattern substring length.
743          * @stable ICU 4.8
744          */
getLength()745         int32_t getLength() const {
746             return length;
747         }
748 
749         /**
750          * Returns the pattern string limit (exclusive-end) index associated with this Part.
751          * Convenience method for getIndex()+getLength().
752          * @return this part's pattern string limit index, same as getIndex()+getLength().
753          * @stable ICU 4.8
754          */
getLimit()755         int32_t getLimit() const {
756             return index+length;
757         }
758 
759         /**
760          * Returns a value associated with this part.
761          * See the documentation of each part type for details.
762          * @return the part value.
763          * @stable ICU 4.8
764          */
getValue()765         int32_t getValue() const {
766             return value;
767         }
768 
769         /**
770          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
771          * otherwise UMSGPAT_ARG_TYPE_NONE.
772          * @return the argument type for this part.
773          * @stable ICU 4.8
774          */
getArgType()775         UMessagePatternArgType getArgType() const {
776             UMessagePatternPartType msgType=getType();
777             if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
778                 return (UMessagePatternArgType)value;
779             } else {
780                 return UMSGPAT_ARG_TYPE_NONE;
781             }
782         }
783 
784         /**
785          * Indicates whether the Part type has a numeric value.
786          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
787          * @param type The Part type to be tested.
788          * @return true if the Part type has a numeric value.
789          * @stable ICU 4.8
790          */
hasNumericValue(UMessagePatternPartType type)791         static UBool hasNumericValue(UMessagePatternPartType type) {
792             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
793         }
794 
795         /**
796          * @param other another object to compare with.
797          * @return true if this object is equivalent to the other one.
798          * @stable ICU 4.8
799          */
800         UBool operator==(const Part &other) const;
801 
802         /**
803          * @param other another object to compare with.
804          * @return false if this object is equivalent to the other one.
805          * @stable ICU 4.8
806          */
807         inline UBool operator!=(const Part &other) const {
808             return !operator==(other);
809         }
810 
811         /**
812          * @return A hash code for this object.
813          * @stable ICU 4.8
814          */
hashCode()815         int32_t hashCode() const {
816             return ((type*37+index)*37+length)*37+value;
817         }
818 
819     private:
820         friend class MessagePattern;
821 
822         static const int32_t MAX_LENGTH=0xffff;
823         static const int32_t MAX_VALUE=0x7fff;
824 
825         // Some fields are not final because they are modified during pattern parsing.
826         // After pattern parsing, the parts are effectively immutable.
827         UMessagePatternPartType type;
828         int32_t index;
829         uint16_t length;
830         int16_t value;
831         int32_t limitPartIndex;
832     };
833 
834 private:
835     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
836 
837     void postParse();
838 
839     int32_t parseMessage(int32_t index, int32_t msgStartLength,
840                          int32_t nestingLevel, UMessagePatternArgType parentType,
841                          UParseError *parseError, UErrorCode &errorCode);
842 
843     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
844                      UParseError *parseError, UErrorCode &errorCode);
845 
846     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
847 
848     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
849                              UParseError *parseError, UErrorCode &errorCode);
850 
851     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
852                                      UParseError *parseError, UErrorCode &errorCode);
853 
854     /**
855      * Validates and parses an argument name or argument number string.
856      * This internal method assumes that the input substring is a "pattern identifier".
857      * @return &gt;=0 if the name is a valid number,
858      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
859      *         ARG_NAME_NOT_VALID (-2) if it is neither.
860      * @see #validateArgumentName(String)
861      */
862     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
863 
parseArgNumber(int32_t start,int32_t limit)864     int32_t parseArgNumber(int32_t start, int32_t limit) {
865         return parseArgNumber(msg, start, limit);
866     }
867 
868     /**
869      * Parses a number from the specified message substring.
870      * @param start start index into the message string
871      * @param limit limit index into the message string, must be start<limit
872      * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
873      * @param parseError
874      * @param errorCode
875      */
876     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
877                      UParseError *parseError, UErrorCode &errorCode);
878 
879     // Java has package-private appendReducedApostrophes() here.
880     // In C++, this is declared in the MessageImpl class.
881 
882     int32_t skipWhiteSpace(int32_t index);
883 
884     int32_t skipIdentifier(int32_t index);
885 
886     /**
887      * Skips a sequence of characters that could occur in a double value.
888      * Does not fully parse or validate the value.
889      */
890     int32_t skipDouble(int32_t index);
891 
892     static UBool isArgTypeChar(UChar32 c);
893 
894     UBool isChoice(int32_t index);
895 
896     UBool isPlural(int32_t index);
897 
898     UBool isSelect(int32_t index);
899 
900     UBool isOrdinal(int32_t index);
901 
902     /**
903      * @return true if we are inside a MessageFormat (sub-)pattern,
904      *         as opposed to inside a top-level choice/plural/select pattern.
905      */
906     UBool inMessageFormatPattern(int32_t nestingLevel);
907 
908     /**
909      * @return true if we are in a MessageFormat sub-pattern
910      *         of a top-level ChoiceFormat pattern.
911      */
912     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
913 
914     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
915                  int32_t value, UErrorCode &errorCode);
916 
917     void addLimitPart(int32_t start,
918                       UMessagePatternPartType type, int32_t index, int32_t length,
919                       int32_t value, UErrorCode &errorCode);
920 
921     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
922 
923     void setParseError(UParseError *parseError, int32_t index);
924 
925     UBool init(UErrorCode &errorCode);
926     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
927 
928     UMessagePatternApostropheMode aposMode;
929     UnicodeString msg;
930     // ArrayList<Part> parts=new ArrayList<Part>();
931     MessagePatternPartsList *partsList;
932     Part *parts;
933     int32_t partsLength;
934     // ArrayList<Double> numericValues;
935     MessagePatternDoubleList *numericValuesList;
936     double *numericValues;
937     int32_t numericValuesLength;
938     UBool hasArgNames;
939     UBool hasArgNumbers;
940     UBool needsAutoQuoting;
941 };
942 
943 U_NAMESPACE_END
944 
945 #endif  // !UCONFIG_NO_FORMATTING
946 
947 #endif /* U_SHOW_CPLUSPLUS_API */
948 
949 #endif  // __MESSAGEPATTERN_H__
950