1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 *
9 * File CHOICFMT.CPP
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   02/19/97    aliu        Converted from java.
15 *   03/20/97    helena      Finished first cut of implementation and got rid
16 *                           of nextDouble/previousDouble and replaced with
17 *                           boolean array.
18 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
19 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
20 *                           wchar.h.
21 *   07/09/97    helena      Made ParsePosition into a class.
22 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
23 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
24 *   02/22/99    stephen     Removed character literals for EBCDIC safety
25 ********************************************************************************
26 */
27 
28 #include "unicode/utypes.h"
29 
30 #if !UCONFIG_NO_FORMATTING
31 
32 #include "unicode/choicfmt.h"
33 #include "unicode/numfmt.h"
34 #include "unicode/locid.h"
35 #include "cpputils.h"
36 #include "cstring.h"
37 #include "messageimpl.h"
38 #include "putilimp.h"
39 #include "uassert.h"
40 #include <stdio.h>
41 #include <float.h>
42 
43 // *****************************************************************************
44 // class ChoiceFormat
45 // *****************************************************************************
46 
47 U_NAMESPACE_BEGIN
48 
49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50 
51 // Special characters used by ChoiceFormat.  There are two characters
52 // used interchangeably to indicate <=.  Either is parsed, but only
53 // LESS_EQUAL is generated by toPattern().
54 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
55 #define LESS_THAN    ((UChar)0x003C)   /*<*/
56 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
57 #define LESS_EQUAL2  ((UChar)0x2264)
58 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
59 #define MINUS        ((UChar)0x002D)   /*-*/
60 
61 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
62 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
63 
64 #ifdef INFINITY
65 #undef INFINITY
66 #endif
67 #define INFINITY     ((UChar)0x221E)
68 
69 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
70 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71 #define POSITIVE_INF_STRLEN 1
72 #define NEGATIVE_INF_STRLEN 2
73 
74 // -------------------------------------
75 // Creates a ChoiceFormat instance based on the pattern.
76 
ChoiceFormat(const UnicodeString & newPattern,UErrorCode & status)77 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78                            UErrorCode& status)
79 : constructorErrorCode(status),
80   msgPattern(status)
81 {
82     applyPattern(newPattern, status);
83 }
84 
85 // -------------------------------------
86 // Creates a ChoiceFormat instance with the limit array and
87 // format strings for each limit.
88 
ChoiceFormat(const double * limits,const UnicodeString * formats,int32_t cnt)89 ChoiceFormat::ChoiceFormat(const double* limits,
90                            const UnicodeString* formats,
91                            int32_t cnt )
92 : constructorErrorCode(U_ZERO_ERROR),
93   msgPattern(constructorErrorCode)
94 {
95     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96 }
97 
98 // -------------------------------------
99 
ChoiceFormat(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)100 ChoiceFormat::ChoiceFormat(const double* limits,
101                            const UBool* closures,
102                            const UnicodeString* formats,
103                            int32_t cnt )
104 : constructorErrorCode(U_ZERO_ERROR),
105   msgPattern(constructorErrorCode)
106 {
107     setChoices(limits, closures, formats, cnt, constructorErrorCode);
108 }
109 
110 // -------------------------------------
111 // copy constructor
112 
ChoiceFormat(const ChoiceFormat & that)113 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
114 : NumberFormat(that),
115   constructorErrorCode(that.constructorErrorCode),
116   msgPattern(that.msgPattern)
117 {
118 }
119 
120 // -------------------------------------
121 // Private constructor that creates a
122 // ChoiceFormat instance based on the
123 // pattern and populates UParseError
124 
ChoiceFormat(const UnicodeString & newPattern,UParseError & parseError,UErrorCode & status)125 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126                            UParseError& parseError,
127                            UErrorCode& status)
128 : constructorErrorCode(status),
129   msgPattern(status)
130 {
131     applyPattern(newPattern,parseError, status);
132 }
133 // -------------------------------------
134 
135 UBool
operator ==(const Format & that) const136 ChoiceFormat::operator==(const Format& that) const
137 {
138     if (this == &that) return TRUE;
139     if (!NumberFormat::operator==(that)) return FALSE;
140     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141     return msgPattern == thatAlias.msgPattern;
142 }
143 
144 // -------------------------------------
145 // copy constructor
146 
147 const ChoiceFormat&
operator =(const ChoiceFormat & that)148 ChoiceFormat::operator=(const   ChoiceFormat& that)
149 {
150     if (this != &that) {
151         NumberFormat::operator=(that);
152         constructorErrorCode = that.constructorErrorCode;
153         msgPattern = that.msgPattern;
154     }
155     return *this;
156 }
157 
158 // -------------------------------------
159 
~ChoiceFormat()160 ChoiceFormat::~ChoiceFormat()
161 {
162 }
163 
164 // -------------------------------------
165 
166 /**
167  * Convert a double value to a string without the overhead of NumberFormat.
168  */
169 UnicodeString&
dtos(double value,UnicodeString & string)170 ChoiceFormat::dtos(double value,
171                    UnicodeString& string)
172 {
173     /* Buffer to contain the digits and any extra formatting stuff. */
174     char temp[DBL_DIG + 16];
175     char *itrPtr = temp;
176     char *expPtr;
177 
178     sprintf(temp, "%.*g", DBL_DIG, value);
179 
180     /* Find and convert the decimal point.
181        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182     */
183     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184         itrPtr++;
185     }
186     if (*itrPtr != 0 && *itrPtr != 'e') {
187         /* We reached something that looks like a decimal point.
188         In case someone used setlocale(), which changes the decimal point. */
189         *itrPtr = '.';
190         itrPtr++;
191     }
192     /* Search for the exponent */
193     while (*itrPtr && *itrPtr != 'e') {
194         itrPtr++;
195     }
196     if (*itrPtr == 'e') {
197         itrPtr++;
198         /* Verify the exponent sign */
199         if (*itrPtr == '+' || *itrPtr == '-') {
200             itrPtr++;
201         }
202         /* Remove leading zeros. You will see this on Windows machines. */
203         expPtr = itrPtr;
204         while (*itrPtr == '0') {
205             itrPtr++;
206         }
207         if (*itrPtr && expPtr != itrPtr) {
208             /* Shift the exponent without zeros. */
209             while (*itrPtr) {
210                 *(expPtr++)  = *(itrPtr++);
211             }
212             // NULL terminate
213             *expPtr = 0;
214         }
215     }
216 
217     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
218     return string;
219 }
220 
221 // -------------------------------------
222 // calls the overloaded applyPattern method.
223 
224 void
applyPattern(const UnicodeString & pattern,UErrorCode & status)225 ChoiceFormat::applyPattern(const UnicodeString& pattern,
226                            UErrorCode& status)
227 {
228     msgPattern.parseChoiceStyle(pattern, NULL, status);
229     constructorErrorCode = status;
230 }
231 
232 // -------------------------------------
233 // Applies the pattern to this ChoiceFormat instance.
234 
235 void
applyPattern(const UnicodeString & pattern,UParseError & parseError,UErrorCode & status)236 ChoiceFormat::applyPattern(const UnicodeString& pattern,
237                            UParseError& parseError,
238                            UErrorCode& status)
239 {
240     msgPattern.parseChoiceStyle(pattern, &parseError, status);
241     constructorErrorCode = status;
242 }
243 // -------------------------------------
244 // Returns the input pattern string.
245 
246 UnicodeString&
toPattern(UnicodeString & result) const247 ChoiceFormat::toPattern(UnicodeString& result) const
248 {
249     return result = msgPattern.getPatternString();
250 }
251 
252 // -------------------------------------
253 // Sets the limit and format arrays.
254 void
setChoices(const double * limits,const UnicodeString * formats,int32_t cnt)255 ChoiceFormat::setChoices(  const double* limits,
256                            const UnicodeString* formats,
257                            int32_t cnt )
258 {
259     UErrorCode errorCode = U_ZERO_ERROR;
260     setChoices(limits, NULL, formats, cnt, errorCode);
261 }
262 
263 // -------------------------------------
264 // Sets the limit and format arrays.
265 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t cnt)266 ChoiceFormat::setChoices(  const double* limits,
267                            const UBool* closures,
268                            const UnicodeString* formats,
269                            int32_t cnt )
270 {
271     UErrorCode errorCode = U_ZERO_ERROR;
272     setChoices(limits, closures, formats, cnt, errorCode);
273 }
274 
275 void
setChoices(const double * limits,const UBool * closures,const UnicodeString * formats,int32_t count,UErrorCode & errorCode)276 ChoiceFormat::setChoices(const double* limits,
277                          const UBool* closures,
278                          const UnicodeString* formats,
279                          int32_t count,
280                          UErrorCode &errorCode) {
281     if (U_FAILURE(errorCode)) {
282         return;
283     }
284     if (limits == NULL || formats == NULL) {
285         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286         return;
287     }
288     // Reconstruct the original input pattern.
289     // Modified version of the pre-ICU 4.8 toPattern() implementation.
290     UnicodeString result;
291     for (int32_t i = 0; i < count; ++i) {
292         if (i != 0) {
293             result += VERTICAL_BAR;
294         }
295         UnicodeString buf;
296         if (uprv_isPositiveInfinity(limits[i])) {
297             result += INFINITY;
298         } else if (uprv_isNegativeInfinity(limits[i])) {
299             result += MINUS;
300             result += INFINITY;
301         } else {
302             result += dtos(limits[i], buf);
303         }
304         if (closures != NULL && closures[i]) {
305             result += LESS_THAN;
306         } else {
307             result += LESS_EQUAL;
308         }
309         // Append formats[i], using quotes if there are special
310         // characters.  Single quotes themselves must be escaped in
311         // either case.
312         const UnicodeString& text = formats[i];
313         int32_t textLength = text.length();
314         int32_t nestingLevel = 0;
315         for (int32_t j = 0; j < textLength; ++j) {
316             UChar c = text[j];
317             if (c == SINGLE_QUOTE && nestingLevel == 0) {
318                 // Double each top-level apostrophe.
319                 result.append(c);
320             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321                 // Surround each pipe symbol with apostrophes for quoting.
322                 // If the next character is an apostrophe, then that will be doubled,
323                 // and although the parser will see the apostrophe pairs beginning
324                 // and ending one character earlier than our doubling, the result
325                 // is as desired.
326                 //   | -> '|'
327                 //   |' -> '|'''
328                 //   |'' -> '|''''' etc.
329                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330                 continue;  // Skip the append(c) at the end of the loop body.
331             } else if (c == LEFT_CURLY_BRACE) {
332                 ++nestingLevel;
333             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334                 --nestingLevel;
335             }
336             result.append(c);
337         }
338     }
339     // Apply the reconstructed pattern.
340     applyPattern(result, errorCode);
341 }
342 
343 // -------------------------------------
344 // Gets the limit array.
345 
346 const double*
getLimits(int32_t & cnt) const347 ChoiceFormat::getLimits(int32_t& cnt) const
348 {
349     cnt = 0;
350     return NULL;
351 }
352 
353 // -------------------------------------
354 // Gets the closures array.
355 
356 const UBool*
getClosures(int32_t & cnt) const357 ChoiceFormat::getClosures(int32_t& cnt) const
358 {
359     cnt = 0;
360     return NULL;
361 }
362 
363 // -------------------------------------
364 // Gets the format array.
365 
366 const UnicodeString*
getFormats(int32_t & cnt) const367 ChoiceFormat::getFormats(int32_t& cnt) const
368 {
369     cnt = 0;
370     return NULL;
371 }
372 
373 // -------------------------------------
374 // Formats an int64 number, it's actually formatted as
375 // a double.  The returned format string may differ
376 // from the input number because of this.
377 
378 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & status) const379 ChoiceFormat::format(int64_t number,
380                      UnicodeString& appendTo,
381                      FieldPosition& status) const
382 {
383     return format((double) number, appendTo, status);
384 }
385 
386 // -------------------------------------
387 // Formats an int32_t number, it's actually formatted as
388 // a double.
389 
390 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & status) const391 ChoiceFormat::format(int32_t number,
392                      UnicodeString& appendTo,
393                      FieldPosition& status) const
394 {
395     return format((double) number, appendTo, status);
396 }
397 
398 // -------------------------------------
399 // Formats a double number.
400 
401 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition &) const402 ChoiceFormat::format(double number,
403                      UnicodeString& appendTo,
404                      FieldPosition& /*pos*/) const
405 {
406     if (msgPattern.countParts() == 0) {
407         // No pattern was applied, or it failed.
408         return appendTo;
409     }
410     // Get the appropriate sub-message.
411     int32_t msgStart = findSubMessage(msgPattern, 0, number);
412     if (!MessageImpl::jdkAposMode(msgPattern)) {
413         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415         appendTo.append(msgPattern.getPatternString(),
416                         patternStart,
417                         msgPattern.getPatternIndex(msgLimit) - patternStart);
418         return appendTo;
419     }
420     // JDK compatibility mode: Remove SKIP_SYNTAX.
421     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422 }
423 
424 int32_t
findSubMessage(const MessagePattern & pattern,int32_t partIndex,double number)425 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426     int32_t count = pattern.countParts();
427     int32_t msgStart;
428     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429     // until ARG_LIMIT or end of choice-only pattern.
430     // Ignore the first number and selector and start the loop on the first message.
431     partIndex += 2;
432     for (;;) {
433         // Skip but remember the current sub-message.
434         msgStart = partIndex;
435         partIndex = pattern.getLimitPartIndex(partIndex);
436         if (++partIndex >= count) {
437             // Reached the end of the choice-only pattern.
438             // Return with the last sub-message.
439             break;
440         }
441         const MessagePattern::Part &part = pattern.getPart(partIndex++);
442         UMessagePatternPartType type = part.getType();
443         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444             // Reached the end of the ChoiceFormat style.
445             // Return with the last sub-message.
446             break;
447         }
448         // part is an ARG_INT or ARG_DOUBLE
449         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450         double boundary = pattern.getNumericValue(part);
451         // Fetch the ARG_SELECTOR character.
452         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455             // The number is in the interval between the previous boundary and the current one.
456             // Return with the sub-message between them.
457             // The !(a>b) and !(a>=b) comparisons are equivalent to
458             // (a<=b) and (a<b) except they "catch" NaN.
459             break;
460         }
461     }
462     return msgStart;
463 }
464 
465 // -------------------------------------
466 // Formats an array of objects. Checks if the data type of the objects
467 // to get the right value for formatting.
468 
469 UnicodeString&
format(const Formattable * objs,int32_t cnt,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const470 ChoiceFormat::format(const Formattable* objs,
471                      int32_t cnt,
472                      UnicodeString& appendTo,
473                      FieldPosition& pos,
474                      UErrorCode& status) const
475 {
476     if(cnt < 0) {
477         status = U_ILLEGAL_ARGUMENT_ERROR;
478         return appendTo;
479     }
480     if (msgPattern.countParts() == 0) {
481         status = U_INVALID_STATE_ERROR;
482         return appendTo;
483     }
484 
485     for (int32_t i = 0; i < cnt; i++) {
486         double objDouble = objs[i].getDouble(status);
487         if (U_SUCCESS(status)) {
488             format(objDouble, appendTo, pos);
489         }
490     }
491 
492     return appendTo;
493 }
494 
495 // -------------------------------------
496 
497 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & pos) const498 ChoiceFormat::parse(const UnicodeString& text,
499                     Formattable& result,
500                     ParsePosition& pos) const
501 {
502     result.setDouble(parseArgument(msgPattern, 0, text, pos));
503 }
504 
505 double
parseArgument(const MessagePattern & pattern,int32_t partIndex,const UnicodeString & source,ParsePosition & pos)506 ChoiceFormat::parseArgument(
507         const MessagePattern &pattern, int32_t partIndex,
508         const UnicodeString &source, ParsePosition &pos) {
509     // find the best number (defined as the one with the longest parse)
510     int32_t start = pos.getIndex();
511     int32_t furthest = start;
512     double bestNumber = uprv_getNaN();
513     double tempNumber = 0.0;
514     int32_t count = pattern.countParts();
515     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
518         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520         if (len >= 0) {
521             int32_t newIndex = start + len;
522             if (newIndex > furthest) {
523                 furthest = newIndex;
524                 bestNumber = tempNumber;
525                 if (furthest == source.length()) {
526                     break;
527                 }
528             }
529         }
530         partIndex = msgLimit + 1;
531     }
532     if (furthest == start) {
533         pos.setErrorIndex(start);
534     } else {
535         pos.setIndex(furthest);
536     }
537     return bestNumber;
538 }
539 
540 int32_t
matchStringUntilLimitPart(const MessagePattern & pattern,int32_t partIndex,int32_t limitPartIndex,const UnicodeString & source,int32_t sourceOffset)541 ChoiceFormat::matchStringUntilLimitPart(
542         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543         const UnicodeString &source, int32_t sourceOffset) {
544     int32_t matchingSourceLength = 0;
545     const UnicodeString &msgString = pattern.getPatternString();
546     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547     for (;;) {
548         const MessagePattern::Part &part = pattern.getPart(++partIndex);
549         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550             int32_t index = part.getIndex();
551             int32_t length = index - prevIndex;
552             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553                 return -1;  // mismatch
554             }
555             matchingSourceLength += length;
556             if (partIndex == limitPartIndex) {
557                 return matchingSourceLength;
558             }
559             prevIndex = part.getLimit();  // SKIP_SYNTAX
560         }
561     }
562 }
563 
564 // -------------------------------------
565 
566 ChoiceFormat*
clone() const567 ChoiceFormat::clone() const
568 {
569     ChoiceFormat *aCopy = new ChoiceFormat(*this);
570     return aCopy;
571 }
572 
573 U_NAMESPACE_END
574 
575 #endif /* #if !UCONFIG_NO_FORMATTING */
576 
577 //eof
578