1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "numparse_types.h"
13 #include "numparse_currency.h"
14 #include "ucurrimp.h"
15 #include "unicode/errorcode.h"
16 #include "numparse_utils.h"
17 #include "string_segment.h"
18 
19 using namespace icu;
20 using namespace icu::numparse;
21 using namespace icu::numparse::impl;
22 
23 
CombinedCurrencyMatcher(const CurrencySymbols & currencySymbols,const DecimalFormatSymbols & dfs,parse_flags_t parseFlags,UErrorCode & status)24 CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
25                                                  parse_flags_t parseFlags, UErrorCode& status)
26         : fCurrency1(currencySymbols.getCurrencySymbol(status)),
27           fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
28           fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
29           afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
30           beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
31           fLocaleName(dfs.getLocale().getName(), -1, status) {
32     utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
33 
34     // Pre-load the long names for the current locale and currency
35     // if we are parsing without the full currency data.
36     if (!fUseFullCurrencyData) {
37         for (int32_t i=0; i<StandardPlural::COUNT; i++) {
38             auto plural = static_cast<StandardPlural::Form>(i);
39             fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
40         }
41     }
42 
43     // TODO: Figure out how to make this faster and re-enable.
44     // Computing the "lead code points" set for fastpathing is too slow to use in production.
45     // See http://bugs.icu-project.org/trac/ticket/13584
46 //    // Compute the full set of characters that could be the first in a currency to allow for
47 //    // efficient smoke test.
48 //    fLeadCodePoints.add(fCurrency1.char32At(0));
49 //    fLeadCodePoints.add(fCurrency2.char32At(0));
50 //    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
51 //    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
52 //    // Always apply case mapping closure for currencies
53 //    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
54 //    fLeadCodePoints.freeze();
55 }
56 
57 bool
match(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const58 CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
59     if (result.currencyCode[0] != 0) {
60         return false;
61     }
62 
63     // Try to match a currency spacing separator.
64     int32_t initialOffset = segment.getOffset();
65     bool maybeMore = false;
66     if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
67         int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
68         if (overlap == beforeSuffixInsert.length()) {
69             segment.adjustOffset(overlap);
70             // Note: let currency spacing be a weak match. Don't update chars consumed.
71         }
72         maybeMore = maybeMore || overlap == segment.length();
73     }
74 
75     // Match the currency string, and reset if we didn't find one.
76     maybeMore = maybeMore || matchCurrency(segment, result, status);
77     if (result.currencyCode[0] == 0) {
78         segment.setOffset(initialOffset);
79         return maybeMore;
80     }
81 
82     // Try to match a currency spacing separator.
83     if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
84         int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
85         if (overlap == afterPrefixInsert.length()) {
86             segment.adjustOffset(overlap);
87             // Note: let currency spacing be a weak match. Don't update chars consumed.
88         }
89         maybeMore = maybeMore || overlap == segment.length();
90     }
91 
92     return maybeMore;
93 }
94 
matchCurrency(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const95 bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
96                                             UErrorCode& status) const {
97     bool maybeMore = false;
98 
99     int32_t overlap1;
100     if (!fCurrency1.isEmpty()) {
101         overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
102     } else {
103         overlap1 = -1;
104     }
105     maybeMore = maybeMore || overlap1 == segment.length();
106     if (overlap1 == fCurrency1.length()) {
107         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
108         segment.adjustOffset(overlap1);
109         result.setCharsConsumed(segment);
110         return maybeMore;
111     }
112 
113     int32_t overlap2;
114     if (!fCurrency2.isEmpty()) {
115         // ISO codes should be accepted case-insensitive.
116         // https://unicode-org.atlassian.net/browse/ICU-13696
117         overlap2 = segment.getCommonPrefixLength(fCurrency2);
118     } else {
119         overlap2 = -1;
120     }
121     maybeMore = maybeMore || overlap2 == segment.length();
122     if (overlap2 == fCurrency2.length()) {
123         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
124         segment.adjustOffset(overlap2);
125         result.setCharsConsumed(segment);
126         return maybeMore;
127     }
128 
129     if (fUseFullCurrencyData) {
130         // Use the full currency data.
131         // NOTE: This call site should be improved with #13584.
132         const UnicodeString segmentString = segment.toTempUnicodeString();
133 
134         // Try to parse the currency
135         ParsePosition ppos(0);
136         int32_t partialMatchLen = 0;
137         uprv_parseCurrency(
138                 fLocaleName.data(),
139                 segmentString,
140                 ppos,
141                 UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
142                 &partialMatchLen,
143                 result.currencyCode,
144                 status);
145         maybeMore = maybeMore || partialMatchLen == segment.length();
146 
147         if (U_SUCCESS(status) && ppos.getIndex() != 0) {
148             // Complete match.
149             // NOTE: The currency code should already be saved in the ParsedNumber.
150             segment.adjustOffset(ppos.getIndex());
151             result.setCharsConsumed(segment);
152             return maybeMore;
153         }
154 
155     } else {
156         // Use the locale long names.
157         int32_t longestFullMatch = 0;
158         for (int32_t i=0; i<StandardPlural::COUNT; i++) {
159             const UnicodeString& name = fLocalLongNames[i];
160             int32_t overlap = segment.getCommonPrefixLength(name);
161             if (overlap == name.length() && name.length() > longestFullMatch) {
162                 longestFullMatch = name.length();
163             }
164             maybeMore = maybeMore || overlap > 0;
165         }
166         if (longestFullMatch > 0) {
167             utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
168             segment.adjustOffset(longestFullMatch);
169             result.setCharsConsumed(segment);
170             return maybeMore;
171         }
172     }
173 
174     // No match found.
175     return maybeMore;
176 }
177 
smokeTest(const StringSegment &) const178 bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
179     // TODO: See constructor
180     return true;
181     //return segment.startsWith(fLeadCodePoints);
182 }
183 
toString() const184 UnicodeString CombinedCurrencyMatcher::toString() const {
185     return u"<CombinedCurrencyMatcher>";
186 }
187 
188 
189 #endif /* #if !UCONFIG_NO_FORMATTING */
190