1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "numparse_types.h"
13 #include "numparse_scientific.h"
14 #include "static_unicode_sets.h"
15 #include "string_segment.h"
16 
17 using namespace icu;
18 using namespace icu::numparse;
19 using namespace icu::numparse::impl;
20 
21 
22 namespace {
23 
minusSignSet()24 inline const UnicodeSet& minusSignSet() {
25     return *unisets::get(unisets::MINUS_SIGN);
26 }
27 
plusSignSet()28 inline const UnicodeSet& plusSignSet() {
29     return *unisets::get(unisets::PLUS_SIGN);
30 }
31 
32 } // namespace
33 
34 
ScientificMatcher(const DecimalFormatSymbols & dfs,const Grouper & grouper)35 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
36         : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
37           fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
38           fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
39 
40     const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
41     if (minusSignSet().contains(minusSign)) {
42         fCustomMinusSign.setToBogus();
43     } else {
44         fCustomMinusSign = minusSign;
45     }
46 
47     const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
48     if (plusSignSet().contains(plusSign)) {
49         fCustomPlusSign.setToBogus();
50     } else {
51         fCustomPlusSign = plusSign;
52     }
53 }
54 
match(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const55 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
56     // Only accept scientific notation after the mantissa.
57     if (!result.seenNumber()) {
58         return false;
59     }
60 
61     // Only accept one exponent per string.
62     if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
63         return false;
64     }
65 
66     // First match the scientific separator, and then match another number after it.
67     // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
68     int32_t initialOffset = segment.getOffset();
69     int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
70     if (overlap == fExponentSeparatorString.length()) {
71         // Full exponent separator match.
72 
73         // First attempt to get a code point, returning true if we can't get one.
74         if (segment.length() == overlap) {
75             return true;
76         }
77         segment.adjustOffset(overlap);
78 
79         // Allow ignorables before the sign.
80         // Note: call site is guarded by the segment.length() check above.
81         // Note: the ignorables matcher should not touch the result.
82         fIgnorablesMatcher.match(segment, result, status);
83         if (segment.length() == 0) {
84             segment.setOffset(initialOffset);
85             return true;
86         }
87 
88         // Allow a sign, and then try to match digits.
89         int8_t exponentSign = 1;
90         if (segment.startsWith(minusSignSet())) {
91             exponentSign = -1;
92             segment.adjustOffsetByCodePoint();
93         } else if (segment.startsWith(plusSignSet())) {
94             segment.adjustOffsetByCodePoint();
95         } else if (segment.startsWith(fCustomMinusSign)) {
96             overlap = segment.getCommonPrefixLength(fCustomMinusSign);
97             if (overlap != fCustomMinusSign.length()) {
98                 // Partial custom sign match
99                 segment.setOffset(initialOffset);
100                 return true;
101             }
102             exponentSign = -1;
103             segment.adjustOffset(overlap);
104         } else if (segment.startsWith(fCustomPlusSign)) {
105             overlap = segment.getCommonPrefixLength(fCustomPlusSign);
106             if (overlap != fCustomPlusSign.length()) {
107                 // Partial custom sign match
108                 segment.setOffset(initialOffset);
109                 return true;
110             }
111             segment.adjustOffset(overlap);
112         }
113 
114         // Return true if the segment is empty.
115         if (segment.length() == 0) {
116             segment.setOffset(initialOffset);
117             return true;
118         }
119 
120         // Allow ignorables after the sign.
121         // Note: call site is guarded by the segment.length() check above.
122         // Note: the ignorables matcher should not touch the result.
123         fIgnorablesMatcher.match(segment, result, status);
124         if (segment.length() == 0) {
125             segment.setOffset(initialOffset);
126             return true;
127         }
128 
129         // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
130         bool wasBogus = result.quantity.bogus;
131         result.quantity.bogus = false;
132         int digitsOffset = segment.getOffset();
133         bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
134         result.quantity.bogus = wasBogus;
135 
136         if (segment.getOffset() != digitsOffset) {
137             // At least one exponent digit was matched.
138             result.flags |= FLAG_HAS_EXPONENT;
139         } else {
140             // No exponent digits were matched
141             segment.setOffset(initialOffset);
142         }
143         return digitsReturnValue;
144 
145     } else if (overlap == segment.length()) {
146         // Partial exponent separator match
147         return true;
148     }
149 
150     // No match
151     return false;
152 }
153 
smokeTest(const StringSegment & segment) const154 bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
155     return segment.startsWith(fExponentSeparatorString);
156 }
157 
toString() const158 UnicodeString ScientificMatcher::toString() const {
159     return u"<Scientific>";
160 }
161 
162 
163 #endif /* #if !UCONFIG_NO_FORMATTING */
164