1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // This file contains utilities to deal with static-allocated UnicodeSets.
5 //
6 // Common use case: you write a "private static final" UnicodeSet in Java, and
7 // want something similarly easy in C++.  Originally written for number
8 // parsing, but this header can be used for other applications.
9 //
10 // Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
11 //
12 // This file is in common instead of i18n because it is needed by ucurr.cpp.
13 //
14 // Author: sffc
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_FORMATTING
19 #ifndef __STATIC_UNICODE_SETS_H__
20 #define __STATIC_UNICODE_SETS_H__
21 
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 
25 U_NAMESPACE_BEGIN
26 namespace unisets {
27 
28 enum Key {
29     // NONE is used to indicate null in chooseFrom().
30     // EMPTY is used to get an empty UnicodeSet.
31     NONE = -1,
32     EMPTY = 0,
33 
34     // Ignorables
35     DEFAULT_IGNORABLES,
36     STRICT_IGNORABLES,
37 
38     // Separators
39     // Notes:
40     // - COMMA is a superset of STRICT_COMMA
41     // - PERIOD is a superset of SCRICT_PERIOD
42     // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
43     // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
44     COMMA,
45     PERIOD,
46     STRICT_COMMA,
47     STRICT_PERIOD,
48     APOSTROPHE_SIGN,
49     OTHER_GROUPING_SEPARATORS,
50     ALL_SEPARATORS,
51     STRICT_ALL_SEPARATORS,
52 
53     // Symbols
54     MINUS_SIGN,
55     PLUS_SIGN,
56     PERCENT_SIGN,
57     PERMILLE_SIGN,
58     INFINITY_SIGN,
59 
60     // Currency Symbols
61     DOLLAR_SIGN,
62     POUND_SIGN,
63     RUPEE_SIGN,
64     YEN_SIGN,
65     WON_SIGN,
66 
67     // Other
68     DIGITS,
69 
70     // Combined Separators with Digits (for lead code points)
71     DIGITS_OR_ALL_SEPARATORS,
72     DIGITS_OR_STRICT_ALL_SEPARATORS,
73 
74     // The number of elements in the enum.
75     UNISETS_KEY_COUNT
76 };
77 
78 /**
79  * Gets the static-allocated UnicodeSet according to the provided key. The
80  * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
81  *
82  * Exported as U_COMMON_API for ucurr.cpp
83  *
84  * This method is always safe and OK to chain: in the case of a memory or other
85  * error, it returns an empty set from static memory.
86  *
87  * Example:
88  *
89  *     UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
90  *
91  * @param key The desired UnicodeSet according to the enum in this file.
92  * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
93  *         may be empty if an error occurred during data loading.
94  */
95 U_COMMON_API const UnicodeSet* get(Key key);
96 
97 /**
98  * Checks if the UnicodeSet given by key1 contains the given string.
99  *
100  * Exported as U_COMMON_API for numparse_decimal.cpp
101  *
102  * @param str The string to check.
103  * @param key1 The set to check.
104  * @return key1 if the set contains str, or NONE if not.
105  */
106 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
107 
108 /**
109  * Checks if the UnicodeSet given by either key1 or key2 contains the string.
110  *
111  * Exported as U_COMMON_API for numparse_decimal.cpp
112  *
113  * @param str The string to check.
114  * @param key1 The first set to check.
115  * @param key2 The second set to check.
116  * @return key1 if that set contains str; key2 if that set contains str; or
117  *         NONE if neither set contains str.
118  */
119 U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
120 
121 // TODO: Load these from data: ICU-20108
122 // Unused in C++:
123 // Key chooseCurrency(UnicodeString str);
124 // Used instead:
125 static const struct {
126     Key key;
127     UChar32 exemplar;
128 } kCurrencyEntries[] = {
129     {DOLLAR_SIGN, u'$'},
130     {POUND_SIGN, u'£'},
131     {RUPEE_SIGN, u'₹'},
132     {YEN_SIGN, u'¥'},
133     {WON_SIGN, u'₩'},
134 };
135 
136 } // namespace unisets
137 U_NAMESPACE_END
138 
139 #endif //__STATIC_UNICODE_SETS_H__
140 #endif /* #if !UCONFIG_NO_FORMATTING */
141