1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * scriptset.cpp
10 *
11 * created on: 2013 Jan 7
12 * created by: Andy Heninger
13 */
14 
15 #include "unicode/utypes.h"
16 
17 #include "unicode/uchar.h"
18 #include "unicode/unistr.h"
19 
20 #include "scriptset.h"
21 #include "uassert.h"
22 #include "cmemory.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 //----------------------------------------------------------------------------
27 //
28 //  ScriptSet implementation
29 //
30 //----------------------------------------------------------------------------
ScriptSet()31 ScriptSet::ScriptSet() {
32     uprv_memset(bits, 0, sizeof(bits));
33 }
34 
~ScriptSet()35 ScriptSet::~ScriptSet() {
36 }
37 
ScriptSet(const ScriptSet & other)38 ScriptSet::ScriptSet(const ScriptSet &other) {
39     *this = other;
40 }
41 
operator =(const ScriptSet & other)42 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
43     uprv_memcpy(bits, other.bits, sizeof(bits));
44     return *this;
45 }
46 
operator ==(const ScriptSet & other) const47 UBool ScriptSet::operator == (const ScriptSet &other) const {
48     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
49         if (bits[i] != other.bits[i]) {
50             return FALSE;
51         }
52     }
53     return TRUE;
54 }
55 
test(UScriptCode script,UErrorCode & status) const56 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
57     if (U_FAILURE(status)) {
58         return FALSE;
59     }
60     if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
61         status = U_ILLEGAL_ARGUMENT_ERROR;
62         return FALSE;
63     }
64     uint32_t index = script / 32;
65     uint32_t bit   = 1 << (script & 31);
66     return ((bits[index] & bit) != 0);
67 }
68 
69 
set(UScriptCode script,UErrorCode & status)70 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
71     if (U_FAILURE(status)) {
72         return *this;
73     }
74     if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
75         status = U_ILLEGAL_ARGUMENT_ERROR;
76         return *this;
77     }
78     uint32_t index = script / 32;
79     uint32_t bit   = 1 << (script & 31);
80     bits[index] |= bit;
81     return *this;
82 }
83 
reset(UScriptCode script,UErrorCode & status)84 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
85     if (U_FAILURE(status)) {
86         return *this;
87     }
88     if (script < 0 || (int32_t)script >= SCRIPT_LIMIT) {
89         status = U_ILLEGAL_ARGUMENT_ERROR;
90         return *this;
91     }
92     uint32_t index = script / 32;
93     uint32_t bit   = 1 << (script & 31);
94     bits[index] &= ~bit;
95     return *this;
96 }
97 
98 
99 
Union(const ScriptSet & other)100 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
101     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
102         bits[i] |= other.bits[i];
103     }
104     return *this;
105 }
106 
intersect(const ScriptSet & other)107 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
108     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
109         bits[i] &= other.bits[i];
110     }
111     return *this;
112 }
113 
intersect(UScriptCode script,UErrorCode & status)114 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
115     ScriptSet t;
116     t.set(script, status);
117     if (U_SUCCESS(status)) {
118         this->intersect(t);
119     }
120     return *this;
121 }
122 
intersects(const ScriptSet & other) const123 UBool ScriptSet::intersects(const ScriptSet &other) const {
124     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
125         if ((bits[i] & other.bits[i]) != 0) {
126             return true;
127         }
128     }
129     return false;
130 }
131 
contains(const ScriptSet & other) const132 UBool ScriptSet::contains(const ScriptSet &other) const {
133     ScriptSet t(*this);
134     t.intersect(other);
135     return (t == other);
136 }
137 
138 
setAll()139 ScriptSet &ScriptSet::setAll() {
140     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
141         bits[i] = 0xffffffffu;
142     }
143     return *this;
144 }
145 
146 
resetAll()147 ScriptSet &ScriptSet::resetAll() {
148     uprv_memset(bits, 0, sizeof(bits));
149     return *this;
150 }
151 
countMembers() const152 int32_t ScriptSet::countMembers() const {
153     // This bit counter is good for sparse numbers of '1's, which is
154     //  very much the case that we will usually have.
155     int32_t count = 0;
156     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
157         uint32_t x = bits[i];
158         while (x > 0) {
159             count++;
160             x &= (x - 1);    // and off the least significant one bit.
161         }
162     }
163     return count;
164 }
165 
hashCode() const166 int32_t ScriptSet::hashCode() const {
167     int32_t hash = 0;
168     for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
169         hash ^= bits[i];
170     }
171     return hash;
172 }
173 
nextSetBit(int32_t fromIndex) const174 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
175     // TODO: Wants a better implementation.
176     if (fromIndex < 0) {
177         return -1;
178     }
179     UErrorCode status = U_ZERO_ERROR;
180     for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
181         if (test((UScriptCode)scriptIndex, status)) {
182             return scriptIndex;
183         }
184     }
185     return -1;
186 }
187 
isEmpty() const188 UBool ScriptSet::isEmpty() const {
189     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
190         if (bits[i] != 0) {
191             return FALSE;
192         }
193     }
194     return TRUE;
195 }
196 
displayScripts(UnicodeString & dest) const197 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
198     UBool firstTime = TRUE;
199     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
200         if (!firstTime) {
201             dest.append((UChar)0x20);
202         }
203         firstTime = FALSE;
204         const char *scriptName = uscript_getShortName((UScriptCode(i)));
205         dest.append(UnicodeString(scriptName, -1, US_INV));
206     }
207     return dest;
208 }
209 
parseScripts(const UnicodeString & scriptString,UErrorCode & status)210 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
211     resetAll();
212     if (U_FAILURE(status)) {
213         return *this;
214     }
215     UnicodeString oneScriptName;
216     for (int32_t i=0; i<scriptString.length();) {
217         UChar32 c = scriptString.char32At(i);
218         i = scriptString.moveIndex32(i, 1);
219         if (!u_isUWhiteSpace(c)) {
220             oneScriptName.append(c);
221             if (i < scriptString.length()) {
222                 continue;
223             }
224         }
225         if (oneScriptName.length() > 0) {
226             char buf[40];
227             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
228             buf[sizeof(buf)-1] = 0;
229             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
230             if (sc == UCHAR_INVALID_CODE) {
231                 status = U_ILLEGAL_ARGUMENT_ERROR;
232             } else {
233                 this->set((UScriptCode)sc, status);
234             }
235             if (U_FAILURE(status)) {
236                 return *this;
237             }
238             oneScriptName.remove();
239         }
240     }
241     return *this;
242 }
243 
setScriptExtensions(UChar32 codePoint,UErrorCode & status)244 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
245     if (U_FAILURE(status)) { return; }
246     static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
247     MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
248     UErrorCode internalStatus = U_ZERO_ERROR;
249     int32_t script_count = -1;
250 
251     while (TRUE) {
252         script_count = uscript_getScriptExtensions(
253             codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
254         if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
255             // Need to allocate more space
256             if (scripts.resize(script_count) == NULL) {
257                 status = U_MEMORY_ALLOCATION_ERROR;
258                 return;
259             }
260             internalStatus = U_ZERO_ERROR;
261         } else {
262             break;
263         }
264     }
265 
266     // Check if we failed for some reason other than buffer overflow
267     if (U_FAILURE(internalStatus)) {
268         status = internalStatus;
269         return;
270     }
271 
272     // Load the scripts into the ScriptSet and return
273     for (int32_t i = 0; i < script_count; i++) {
274         this->set(scripts[i], status);
275         if (U_FAILURE(status)) { return; }
276     }
277 }
278 
279 U_NAMESPACE_END
280 
281 U_CAPI UBool U_EXPORT2
uhash_equalsScriptSet(const UElement key1,const UElement key2)282 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
283     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
284     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
285     return (*s1 == *s2);
286 }
287 
288 U_CAPI int8_t U_EXPORT2
uhash_compareScriptSet(UElement key0,UElement key1)289 uhash_compareScriptSet(UElement key0, UElement key1) {
290     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
291     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
292     int32_t diff = s0->countMembers() - s1->countMembers();
293     if (diff != 0) return static_cast<UBool>(diff);
294     int32_t i0 = s0->nextSetBit(0);
295     int32_t i1 = s1->nextSetBit(0);
296     while ((diff = i0-i1) == 0 && i0 > 0) {
297         i0 = s0->nextSetBit(i0+1);
298         i1 = s1->nextSetBit(i1+1);
299     }
300     return (int8_t)diff;
301 }
302 
303 U_CAPI int32_t U_EXPORT2
uhash_hashScriptSet(const UElement key)304 uhash_hashScriptSet(const UElement key) {
305     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
306     return s->hashCode();
307 }
308 
309 U_CAPI void U_EXPORT2
uhash_deleteScriptSet(void * obj)310 uhash_deleteScriptSet(void *obj) {
311     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
312     delete s;
313 }
314