1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1996-2012, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 //===============================================================================
10 //
11 // File sortkey.cpp
12 //
13 //
14 //
15 // Created by: Helena Shih
16 //
17 // Modification History:
18 //
19 //  Date         Name          Description
20 //
21 //  6/20/97      helena        Java class name change.
22 //  6/23/97      helena        Added comments to make code more readable.
23 //  6/26/98      erm           Changed to use byte arrays instead of UnicodeString
24 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
25 //                             Cleaned up operator=
26 // 07/12/99      helena        HPUX 11 CC port.
27 // 03/06/01      synwee        Modified compareTo, to handle the result of
28 //                             2 string similar in contents, but one is longer
29 //                             than the other
30 //===============================================================================
31 
32 #include "unicode/utypes.h"
33 
34 #if !UCONFIG_NO_COLLATION
35 
36 #include "unicode/sortkey.h"
37 #include "cmemory.h"
38 #include "uelement.h"
39 #include "ustr_imp.h"
40 
41 U_NAMESPACE_BEGIN
42 
43 // A hash code of kInvalidHashCode indicates that the hash code needs
44 // to be computed. A hash code of kEmptyHashCode is used for empty keys
45 // and for any key whose computed hash code is kInvalidHashCode.
46 static const int32_t kInvalidHashCode = 0;
47 static const int32_t kEmptyHashCode = 1;
48 // The "bogus hash code" replaces a separate fBogus flag.
49 static const int32_t kBogusHashCode = 2;
50 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)51 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
52 
53 CollationKey::CollationKey()
54     : UObject(), fFlagAndLength(0),
55       fHashCode(kEmptyHashCode)
56 {
57 }
58 
59 // Create a collation key from a bit array.
CollationKey(const uint8_t * newValues,int32_t count)60 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
61     : UObject(), fFlagAndLength(count),
62       fHashCode(kInvalidHashCode)
63 {
64     if (count < 0 || (newValues == NULL && count != 0) ||
65             (count > getCapacity() && reallocate(count, 0) == NULL)) {
66         setToBogus();
67         return;
68     }
69 
70     if (count > 0) {
71         uprv_memcpy(getBytes(), newValues, count);
72     }
73 }
74 
CollationKey(const CollationKey & other)75 CollationKey::CollationKey(const CollationKey& other)
76     : UObject(other), fFlagAndLength(other.getLength()),
77       fHashCode(other.fHashCode)
78 {
79     if (other.isBogus())
80     {
81         setToBogus();
82         return;
83     }
84 
85     int32_t length = fFlagAndLength;
86     if (length > getCapacity() && reallocate(length, 0) == NULL) {
87         setToBogus();
88         return;
89     }
90 
91     if (length > 0) {
92         uprv_memcpy(getBytes(), other.getBytes(), length);
93     }
94 }
95 
~CollationKey()96 CollationKey::~CollationKey()
97 {
98     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
99 }
100 
reallocate(int32_t newCapacity,int32_t length)101 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
102     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
103     if(newBytes == NULL) { return NULL; }
104     if(length > 0) {
105         uprv_memcpy(newBytes, getBytes(), length);
106     }
107     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
108     fUnion.fFields.fBytes = newBytes;
109     fUnion.fFields.fCapacity = newCapacity;
110     fFlagAndLength |= 0x80000000;
111     return newBytes;
112 }
113 
setLength(int32_t newLength)114 void CollationKey::setLength(int32_t newLength) {
115     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
116     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
117     fHashCode = kInvalidHashCode;
118 }
119 
120 // set the key to an empty state
121 CollationKey&
reset()122 CollationKey::reset()
123 {
124     fFlagAndLength &= 0x80000000;
125     fHashCode = kEmptyHashCode;
126 
127     return *this;
128 }
129 
130 // set the key to a "bogus" or invalid state
131 CollationKey&
setToBogus()132 CollationKey::setToBogus()
133 {
134     fFlagAndLength &= 0x80000000;
135     fHashCode = kBogusHashCode;
136 
137     return *this;
138 }
139 
140 UBool
operator ==(const CollationKey & source) const141 CollationKey::operator==(const CollationKey& source) const
142 {
143     return getLength() == source.getLength() &&
144             (this == &source ||
145              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
146 }
147 
148 const CollationKey&
operator =(const CollationKey & other)149 CollationKey::operator=(const CollationKey& other)
150 {
151     if (this != &other)
152     {
153         if (other.isBogus())
154         {
155             return setToBogus();
156         }
157 
158         int32_t length = other.getLength();
159         if (length > getCapacity() && reallocate(length, 0) == NULL) {
160             return setToBogus();
161         }
162         if (length > 0) {
163             uprv_memcpy(getBytes(), other.getBytes(), length);
164         }
165         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
166         fHashCode = other.fHashCode;
167     }
168 
169     return *this;
170 }
171 
172 // Bitwise comparison for the collation keys.
173 Collator::EComparisonResult
compareTo(const CollationKey & target) const174 CollationKey::compareTo(const CollationKey& target) const
175 {
176     UErrorCode errorCode = U_ZERO_ERROR;
177     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
178 }
179 
180 // Bitwise comparison for the collation keys.
181 UCollationResult
compareTo(const CollationKey & target,UErrorCode & status) const182 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
183 {
184   if(U_SUCCESS(status)) {
185     const uint8_t *src = getBytes();
186     const uint8_t *tgt = target.getBytes();
187 
188     // are we comparing the same string
189     if (src == tgt)
190         return  UCOL_EQUAL;
191 
192     UCollationResult result;
193 
194     // are we comparing different lengths?
195     int32_t minLength = getLength();
196     int32_t targetLength = target.getLength();
197     if (minLength < targetLength) {
198         result = UCOL_LESS;
199     } else if (minLength == targetLength) {
200         result = UCOL_EQUAL;
201     } else {
202         minLength = targetLength;
203         result = UCOL_GREATER;
204     }
205 
206     if (minLength > 0) {
207         int diff = uprv_memcmp(src, tgt, minLength);
208         if (diff > 0) {
209             return UCOL_GREATER;
210         }
211         else
212             if (diff < 0) {
213                 return UCOL_LESS;
214             }
215     }
216 
217     return result;
218   } else {
219     return UCOL_EQUAL;
220   }
221 }
222 
223 #ifdef U_USE_COLLATION_KEY_DEPRECATES
224 // Create a copy of the byte array.
225 uint8_t*
toByteArray(int32_t & count) const226 CollationKey::toByteArray(int32_t& count) const
227 {
228     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
229 
230     if (result == NULL)
231     {
232         count = 0;
233     }
234     else
235     {
236         count = fCount;
237         if (count > 0) {
238             uprv_memcpy(result, fBytes, fCount);
239         }
240     }
241 
242     return result;
243 }
244 #endif
245 
246 static int32_t
computeHashCode(const uint8_t * key,int32_t length)247 computeHashCode(const uint8_t *key, int32_t  length) {
248     const char *s = reinterpret_cast<const char *>(key);
249     int32_t hash;
250     if (s == NULL || length == 0) {
251         hash = kEmptyHashCode;
252     } else {
253         hash = ustr_hashCharsN(s, length);
254         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
255             hash = kEmptyHashCode;
256         }
257     }
258     return hash;
259 }
260 
261 int32_t
hashCode() const262 CollationKey::hashCode() const
263 {
264     // (Cribbed from UnicodeString)
265     // We cache the hashCode; when it becomes invalid, due to any change to the
266     // string, we note this by setting it to kInvalidHashCode. [LIU]
267 
268     // Note: This method is semantically const, but physically non-const.
269 
270     if (fHashCode == kInvalidHashCode)
271     {
272         fHashCode = computeHashCode(getBytes(), getLength());
273     }
274 
275     return fHashCode;
276 }
277 
278 U_NAMESPACE_END
279 
280 U_CAPI int32_t U_EXPORT2
ucol_keyHashCode(const uint8_t * key,int32_t length)281 ucol_keyHashCode(const uint8_t *key,
282                        int32_t  length)
283 {
284     return icu::computeHashCode(key, length);
285 }
286 
287 #endif /* #if !UCONFIG_NO_COLLATION */
288