1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *   Copyright (C) 1997-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 *   Date        Name        Description
9 *   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
10 *   07/06/01    aliu        Modified to support int32_t keys on
11 *                           platforms with sizeof(void*) < 32.
12 ******************************************************************************
13 */
14 
15 #include "uhash.h"
16 #include "unicode/ustring.h"
17 #include "cstring.h"
18 #include "cmemory.h"
19 #include "uassert.h"
20 #include "ustr_imp.h"
21 
22 /* This hashtable is implemented as a double hash.  All elements are
23  * stored in a single array with no secondary storage for collision
24  * resolution (no linked list, etc.).  When there is a hash collision
25  * (when two unequal keys have the same hashcode) we resolve this by
26  * using a secondary hash.  The secondary hash is an increment
27  * computed as a hash function (a different one) of the primary
28  * hashcode.  This increment is added to the initial hash value to
29  * obtain further slots assigned to the same hash code.  For this to
30  * work, the length of the array and the increment must be relatively
31  * prime.  The easiest way to achieve this is to have the length of
32  * the array be prime, and the increment be any value from
33  * 1..length-1.
34  *
35  * Hashcodes are 32-bit integers.  We make sure all hashcodes are
36  * non-negative by masking off the top bit.  This has two effects: (1)
37  * modulo arithmetic is simplified.  If we allowed negative hashcodes,
38  * then when we computed hashcode % length, we could get a negative
39  * result, which we would then have to adjust back into range.  It's
40  * simpler to just make hashcodes non-negative. (2) It makes it easy
41  * to check for empty vs. occupied slots in the table.  We just mark
42  * empty or deleted slots with a negative hashcode.
43  *
44  * The central function is _uhash_find().  This function looks for a
45  * slot matching the given key and hashcode.  If one is found, it
46  * returns a pointer to that slot.  If the table is full, and no match
47  * is found, it returns NULL -- in theory.  This would make the code
48  * more complicated, since all callers of _uhash_find() would then
49  * have to check for a NULL result.  To keep this from happening, we
50  * don't allow the table to fill.  When there is only one
51  * empty/deleted slot left, uhash_put() will refuse to increase the
52  * count, and fail.  This simplifies the code.  In practice, one will
53  * seldom encounter this using default UHashtables.  However, if a
54  * hashtable is set to a U_FIXED resize policy, or if memory is
55  * exhausted, then the table may fill.
56  *
57  * High and low water ratios control rehashing.  They establish levels
58  * of fullness (from 0 to 1) outside of which the data array is
59  * reallocated and repopulated.  Setting the low water ratio to zero
60  * means the table will never shrink.  Setting the high water ratio to
61  * one means the table will never grow.  The ratios should be
62  * coordinated with the ratio between successive elements of the
63  * PRIMES table, so that when the primeIndex is incremented or
64  * decremented during rehashing, it brings the ratio of count / length
65  * back into the desired range (between low and high water ratios).
66  */
67 
68 /********************************************************************
69  * PRIVATE Constants, Macros
70  ********************************************************************/
71 
72 /* This is a list of non-consecutive primes chosen such that
73  * PRIMES[i+1] ~ 2*PRIMES[i].  (Currently, the ratio ranges from 1.81
74  * to 2.18; the inverse ratio ranges from 0.459 to 0.552.)  If this
75  * ratio is changed, the low and high water ratios should also be
76  * adjusted to suit.
77  *
78  * These prime numbers were also chosen so that they are the largest
79  * prime number while being less than a power of two.
80  */
81 static const int32_t PRIMES[] = {
82     7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
83     65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
84     16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
85     1073741789, 2147483647 /*, 4294967291 */
86 };
87 
88 #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
89 #define DEFAULT_PRIME_INDEX 4
90 
91 /* These ratios are tuned to the PRIMES array such that a resize
92  * places the table back into the zone of non-resizing.  That is,
93  * after a call to _uhash_rehash(), a subsequent call to
94  * _uhash_rehash() should do nothing (should not churn).  This is only
95  * a potential problem with U_GROW_AND_SHRINK.
96  */
97 static const float RESIZE_POLICY_RATIO_TABLE[6] = {
98     /* low, high water ratio */
99     0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
100     0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
101     0.0F, 1.0F  /* U_FIXED: Never change size */
102 };
103 
104 /*
105   Invariants for hashcode values:
106 
107   * DELETED < 0
108   * EMPTY < 0
109   * Real hashes >= 0
110 
111   Hashcodes may not start out this way, but internally they are
112   adjusted so that they are always positive.  We assume 32-bit
113   hashcodes; adjust these constants for other hashcode sizes.
114 */
115 #define HASH_DELETED    ((int32_t) 0x80000000)
116 #define HASH_EMPTY      ((int32_t) HASH_DELETED + 1)
117 
118 #define IS_EMPTY_OR_DELETED(x) ((x) < 0)
119 
120 /* This macro expects a UHashTok.pointer as its keypointer and
121    valuepointer parameters */
122 #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \
123     if (hash->keyDeleter != NULL && keypointer != NULL) { \
124         (*hash->keyDeleter)(keypointer); \
125     } \
126     if (hash->valueDeleter != NULL && valuepointer != NULL) { \
127         (*hash->valueDeleter)(valuepointer); \
128     } \
129 } UPRV_BLOCK_MACRO_END
130 
131 /*
132  * Constants for hinting whether a key or value is an integer
133  * or a pointer.  If a hint bit is zero, then the associated
134  * token is assumed to be an integer.
135  */
136 #define HINT_BOTH_INTEGERS (0)
137 #define HINT_KEY_POINTER   (1)
138 #define HINT_VALUE_POINTER (2)
139 #define HINT_ALLOW_ZERO    (4)
140 
141 /********************************************************************
142  * PRIVATE Implementation
143  ********************************************************************/
144 
145 static UHashTok
_uhash_setElement(UHashtable * hash,UHashElement * e,int32_t hashcode,UHashTok key,UHashTok value,int8_t hint)146 _uhash_setElement(UHashtable *hash, UHashElement* e,
147                   int32_t hashcode,
148                   UHashTok key, UHashTok value, int8_t hint) {
149 
150     UHashTok oldValue = e->value;
151     if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
152         e->key.pointer != key.pointer) { /* Avoid double deletion */
153         (*hash->keyDeleter)(e->key.pointer);
154     }
155     if (hash->valueDeleter != NULL) {
156         if (oldValue.pointer != NULL &&
157             oldValue.pointer != value.pointer) { /* Avoid double deletion */
158             (*hash->valueDeleter)(oldValue.pointer);
159         }
160         oldValue.pointer = NULL;
161     }
162     /* Compilers should copy the UHashTok union correctly, but even if
163      * they do, memory heap tools (e.g. BoundsChecker) can get
164      * confused when a pointer is cloaked in a union and then copied.
165      * TO ALLEVIATE THIS, we use hints (based on what API the user is
166      * calling) to copy pointers when we know the user thinks
167      * something is a pointer. */
168     if (hint & HINT_KEY_POINTER) {
169         e->key.pointer = key.pointer;
170     } else {
171         e->key = key;
172     }
173     if (hint & HINT_VALUE_POINTER) {
174         e->value.pointer = value.pointer;
175     } else {
176         e->value = value;
177     }
178     e->hashcode = hashcode;
179     return oldValue;
180 }
181 
182 /**
183  * Assumes that the given element is not empty or deleted.
184  */
185 static UHashTok
_uhash_internalRemoveElement(UHashtable * hash,UHashElement * e)186 _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
187     UHashTok empty;
188     U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
189     --hash->count;
190     empty.pointer = NULL; empty.integer = 0;
191     return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
192 }
193 
194 static void
_uhash_internalSetResizePolicy(UHashtable * hash,enum UHashResizePolicy policy)195 _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
196     U_ASSERT(hash != NULL);
197     U_ASSERT(((int32_t)policy) >= 0);
198     U_ASSERT(((int32_t)policy) < 3);
199     hash->lowWaterRatio  = RESIZE_POLICY_RATIO_TABLE[policy * 2];
200     hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
201 }
202 
203 /**
204  * Allocate internal data array of a size determined by the given
205  * prime index.  If the index is out of range it is pinned into range.
206  * If the allocation fails the status is set to
207  * U_MEMORY_ALLOCATION_ERROR and all array storage is freed.  In
208  * either case the previous array pointer is overwritten.
209  *
210  * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
211  */
212 static void
_uhash_allocate(UHashtable * hash,int32_t primeIndex,UErrorCode * status)213 _uhash_allocate(UHashtable *hash,
214                 int32_t primeIndex,
215                 UErrorCode *status) {
216 
217     UHashElement *p, *limit;
218     UHashTok emptytok;
219 
220     if (U_FAILURE(*status)) return;
221 
222     U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
223 
224     hash->primeIndex = static_cast<int8_t>(primeIndex);
225     hash->length = PRIMES[primeIndex];
226 
227     p = hash->elements = (UHashElement*)
228         uprv_malloc(sizeof(UHashElement) * hash->length);
229 
230     if (hash->elements == NULL) {
231         *status = U_MEMORY_ALLOCATION_ERROR;
232         return;
233     }
234 
235     emptytok.pointer = NULL; /* Only one of these two is needed */
236     emptytok.integer = 0;    /* but we don't know which one. */
237 
238     limit = p + hash->length;
239     while (p < limit) {
240         p->key = emptytok;
241         p->value = emptytok;
242         p->hashcode = HASH_EMPTY;
243         ++p;
244     }
245 
246     hash->count = 0;
247     hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
248     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
249 }
250 
251 static UHashtable*
_uhash_init(UHashtable * result,UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,int32_t primeIndex,UErrorCode * status)252 _uhash_init(UHashtable *result,
253               UHashFunction *keyHash,
254               UKeyComparator *keyComp,
255               UValueComparator *valueComp,
256               int32_t primeIndex,
257               UErrorCode *status)
258 {
259     if (U_FAILURE(*status)) return NULL;
260     U_ASSERT(keyHash != NULL);
261     U_ASSERT(keyComp != NULL);
262 
263     result->keyHasher       = keyHash;
264     result->keyComparator   = keyComp;
265     result->valueComparator = valueComp;
266     result->keyDeleter      = NULL;
267     result->valueDeleter    = NULL;
268     result->allocated       = FALSE;
269     _uhash_internalSetResizePolicy(result, U_GROW);
270 
271     _uhash_allocate(result, primeIndex, status);
272 
273     if (U_FAILURE(*status)) {
274         return NULL;
275     }
276 
277     return result;
278 }
279 
280 static UHashtable*
_uhash_create(UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,int32_t primeIndex,UErrorCode * status)281 _uhash_create(UHashFunction *keyHash,
282               UKeyComparator *keyComp,
283               UValueComparator *valueComp,
284               int32_t primeIndex,
285               UErrorCode *status) {
286     UHashtable *result;
287 
288     if (U_FAILURE(*status)) return NULL;
289 
290     result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
291     if (result == NULL) {
292         *status = U_MEMORY_ALLOCATION_ERROR;
293         return NULL;
294     }
295 
296     _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
297     result->allocated       = TRUE;
298 
299     if (U_FAILURE(*status)) {
300         uprv_free(result);
301         return NULL;
302     }
303 
304     return result;
305 }
306 
307 /**
308  * Look for a key in the table, or if no such key exists, the first
309  * empty slot matching the given hashcode.  Keys are compared using
310  * the keyComparator function.
311  *
312  * First find the start position, which is the hashcode modulo
313  * the length.  Test it to see if it is:
314  *
315  * a. identical:  First check the hash values for a quick check,
316  *    then compare keys for equality using keyComparator.
317  * b. deleted
318  * c. empty
319  *
320  * Stop if it is identical or empty, otherwise continue by adding a
321  * "jump" value (moduloing by the length again to keep it within
322  * range) and retesting.  For efficiency, there need enough empty
323  * values so that the searchs stop within a reasonable amount of time.
324  * This can be changed by changing the high/low water marks.
325  *
326  * In theory, this function can return NULL, if it is full (no empty
327  * or deleted slots) and if no matching key is found.  In practice, we
328  * prevent this elsewhere (in uhash_put) by making sure the last slot
329  * in the table is never filled.
330  *
331  * The size of the table should be prime for this algorithm to work;
332  * otherwise we are not guaranteed that the jump value (the secondary
333  * hash) is relatively prime to the table length.
334  */
335 static UHashElement*
_uhash_find(const UHashtable * hash,UHashTok key,int32_t hashcode)336 _uhash_find(const UHashtable *hash, UHashTok key,
337             int32_t hashcode) {
338 
339     int32_t firstDeleted = -1;  /* assume invalid index */
340     int32_t theIndex, startIndex;
341     int32_t jump = 0; /* lazy evaluate */
342     int32_t tableHash;
343     UHashElement *elements = hash->elements;
344 
345     hashcode &= 0x7FFFFFFF; /* must be positive */
346     startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
347 
348     do {
349         tableHash = elements[theIndex].hashcode;
350         if (tableHash == hashcode) {          /* quick check */
351             if ((*hash->keyComparator)(key, elements[theIndex].key)) {
352                 return &(elements[theIndex]);
353             }
354         } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
355             /* We have hit a slot which contains a key-value pair,
356              * but for which the hash code does not match.  Keep
357              * looking.
358              */
359         } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
360             break;
361         } else if (firstDeleted < 0) { /* remember first deleted */
362             firstDeleted = theIndex;
363         }
364         if (jump == 0) { /* lazy compute jump */
365             /* The jump value must be relatively prime to the table
366              * length.  As long as the length is prime, then any value
367              * 1..length-1 will be relatively prime to it.
368              */
369             jump = (hashcode % (hash->length - 1)) + 1;
370         }
371         theIndex = (theIndex + jump) % hash->length;
372     } while (theIndex != startIndex);
373 
374     if (firstDeleted >= 0) {
375         theIndex = firstDeleted; /* reset if had deleted slot */
376     } else if (tableHash != HASH_EMPTY) {
377         /* We get to this point if the hashtable is full (no empty or
378          * deleted slots), and we've failed to find a match.  THIS
379          * WILL NEVER HAPPEN as long as uhash_put() makes sure that
380          * count is always < length.
381          */
382         UPRV_UNREACHABLE;
383     }
384     return &(elements[theIndex]);
385 }
386 
387 /**
388  * Attempt to grow or shrink the data arrays in order to make the
389  * count fit between the high and low water marks.  hash_put() and
390  * hash_remove() call this method when the count exceeds the high or
391  * low water marks.  This method may do nothing, if memory allocation
392  * fails, or if the count is already in range, or if the length is
393  * already at the low or high limit.  In any case, upon return the
394  * arrays will be valid.
395  */
396 static void
_uhash_rehash(UHashtable * hash,UErrorCode * status)397 _uhash_rehash(UHashtable *hash, UErrorCode *status) {
398 
399     UHashElement *old = hash->elements;
400     int32_t oldLength = hash->length;
401     int32_t newPrimeIndex = hash->primeIndex;
402     int32_t i;
403 
404     if (hash->count > hash->highWaterMark) {
405         if (++newPrimeIndex >= PRIMES_LENGTH) {
406             return;
407         }
408     } else if (hash->count < hash->lowWaterMark) {
409         if (--newPrimeIndex < 0) {
410             return;
411         }
412     } else {
413         return;
414     }
415 
416     _uhash_allocate(hash, newPrimeIndex, status);
417 
418     if (U_FAILURE(*status)) {
419         hash->elements = old;
420         hash->length = oldLength;
421         return;
422     }
423 
424     for (i = oldLength - 1; i >= 0; --i) {
425         if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
426             UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
427             U_ASSERT(e != NULL);
428             U_ASSERT(e->hashcode == HASH_EMPTY);
429             e->key = old[i].key;
430             e->value = old[i].value;
431             e->hashcode = old[i].hashcode;
432             ++hash->count;
433         }
434     }
435 
436     uprv_free(old);
437 }
438 
439 static UHashTok
_uhash_remove(UHashtable * hash,UHashTok key)440 _uhash_remove(UHashtable *hash,
441               UHashTok key) {
442     /* First find the position of the key in the table.  If the object
443      * has not been removed already, remove it.  If the user wanted
444      * keys deleted, then delete it also.  We have to put a special
445      * hashcode in that position that means that something has been
446      * deleted, since when we do a find, we have to continue PAST any
447      * deleted values.
448      */
449     UHashTok result;
450     UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
451     U_ASSERT(e != NULL);
452     result.pointer = NULL;
453     result.integer = 0;
454     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
455         result = _uhash_internalRemoveElement(hash, e);
456         if (hash->count < hash->lowWaterMark) {
457             UErrorCode status = U_ZERO_ERROR;
458             _uhash_rehash(hash, &status);
459         }
460     }
461     return result;
462 }
463 
464 static UHashTok
_uhash_put(UHashtable * hash,UHashTok key,UHashTok value,int8_t hint,UErrorCode * status)465 _uhash_put(UHashtable *hash,
466            UHashTok key,
467            UHashTok value,
468            int8_t hint,
469            UErrorCode *status) {
470 
471     /* Put finds the position in the table for the new value.  If the
472      * key is already in the table, it is deleted, if there is a
473      * non-NULL keyDeleter.  Then the key, the hash and the value are
474      * all put at the position in their respective arrays.
475      */
476     int32_t hashcode;
477     UHashElement* e;
478     UHashTok emptytok;
479 
480     if (U_FAILURE(*status)) {
481         goto err;
482     }
483     U_ASSERT(hash != NULL);
484     if ((hint & HINT_VALUE_POINTER) ?
485             value.pointer == NULL :
486             value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
487         /* Disallow storage of NULL values, since NULL is returned by
488          * get() to indicate an absent key.  Storing NULL == removing.
489          */
490         return _uhash_remove(hash, key);
491     }
492     if (hash->count > hash->highWaterMark) {
493         _uhash_rehash(hash, status);
494         if (U_FAILURE(*status)) {
495             goto err;
496         }
497     }
498 
499     hashcode = (*hash->keyHasher)(key);
500     e = _uhash_find(hash, key, hashcode);
501     U_ASSERT(e != NULL);
502 
503     if (IS_EMPTY_OR_DELETED(e->hashcode)) {
504         /* Important: We must never actually fill the table up.  If we
505          * do so, then _uhash_find() will return NULL, and we'll have
506          * to check for NULL after every call to _uhash_find().  To
507          * avoid this we make sure there is always at least one empty
508          * or deleted slot in the table.  This only is a problem if we
509          * are out of memory and rehash isn't working.
510          */
511         ++hash->count;
512         if (hash->count == hash->length) {
513             /* Don't allow count to reach length */
514             --hash->count;
515             *status = U_MEMORY_ALLOCATION_ERROR;
516             goto err;
517         }
518     }
519 
520     /* We must in all cases handle storage properly.  If there was an
521      * old key, then it must be deleted (if the deleter != NULL).
522      * Make hashcodes stored in table positive.
523      */
524     return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
525 
526  err:
527     /* If the deleters are non-NULL, this method adopts its key and/or
528      * value arguments, and we must be sure to delete the key and/or
529      * value in all cases, even upon failure.
530      */
531     HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
532     emptytok.pointer = NULL; emptytok.integer = 0;
533     return emptytok;
534 }
535 
536 
537 /********************************************************************
538  * PUBLIC API
539  ********************************************************************/
540 
541 U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,UErrorCode * status)542 uhash_open(UHashFunction *keyHash,
543            UKeyComparator *keyComp,
544            UValueComparator *valueComp,
545            UErrorCode *status) {
546 
547     return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
548 }
549 
550 U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,int32_t size,UErrorCode * status)551 uhash_openSize(UHashFunction *keyHash,
552                UKeyComparator *keyComp,
553                UValueComparator *valueComp,
554                int32_t size,
555                UErrorCode *status) {
556 
557     /* Find the smallest index i for which PRIMES[i] >= size. */
558     int32_t i = 0;
559     while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
560         ++i;
561     }
562 
563     return _uhash_create(keyHash, keyComp, valueComp, i, status);
564 }
565 
566 U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable * fillinResult,UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,UErrorCode * status)567 uhash_init(UHashtable *fillinResult,
568            UHashFunction *keyHash,
569            UKeyComparator *keyComp,
570            UValueComparator *valueComp,
571            UErrorCode *status) {
572 
573     return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
574 }
575 
576 U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable * fillinResult,UHashFunction * keyHash,UKeyComparator * keyComp,UValueComparator * valueComp,int32_t size,UErrorCode * status)577 uhash_initSize(UHashtable *fillinResult,
578                UHashFunction *keyHash,
579                UKeyComparator *keyComp,
580                UValueComparator *valueComp,
581                int32_t size,
582                UErrorCode *status) {
583 
584     // Find the smallest index i for which PRIMES[i] >= size.
585     int32_t i = 0;
586     while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
587         ++i;
588     }
589     return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
590 }
591 
592 U_CAPI void U_EXPORT2
uhash_close(UHashtable * hash)593 uhash_close(UHashtable *hash) {
594     if (hash == NULL) {
595         return;
596     }
597     if (hash->elements != NULL) {
598         if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
599             int32_t pos=UHASH_FIRST;
600             UHashElement *e;
601             while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
602                 HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
603             }
604         }
605         uprv_free(hash->elements);
606         hash->elements = NULL;
607     }
608     if (hash->allocated) {
609         uprv_free(hash);
610     }
611 }
612 
613 U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable * hash,UHashFunction * fn)614 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
615     UHashFunction *result = hash->keyHasher;
616     hash->keyHasher = fn;
617     return result;
618 }
619 
620 U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable * hash,UKeyComparator * fn)621 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
622     UKeyComparator *result = hash->keyComparator;
623     hash->keyComparator = fn;
624     return result;
625 }
626 U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable * hash,UValueComparator * fn)627 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
628     UValueComparator *result = hash->valueComparator;
629     hash->valueComparator = fn;
630     return result;
631 }
632 
633 U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable * hash,UObjectDeleter * fn)634 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
635     UObjectDeleter *result = hash->keyDeleter;
636     hash->keyDeleter = fn;
637     return result;
638 }
639 
640 U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable * hash,UObjectDeleter * fn)641 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
642     UObjectDeleter *result = hash->valueDeleter;
643     hash->valueDeleter = fn;
644     return result;
645 }
646 
647 U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable * hash,enum UHashResizePolicy policy)648 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
649     UErrorCode status = U_ZERO_ERROR;
650     _uhash_internalSetResizePolicy(hash, policy);
651     hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
652     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
653     _uhash_rehash(hash, &status);
654 }
655 
656 U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable * hash)657 uhash_count(const UHashtable *hash) {
658     return hash->count;
659 }
660 
661 U_CAPI void* U_EXPORT2
uhash_get(const UHashtable * hash,const void * key)662 uhash_get(const UHashtable *hash,
663           const void* key) {
664     UHashTok keyholder;
665     keyholder.pointer = (void*) key;
666     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
667 }
668 
669 U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable * hash,int32_t key)670 uhash_iget(const UHashtable *hash,
671            int32_t key) {
672     UHashTok keyholder;
673     keyholder.integer = key;
674     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
675 }
676 
677 U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable * hash,const void * key)678 uhash_geti(const UHashtable *hash,
679            const void* key) {
680     UHashTok keyholder;
681     keyholder.pointer = (void*) key;
682     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
683 }
684 
685 U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable * hash,int32_t key)686 uhash_igeti(const UHashtable *hash,
687            int32_t key) {
688     UHashTok keyholder;
689     keyholder.integer = key;
690     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
691 }
692 
693 U_CAPI int32_t U_EXPORT2
uhash_getiAndFound(const UHashtable * hash,const void * key,UBool * found)694 uhash_getiAndFound(const UHashtable *hash,
695                    const void *key,
696                    UBool *found) {
697     UHashTok keyholder;
698     keyholder.pointer = (void *)key;
699     const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
700     *found = !IS_EMPTY_OR_DELETED(e->hashcode);
701     return e->value.integer;
702 }
703 
704 U_CAPI int32_t U_EXPORT2
uhash_igetiAndFound(const UHashtable * hash,int32_t key,UBool * found)705 uhash_igetiAndFound(const UHashtable *hash,
706                     int32_t key,
707                     UBool *found) {
708     UHashTok keyholder;
709     keyholder.integer = key;
710     const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
711     *found = !IS_EMPTY_OR_DELETED(e->hashcode);
712     return e->value.integer;
713 }
714 
715 U_CAPI void* U_EXPORT2
uhash_put(UHashtable * hash,void * key,void * value,UErrorCode * status)716 uhash_put(UHashtable *hash,
717           void* key,
718           void* value,
719           UErrorCode *status) {
720     UHashTok keyholder, valueholder;
721     keyholder.pointer = key;
722     valueholder.pointer = value;
723     return _uhash_put(hash, keyholder, valueholder,
724                       HINT_KEY_POINTER | HINT_VALUE_POINTER,
725                       status).pointer;
726 }
727 
728 U_CAPI void* U_EXPORT2
uhash_iput(UHashtable * hash,int32_t key,void * value,UErrorCode * status)729 uhash_iput(UHashtable *hash,
730            int32_t key,
731            void* value,
732            UErrorCode *status) {
733     UHashTok keyholder, valueholder;
734     keyholder.integer = key;
735     valueholder.pointer = value;
736     return _uhash_put(hash, keyholder, valueholder,
737                       HINT_VALUE_POINTER,
738                       status).pointer;
739 }
740 
741 U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable * hash,void * key,int32_t value,UErrorCode * status)742 uhash_puti(UHashtable *hash,
743            void* key,
744            int32_t value,
745            UErrorCode *status) {
746     UHashTok keyholder, valueholder;
747     keyholder.pointer = key;
748     valueholder.integer = value;
749     return _uhash_put(hash, keyholder, valueholder,
750                       HINT_KEY_POINTER,
751                       status).integer;
752 }
753 
754 
755 U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable * hash,int32_t key,int32_t value,UErrorCode * status)756 uhash_iputi(UHashtable *hash,
757            int32_t key,
758            int32_t value,
759            UErrorCode *status) {
760     UHashTok keyholder, valueholder;
761     keyholder.integer = key;
762     valueholder.integer = value;
763     return _uhash_put(hash, keyholder, valueholder,
764                       HINT_BOTH_INTEGERS,
765                       status).integer;
766 }
767 
768 U_CAPI int32_t U_EXPORT2
uhash_putiAllowZero(UHashtable * hash,void * key,int32_t value,UErrorCode * status)769 uhash_putiAllowZero(UHashtable *hash,
770                     void *key,
771                     int32_t value,
772                     UErrorCode *status) {
773     UHashTok keyholder, valueholder;
774     keyholder.pointer = key;
775     valueholder.integer = value;
776     return _uhash_put(hash, keyholder, valueholder,
777                       HINT_KEY_POINTER | HINT_ALLOW_ZERO,
778                       status).integer;
779 }
780 
781 
782 U_CAPI int32_t U_EXPORT2
uhash_iputiAllowZero(UHashtable * hash,int32_t key,int32_t value,UErrorCode * status)783 uhash_iputiAllowZero(UHashtable *hash,
784                      int32_t key,
785                      int32_t value,
786                      UErrorCode *status) {
787     UHashTok keyholder, valueholder;
788     keyholder.integer = key;
789     valueholder.integer = value;
790     return _uhash_put(hash, keyholder, valueholder,
791                       HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
792                       status).integer;
793 }
794 
795 U_CAPI void* U_EXPORT2
uhash_remove(UHashtable * hash,const void * key)796 uhash_remove(UHashtable *hash,
797              const void* key) {
798     UHashTok keyholder;
799     keyholder.pointer = (void*) key;
800     return _uhash_remove(hash, keyholder).pointer;
801 }
802 
803 U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable * hash,int32_t key)804 uhash_iremove(UHashtable *hash,
805               int32_t key) {
806     UHashTok keyholder;
807     keyholder.integer = key;
808     return _uhash_remove(hash, keyholder).pointer;
809 }
810 
811 U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable * hash,const void * key)812 uhash_removei(UHashtable *hash,
813               const void* key) {
814     UHashTok keyholder;
815     keyholder.pointer = (void*) key;
816     return _uhash_remove(hash, keyholder).integer;
817 }
818 
819 U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable * hash,int32_t key)820 uhash_iremovei(UHashtable *hash,
821                int32_t key) {
822     UHashTok keyholder;
823     keyholder.integer = key;
824     return _uhash_remove(hash, keyholder).integer;
825 }
826 
827 U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable * hash)828 uhash_removeAll(UHashtable *hash) {
829     int32_t pos = UHASH_FIRST;
830     const UHashElement *e;
831     U_ASSERT(hash != NULL);
832     if (hash->count != 0) {
833         while ((e = uhash_nextElement(hash, &pos)) != NULL) {
834             uhash_removeElement(hash, e);
835         }
836     }
837     U_ASSERT(hash->count == 0);
838 }
839 
840 U_CAPI UBool U_EXPORT2
uhash_containsKey(const UHashtable * hash,const void * key)841 uhash_containsKey(const UHashtable *hash, const void *key) {
842     UHashTok keyholder;
843     keyholder.pointer = (void *)key;
844     const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
845     return !IS_EMPTY_OR_DELETED(e->hashcode);
846 }
847 
848 /**
849  * Returns true if the UHashtable contains an item with this integer key.
850  *
851  * @param hash The target UHashtable.
852  * @param key An integer key stored in a hashtable
853  * @return true if the key is found.
854  */
855 U_CAPI UBool U_EXPORT2
uhash_icontainsKey(const UHashtable * hash,int32_t key)856 uhash_icontainsKey(const UHashtable *hash, int32_t key) {
857     UHashTok keyholder;
858     keyholder.integer = key;
859     const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
860     return !IS_EMPTY_OR_DELETED(e->hashcode);
861 }
862 
863 U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable * hash,const void * key)864 uhash_find(const UHashtable *hash, const void* key) {
865     UHashTok keyholder;
866     const UHashElement *e;
867     keyholder.pointer = (void*) key;
868     e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
869     return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
870 }
871 
872 U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable * hash,int32_t * pos)873 uhash_nextElement(const UHashtable *hash, int32_t *pos) {
874     /* Walk through the array until we find an element that is not
875      * EMPTY and not DELETED.
876      */
877     int32_t i;
878     U_ASSERT(hash != NULL);
879     for (i = *pos + 1; i < hash->length; ++i) {
880         if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
881             *pos = i;
882             return &(hash->elements[i]);
883         }
884     }
885 
886     /* No more elements */
887     return NULL;
888 }
889 
890 U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable * hash,const UHashElement * e)891 uhash_removeElement(UHashtable *hash, const UHashElement* e) {
892     U_ASSERT(hash != NULL);
893     U_ASSERT(e != NULL);
894     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
895         UHashElement *nce = (UHashElement *)e;
896         return _uhash_internalRemoveElement(hash, nce).pointer;
897     }
898     return NULL;
899 }
900 
901 /********************************************************************
902  * UHashTok convenience
903  ********************************************************************/
904 
905 /**
906  * Return a UHashTok for an integer.
907  */
908 /*U_CAPI UHashTok U_EXPORT2
909 uhash_toki(int32_t i) {
910     UHashTok tok;
911     tok.integer = i;
912     return tok;
913 }*/
914 
915 /**
916  * Return a UHashTok for a pointer.
917  */
918 /*U_CAPI UHashTok U_EXPORT2
919 uhash_tokp(void* p) {
920     UHashTok tok;
921     tok.pointer = p;
922     return tok;
923 }*/
924 
925 /********************************************************************
926  * PUBLIC Key Hash Functions
927  ********************************************************************/
928 
929 U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key)930 uhash_hashUChars(const UHashTok key) {
931     const UChar *s = (const UChar *)key.pointer;
932     return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
933 }
934 
935 U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key)936 uhash_hashChars(const UHashTok key) {
937     const char *s = (const char *)key.pointer;
938     return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
939 }
940 
941 U_CAPI int32_t U_EXPORT2
uhash_hashIChars(const UHashTok key)942 uhash_hashIChars(const UHashTok key) {
943     const char *s = (const char *)key.pointer;
944     return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
945 }
946 
947 U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable * hash1,const UHashtable * hash2)948 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
949     int32_t count1, count2, pos, i;
950 
951     if(hash1==hash2){
952         return TRUE;
953     }
954 
955     /*
956      * Make sure that we are comparing 2 valid hashes of the same type
957      * with valid comparison functions.
958      * Without valid comparison functions, a binary comparison
959      * of the hash values will yield random results on machines
960      * with 64-bit pointers and 32-bit integer hashes.
961      * A valueComparator is normally optional.
962      */
963     if (hash1==NULL || hash2==NULL ||
964         hash1->keyComparator != hash2->keyComparator ||
965         hash1->valueComparator != hash2->valueComparator ||
966         hash1->valueComparator == NULL)
967     {
968         /*
969         Normally we would return an error here about incompatible hash tables,
970         but we return FALSE instead.
971         */
972         return FALSE;
973     }
974 
975     count1 = uhash_count(hash1);
976     count2 = uhash_count(hash2);
977     if(count1!=count2){
978         return FALSE;
979     }
980 
981     pos=UHASH_FIRST;
982     for(i=0; i<count1; i++){
983         const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
984         const UHashTok key1 = elem1->key;
985         const UHashTok val1 = elem1->value;
986         /* here the keys are not compared, instead the key form hash1 is used to fetch
987          * value from hash2. If the hashes are equal then then both hashes should
988          * contain equal values for the same key!
989          */
990         const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
991         const UHashTok val2 = elem2->value;
992         if(hash1->valueComparator(val1, val2)==FALSE){
993             return FALSE;
994         }
995     }
996     return TRUE;
997 }
998 
999 /********************************************************************
1000  * PUBLIC Comparator Functions
1001  ********************************************************************/
1002 
1003 U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1,const UHashTok key2)1004 uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
1005     const UChar *p1 = (const UChar*) key1.pointer;
1006     const UChar *p2 = (const UChar*) key2.pointer;
1007     if (p1 == p2) {
1008         return TRUE;
1009     }
1010     if (p1 == NULL || p2 == NULL) {
1011         return FALSE;
1012     }
1013     while (*p1 != 0 && *p1 == *p2) {
1014         ++p1;
1015         ++p2;
1016     }
1017     return (UBool)(*p1 == *p2);
1018 }
1019 
1020 U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1,const UHashTok key2)1021 uhash_compareChars(const UHashTok key1, const UHashTok key2) {
1022     const char *p1 = (const char*) key1.pointer;
1023     const char *p2 = (const char*) key2.pointer;
1024     if (p1 == p2) {
1025         return TRUE;
1026     }
1027     if (p1 == NULL || p2 == NULL) {
1028         return FALSE;
1029     }
1030     while (*p1 != 0 && *p1 == *p2) {
1031         ++p1;
1032         ++p2;
1033     }
1034     return (UBool)(*p1 == *p2);
1035 }
1036 
1037 U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1,const UHashTok key2)1038 uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
1039     const char *p1 = (const char*) key1.pointer;
1040     const char *p2 = (const char*) key2.pointer;
1041     if (p1 == p2) {
1042         return TRUE;
1043     }
1044     if (p1 == NULL || p2 == NULL) {
1045         return FALSE;
1046     }
1047     while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
1048         ++p1;
1049         ++p2;
1050     }
1051     return (UBool)(*p1 == *p2);
1052 }
1053 
1054 /********************************************************************
1055  * PUBLIC int32_t Support Functions
1056  ********************************************************************/
1057 
1058 U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key)1059 uhash_hashLong(const UHashTok key) {
1060     return key.integer;
1061 }
1062 
1063 U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1,const UHashTok key2)1064 uhash_compareLong(const UHashTok key1, const UHashTok key2) {
1065     return (UBool)(key1.integer == key2.integer);
1066 }
1067