1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright 2019 Kjell Winblad (kjellwinblad@gmail.com, http://winsh.me).
5 * All Rights Reserved.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 * %CopyrightEnd%
20 */
21
22 /*
23 *
24 * Author: Kjell Winblad
25 *
26 */
27
28 #ifndef CHAINED_HASH_SET_H
29 #define CHAINED_HASH_SET_H
30
31
32 #if defined(_MSC_VER)
33 #define inline __inline
34 #endif
35
36 #include "sorted_list_set.h"
37 #include <stdint.h>
38
39 #define CHAIN_LENGHT_EXPAND_THRESHOLD 2
40 #define CHAIN_LENGHT_SHRINK_THRESHOLD 0.5
41 /*Must be power of two*/
42 #define INITIAL_NUMBER_OF_BUCKETS 4
43
44 typedef struct {
45 unsigned int keyPosition;
46 void *(*extract_key)(void *v, int keyPos);
47 unsigned int (*hash_key)(void *key);
48 bool (*are_equal)(void *v1, void *v2);
49 char *(*to_string)(void *v1);
50 void *(*malloc)(size_t size);
51 void (*free)(void *ptr);
52 unsigned int numberOfBuckets;
53 unsigned int expandTreshold;
54 unsigned int shrinkTreshold;
55 unsigned int size;
56 SortedListSetNode **buckets;
57 } ChainedHashSet;
58
59 /*
60 * The reverse_bits function is inspired by:
61 * https://stackoverflow.com/questions/746171/efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
62 */
63 static const unsigned char BitReverseTable256[] = {
64 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
65 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
66 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
67 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
68 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
69 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
70 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
71 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
72 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
73 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
74 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
75 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
76 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
77 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
78 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
79 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
80 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
81 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
82 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
83 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
84 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
85 0x3F, 0xBF, 0x7F, 0xFF};
86
reverse_bits(uint32_t v)87 static inline uint32_t reverse_bits(uint32_t v) {
88 return (((uint32_t)BitReverseTable256[v & 0xff]) << 24) |
89 (((uint32_t)BitReverseTable256[(v >> 8) & 0xff]) << 16) |
90 (((uint32_t)BitReverseTable256[(v >> 16) & 0xff]) << 8) |
91 (((uint32_t)BitReverseTable256[(v >> 24) & 0xff]));
92 }
93
ch_set_increase_size(ChainedHashSet * set)94 static inline void ch_set_increase_size(ChainedHashSet *set) {
95 set->size = set->size + 1;
96 if (set->size > set->expandTreshold) {
97 unsigned int oldNumberOfBuckets = set->numberOfBuckets;
98 unsigned int newNumberOfBuckets = oldNumberOfBuckets * 2;
99 unsigned int splitUpMask = reverse_bits(newNumberOfBuckets - 1) ^
100 reverse_bits(oldNumberOfBuckets - 1);
101 SortedListSetNode **newBuckets =
102 set->malloc(sizeof(SortedListSetNode *) * newNumberOfBuckets);
103 SortedListSetNode **oldBuckets = set->buckets;
104 SortedListSetNode *moveTemp;
105 for (unsigned int i = 0; i < oldNumberOfBuckets; i++) {
106 moveTemp = sl_set_split_opt(&oldBuckets[i], splitUpMask);
107 newBuckets[i] = oldBuckets[i];
108 newBuckets[i + oldNumberOfBuckets] = moveTemp;
109 }
110 set->free(oldBuckets);
111 set->buckets = newBuckets;
112 set->numberOfBuckets = newNumberOfBuckets;
113 set->expandTreshold = newNumberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
114 set->shrinkTreshold = newNumberOfBuckets * CHAIN_LENGHT_SHRINK_THRESHOLD;
115 }
116 }
117
ch_set_decrease_size(ChainedHashSet * set)118 static inline void ch_set_decrease_size(ChainedHashSet *set) {
119 set->size = set->size - 1;
120 if (set->size < set->shrinkTreshold) {
121 unsigned int oldNumberOfBuckets = set->numberOfBuckets;
122 unsigned int newNumberOfBuckets = oldNumberOfBuckets / 2;
123 SortedListSetNode **newBuckets =
124 set->malloc(sizeof(SortedListSetNode *) * newNumberOfBuckets);
125 SortedListSetNode **oldBuckets = set->buckets;
126 for (unsigned int i = 0; i < newNumberOfBuckets; i++) {
127 newBuckets[i] = oldBuckets[i];
128 sl_set_concat_opt(&newBuckets[i], oldBuckets[i + newNumberOfBuckets]);
129 }
130 set->free(oldBuckets);
131 set->buckets = newBuckets;
132 set->numberOfBuckets = newNumberOfBuckets;
133 set->expandTreshold = newNumberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
134 if (set->numberOfBuckets == INITIAL_NUMBER_OF_BUCKETS) {
135 set->shrinkTreshold = 0;
136 } else {
137 set->shrinkTreshold = newNumberOfBuckets * CHAIN_LENGHT_SHRINK_THRESHOLD;
138 }
139 }
140 }
141
ch_set_initialize(ChainedHashSet * set,unsigned int keyPosition,void * (* extract_key)(void * v,int keyPos),unsigned int (* hash_key)(void * k),bool (* are_equal)(void * v1,void * v2),char * (* to_string)(void * v1),void * (* my_malloc)(size_t size),void (* my_free)(void * ptr))142 static inline void ch_set_initialize(ChainedHashSet *set,
143 unsigned int keyPosition,
144 void *(*extract_key)(void *v, int keyPos),
145 unsigned int (*hash_key)(void *k),
146 bool (*are_equal)(void *v1, void *v2),
147 char *(*to_string)(void *v1),
148 void *(*my_malloc)(size_t size),
149 void (*my_free)(void *ptr)) {
150 set->keyPosition = keyPosition;
151 set->extract_key = extract_key;
152 set->hash_key = hash_key;
153 set->are_equal = are_equal;
154 set->to_string = to_string;
155 set->malloc = my_malloc;
156 set->free = my_free;
157 set->numberOfBuckets = INITIAL_NUMBER_OF_BUCKETS;
158 set->expandTreshold = set->numberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
159 set->shrinkTreshold = set->numberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
160 set->size = 0;
161 set->buckets =
162 set->malloc(sizeof(SortedListSetNode *) * INITIAL_NUMBER_OF_BUCKETS);
163 for (int i = 0; i < INITIAL_NUMBER_OF_BUCKETS; i++) {
164 set->buckets[i] = NULL;
165 }
166 }
167
ch_set_create(unsigned int keyPosition,void * (* extract_key)(void * v,int keyPos),unsigned int (* hash_key)(void * k),bool (* are_equal)(void * v1,void * v2),char * (* to_string)(void * v1),void * (* my_malloc)(size_t size),void (* my_free)(void * ptr))168 static inline ChainedHashSet *ch_set_create(
169 unsigned int keyPosition, void *(*extract_key)(void *v, int keyPos),
170 unsigned int (*hash_key)(void *k), bool (*are_equal)(void *v1, void *v2),
171 char *(*to_string)(void *v1), void *(*my_malloc)(size_t size),
172 void (*my_free)(void *ptr)) {
173 ChainedHashSet *set = my_malloc(sizeof(ChainedHashSet));
174 ch_set_initialize(set, keyPosition, extract_key, hash_key, are_equal,
175 to_string, my_malloc, my_free);
176 return set;
177 }
178
ch_set_insert_opt(ChainedHashSet * set,void * value,unsigned int valueSize,unsigned int hashValue,bool overwrite)179 static inline bool ch_set_insert_opt(ChainedHashSet *set, void *value,
180 unsigned int valueSize,
181 unsigned int hashValue, bool overwrite) {
182 unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
183 SortedListSetNode **bucket = &set->buckets[bucketIndex];
184 bool oneAdded = sl_set_insert_opt(
185 bucket, value, valueSize, reverse_bits(hashValue), set->keyPosition,
186 overwrite, set->extract_key, set->are_equal, set->malloc, set->free);
187 if (oneAdded) {
188 ch_set_increase_size(set);
189 }
190 return oneAdded;
191 }
192
ch_set_insert(void * setParam,void * value,unsigned int valueSize)193 static inline void ch_set_insert(void *setParam, void *value,
194 unsigned int valueSize) {
195 ChainedHashSet *set = (ChainedHashSet *)setParam;
196 void *key = set->extract_key(value, set->keyPosition);
197 unsigned int hashValue = set->hash_key(key);
198 ch_set_insert_opt(set, value, valueSize, hashValue, true);
199 }
200
ch_set_insert_new(void * setParam,void * value,unsigned int valueSize)201 static inline bool ch_set_insert_new(void *setParam, void *value,
202 unsigned int valueSize) {
203 ChainedHashSet *set = (ChainedHashSet *)setParam;
204 void *key = set->extract_key(value, set->keyPosition);
205 unsigned int hashValue = set->hash_key(key);
206 return ch_set_insert_opt(set, value, valueSize, hashValue, false);
207 }
208
ch_set_lookup(void * setParam,void * key)209 static inline void *ch_set_lookup(void *setParam, void *key) {
210 ChainedHashSet *set = (ChainedHashSet *)setParam;
211 unsigned int hashValue = set->hash_key(key);
212 unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
213 SortedListSetNode **bucket = &set->buckets[bucketIndex];
214 return sl_set_lookup_opt(bucket, key, reverse_bits(hashValue),
215 set->keyPosition, set->extract_key, set->are_equal,
216 false, set->malloc);
217 }
218
ch_set_delete(void * setParam,void * key,unsigned int keySize)219 static inline void ch_set_delete(void *setParam, void *key,
220 unsigned int keySize) {
221 (void)keySize;
222 ChainedHashSet *set = (ChainedHashSet *)setParam;
223 unsigned int hashValue = set->hash_key(key);
224 unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
225 SortedListSetNode **bucket = &set->buckets[bucketIndex];
226 bool oneRemoved =
227 sl_set_delete_opt(bucket, key, reverse_bits(hashValue), set->keyPosition,
228 set->extract_key, set->are_equal, set->free);
229 if (oneRemoved) {
230 ch_set_decrease_size(set);
231 }
232 }
233
ch_set_traverse(void * setParam,void (* traverser)(size_t index,void * v,void * context),void * context)234 static inline void ch_set_traverse(void *setParam,
235 void (*traverser)(size_t index, void *v,
236 void *context),
237 void *context) {
238 ChainedHashSet *set = (ChainedHashSet *)setParam;
239 unsigned int numberOfBuckets = set->numberOfBuckets;
240 SortedListSetNode *itemNode;
241 size_t index = 0;
242 for (unsigned int i = 0; i < numberOfBuckets; i++) {
243 itemNode = set->buckets[i];
244 while (itemNode != NULL) {
245 traverser(index, (void *)itemNode->value, context);
246 index++;
247 itemNode = itemNode->next;
248 }
249 }
250 }
251
ch_set_free(void * setParam)252 static inline void ch_set_free(void *setParam) {
253 ChainedHashSet *set = (ChainedHashSet *)setParam;
254 unsigned int numberOfBuckets = set->numberOfBuckets;
255 for (unsigned int i = 0; i < numberOfBuckets; i++) {
256 sl_set_free_opt(&set->buckets[i], set->free);
257 }
258 set->free(set->buckets);
259 set->free(set);
260 }
261
ch_set_to_string(void * setParam)262 static inline char *ch_set_to_string(void *setParam) {
263 ChainedHashSet *set = (ChainedHashSet *)setParam;
264 unsigned int numberOfBuckets = set->numberOfBuckets;
265 char **bucketStrings = malloc(sizeof(char*)*numberOfBuckets);
266 unsigned int *bucketStringSizes = malloc(sizeof(unsigned int)*numberOfBuckets);
267 unsigned int totalBucketsStringSize = 0;
268 SortedListSet *tempListSet = plain_sl_set_create(
269 set->keyPosition, set->extract_key, set->hash_key, set->are_equal,
270 set->to_string, set->malloc, set->free);
271 for (unsigned int i = 0; i < numberOfBuckets; i++) {
272 tempListSet->head = set->buckets[i];
273 bucketStrings[i] = sl_set_to_string(tempListSet);
274 bucketStringSizes[i] = strlen(bucketStrings[i]);
275 totalBucketsStringSize = totalBucketsStringSize + bucketStringSizes[i];
276 }
277 tempListSet->head = NULL;
278 sl_set_free(tempListSet);
279 char *resultString =
280 set->malloc(totalBucketsStringSize + numberOfBuckets * 3 - 3 + 3);
281 resultString[0] = '[';
282 unsigned int currentPosition = 1;
283 for (unsigned int i = 0; i < numberOfBuckets; i++) {
284 sprintf(&resultString[currentPosition], "%s", bucketStrings[i]);
285 set->free(bucketStrings[i]);
286 currentPosition = currentPosition + bucketStringSizes[i];
287 if (i != (numberOfBuckets - 1)) {
288 sprintf(&resultString[currentPosition], ",\n ");
289 currentPosition = currentPosition + 3;
290 }
291 }
292 sprintf(&resultString[currentPosition], "]");
293 free(bucketStrings);
294 free(bucketStringSizes);
295 return resultString;
296 }
297
ch_set_print(void * setParam)298 static inline void ch_set_print(void *setParam) {
299 ChainedHashSet *set = (ChainedHashSet *)setParam;
300 char *str = ch_set_to_string(set);
301 printf("%s\n", str);
302 set->free(str);
303 }
304
ch_set_is_concurrent()305 static inline bool ch_set_is_concurrent() { return false; }
306
307 #endif
308