1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright 2019 Kjell Winblad (kjellwinblad@gmail.com, http://winsh.me).
5  * All Rights Reserved.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  * %CopyrightEnd%
20  */
21 
22 /*
23  *
24  * Author: Kjell Winblad
25  *
26  */
27 
28 #ifndef CHAINED_HASH_SET_H
29 #define CHAINED_HASH_SET_H
30 
31 
32 #if defined(_MSC_VER)
33 #define inline __inline
34 #endif
35 
36 #include "sorted_list_set.h"
37 #include <stdint.h>
38 
39 #define CHAIN_LENGHT_EXPAND_THRESHOLD 2
40 #define CHAIN_LENGHT_SHRINK_THRESHOLD 0.5
41 /*Must be power of two*/
42 #define INITIAL_NUMBER_OF_BUCKETS 4
43 
44 typedef struct {
45   unsigned int keyPosition;
46   void *(*extract_key)(void *v, int keyPos);
47   unsigned int (*hash_key)(void *key);
48   bool (*are_equal)(void *v1, void *v2);
49   char *(*to_string)(void *v1);
50   void *(*malloc)(size_t size);
51   void (*free)(void *ptr);
52   unsigned int numberOfBuckets;
53   unsigned int expandTreshold;
54   unsigned int shrinkTreshold;
55   unsigned int size;
56   SortedListSetNode **buckets;
57 } ChainedHashSet;
58 
59 /*
60  * The reverse_bits function is inspired by:
61  * https://stackoverflow.com/questions/746171/efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
62  */
63 static const unsigned char BitReverseTable256[] = {
64     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0,
65     0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
66     0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
67     0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
68     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC,
69     0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
70     0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
71     0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
72     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
73     0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
74     0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1,
75     0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
76     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9,
77     0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
78     0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
79     0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
80     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
81     0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
82     0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7,
83     0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
84     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
85     0x3F, 0xBF, 0x7F, 0xFF};
86 
reverse_bits(uint32_t v)87 static inline uint32_t reverse_bits(uint32_t v) {
88   return (((uint32_t)BitReverseTable256[v & 0xff]) << 24) |
89          (((uint32_t)BitReverseTable256[(v >> 8) & 0xff]) << 16) |
90          (((uint32_t)BitReverseTable256[(v >> 16) & 0xff]) << 8) |
91          (((uint32_t)BitReverseTable256[(v >> 24) & 0xff]));
92 }
93 
ch_set_increase_size(ChainedHashSet * set)94 static inline void ch_set_increase_size(ChainedHashSet *set) {
95   set->size = set->size + 1;
96   if (set->size > set->expandTreshold) {
97     unsigned int oldNumberOfBuckets = set->numberOfBuckets;
98     unsigned int newNumberOfBuckets = oldNumberOfBuckets * 2;
99     unsigned int splitUpMask = reverse_bits(newNumberOfBuckets - 1) ^
100                                reverse_bits(oldNumberOfBuckets - 1);
101     SortedListSetNode **newBuckets =
102         set->malloc(sizeof(SortedListSetNode *) * newNumberOfBuckets);
103     SortedListSetNode **oldBuckets = set->buckets;
104     SortedListSetNode *moveTemp;
105     for (unsigned int i = 0; i < oldNumberOfBuckets; i++) {
106       moveTemp = sl_set_split_opt(&oldBuckets[i], splitUpMask);
107       newBuckets[i] = oldBuckets[i];
108       newBuckets[i + oldNumberOfBuckets] = moveTemp;
109     }
110     set->free(oldBuckets);
111     set->buckets = newBuckets;
112     set->numberOfBuckets = newNumberOfBuckets;
113     set->expandTreshold = newNumberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
114     set->shrinkTreshold = newNumberOfBuckets * CHAIN_LENGHT_SHRINK_THRESHOLD;
115   }
116 }
117 
ch_set_decrease_size(ChainedHashSet * set)118 static inline void ch_set_decrease_size(ChainedHashSet *set) {
119   set->size = set->size - 1;
120   if (set->size < set->shrinkTreshold) {
121     unsigned int oldNumberOfBuckets = set->numberOfBuckets;
122     unsigned int newNumberOfBuckets = oldNumberOfBuckets / 2;
123     SortedListSetNode **newBuckets =
124         set->malloc(sizeof(SortedListSetNode *) * newNumberOfBuckets);
125     SortedListSetNode **oldBuckets = set->buckets;
126     for (unsigned int i = 0; i < newNumberOfBuckets; i++) {
127       newBuckets[i] = oldBuckets[i];
128       sl_set_concat_opt(&newBuckets[i], oldBuckets[i + newNumberOfBuckets]);
129     }
130     set->free(oldBuckets);
131     set->buckets = newBuckets;
132     set->numberOfBuckets = newNumberOfBuckets;
133     set->expandTreshold = newNumberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
134     if (set->numberOfBuckets == INITIAL_NUMBER_OF_BUCKETS) {
135       set->shrinkTreshold = 0;
136     } else {
137       set->shrinkTreshold = newNumberOfBuckets * CHAIN_LENGHT_SHRINK_THRESHOLD;
138     }
139   }
140 }
141 
ch_set_initialize(ChainedHashSet * set,unsigned int keyPosition,void * (* extract_key)(void * v,int keyPos),unsigned int (* hash_key)(void * k),bool (* are_equal)(void * v1,void * v2),char * (* to_string)(void * v1),void * (* my_malloc)(size_t size),void (* my_free)(void * ptr))142 static inline void ch_set_initialize(ChainedHashSet *set,
143                                      unsigned int keyPosition,
144                                      void *(*extract_key)(void *v, int keyPos),
145                                      unsigned int (*hash_key)(void *k),
146                                      bool (*are_equal)(void *v1, void *v2),
147                                      char *(*to_string)(void *v1),
148                                      void *(*my_malloc)(size_t size),
149                                      void (*my_free)(void *ptr)) {
150   set->keyPosition = keyPosition;
151   set->extract_key = extract_key;
152   set->hash_key = hash_key;
153   set->are_equal = are_equal;
154   set->to_string = to_string;
155   set->malloc = my_malloc;
156   set->free = my_free;
157   set->numberOfBuckets = INITIAL_NUMBER_OF_BUCKETS;
158   set->expandTreshold = set->numberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
159   set->shrinkTreshold = set->numberOfBuckets * CHAIN_LENGHT_EXPAND_THRESHOLD;
160   set->size = 0;
161   set->buckets =
162       set->malloc(sizeof(SortedListSetNode *) * INITIAL_NUMBER_OF_BUCKETS);
163   for (int i = 0; i < INITIAL_NUMBER_OF_BUCKETS; i++) {
164     set->buckets[i] = NULL;
165   }
166 }
167 
ch_set_create(unsigned int keyPosition,void * (* extract_key)(void * v,int keyPos),unsigned int (* hash_key)(void * k),bool (* are_equal)(void * v1,void * v2),char * (* to_string)(void * v1),void * (* my_malloc)(size_t size),void (* my_free)(void * ptr))168 static inline ChainedHashSet *ch_set_create(
169     unsigned int keyPosition, void *(*extract_key)(void *v, int keyPos),
170     unsigned int (*hash_key)(void *k), bool (*are_equal)(void *v1, void *v2),
171     char *(*to_string)(void *v1), void *(*my_malloc)(size_t size),
172     void (*my_free)(void *ptr)) {
173   ChainedHashSet *set = my_malloc(sizeof(ChainedHashSet));
174   ch_set_initialize(set, keyPosition, extract_key, hash_key, are_equal,
175                     to_string, my_malloc, my_free);
176   return set;
177 }
178 
ch_set_insert_opt(ChainedHashSet * set,void * value,unsigned int valueSize,unsigned int hashValue,bool overwrite)179 static inline bool ch_set_insert_opt(ChainedHashSet *set, void *value,
180                                      unsigned int valueSize,
181                                      unsigned int hashValue, bool overwrite) {
182   unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
183   SortedListSetNode **bucket = &set->buckets[bucketIndex];
184   bool oneAdded = sl_set_insert_opt(
185       bucket, value, valueSize, reverse_bits(hashValue), set->keyPosition,
186       overwrite, set->extract_key, set->are_equal, set->malloc, set->free);
187   if (oneAdded) {
188     ch_set_increase_size(set);
189   }
190   return oneAdded;
191 }
192 
ch_set_insert(void * setParam,void * value,unsigned int valueSize)193 static inline void ch_set_insert(void *setParam, void *value,
194                                  unsigned int valueSize) {
195   ChainedHashSet *set = (ChainedHashSet *)setParam;
196   void *key = set->extract_key(value, set->keyPosition);
197   unsigned int hashValue = set->hash_key(key);
198   ch_set_insert_opt(set, value, valueSize, hashValue, true);
199 }
200 
ch_set_insert_new(void * setParam,void * value,unsigned int valueSize)201 static inline bool ch_set_insert_new(void *setParam, void *value,
202                                      unsigned int valueSize) {
203   ChainedHashSet *set = (ChainedHashSet *)setParam;
204   void *key = set->extract_key(value, set->keyPosition);
205   unsigned int hashValue = set->hash_key(key);
206   return ch_set_insert_opt(set, value, valueSize, hashValue, false);
207 }
208 
ch_set_lookup(void * setParam,void * key)209 static inline void *ch_set_lookup(void *setParam, void *key) {
210   ChainedHashSet *set = (ChainedHashSet *)setParam;
211   unsigned int hashValue = set->hash_key(key);
212   unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
213   SortedListSetNode **bucket = &set->buckets[bucketIndex];
214   return sl_set_lookup_opt(bucket, key, reverse_bits(hashValue),
215                            set->keyPosition, set->extract_key, set->are_equal,
216                            false, set->malloc);
217 }
218 
ch_set_delete(void * setParam,void * key,unsigned int keySize)219 static inline void ch_set_delete(void *setParam, void *key,
220                                  unsigned int keySize) {
221   (void)keySize;
222   ChainedHashSet *set = (ChainedHashSet *)setParam;
223   unsigned int hashValue = set->hash_key(key);
224   unsigned int bucketIndex = hashValue & (set->numberOfBuckets - 1);
225   SortedListSetNode **bucket = &set->buckets[bucketIndex];
226   bool oneRemoved =
227       sl_set_delete_opt(bucket, key, reverse_bits(hashValue), set->keyPosition,
228                         set->extract_key, set->are_equal, set->free);
229   if (oneRemoved) {
230     ch_set_decrease_size(set);
231   }
232 }
233 
ch_set_traverse(void * setParam,void (* traverser)(size_t index,void * v,void * context),void * context)234 static inline void ch_set_traverse(void *setParam,
235                                    void (*traverser)(size_t index, void *v,
236                                                      void *context),
237                                    void *context) {
238   ChainedHashSet *set = (ChainedHashSet *)setParam;
239   unsigned int numberOfBuckets = set->numberOfBuckets;
240   SortedListSetNode *itemNode;
241   size_t index = 0;
242   for (unsigned int i = 0; i < numberOfBuckets; i++) {
243     itemNode = set->buckets[i];
244     while (itemNode != NULL) {
245       traverser(index, (void *)itemNode->value, context);
246       index++;
247       itemNode = itemNode->next;
248     }
249   }
250 }
251 
ch_set_free(void * setParam)252 static inline void ch_set_free(void *setParam) {
253   ChainedHashSet *set = (ChainedHashSet *)setParam;
254   unsigned int numberOfBuckets = set->numberOfBuckets;
255   for (unsigned int i = 0; i < numberOfBuckets; i++) {
256     sl_set_free_opt(&set->buckets[i], set->free);
257   }
258   set->free(set->buckets);
259   set->free(set);
260 }
261 
ch_set_to_string(void * setParam)262 static inline char *ch_set_to_string(void *setParam) {
263   ChainedHashSet *set = (ChainedHashSet *)setParam;
264   unsigned int numberOfBuckets = set->numberOfBuckets;
265   char **bucketStrings = malloc(sizeof(char*)*numberOfBuckets);
266   unsigned int *bucketStringSizes = malloc(sizeof(unsigned int)*numberOfBuckets);
267   unsigned int totalBucketsStringSize = 0;
268   SortedListSet *tempListSet = plain_sl_set_create(
269       set->keyPosition, set->extract_key, set->hash_key, set->are_equal,
270       set->to_string, set->malloc, set->free);
271   for (unsigned int i = 0; i < numberOfBuckets; i++) {
272     tempListSet->head = set->buckets[i];
273     bucketStrings[i] = sl_set_to_string(tempListSet);
274     bucketStringSizes[i] = strlen(bucketStrings[i]);
275     totalBucketsStringSize = totalBucketsStringSize + bucketStringSizes[i];
276   }
277   tempListSet->head = NULL;
278   sl_set_free(tempListSet);
279   char *resultString =
280       set->malloc(totalBucketsStringSize + numberOfBuckets * 3 - 3 + 3);
281   resultString[0] = '[';
282   unsigned int currentPosition = 1;
283   for (unsigned int i = 0; i < numberOfBuckets; i++) {
284     sprintf(&resultString[currentPosition], "%s", bucketStrings[i]);
285     set->free(bucketStrings[i]);
286     currentPosition = currentPosition + bucketStringSizes[i];
287     if (i != (numberOfBuckets - 1)) {
288       sprintf(&resultString[currentPosition], ",\n ");
289       currentPosition = currentPosition + 3;
290     }
291   }
292   sprintf(&resultString[currentPosition], "]");
293   free(bucketStrings);
294   free(bucketStringSizes);
295   return resultString;
296 }
297 
ch_set_print(void * setParam)298 static inline void ch_set_print(void *setParam) {
299   ChainedHashSet *set = (ChainedHashSet *)setParam;
300   char *str = ch_set_to_string(set);
301   printf("%s\n", str);
302   set->free(str);
303 }
304 
ch_set_is_concurrent()305 static inline bool ch_set_is_concurrent() { return false; }
306 
307 #endif
308