1 /*
2  * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3  *
4  * This file is part of MooseFS.
5  *
6  * MooseFS is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, version 2 (only).
9  *
10  * MooseFS is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with MooseFS; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18  * or visit http://www.gnu.org/licenses/gpl-2.0.html
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <inttypes.h>
26 
27 #include "cuckoohash.h"
28 // #include "hashfn.h"
29 #include "random.h"
30 #include "massert.h"
31 
32 /* treap */
33 
34 typedef struct treap_node {
35 	hash_key_t key;
36 	void *val;
37 	uint32_t pri;
38 	struct treap_node *left,*right;
39 } treap_node;
40 
tree_free(treap_node * node)41 static inline void tree_free(treap_node *node) {
42 	if (node!=NULL) {
43 		tree_free(node->left);
44 		tree_free(node->right);
45 		free(node);
46 	}
47 }
48 
tree_rotate_with_left_child(treap_node ** node)49 static inline void tree_rotate_with_left_child(treap_node **node) {
50 	treap_node *tmp,*n = (*node);
51 	tmp = n->left;
52 	n->left = tmp->right;
53 	tmp->right = n;
54 	*node = tmp;
55 }
56 
tree_rotate_with_right_child(treap_node ** node)57 static inline void tree_rotate_with_right_child(treap_node **node) {
58 	treap_node *tmp,*n = (*node);
59 	tmp = n->right;
60 	n->right = tmp->left;
61 	tmp->left = n;
62 	*node = tmp;
63 }
64 
tree_insert(treap_node ** node,hash_key_t key,void * val)65 static inline uint8_t tree_insert(treap_node **node,hash_key_t key,void *val) {
66 	treap_node *n = (*node);
67 	if (n==NULL) {
68 		n = malloc(sizeof(treap_node));
69 		passert(n);
70 		n->left = NULL;
71 		n->right = NULL;
72 		n->key = key;
73 		n->val = val;
74 		n->pri = rndu32();
75 		*node = n;
76 		return 1;
77 	} else {
78 		if (key < n->key) {
79 			if (tree_insert(&(n->left),key,val)) {
80 				if (n->left->pri < n->pri) {
81 					tree_rotate_with_left_child(node);
82 				}
83 				return 1;
84 			}
85 		} else if (key > n->key) {
86 			if (tree_insert(&(n->right),key,val)) {
87 				if (n->right->pri < n->pri) {
88 					tree_rotate_with_right_child(node);
89 				}
90 				return 1;
91 			}
92 		}
93 	}
94 	return 0;
95 }
96 
tree_delete(treap_node ** node,hash_key_t key)97 static inline uint8_t tree_delete(treap_node **node,hash_key_t key) {
98 	treap_node *n,*c;
99 	while ((n=*node)) {
100 		if (key < n->key) {
101 			node = &(n->left);
102 		} else if (key > n->key) {
103 			node = &(n->right);
104 		} else {
105 			if (n->left==NULL && n->right==NULL) {
106 				*node = NULL;
107 				free(n);
108 			} else if (n->left==NULL) {
109 				*node = n->right;
110 				free(n);
111 			} else if (n->right==NULL) {
112 				*node = n->left;
113 				free(n);
114 			} else {
115 				if (rndu8()&1) {
116 					node = &(n->left);
117 					while ((c=*node) && c->right!=NULL) {
118 						node = &(c->right);
119 					}
120 					n->key = c->key;
121 					n->val = c->val;
122 					*node = c->left;
123 					free(c);
124 				} else {
125 					node = &(n->right);
126 					while ((c=*node) && c->left!=NULL) {
127 						node = &(c->left);
128 					}
129 					n->key = c->key;
130 					n->val = c->val;
131 					*node = c->right;
132 					free(c);
133 				}
134 			}
135 			return 1;
136 		}
137 	}
138 	return 0;
139 }
140 
tree_find(treap_node * n,hash_key_t key)141 void* tree_find(treap_node *n,hash_key_t key) {
142 	while (n) {
143 		if (key < n->key) {
144 			n = n->left;
145 		} else if (key > n->key) {
146 			n = n->right;
147 		} else {
148 			return n->val;
149 		}
150 	}
151 	return NULL;
152 }
153 
154 
155 
156 
157 /* cuckoo hash */
158 
159 #define HASHFN1(x,mask) ((x)&(mask))
160 #define HASHFN2(x,mask) ((((x)*167)>>8)&(mask))
161 #define HASHTAB_MOVEFACTOR 3
162 
163 #define HASHTAB_LOBITS 20
164 #define HASHTAB_HISIZE (0x80000000>>(HASHTAB_LOBITS))
165 #define HASHTAB_LOSIZE (1<<HASHTAB_LOBITS)
166 #define HASHTAB_MASK (HASHTAB_LOSIZE-1)
167 
168 #define HASH_BUCKET_SIZE 6
169 
170 typedef struct _hentry {
171 	uint8_t e;
172 	hash_key_t k[HASH_BUCKET_SIZE];
173 	void* v[HASH_BUCKET_SIZE];
174 } hentry;
175 
176 typedef struct _htab {
177 	hentry* *hashtab;
178 	treap_node *groot;
179 	uint8_t rehashgarbage;
180 	uint32_t helements,telements;
181 	uint32_t size;
182 	uint32_t mask;
183 	uint32_t rehashpos;
184 } htab;
185 
hash_cuckoo(htab * ht,hentry * he,hash_key_t x,void * v)186 static inline int hash_cuckoo(htab *ht,hentry *he,hash_key_t x,void *v) {
187 	hentry *cuckoohe1,*cuckoohe2,*cuckoohe;
188 	uint32_t i,cuckoohash;
189 	for (i=0 ; i<he->e ; i++) {
190 		cuckoohash = HASHFN1(he->k[i],ht->mask);
191 		if (cuckoohash >= ht->rehashpos) {
192 			cuckoohash = cuckoohash & (ht->mask>>1);
193 		}
194 		cuckoohe1 = ht->hashtab[cuckoohash>>HASHTAB_LOBITS] + (cuckoohash&HASHTAB_MASK);
195 		cuckoohash = HASHFN2(he->k[i],ht->mask);
196 		if (cuckoohash >= ht->rehashpos) {
197 			cuckoohash = cuckoohash & (ht->mask>>1);
198 		}
199 		cuckoohe2 = ht->hashtab[cuckoohash>>HASHTAB_LOBITS] + (cuckoohash&HASHTAB_MASK);
200 		if (cuckoohe1!=he) {
201 			cuckoohe = cuckoohe1;
202 		} else if (cuckoohe2!=he) {
203 			cuckoohe = cuckoohe2;
204 		} else {
205 			cuckoohe = NULL;
206 		}
207 		if (cuckoohe!=NULL) {
208 			if (cuckoohe->e<HASH_BUCKET_SIZE) {
209 				cuckoohe->k[cuckoohe->e] = he->k[i];
210 				cuckoohe->v[cuckoohe->e] = he->v[i];
211 				cuckoohe->e++;
212 				he->k[i] = x;
213 				he->v[i] = v;
214 				ht->helements++;
215 				return 1;
216 			}
217 		}
218 	}
219 	return 0;
220 }
221 
222 void chash_add(void *h,hash_key_t x,void *v);
223 
tree_rebuild(htab * ht,treap_node * node)224 static inline void tree_rebuild(htab *ht,treap_node *node) {
225 	if (node!=NULL) {
226 		tree_rebuild(ht,node->left);
227 		tree_rebuild(ht,node->right);
228 		chash_add(ht,node->key,node->val);
229 		free(node);
230 	}
231 }
232 
hash_garbage_to_hash(htab * ht)233 static inline void hash_garbage_to_hash(htab *ht) {
234 	treap_node *nroot;
235 	nroot = ht->groot;
236 	ht->telements = 0;
237 	ht->groot = NULL;
238 	tree_rebuild(ht,nroot);
239 }
240 
hash_rehash_job(htab * ht)241 static inline void hash_rehash_job(htab *ht) {
242 	uint32_t i,lhash,hash1,hash2;
243 	hentry *he,*hen;
244 	if (ht->rehashpos < ht->size) {
245 		for (i=0 ; i<HASHTAB_MOVEFACTOR && ht->rehashpos < ht->size ; i++) {
246 			lhash = ht->rehashpos&(ht->mask>>1);
247 			he = ht->hashtab[lhash>>HASHTAB_LOBITS] + (lhash&HASHTAB_MASK);
248 			hen = ht->hashtab[ht->rehashpos>>HASHTAB_LOBITS] + (ht->rehashpos&HASHTAB_MASK);
249 			hen->e = 0;
250 			i = 0;
251 			while (i<he->e) {
252 				hash1 = HASHFN1(he->k[i],ht->mask);
253 				hash2 = HASHFN2(he->k[i],ht->mask);
254 				if (hash1==ht->rehashpos || hash2==ht->rehashpos) {
255 					hen->k[hen->e] = he->k[i];
256 					hen->v[hen->e] = he->v[i];
257 					hen->e++;
258 					he->e--;
259 					if (i<he->e) {
260 						he->k[i] = he->k[he->e];
261 						he->v[i] = he->v[he->e];
262 					}
263 				} else {
264 					i++;
265 				}
266 			}
267 			ht->rehashpos++;
268 		}
269 	} else if (ht->rehashgarbage) {
270 		hash_garbage_to_hash(ht);
271 		ht->rehashgarbage = 0;
272 	} else if (ht->helements * 3 > ht->size * 2 * HASH_BUCKET_SIZE) {
273 //		printf("rehash\n");
274 		ht->size *= 2;
275 		ht->mask = ht->size - 1;
276 		for (i=ht->rehashpos>>HASHTAB_LOBITS ; i<ht->size>>HASHTAB_LOBITS ; i++) {
277 			ht->hashtab[i] = malloc(sizeof(hentry)*HASHTAB_LOSIZE);
278 			passert(ht->hashtab[i]);
279 		}
280 		ht->rehashgarbage = 1;
281 	}
282 }
283 
chash_new()284 void* chash_new() {
285 	htab *ht;
286 
287 	ht = malloc(sizeof(htab));
288 	passert(ht);
289 	ht->mask = HASHTAB_MASK;
290 	ht->size = HASHTAB_LOSIZE;
291 	ht->helements = 0;
292 	ht->telements = 0;
293 	ht->rehashgarbage = 0;
294 	ht->rehashpos = ht->size;
295 	ht->hashtab = malloc(sizeof(hentry*)*HASHTAB_HISIZE);
296 	passert(ht->hashtab);
297 	ht->hashtab[0] = malloc(sizeof(hentry)*HASHTAB_LOSIZE);
298 	passert(ht->hashtab[0]);
299 	memset(ht->hashtab[0],0,sizeof(hentry)*HASHTAB_LOSIZE);
300 	ht->groot = NULL;
301 	return ht;
302 }
303 
304 /* delete all elements, but leave structure initialized */
chash_erase(void * h)305 void chash_erase(void *h) {
306 	htab *ht = (htab*)h;
307 	uint32_t i;
308 
309 	tree_free(ht->groot);
310 	ht->groot = NULL;
311 	for (i=1 ; i<ht->size>>HASHTAB_LOBITS ; i++) {
312 		if (ht->hashtab[i]!=NULL) {
313 			free(ht->hashtab[i]);
314 			ht->hashtab[i]=NULL;
315 		}
316 	}
317 	memset(ht->hashtab[0],0,sizeof(hentry)*HASHTAB_LOSIZE);
318 	ht->mask = HASHTAB_MASK;
319 	ht->size = HASHTAB_LOSIZE;
320 	ht->helements = 0;
321 	ht->telements = 0;
322 	ht->rehashgarbage = 0;
323 	ht->rehashpos = ht->size;
324 }
325 
326 /* delete elemnts and structure */
chash_free(void * h)327 void chash_free(void *h) {
328 	htab *ht = (htab*)h;
329 	uint32_t i;
330 
331 	tree_free(ht->groot);
332 	for (i=0 ; i<ht->size>>HASHTAB_LOBITS ; i++) {
333 		if (ht->hashtab[i]!=NULL) {
334 			free(ht->hashtab[i]);
335 		}
336 	}
337 	free(ht);
338 }
339 
chash_find(void * h,hash_key_t x)340 void* chash_find(void *h,hash_key_t x) {
341 	htab *ht = (htab*)h;
342 	hentry *he;
343 	uint32_t i,hash;
344 
345 	hash = HASHFN1(x,ht->mask);
346 	if (hash >= ht->rehashpos) {
347 		hash = hash & (ht->mask>>1);
348 	}
349 	he = ht->hashtab[hash>>HASHTAB_LOBITS] + (hash&HASHTAB_MASK);
350 	for (i=0 ; i<he->e ; i++) {
351 		if (he->k[i]==x) {
352 			return he->v[i];
353 		}
354 	}
355 
356 	hash = HASHFN2(x,ht->mask);
357 	if (hash >= ht->rehashpos) {
358 		hash = hash & (ht->mask>>1);
359 	}
360 	he = ht->hashtab[hash>>HASHTAB_LOBITS] + (hash&HASHTAB_MASK);
361 	for (i=0 ; i<he->e ; i++) {
362 		if (he->k[i]==x) {
363 			return he->v[i];
364 		}
365 	}
366 
367 	// search "garbage"
368 	return tree_find(ht->groot,x);
369 }
370 
chash_delete(void * h,hash_key_t x)371 void chash_delete(void *h,hash_key_t x) {
372 	htab *ht = (htab*)h;
373 	hentry *he;
374 	uint32_t i,hash;
375 
376 	hash = HASHFN1(x,ht->mask);
377 	if (hash >= ht->rehashpos) {
378 		hash = hash & (ht->mask>>1);
379 	}
380 	he = ht->hashtab[hash>>HASHTAB_LOBITS] + (hash&HASHTAB_MASK);
381 	for (i=0 ; i<he->e ; i++) {
382 		if (he->k[i]==x) {
383 			he->e--;
384 			if (i<he->e) {
385 				he->k[i] = he->k[he->e];
386 				he->v[i] = he->v[he->e];
387 			}
388 			ht->helements--;
389 			return;
390 		}
391 	}
392 
393 	hash = HASHFN2(x,ht->mask);
394 	if (hash >= ht->rehashpos) {
395 		hash = hash & (ht->mask>>1);
396 	}
397 	he = ht->hashtab[hash>>HASHTAB_LOBITS] + (hash&HASHTAB_MASK);
398 	for (i=0 ; i<he->e ; i++) {
399 		if (he->k[i]==x) {
400 			he->e--;
401 			if (i<he->e) {
402 				he->k[i] = he->k[he->e];
403 				he->v[i] = he->v[he->e];
404 			}
405 			ht->helements--;
406 			return;
407 		}
408 	}
409 
410 	// delete from "garbage"
411 	ht->telements -= tree_delete(&(ht->groot),x);
412 }
413 
chash_add(void * h,hash_key_t x,void * v)414 void chash_add(void *h,hash_key_t x,void *v) {
415 	htab *ht = (htab*)h;
416 	hentry *he1,*he2;
417 	uint32_t i,hash1,hash2;
418 
419 	hash_rehash_job(ht);
420 
421 	hash1 = HASHFN1(x,ht->mask);
422 	if (hash1 >= ht->rehashpos) {
423 		hash1 = hash1 & (ht->mask>>1);
424 	}
425 	hash2 = HASHFN2(x,ht->mask);
426 	if (hash2 >= ht->rehashpos) {
427 		hash2 = hash2 & (ht->mask>>1);
428 	}
429 	he1 = ht->hashtab[hash1>>HASHTAB_LOBITS] + (hash1&HASHTAB_MASK);
430 	he2 = ht->hashtab[hash2>>HASHTAB_LOBITS] + (hash2&HASHTAB_MASK);
431 	for (i=0 ; i<he1->e ; i++) {
432 		if (he1->k[i]==x) {
433 			return;
434 		}
435 	}
436 	for (i=0 ; i<he2->e ; i++) {
437 		if (he2->k[i]==x) {
438 			return;
439 		}
440 	}
441 	if (tree_find(ht->groot,x)!=NULL) {
442 		return;
443 	}
444 	if (he1->e==HASH_BUCKET_SIZE && he2->e==HASH_BUCKET_SIZE) { // cuckoo
445 		if (hash_cuckoo(ht,he1,x,v)) {
446 			return;
447 		}
448 		if (hash_cuckoo(ht,he2,x,v)) {
449 			return;
450 		}
451 		ht->telements += tree_insert(&(ht->groot),x,v);
452 		return;
453 	}
454 	if (he1->e>he2->e) {
455 		he2->k[he2->e] = x;
456 		he2->v[he2->e] = v;
457 		he2->e++;
458 	} else {
459 		he1->k[he1->e] = x;
460 		he1->v[he1->e] = v;
461 		he1->e++;
462 	}
463 	ht->helements++;
464 }
465 
chash_get_elemcount(void * h)466 uint32_t chash_get_elemcount(void *h) {
467 	htab *ht = (htab*)h;
468 	return ht->helements + ht->telements;
469 }
470 
chash_get_size(void * h)471 uint32_t chash_get_size(void *h) {
472 	htab *ht = (htab*)h;
473 //	printf("ht->size: %"PRIu32" ; ht->helements: %"PRIu32" ; ht->telements: %"PRIu32"\n",ht->size,ht->helements,ht->telements);
474 	return (ht->size * sizeof(hentry)) + (ht->telements * sizeof(treap_node));
475 }
476