1 /*
2 *
3 * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
4 *
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef __HASH_SET_RJ
21 #define __HASH_SET_RJ
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdint.h>
27 #include <math.h>
28
29 static const uint64_t sys_prime_list[61] = {
30 0x7LLU, 0xfLLU, 0x1fLLU, 0x43LLU, 0x89LLU,
31 0x115LLU, 0x22dLLU, 0x45dLLU, 0x8bdLLU, 0x1181LLU,
32 0x2303LLU, 0x4609LLU, 0x8c17LLU, 0x1183dLLU, 0x2307bLLU,
33 0x460fdLLU, 0x8c201LLU, 0x118411LLU, 0x230833LLU, 0x461069LLU,
34 0x8c20e1LLU, 0x11841cbLLU, 0x2308397LLU, 0x461075bLLU, 0x8c20ecbLLU,
35 0x11841da5LLU, 0x23083b61LLU, 0x461076c7LLU, 0x8c20ed91LLU, 0x11841db31LLU,
36 0x23083b673LLU, 0x461076d1bLLU, 0x8c20eda41LLU, 0x11841db48dLLU, 0x23083b6937LLU,
37 0x461076d27fLLU, 0x8c20eda50dLLU, 0x11841db4a59LLU, 0x23083b694ebLLU, 0x461076d29f1LLU,
38 0x8c20eda5441LLU, 0x11841db4a887LLU, 0x23083b69511fLLU, 0x461076d2a2c1LLU, 0x8c20eda54591LLU,
39 0x11841db4a8b55LLU, 0x23083b69516c1LLU, 0x461076d2a2da5LLU, 0x8c20eda545b55LLU, 0x11841db4a8b6b5LLU,
40 0x23083b69516d91LLU, 0x461076d2a2db3bLLU, 0x8c20eda545b69dLLU, 0x11841db4a8b6d5dLLU, 0x23083b69516daf5LLU,
41 0x461076d2a2db5edLLU, 0x8c20eda545b6c5fLLU, 0x11841db4a8b6d8ebLLU, 0x23083b69516db1ffLLU, 0x461076d2a2db643fLLU,
42 0x8c20eda545b6c8f3LLU
43 };
44
_rj_hashset_find_prime(uint64_t n)45 static inline uint64_t _rj_hashset_find_prime(uint64_t n){
46 uint32_t i;
47 i = 0;
48 while(i < 60 && n > sys_prime_list[i]) i ++;
49 return sys_prime_list[i];
50 }
51
52 #ifndef HASH_FLAG_MACROS
53 #define HASH_FLAG_MACROS
54 #define is_entity_null(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x01)
55 #define is_entity_del(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x02)
56 #define exists_entity(flags, idx) (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03))
57 #define set_entity_null(flags, idx) ((flags)[(idx)>>4] |= (0x01u<<(((idx)&0x0f)<<1)))
58 #define set_entity_del(flags, idx) ((flags)[(idx)>>4] |= (0x02u<<(((idx)&0x0f)<<1)))
59 #define clear_entity_null(flags, idx) ((flags)[(idx)>>4] &= ~(0x01u<<(((idx)&0x0f)<<1)))
60 #define clear_entity_del(flags, idx) ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1)))
61 #endif
62
63 #define init_hashset_macro(hash_type, hash_key_type) \
64 typedef struct { hash_key_type *array; uint32_t *flags; size_t e_size; size_t ocp; size_t size; size_t count; size_t max; float load_factor; size_t iter_ptr; } hash_type; \
65 static inline int hash_type##_is_prime(uint64_t num){ \
66 uint64_t i, max; \
67 if(num < 4) return 1; \
68 if(num % 2 == 0) return 0; \
69 max = (uint64_t)sqrt((double)num); \
70 for(i=3;i<max;i+=2){ if(num % i == 0) return 0; } \
71 return 1; \
72 } \
73 static inline uint64_t hash_type##_find_next_prime(uint64_t num){ \
74 if(num % 2 == 0) num ++; \
75 while(1){ if(hash_type##_is_prime(num)) return num; num += 2; } \
76 } \
77 static inline hash_type* init2_##hash_type(uint32_t size, float factor){ \
78 hash_type *set; \
79 set = (hash_type*)malloc(sizeof(hash_type)); \
80 set->e_size = sizeof(hash_key_type); \
81 set->size = _rj_hashset_find_prime(size); \
82 set->count = 0; \
83 set->ocp = 0; \
84 set->load_factor = factor; \
85 set->max = set->size * set->load_factor; \
86 set->iter_ptr = 0; \
87 set->array = calloc(set->size, set->e_size); \
88 set->flags = malloc((set->size + 15)/16 * 4); \
89 memset(set->flags, 0x55, (set->size + 15) / 16 * 4); \
90 return set; \
91 } \
92 static inline hash_type* init_##hash_type(uint32_t size){ return init2_##hash_type(size, 0.67f); }
93
94 #define get_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
95 static inline hash_key_type* get_##hash_type(hash_type *set, hash_key_type key){\
96 hash_key_type *e; \
97 uint32_t flag; \
98 size_t hc; \
99 hc = hash_code_macro(key) % set->size; \
100 while(1){ \
101 flag = (set->flags[hc >> 4] >> (((hc) & 0x0f) << 1)) & 0x03; \
102 if(flag & 0x01){ \
103 return NULL; \
104 } else if(flag & 0x02){ \
105 } else { \
106 e = ((hash_key_type*)set->array) + hc; \
107 if(hash_equal_macro(*e, key)) return e; \
108 } \
109 if(hc + 1 == set->size) hc = 0; else hc ++; \
110 } \
111 return NULL; \
112 } \
113 static inline size_t offset_##hash_type(hash_type *set, hash_key_type *ptr){ \
114 return ptr - set->array; \
115 }
116
117 #define prepare_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
118 static inline void encap_##hash_type(hash_type *set, size_t num); \
119 static inline hash_key_type* prepare_##hash_type(hash_type *set, hash_key_type key, int *exists){\
120 hash_key_type *e; \
121 size_t hc, d; \
122 encap_##hash_type(set, 1); \
123 hc = hash_code_macro((key)) % set->size; \
124 d = set->size; \
125 while(1){ \
126 if(is_entity_null(set->flags, hc)){ \
127 if(d == set->size){ \
128 clear_entity_null(set->flags, hc); \
129 set->ocp ++; \
130 } else { \
131 hc = d; \
132 clear_entity_del(set->flags, hc); \
133 } \
134 *exists = 0; \
135 set->count ++; \
136 e = ((hash_key_type*)set->array) + hc; \
137 return e; \
138 } else if(is_entity_del(set->flags, hc)){ \
139 if(d == set->size) d = hc; \
140 } else { \
141 e = ((hash_key_type*)set->array) + hc; \
142 if(hash_equal_macro((*e), (key))){ \
143 *exists = 1; \
144 return e; \
145 } \
146 } \
147 hc ++; \
148 hc %= set->size; \
149 } \
150 return NULL; \
151 }
152
153 #define exists_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
154 static inline int exists_##hash_type(hash_type *set, hash_key_type key){ \
155 hash_key_type *e; \
156 size_t hc; \
157 hc = hash_code_macro(key) % set->size; \
158 while(1){ \
159 if(is_entity_null(set->flags, hc)){ \
160 return 0; \
161 } else if(is_entity_del(set->flags, hc)){ \
162 } else { \
163 e = ((hash_key_type*)set->array) + hc; \
164 if(hash_equal_macro(*e, key)) return 1; \
165 } \
166 hc ++; \
167 hc %= set->size; \
168 } \
169 return 0; \
170 }
171
172 #define add_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
173 static inline hash_key_type* add_##hash_type(hash_type *set, hash_key_type key){ \
174 hash_key_type *e; \
175 size_t d, hc; \
176 hc = hash_code_macro(key) % set->size; \
177 d = set->size; \
178 do{ \
179 if(is_entity_null(set->flags, hc)){ \
180 if(d == set->size){ \
181 d = hc; \
182 clear_entity_null(set->flags, d); \
183 set->ocp ++; \
184 } else { \
185 clear_entity_del(set->flags, d); \
186 } \
187 e = ((hash_key_type*)set->array) + d; \
188 *e = key; \
189 set->count ++; \
190 return e; \
191 } else if(is_entity_del(set->flags, hc)){ \
192 if(d == set->size) d = hc; \
193 } else { \
194 e = ((hash_key_type*)set->array) + hc; \
195 if(hash_equal_macro(*e, key)){ \
196 return e; \
197 } \
198 } \
199 if(hc + 1 == set->size) hc = 0; \
200 else hc = hc + 1; \
201 } while(1); \
202 return NULL; \
203 }
204
205 #define put_hashset_macro(hash_type, hash_key_type) \
206 static inline hash_key_type* put_##hash_type(hash_type *set, hash_key_type key){ \
207 encap_##hash_type(set, 1); \
208 return add_##hash_type(set, key); \
209 }
210
211 #define remove_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
212 static inline void delete_##hash_type(hash_type *set, hash_key_type *key){ set_entity_del(set->flags, key - set->array); set->count --; } \
213 static inline int remove_##hash_type(hash_type *set, hash_key_type key){ \
214 hash_key_type *e; \
215 size_t hc; \
216 hc = hash_code_macro(key) % set->size; \
217 while(1){ \
218 if(is_entity_null(set->flags, hc)){ \
219 return 0; \
220 } else if(is_entity_del(set->flags, hc)){ \
221 } else { \
222 e = ((hash_key_type*)set->array) + hc; \
223 if(hash_equal_macro(*e, key)){ \
224 set->count --; \
225 set_entity_del(set->flags, hc); \
226 return 1; \
227 } \
228 } \
229 hc ++; \
230 hc %= set->size; \
231 } \
232 return 0; \
233 }
234
235 #define reset_iter_hashset_macro(hash_type) static inline void reset_iter_##hash_type(hash_type *set){ set->iter_ptr = 0; }
236
237 #define iter_hashset_macro(hash_type, hash_key_type) \
238 static inline int iter_##hash_type(hash_type *set, hash_key_type *ret){ \
239 if(set->iter_ptr >= set->size) return 0; \
240 while(set->iter_ptr < set->size){ \
241 if(exists_entity(set->flags, set->iter_ptr)){ \
242 *ret = *(((hash_key_type*)set->array) + set->iter_ptr); \
243 set->iter_ptr ++; \
244 return 1; \
245 } \
246 set->iter_ptr ++; \
247 } \
248 return 0; \
249 }
250
251 #define ref_iter_hashset_macro(hash_type, hash_key_type) \
252 static inline hash_key_type* ref_iter_##hash_type(hash_type *set){ \
253 if(set->iter_ptr >= set->size) return NULL; \
254 while(set->iter_ptr < set->size){ \
255 if(exists_entity(set->flags, set->iter_ptr)){ \
256 return (((hash_key_type*)set->array) + set->iter_ptr++); \
257 } \
258 set->iter_ptr ++; \
259 } \
260 return NULL; \
261 }
262
263 #define count_hashset_macro(hash_type) static inline int64_t count_##hash_type(hash_type *set){ return set->count; }
264
265 #define clear_hashset_macro(hash_type) \
266 static inline void clear_##hash_type(hash_type *set){ \
267 if(set->ocp == 0) return; \
268 memset(set->flags, 0x55, (set->size + 15) / 16 * 4); \
269 set->count = 0; \
270 set->ocp = 0; \
271 set->iter_ptr = 0; \
272 }
273
274 #define ffwrite(ptr, e_size, size, file) (e_size * fwrite(ptr, e_size, size, file))
275 #define ffread(ptr, e_size, size, file) (e_size * fread(ptr, e_size, size, file))
276
277 #define dump_hashset_macro(hash_type) \
278 static inline size_t sizeof_##hash_type(hash_type *set){ \
279 return sizeof(size_t) * 3 + sizeof(float) + set->e_size * set->size \
280 + sizeof(uint32_t) * ((set->size + 15) / 16); \
281 } \
282 static inline size_t dump_##hash_type(hash_type *set, FILE *out){ \
283 size_t n; \
284 n = ffwrite(&set->e_size, sizeof(size_t), 1, out); \
285 n += ffwrite(&set->size, sizeof(size_t), 1, out); \
286 n += ffwrite(&set->count, sizeof(size_t), 1, out); \
287 n += ffwrite(&set->load_factor, sizeof(float), 1, out); \
288 n += ffwrite(set->array, set->e_size, set->size, out); \
289 n += ffwrite(set->flags, sizeof(uint32_t), (set->size + 15) / 16, out); \
290 return n; \
291 }
292
293 #define load_hashset_macro(hash_type) \
294 static inline hash_type* load_##hash_type(FILE *in){ \
295 hash_type *set; \
296 size_t n; \
297 set = (hash_type*)malloc(sizeof(hash_type)); \
298 n = ffread(&set->e_size, sizeof(size_t), 1, in); \
299 n += ffread(&set->size, sizeof(size_t), 1, in); \
300 n += ffread(&set->count, sizeof(size_t), 1, in); \
301 n += ffread(&set->load_factor, sizeof(float), 1, in); \
302 set->max = set->size * set->load_factor; \
303 set->array = malloc(set->size * set->e_size); \
304 n += ffread(set->array, set->e_size, set->size, in); \
305 set->flags = (uint32_t*)malloc((set->size + 15) / 16 * 4); \
306 n += ffread(set->flags, sizeof(uint32_t), (set->size + 15) / 16, in); \
307 return set; \
308 }
309
310 #define free_hashset_macro(hash_type) \
311 static inline void free_##hash_type(hash_type *set){ \
312 free(set->array); \
313 free(set->flags); \
314 free(set); \
315 }
316
317 #define encap_hashset_macro(hash_type, hash_key_type, hash_code_macro) \
318 static inline void encap_##hash_type(hash_type *set, size_t num){ \
319 uint32_t *flags, *f; \
320 uint64_t i, n, size, hc; \
321 hash_key_type key; \
322 hash_key_type tmp; \
323 if(set->ocp + num <= set->max) return; \
324 n = set->size; \
325 do{ n = _rj_hashset_find_prime(n * 2); } while(n * set->load_factor < set->count + num); \
326 set->array = realloc(set->array, n * set->e_size); \
327 if(set->array == NULL){ \
328 fprintf(stderr, "-- Out of memory --\n"); \
329 abort(); \
330 } \
331 flags = malloc((n+15)/16 * 4); \
332 memset(flags, 0x55, (n+15)/16 * 4); \
333 size = set->size; \
334 set->size = n; \
335 set->ocp = set->count; \
336 set->max = n * set->load_factor; \
337 f = set->flags; \
338 set->flags = flags; \
339 flags = f; \
340 for(i=0;i<size;i++){ \
341 if(!exists_entity(flags, i)) continue; \
342 key = ((hash_key_type*)set->array)[i]; \
343 set_entity_del(flags, i); \
344 while(1){ \
345 hc = hash_code_macro(key) % set->size; \
346 while(!is_entity_null(set->flags, hc)){ hc = (hc + 1) % set->size; } \
347 clear_entity_null(set->flags, hc); \
348 if(hc < size && exists_entity(flags, hc)){ \
349 tmp = key; \
350 key = ((hash_key_type*)set->array)[hc]; \
351 ((hash_key_type*)set->array)[hc] = tmp; \
352 set_entity_del(flags, hc); \
353 } else { \
354 ((hash_key_type*)set->array)[hc] = key; \
355 break; \
356 } \
357 } \
358 } \
359 free(flags); \
360 }
361
362
363
364 // ---------------------- Define your own hashset ----------------------------------
365 // Example:
366 // typedef struct { int group; int user; } Info;
367 // #define my_hashcode(val) (val)->group
368 // #define my_hashequal(v1, v2) (((v1)->group == (v2)->group) && ((v1)->user == (v2)->user))
369 // define_hashset(myhash, Info, my_hashcode, my_hashequal);
370
371 #define define_hashset(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
372 init_hashset_macro(hash_type, hash_key_type); \
373 get_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
374 prepare_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro);\
375 exists_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
376 add_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
377 put_hashset_macro(hash_type, hash_key_type); \
378 remove_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
379 iter_hashset_macro(hash_type, hash_key_type); \
380 ref_iter_hashset_macro(hash_type, hash_key_type); \
381 reset_iter_hashset_macro(hash_type); \
382 count_hashset_macro(hash_type); \
383 clear_hashset_macro(hash_type); \
384 dump_hashset_macro(hash_type); \
385 load_hashset_macro(hash_type); \
386 free_hashset_macro(hash_type); \
387 encap_hashset_macro(hash_type, hash_key_type, hash_code_macro);
388
389 /* ------------------ Useful functions ------------------------------------- */
390
__lh3_Jenkins_hash_int(uint32_t key)391 static inline uint32_t __lh3_Jenkins_hash_int(uint32_t key){
392 key += (key << 12);
393 key ^= (key >> 22);
394 key += (key << 4);
395 key ^= (key >> 9);
396 key += (key << 10);
397 key ^= (key >> 2);
398 key += (key << 7);
399 key ^= (key >> 12);
400 return key;
401 }
402
__lh3_Jenkins_hash_64(uint64_t key)403 static inline uint64_t __lh3_Jenkins_hash_64(uint64_t key){
404 key += ~(key << 32);
405 key ^= (key >> 22);
406 key += ~(key << 13);
407 key ^= (key >> 8);
408 key += (key << 3);
409 key ^= (key >> 15);
410 key += ~(key << 27);
411 key ^= (key >> 31);
412 return key;
413 }
414
jenkins_one_at_a_time_hash(char * key,size_t len)415 static inline uint32_t jenkins_one_at_a_time_hash(char *key, size_t len){
416 uint32_t hash, i;
417 for(hash = i = 0; i < len; ++i){
418 hash += key[i];
419 hash += (hash << 10);
420 hash ^= (hash >> 6);
421 }
422 hash += (hash << 3);
423 hash ^= (hash >> 11);
424 hash += (hash << 15);
425 return hash;
426 }
427
hash64shift(uint64_t key)428 static inline uint64_t hash64shift(uint64_t key){
429 key = (~key) + (key << 21); // key = (key << 21) - key - 1;
430 key = key ^ (key >> 24);
431 key = (key + (key << 3)) + (key << 8); // key * 265
432 key = key ^ (key >> 14);
433 key = (key + (key << 2)) + (key << 4); // key * 21
434 key = key ^ (key >> 28);
435 key = key + (key << 31);
436 return key;
437 }
438
439
MurmurHash64A(const void * key,int len,uint32_t seed)440 static inline uint64_t MurmurHash64A(const void * key, int len, uint32_t seed){
441 const uint64_t m = 0xc6a4a7935bd1e995LLU;
442 const int r = 47;
443
444 uint64_t h = seed ^ (len * m);
445
446 const uint64_t * data = (const uint64_t *)key;
447 const uint64_t * end = data + (len/8);
448
449 while(data != end){
450 uint64_t k = *data++;
451
452 k *= m;
453 k ^= k >> r;
454 k *= m;
455
456 h ^= k;
457 h *= m;
458 }
459
460 const unsigned char * data2 = (const unsigned char*)data;
461
462 switch(len & 7){
463 case 7: h ^= ((uint64_t)data2[6]) << 48;
464 case 6: h ^= ((uint64_t)data2[5]) << 40;
465 case 5: h ^= ((uint64_t)data2[4]) << 32;
466 case 4: h ^= ((uint64_t)data2[3]) << 24;
467 case 3: h ^= ((uint64_t)data2[2]) << 16;
468 case 2: h ^= ((uint64_t)data2[1]) << 8;
469 case 1: h ^= ((uint64_t)data2[0]);
470 h *= m;
471 };
472
473 h ^= h >> r;
474 h *= m;
475 h ^= h >> r;
476
477 return h;
478 }
479
480 #define u32hashcode(key) __lh3_Jenkins_hash_int(key)
481 #define u64hashcode(key) __lh3_Jenkins_hash_64(key)
482
__string_hashcode(const char * s)483 static inline uint32_t __string_hashcode(const char *s){
484 uint32_t h = *s;
485 if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
486 return h;
487 }
488
489 #define u32hash_code(e) u32hashcode(e)
490 #define u64hash_code(e) u64hashcode(e)
491 #define uxxhash_equals(e1, e2) ((e1) == (e2))
492 define_hashset(u32hash, uint32_t, u32hash_code, uxxhash_equals);
493 define_hashset(u64hash, uint64_t, u64hash_code, uxxhash_equals);
494
495 #define i32hash_code(e) u32hashcode((uint32_t)(e))
496 #define i32hash_equals(e1, e2) ((e1) == (e2))
497 define_hashset(i32hash, int, i32hash_code, i32hash_equals);
498
499 #define chash_code(e) __string_hashcode(e)
500 #define chash_equals(e1, e2) (strcmp(e1, e2) == 0)
501 define_hashset(chash, char*, chash_code, chash_equals);
502
503 typedef struct { uint32_t key, val; } uuhash_t;
504 #define uuhash_code(e) (e).key
505 #define uuhash_equals(e1, e2) ((e1).key == (e2).key)
506 define_hashset(uuhash, uuhash_t, uuhash_code, uuhash_equals);
507
508 typedef struct { char *key; uint32_t val; } cuhash_t;
509 #define cuhash_code(e) __string_hashcode((e).key)
510 #define cuhash_equals(e1, e2) (strcmp((e1).key, (e2).key) == 0)
511 define_hashset(cuhash, cuhash_t, cuhash_code, cuhash_equals);
512
513 #endif
514