1 /*
2 * This software is Copyright (c) 2015 Sayantan Datta <std2048 at gmail dot com>
3 * and it is hereby released to the general public under the following terms:
4 * Redistribution and use in source and binary forms, with or without modification, are permitted.
5 */
6
7 #ifdef HAVE_OPENCL
8
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include "bt_hash_types.h"
12
13 uint128_t *loaded_hashes_128 = NULL;
14 unsigned int *hash_table_128 = NULL;
15
16 /* Assuming N < 0x7fffffff */
modulo128_31b(uint128_t a,unsigned int N,uint64_t shift64)17 inline unsigned int modulo128_31b(uint128_t a, unsigned int N, uint64_t shift64)
18 {
19 uint64_t p;
20 p = (a.HI64 % N) * shift64;
21 p += (a.LO64 % N);
22 p %= N;
23 return (unsigned int)p;
24 }
25
add128(uint128_t a,unsigned int b)26 inline uint128_t add128(uint128_t a, unsigned int b)
27 {
28 uint128_t result;
29 result.LO64 = a.LO64 + b;
30 result.HI64 = a.HI64 + (result.LO64 < a.LO64);
31 if (result.HI64 < a.HI64)
32 bt_warn("128 bit add overflow.");
33
34 return result;
35 }
36
allocate_ht_128(unsigned int num_loaded_hashes,unsigned int verbosity)37 void allocate_ht_128(unsigned int num_loaded_hashes, unsigned int verbosity)
38 {
39 unsigned int i;
40
41 if (bt_memalign_alloc((void **)&hash_table_128, 16, 4 * hash_table_size * sizeof(unsigned int)))
42 bt_error("Couldn't allocate hash_table_128.");
43
44 for (i = 0; i < hash_table_size; i++)
45 hash_table_128[i] = hash_table_128[i + hash_table_size]
46 = hash_table_128[i + 2 * hash_table_size]
47 = hash_table_128[i + 3 * hash_table_size] = 0;
48
49 total_memory_in_bytes += 4 * hash_table_size * sizeof(unsigned int);
50
51 if (verbosity > 2) {
52 fprintf(stdout, "Hash Table Size %Lf %% of Number of Loaded Hashes.\n", ((long double)hash_table_size / (long double)num_loaded_hashes) * 100.00);
53 fprintf(stdout, "Hash Table Size(in GBs):%Lf\n", ((long double)4.0 * hash_table_size * sizeof(unsigned int)) / ((long double)1024 * 1024 * 1024));
54 }
55 }
56
calc_ht_idx_128(unsigned int hash_location,unsigned int offset)57 inline unsigned int calc_ht_idx_128(unsigned int hash_location, unsigned int offset)
58 {
59 return modulo128_31b(add128(loaded_hashes_128[hash_location], offset), hash_table_size, shift64_ht_sz);
60 }
61
zero_check_ht_128(unsigned int hash_table_idx)62 inline unsigned int zero_check_ht_128(unsigned int hash_table_idx)
63 {
64 return ((hash_table_128[hash_table_idx] || hash_table_128[hash_table_idx + hash_table_size] ||
65 hash_table_128[hash_table_idx + 2 * hash_table_size] ||
66 hash_table_128[hash_table_idx + 3 * hash_table_size]));
67 }
68
assign_ht_128(unsigned int hash_table_idx,unsigned int hash_location)69 inline void assign_ht_128(unsigned int hash_table_idx, unsigned int hash_location)
70 {
71 uint128_t hash = loaded_hashes_128[hash_location];
72 hash_table_128[hash_table_idx] = (unsigned int)(hash.LO64 & 0xffffffff);
73 hash_table_128[hash_table_idx + hash_table_size] = (unsigned int)(hash.LO64 >> 32);
74 hash_table_128[hash_table_idx + 2 * hash_table_size] = (unsigned int)(hash.HI64 & 0xffffffff);
75 hash_table_128[hash_table_idx + 3 * hash_table_size] = (unsigned int)(hash.HI64 >> 32);
76 }
77
assign0_ht_128(unsigned int hash_table_idx)78 inline void assign0_ht_128(unsigned int hash_table_idx)
79 {
80 hash_table_128[hash_table_idx] = hash_table_128[hash_table_idx + hash_table_size]
81 = hash_table_128[hash_table_idx + 2 * hash_table_size]
82 = hash_table_128[hash_table_idx + 3 * hash_table_size] = 0;
83 }
84
get_offset_128(unsigned int hash_table_idx,unsigned int hash_location)85 unsigned int get_offset_128(unsigned int hash_table_idx, unsigned int hash_location)
86 {
87 unsigned int z = modulo128_31b(loaded_hashes_128[hash_location], hash_table_size, shift64_ht_sz);
88 return (hash_table_size - z + hash_table_idx);
89 }
90
test_tables_128(unsigned int num_loaded_hashes,OFFSET_TABLE_WORD * offset_table,unsigned int offset_table_size,unsigned int shift64_ot_sz,unsigned int shift128_ot_sz,unsigned int verbosity)91 int test_tables_128(unsigned int num_loaded_hashes, OFFSET_TABLE_WORD *offset_table, unsigned int offset_table_size, unsigned int shift64_ot_sz, unsigned int shift128_ot_sz, unsigned int verbosity)
92 {
93 unsigned char *hash_table_collisions;
94 unsigned int i, hash_table_idx, error = 1, count = 0;
95 uint128_t hash;
96
97 if (bt_calloc((void **)&hash_table_collisions, hash_table_size, sizeof(unsigned char)))
98 bt_error("Failed to allocate memory: hash_table_collisions.");
99
100 if (verbosity > 1)
101 fprintf(stdout, "\nTesting Tables...");
102
103 #if _OPENMP
104 #pragma omp parallel private(i, hash_table_idx, hash)
105 #endif
106 {
107 #if _OPENMP
108 #pragma omp for
109 #endif
110 for (i = 0; i < num_loaded_hashes; i++) {
111 hash = loaded_hashes_128[i];
112 hash_table_idx =
113 calc_ht_idx_128(i,
114 (unsigned int)offset_table[
115 modulo128_31b(hash,
116 offset_table_size, shift64_ot_sz)]);
117 #if _OPENMP
118 #pragma omp atomic
119 #endif
120 hash_table_collisions[hash_table_idx]++;
121
122 if (error && (hash_table_128[hash_table_idx] != (unsigned int)(hash.LO64 & 0xffffffff) ||
123 hash_table_128[hash_table_idx + hash_table_size] != (unsigned int)(hash.LO64 >> 32) ||
124 hash_table_128[hash_table_idx + 2 * hash_table_size] != (unsigned int)(hash.HI64 & 0xffffffff) ||
125 hash_table_128[hash_table_idx + 3 * hash_table_size] != (unsigned int)(hash.HI64 >> 32) ||
126 hash_table_collisions[hash_table_idx] > 1)) {
127 fprintf(stderr, "Error building tables: Loaded hash idx:%u, No. of collisions:%u\n", i, hash_table_collisions[hash_table_idx]);
128 error = 0;
129 }
130
131 }
132 #if _OPENMP
133 #pragma omp single
134 #endif
135 for (hash_table_idx = 0; hash_table_idx < hash_table_size; hash_table_idx++)
136 if (zero_check_ht_128(hash_table_idx))
137 count++;
138 #if _OPENMP
139 #pragma omp barrier
140 #endif
141 }
142
143 /* Suppress unused variable warning. */
144 #define UNUSED(x) (void)(x)
145 UNUSED(shift128_ot_sz);
146
147 if (count != num_loaded_hashes) {
148 error = 0;
149 fprintf(stderr, "Error!! Tables contains extra or less entries.\n");
150 return 0;
151 }
152
153 bt_free((void **)&hash_table_collisions);
154
155 if (error && verbosity > 1)
156 fprintf(stdout, "OK\n");
157
158 return 1;
159 }
160
161 #define check_equal(p, q) \
162 (loaded_hashes_128[p].LO64 == loaded_hashes_128[q].LO64 && \
163 loaded_hashes_128[p].HI64 == loaded_hashes_128[q].HI64)
164
165 #define check_non_zero(p) \
166 (loaded_hashes_128[p].LO64 || loaded_hashes_128[p].HI64)
167
168 #define check_zero(p) \
169 (loaded_hashes_128[p].LO64 == 0 && loaded_hashes_128[p].HI64 == 0)
170
171 #define set_zero(p) \
172 loaded_hashes_128[p].LO64 = loaded_hashes_128[p].HI64 = 0
173
remove_duplicates_final(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int * rehash_list)174 static void remove_duplicates_final(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int *rehash_list)
175 {
176 unsigned int i, **hash_location_list, counter;
177 #define COLLISION_DTYPE unsigned int
178 COLLISION_DTYPE *collisions;
179 typedef struct {
180 unsigned int store_loc1;
181 unsigned int store_loc2;
182 unsigned int idx_hash_loc_list;
183 COLLISION_DTYPE collisions;
184 COLLISION_DTYPE iter;
185 } hash_table_data;
186
187 hash_table_data *hash_table = NULL;
188
189 if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
190 bt_error("Failed to allocate memory: hash_table.");
191 if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
192 bt_error("Failed to allocate memory: collisions.");
193
194 for (i = 0; i < num_loaded_hashes; i++) {
195 unsigned int idx = loaded_hashes_128[rehash_list[i]].LO64 % hash_table_size;
196 collisions[idx]++;
197 }
198
199 counter = 0;
200 for (i = 0; i < hash_table_size; i++) {
201 hash_table[i].collisions = collisions[i];
202 hash_table[i].iter = 0;
203 hash_table[i].store_loc1 = hash_table[i].store_loc2 =
204 hash_table[i].idx_hash_loc_list = 0xffffffff;
205 if (hash_table[i].collisions > 3)
206 hash_table[i].idx_hash_loc_list = counter++;
207 }
208
209 if (bt_malloc((void **)&hash_location_list, (counter + 1) * sizeof(unsigned int *)))
210 bt_error("Failed to allocate memory: hash_location_list.");
211
212 counter = 0;
213 for (i = 0; i < hash_table_size; i++)
214 if (collisions[i] > 3) {
215 if (bt_malloc((void **)&hash_location_list[counter], (collisions[i] - 1) * sizeof(unsigned int)))
216 bt_error("Failed to allocate memory: hash_location_list[counter].");
217 counter++;
218 }
219
220 for (i = 0; i < num_loaded_hashes; i++) {
221 unsigned int k = rehash_list[i];
222 unsigned int idx = loaded_hashes_128[k].LO64 % hash_table_size ;
223
224 if (collisions[idx] == 2) {
225 if (!hash_table[idx].iter) {
226 hash_table[idx].iter++;
227 hash_table[idx].store_loc1 = k;
228 }
229 else if (check_equal(hash_table[idx].store_loc1, k))
230 set_zero(k);
231 }
232
233 if (collisions[idx] == 3) {
234 if (!hash_table[idx].iter) {
235 hash_table[idx].iter++;
236 hash_table[idx].store_loc1 = k;
237 }
238 else if (hash_table[idx].iter == 1) {
239 if (check_equal(hash_table[idx].store_loc1, k))
240 set_zero(k);
241 else
242 hash_table[idx].store_loc2 = k;
243 }
244 else if (check_equal(hash_table[idx].store_loc1, k) ||
245 check_equal(hash_table[idx].store_loc2, k))
246 set_zero(k);
247 }
248
249 else if (collisions[idx] > 3) {
250 unsigned int iter = hash_table[idx].iter;
251 if (!iter)
252 hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
253 else {
254 unsigned int j;
255 for (j = 0; j < iter; j++)
256 if (check_equal(hash_location_list[hash_table[idx].idx_hash_loc_list][j], k)) {
257 set_zero(k);
258 break;
259 }
260 if (j == iter && iter < (unsigned int)hash_table[idx].collisions - 1)
261 hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
262 }
263 hash_table[idx].iter = iter;
264 }
265 }
266
267 #undef COLLISION_DTYPE
268 for (i = 0; i < counter; i++)
269 bt_free((void **)&hash_location_list[i]);
270 bt_free((void **)&hash_location_list);
271 bt_free((void **)&hash_table);
272 bt_free((void **)&collisions);
273 }
274
remove_duplicates_128(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int verbosity)275 unsigned int remove_duplicates_128(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int verbosity)
276 {
277 unsigned int i, num_unique_hashes, *rehash_list, counter;
278 #define COLLISION_DTYPE unsigned int
279 COLLISION_DTYPE *collisions;
280 typedef struct {
281 unsigned int store_loc1;
282 unsigned int store_loc2;
283 unsigned int store_loc3;
284 COLLISION_DTYPE iter;
285 } hash_table_data;
286
287 hash_table_data *hash_table = NULL;
288
289 if (verbosity > 1)
290 fprintf(stdout, "Removing duplicate hashes...");
291
292 if (hash_table_size & (hash_table_size - 1)) {
293 fprintf(stderr, "Duplicate removal hash table size must power of 2.\n");
294 return 0;
295 }
296
297 if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
298 bt_error("Failed to allocate memory: hash_table.");
299 if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
300 bt_error("Failed to allocate memory: collisions.");
301
302 #if _OPENMP
303 #pragma omp parallel private(i)
304 #endif
305 {
306 #if _OPENMP
307 #pragma omp for
308 #endif
309 for (i = 0; i < num_loaded_hashes; i++) {
310 unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
311 #if _OPENMP
312 #pragma omp atomic
313 #endif
314 collisions[idx]++;
315 }
316
317 counter = 0;
318 #if _OPENMP
319 #pragma omp barrier
320
321 #pragma omp for
322 #endif
323 for (i = 0; i < hash_table_size; i++) {
324 hash_table[i].iter = 0;
325 if (collisions[i] > 4)
326 #if _OPENMP
327 #pragma omp atomic
328 #endif
329 counter += (collisions[i] - 3);
330 }
331 #if _OPENMP
332 #pragma omp barrier
333
334 #pragma omp sections
335 #endif
336 {
337 #if _OPENMP
338 #pragma omp section
339 #endif
340 {
341 for (i = 0; i < num_loaded_hashes; i++) {
342 unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
343
344 if (collisions[idx] == 2) {
345 if (!hash_table[idx].iter) {
346 hash_table[idx].iter++;
347 hash_table[idx].store_loc1 = i;
348 }
349 else if (check_equal(hash_table[idx].store_loc1, i))
350 set_zero(i);
351 }
352 }
353 }
354 #if _OPENMP
355 #pragma omp section
356 #endif
357 {
358 if (bt_malloc((void **)&rehash_list, counter * sizeof(unsigned int)))
359 bt_error("Failed to allocate memory: rehash_list.");
360 counter = 0;
361 for (i = 0; i < num_loaded_hashes; i++) {
362 unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
363
364 if (collisions[idx] == 3) {
365 if (!hash_table[idx].iter) {
366 hash_table[idx].iter++;
367 hash_table[idx].store_loc1 = i;
368 }
369 else if (hash_table[idx].iter == 1) {
370 if (check_equal(hash_table[idx].store_loc1, i))
371 set_zero(i);
372 else {
373 hash_table[idx].iter++;
374 hash_table[idx].store_loc2 = i;
375 }
376 }
377 else if (check_equal(hash_table[idx].store_loc1, i) ||
378 check_equal(hash_table[idx].store_loc2, i))
379 set_zero(i);
380 }
381
382 else if (collisions[idx] >= 4) {
383 if (!hash_table[idx].iter) {
384 hash_table[idx].iter++;
385 hash_table[idx].store_loc1 = i;
386 }
387 else if (hash_table[idx].iter == 1) {
388 if (check_equal(hash_table[idx].store_loc1, i))
389 set_zero(i);
390 else {
391 hash_table[idx].iter++;
392 hash_table[idx].store_loc2 = i;
393 }
394
395 }
396 else if (hash_table[idx].iter == 2) {
397 if (check_equal(hash_table[idx].store_loc1, i) ||
398 check_equal(hash_table[idx].store_loc2, i))
399 set_zero(i);
400 else {
401 hash_table[idx].iter++;
402 hash_table[idx].store_loc3 = i;
403 }
404 }
405 else if (hash_table[idx].iter >= 3) {
406 if (check_equal(hash_table[idx].store_loc1, i) ||
407 check_equal(hash_table[idx].store_loc2, i) ||
408 check_equal(hash_table[idx].store_loc3, i))
409 set_zero(i);
410 else {
411 if (collisions[idx] > 4)
412 rehash_list[counter++] = i;
413 }
414 }
415 }
416 }
417
418 if (counter)
419 remove_duplicates_final(counter, counter + (counter >> 1), rehash_list);
420 bt_free((void **)&rehash_list);
421 }
422 }
423 }
424
425 #if 0
426 { unsigned int col1 = 0, col2 = 0, col3 = 0, col4 = 0, col5a = 0;
427 for (i = 0; i < hash_table_size; i++) {
428 if (collisions[i] == 1)
429 col1++;
430 else if (collisions[i] == 2)
431 col2++;
432 else if (collisions[i] == 3)
433 col3++;
434 else if (collisions[i] == 4)
435 col4++;
436 else if (collisions[i] > 4)
437 col5a += collisions[i];
438 }
439 col2 *= 2;
440 col3 *= 3;
441 col4 *= 4;
442 fprintf(stderr, "Statistics:%Lf %Lf %Lf %Lf %Lf\n", (long double)col1 / (long double)num_loaded_hashes,
443 (long double)col2 / (long double)num_loaded_hashes, (long double)col3 / (long double)num_loaded_hashes,
444 (long double)col4 / (long double)num_loaded_hashes, (long double)col5a / (long double)num_loaded_hashes);
445
446 }
447 #endif
448 num_unique_hashes = 0;
449 for (i = num_loaded_hashes - 1; (int)i >= 0; i--)
450 if (check_non_zero(i)) {
451 num_unique_hashes = i;
452 break;
453 }
454
455 for (i = 0; i <= num_unique_hashes; i++)
456 if (check_zero(i)) {
457 unsigned int j;
458 loaded_hashes_128[i] = loaded_hashes_128[num_unique_hashes];
459 set_zero(num_unique_hashes);
460 num_unique_hashes--;
461 for (j = num_unique_hashes; (int)j >= 0; j--)
462 if (check_non_zero(j)) {
463 num_unique_hashes = j;
464 break;
465 }
466 }
467 #undef COLLISION_DTYPE
468 bt_free((void **)&collisions);
469 bt_free((void **)&hash_table);
470
471 if (verbosity > 1)
472 fprintf(stdout, "Done\n");
473
474 return (num_unique_hashes + 1);
475 }
476
477 #endif
478