1 /*
2  * This software is Copyright (c) 2015 Sayantan Datta <std2048 at gmail dot com>
3  * and it is hereby released to the general public under the following terms:
4  * Redistribution and use in source and binary forms, with or without modification, are permitted.
5  */
6 
7 #ifdef HAVE_OPENCL
8 
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include "bt_hash_types.h"
12 
13 uint128_t *loaded_hashes_128 = NULL;
14 unsigned int *hash_table_128 = NULL;
15 
16 /* Assuming N < 0x7fffffff */
modulo128_31b(uint128_t a,unsigned int N,uint64_t shift64)17 inline unsigned int modulo128_31b(uint128_t a, unsigned int N, uint64_t shift64)
18 {
19 	uint64_t p;
20 	p = (a.HI64 % N) * shift64;
21 	p += (a.LO64 % N);
22 	p %= N;
23 	return (unsigned int)p;
24 }
25 
add128(uint128_t a,unsigned int b)26 inline uint128_t add128(uint128_t a, unsigned int b)
27 {
28 	uint128_t result;
29 	result.LO64 = a.LO64 + b;
30 	result.HI64 = a.HI64 + (result.LO64 < a.LO64);
31 	if (result.HI64 < a.HI64)
32 		bt_warn("128 bit add overflow.");
33 
34 	return result;
35 }
36 
allocate_ht_128(unsigned int num_loaded_hashes,unsigned int verbosity)37 void allocate_ht_128(unsigned int num_loaded_hashes, unsigned int verbosity)
38 {
39 	unsigned int i;
40 
41 	if (bt_memalign_alloc((void **)&hash_table_128, 16, 4 * hash_table_size * sizeof(unsigned int)))
42 		bt_error("Couldn't allocate hash_table_128.");
43 
44 	for (i = 0; i < hash_table_size; i++)
45 		hash_table_128[i] = hash_table_128[i + hash_table_size]
46 			= hash_table_128[i + 2 * hash_table_size]
47 			= hash_table_128[i + 3 * hash_table_size] = 0;
48 
49 	total_memory_in_bytes += 4 * hash_table_size * sizeof(unsigned int);
50 
51 	if (verbosity > 2) {
52 		fprintf(stdout, "Hash Table Size %Lf %% of Number of Loaded Hashes.\n", ((long double)hash_table_size / (long double)num_loaded_hashes) * 100.00);
53 		fprintf(stdout, "Hash Table Size(in GBs):%Lf\n", ((long double)4.0 * hash_table_size * sizeof(unsigned int)) / ((long double)1024 * 1024 * 1024));
54 	}
55 }
56 
calc_ht_idx_128(unsigned int hash_location,unsigned int offset)57 inline unsigned int calc_ht_idx_128(unsigned int hash_location, unsigned int offset)
58 {
59 	return  modulo128_31b(add128(loaded_hashes_128[hash_location], offset), hash_table_size, shift64_ht_sz);
60 }
61 
zero_check_ht_128(unsigned int hash_table_idx)62 inline unsigned int zero_check_ht_128(unsigned int hash_table_idx)
63 {
64 	return ((hash_table_128[hash_table_idx] || hash_table_128[hash_table_idx + hash_table_size] ||
65 		hash_table_128[hash_table_idx + 2 * hash_table_size] ||
66 		hash_table_128[hash_table_idx + 3 * hash_table_size]));
67 }
68 
assign_ht_128(unsigned int hash_table_idx,unsigned int hash_location)69 inline void assign_ht_128(unsigned int hash_table_idx, unsigned int hash_location)
70 {
71 	uint128_t hash = loaded_hashes_128[hash_location];
72 	hash_table_128[hash_table_idx] = (unsigned int)(hash.LO64 & 0xffffffff);
73 	hash_table_128[hash_table_idx + hash_table_size] = (unsigned int)(hash.LO64 >> 32);
74 	hash_table_128[hash_table_idx + 2 * hash_table_size] = (unsigned int)(hash.HI64 & 0xffffffff);
75 	hash_table_128[hash_table_idx + 3 * hash_table_size] = (unsigned int)(hash.HI64 >> 32);
76 }
77 
assign0_ht_128(unsigned int hash_table_idx)78 inline void assign0_ht_128(unsigned int hash_table_idx)
79 {
80 	hash_table_128[hash_table_idx] = hash_table_128[hash_table_idx + hash_table_size]
81 			= hash_table_128[hash_table_idx + 2 * hash_table_size]
82 			= hash_table_128[hash_table_idx + 3 * hash_table_size] = 0;
83 }
84 
get_offset_128(unsigned int hash_table_idx,unsigned int hash_location)85 unsigned int get_offset_128(unsigned int hash_table_idx, unsigned int hash_location)
86 {
87 	unsigned int z = modulo128_31b(loaded_hashes_128[hash_location], hash_table_size, shift64_ht_sz);
88 	return (hash_table_size - z + hash_table_idx);
89 }
90 
test_tables_128(unsigned int num_loaded_hashes,OFFSET_TABLE_WORD * offset_table,unsigned int offset_table_size,unsigned int shift64_ot_sz,unsigned int shift128_ot_sz,unsigned int verbosity)91 int test_tables_128(unsigned int num_loaded_hashes, OFFSET_TABLE_WORD *offset_table, unsigned int offset_table_size, unsigned int shift64_ot_sz, unsigned int shift128_ot_sz, unsigned int verbosity)
92 {
93 	unsigned char *hash_table_collisions;
94 	unsigned int i, hash_table_idx, error = 1, count = 0;
95 	uint128_t hash;
96 
97 	if (bt_calloc((void **)&hash_table_collisions, hash_table_size, sizeof(unsigned char)))
98 		bt_error("Failed to allocate memory: hash_table_collisions.");
99 
100 	if (verbosity > 1)
101 		fprintf(stdout, "\nTesting Tables...");
102 
103 #if _OPENMP
104 #pragma omp parallel private(i, hash_table_idx, hash)
105 #endif
106 	{
107 #if _OPENMP
108 #pragma omp for
109 #endif
110 		for (i = 0; i < num_loaded_hashes; i++) {
111 			hash = loaded_hashes_128[i];
112 			hash_table_idx =
113 				calc_ht_idx_128(i,
114 					(unsigned int)offset_table[
115 					modulo128_31b(hash,
116 					offset_table_size, shift64_ot_sz)]);
117 #if _OPENMP
118 #pragma omp atomic
119 #endif
120 			hash_table_collisions[hash_table_idx]++;
121 
122 			if (error && (hash_table_128[hash_table_idx] != (unsigned int)(hash.LO64 & 0xffffffff)  ||
123 			    hash_table_128[hash_table_idx + hash_table_size] != (unsigned int)(hash.LO64 >> 32) ||
124 			    hash_table_128[hash_table_idx + 2 * hash_table_size] != (unsigned int)(hash.HI64 & 0xffffffff) ||
125 			    hash_table_128[hash_table_idx + 3 * hash_table_size] != (unsigned int)(hash.HI64 >> 32) ||
126 			    hash_table_collisions[hash_table_idx] > 1)) {
127 				fprintf(stderr, "Error building tables: Loaded hash idx:%u, No. of collisions:%u\n", i, hash_table_collisions[hash_table_idx]);
128 				error = 0;
129 			}
130 
131 		}
132 #if _OPENMP
133 #pragma omp single
134 #endif
135 		for (hash_table_idx = 0; hash_table_idx < hash_table_size; hash_table_idx++)
136 			if (zero_check_ht_128(hash_table_idx))
137 				count++;
138 #if _OPENMP
139 #pragma omp barrier
140 #endif
141 	}
142 
143 /* Suppress unused variable warning. */
144 #define UNUSED(x) (void)(x)
145 	UNUSED(shift128_ot_sz);
146 
147 	if (count != num_loaded_hashes) {
148 		error = 0;
149 		fprintf(stderr, "Error!! Tables contains extra or less entries.\n");
150 		return 0;
151 	}
152 
153 	bt_free((void **)&hash_table_collisions);
154 
155 	if (error && verbosity > 1)
156 		fprintf(stdout, "OK\n");
157 
158 	return 1;
159 }
160 
161 #define check_equal(p, q) \
162 	(loaded_hashes_128[p].LO64 == loaded_hashes_128[q].LO64 &&	\
163 	 loaded_hashes_128[p].HI64 == loaded_hashes_128[q].HI64)
164 
165 #define check_non_zero(p) \
166 	(loaded_hashes_128[p].LO64 || loaded_hashes_128[p].HI64)
167 
168 #define check_zero(p) \
169 	(loaded_hashes_128[p].LO64 == 0 && loaded_hashes_128[p].HI64 == 0)
170 
171 #define set_zero(p) \
172 	loaded_hashes_128[p].LO64 = loaded_hashes_128[p].HI64 = 0
173 
remove_duplicates_final(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int * rehash_list)174 static void remove_duplicates_final(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int *rehash_list)
175 {
176 	unsigned int i, **hash_location_list, counter;
177 #define COLLISION_DTYPE unsigned int
178 	COLLISION_DTYPE *collisions;
179 	typedef struct {
180 		unsigned int store_loc1;
181 		unsigned int store_loc2;
182 		unsigned int idx_hash_loc_list;
183 		COLLISION_DTYPE  collisions;
184 		COLLISION_DTYPE iter;
185 	} hash_table_data;
186 
187 	hash_table_data *hash_table = NULL;
188 
189 	if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
190 		bt_error("Failed to allocate memory: hash_table.");
191 	if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
192 		bt_error("Failed to allocate memory: collisions.");
193 
194 	for (i = 0; i < num_loaded_hashes; i++) {
195 		unsigned int idx = loaded_hashes_128[rehash_list[i]].LO64 % hash_table_size;
196 		collisions[idx]++;
197 	}
198 
199 	counter = 0;
200 	for (i = 0; i < hash_table_size; i++) {
201 		 hash_table[i].collisions = collisions[i];
202 		 hash_table[i].iter = 0;
203 		 hash_table[i].store_loc1 = hash_table[i].store_loc2 =
204 			hash_table[i].idx_hash_loc_list = 0xffffffff;
205 		if (hash_table[i].collisions > 3)
206 			hash_table[i].idx_hash_loc_list = counter++;
207 	}
208 
209 	if (bt_malloc((void **)&hash_location_list, (counter + 1) * sizeof(unsigned int *)))
210 		bt_error("Failed to allocate memory: hash_location_list.");
211 
212 	counter = 0;
213 	for (i = 0; i < hash_table_size; i++)
214 	      if (collisions[i] > 3) {
215 			if (bt_malloc((void **)&hash_location_list[counter], (collisions[i] - 1) * sizeof(unsigned int)))
216 				bt_error("Failed to allocate memory: hash_location_list[counter].");
217 			counter++;
218 	      }
219 
220 	for (i = 0; i < num_loaded_hashes; i++) {
221 		unsigned int k = rehash_list[i];
222 		unsigned int idx = loaded_hashes_128[k].LO64 % hash_table_size ;
223 
224 		if (collisions[idx] == 2) {
225 			if (!hash_table[idx].iter) {
226 				hash_table[idx].iter++;
227 				hash_table[idx].store_loc1 = k;
228 			}
229 			else if (check_equal(hash_table[idx].store_loc1, k))
230 				set_zero(k);
231 		}
232 
233 		if (collisions[idx] == 3) {
234 			if (!hash_table[idx].iter) {
235 				hash_table[idx].iter++;
236 				hash_table[idx].store_loc1 = k;
237 			}
238 			else if (hash_table[idx].iter == 1) {
239 				if (check_equal(hash_table[idx].store_loc1, k))
240 					set_zero(k);
241 				else
242 					hash_table[idx].store_loc2 = k;
243 			}
244 			else if (check_equal(hash_table[idx].store_loc1, k) ||
245 				 check_equal(hash_table[idx].store_loc2, k))
246 				set_zero(k);
247 		}
248 
249 		else if (collisions[idx] > 3) {
250 			unsigned int iter = hash_table[idx].iter;
251 			if (!iter)
252 				hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
253 			else {
254 				unsigned int j;
255 				for (j = 0; j < iter; j++)
256 					if (check_equal(hash_location_list[hash_table[idx].idx_hash_loc_list][j], k)) {
257 						set_zero(k);
258 						break;
259 					}
260 				if (j == iter && iter < (unsigned int)hash_table[idx].collisions - 1)
261 					hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
262 			}
263 			hash_table[idx].iter = iter;
264 		}
265 	}
266 
267 #undef COLLISION_DTYPE
268 	for (i = 0; i < counter; i++)
269 		bt_free((void **)&hash_location_list[i]);
270 	bt_free((void **)&hash_location_list);
271 	bt_free((void **)&hash_table);
272 	bt_free((void **)&collisions);
273 }
274 
remove_duplicates_128(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int verbosity)275 unsigned int remove_duplicates_128(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int verbosity)
276 {
277 	unsigned int i, num_unique_hashes, *rehash_list, counter;
278 #define COLLISION_DTYPE unsigned int
279 	COLLISION_DTYPE *collisions;
280 	typedef struct {
281 		unsigned int store_loc1;
282 		unsigned int store_loc2;
283 		unsigned int store_loc3;
284 		COLLISION_DTYPE iter;
285 	} hash_table_data;
286 
287 	hash_table_data *hash_table = NULL;
288 
289 	if (verbosity > 1)
290 		fprintf(stdout, "Removing duplicate hashes...");
291 
292 	if (hash_table_size & (hash_table_size - 1)) {
293 		fprintf(stderr, "Duplicate removal hash table size must power of 2.\n");
294 		return 0;
295 	}
296 
297 	if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
298 		bt_error("Failed to allocate memory: hash_table.");
299 	if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
300 		bt_error("Failed to allocate memory: collisions.");
301 
302 #if _OPENMP
303 #pragma omp parallel private(i)
304 #endif
305 {
306 #if _OPENMP
307 #pragma omp for
308 #endif
309 	for (i = 0; i < num_loaded_hashes; i++) {
310 		unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
311 #if _OPENMP
312 #pragma omp atomic
313 #endif
314 		collisions[idx]++;
315 	}
316 
317 	counter = 0;
318 #if _OPENMP
319 #pragma omp barrier
320 
321 #pragma omp for
322 #endif
323 	for (i = 0; i < hash_table_size; i++) {
324 		hash_table[i].iter = 0;
325 		if (collisions[i] > 4)
326 #if _OPENMP
327 #pragma omp atomic
328 #endif
329 			 counter += (collisions[i] - 3);
330 	}
331 #if _OPENMP
332 #pragma omp barrier
333 
334 #pragma omp sections
335 #endif
336 {
337 #if _OPENMP
338 #pragma omp section
339 #endif
340 {
341 	for (i = 0; i < num_loaded_hashes; i++) {
342 		unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
343 
344 		if (collisions[idx] == 2) {
345 			if (!hash_table[idx].iter) {
346 				hash_table[idx].iter++;
347 				hash_table[idx].store_loc1 = i;
348 			}
349 			else if (check_equal(hash_table[idx].store_loc1, i))
350 				set_zero(i);
351 		}
352 	}
353 }
354 #if _OPENMP
355 #pragma omp section
356 #endif
357 {
358 	if (bt_malloc((void **)&rehash_list, counter * sizeof(unsigned int)))
359 		bt_error("Failed to allocate memory: rehash_list.");
360 	counter = 0;
361 	for (i = 0; i < num_loaded_hashes; i++) {
362 		unsigned int idx = loaded_hashes_128[i].LO64 & (hash_table_size - 1);
363 
364 		if (collisions[idx] == 3) {
365 			if (!hash_table[idx].iter) {
366 				hash_table[idx].iter++;
367 				hash_table[idx].store_loc1 = i;
368 			}
369 			else if (hash_table[idx].iter == 1) {
370 				if (check_equal(hash_table[idx].store_loc1, i))
371 					set_zero(i);
372 				else {
373 					hash_table[idx].iter++;
374 					hash_table[idx].store_loc2 = i;
375 				}
376 			}
377 			else if (check_equal(hash_table[idx].store_loc1, i) ||
378 				 check_equal(hash_table[idx].store_loc2, i))
379 				set_zero(i);
380 		}
381 
382 		else if (collisions[idx] >= 4) {
383 			if (!hash_table[idx].iter) {
384 				hash_table[idx].iter++;
385 				hash_table[idx].store_loc1 = i;
386 			}
387 			else if (hash_table[idx].iter == 1) {
388 				if (check_equal(hash_table[idx].store_loc1, i))
389 					set_zero(i);
390 				else {
391 					hash_table[idx].iter++;
392 					hash_table[idx].store_loc2 = i;
393 				}
394 
395 			}
396 			else if (hash_table[idx].iter == 2) {
397 				if (check_equal(hash_table[idx].store_loc1, i) ||
398 				    check_equal(hash_table[idx].store_loc2, i))
399 					set_zero(i);
400 				else {
401 					hash_table[idx].iter++;
402 					hash_table[idx].store_loc3 = i;
403 				}
404 			}
405 			else if (hash_table[idx].iter >= 3) {
406 				if (check_equal(hash_table[idx].store_loc1, i) ||
407 				    check_equal(hash_table[idx].store_loc2, i) ||
408 				    check_equal(hash_table[idx].store_loc3, i))
409 					set_zero(i);
410 				else {
411 					if (collisions[idx] > 4)
412 						rehash_list[counter++] = i;
413 				}
414 			}
415 		}
416 	}
417 
418 	if (counter)
419 		remove_duplicates_final(counter, counter + (counter >> 1), rehash_list);
420 	bt_free((void **)&rehash_list);
421 }
422 }
423 }
424 
425 #if 0
426 	{	unsigned int col1 = 0, col2 = 0, col3 = 0, col4 = 0, col5a = 0;
427 		for (i = 0; i < hash_table_size; i++) {
428 			if (collisions[i] == 1)
429 				col1++;
430 			else if (collisions[i] == 2)
431 				col2++;
432 			else if (collisions[i] == 3)
433 				col3++;
434 			else if (collisions[i] == 4)
435 				col4++;
436 			else if (collisions[i] > 4)
437 				col5a += collisions[i];
438 		}
439 		col2 *= 2;
440 		col3 *= 3;
441 		col4 *= 4;
442 		fprintf(stderr, "Statistics:%Lf %Lf %Lf %Lf %Lf\n", (long double)col1 / (long double)num_loaded_hashes,
443 		  (long double)col2 / (long double)num_loaded_hashes, (long double)col3 / (long double)num_loaded_hashes,
444 			(long double)col4 / (long double)num_loaded_hashes, (long double)col5a / (long double)num_loaded_hashes);
445 
446 	}
447 #endif
448 	num_unique_hashes = 0;
449 	for (i = num_loaded_hashes - 1; (int)i >= 0; i--)
450 		if (check_non_zero(i)) {
451 			num_unique_hashes = i;
452 			break;
453 		}
454 
455 	for (i = 0; i <= num_unique_hashes; i++)
456 		if (check_zero(i)) {
457 			unsigned int j;
458 			loaded_hashes_128[i] = loaded_hashes_128[num_unique_hashes];
459 			set_zero(num_unique_hashes);
460 			num_unique_hashes--;
461 			for (j = num_unique_hashes; (int)j >= 0; j--)
462 				if (check_non_zero(j)) {
463 					num_unique_hashes = j;
464 					break;
465 				}
466 		}
467 #undef COLLISION_DTYPE
468 	bt_free((void **)&collisions);
469 	bt_free((void **)&hash_table);
470 
471 	if (verbosity > 1)
472 		fprintf(stdout, "Done\n");
473 
474 	return (num_unique_hashes + 1);
475 }
476 
477 #endif
478