1 /*
2  * This software is Copyright (c) 2015 Sayantan Datta <std2048 at gmail dot com>
3  * and it is hereby released to the general public under the following terms:
4  * Redistribution and use in source and binary forms, with or without modification, are permitted.
5  */
6 
7 #ifdef HAVE_OPENCL
8 
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include "bt_hash_types.h"
12 
13 uint64_t *loaded_hashes_64 = NULL;
14 unsigned int *hash_table_64 = NULL;
15 
16 /* Assuming N < 0x7fffffff */
modulo64_31b(uint64_t a,unsigned int N)17 inline unsigned int modulo64_31b(uint64_t a, unsigned int N)
18 {
19 	return (unsigned int)(a % N);
20 }
21 
add64(uint64_t a,unsigned int b)22 inline uint64_t add64(uint64_t a, unsigned int b)
23 {
24 	return (a + b);
25 }
26 
allocate_ht_64(unsigned int num_loaded_hashes,unsigned int verbosity)27 void allocate_ht_64(unsigned int num_loaded_hashes, unsigned int verbosity)
28 {
29 	unsigned int i;
30 
31 	if (bt_memalign_alloc((void **)&hash_table_64, 16, 2 * hash_table_size * sizeof(unsigned int)))
32 		bt_error("Couldn't allocate hash_table_64.");
33 
34 	for (i = 0; i < hash_table_size; i++)
35 		hash_table_64[i] = hash_table_64[i + hash_table_size] = 0;
36 
37 	total_memory_in_bytes += 2 * hash_table_size * sizeof(unsigned int);
38 
39 	if (verbosity > 2) {
40 		fprintf(stdout, "Hash Table Size %Lf %% of Number of Loaded Hashes.\n", ((long double)hash_table_size / (long double)num_loaded_hashes) * 100.00);
41 		fprintf(stdout, "Hash Table Size(in GBs):%Lf\n", ((long double)2.0 * hash_table_size * sizeof(unsigned int)) / ((long double)1024 * 1024 * 1024));
42 	}
43 }
44 
calc_ht_idx_64(unsigned int hash_location,unsigned int offset)45 inline unsigned int calc_ht_idx_64(unsigned int hash_location, unsigned int offset)
46 {
47 	return  modulo64_31b(add64(loaded_hashes_64[hash_location], offset), hash_table_size);
48 }
49 
zero_check_ht_64(unsigned int hash_table_idx)50 inline unsigned int zero_check_ht_64(unsigned int hash_table_idx)
51 {
52 	return (hash_table_64[hash_table_idx] || hash_table_64[hash_table_idx + hash_table_size]);
53 }
54 
assign_ht_64(unsigned int hash_table_idx,unsigned int hash_location)55 inline void assign_ht_64(unsigned int hash_table_idx, unsigned int hash_location)
56 {
57 	uint64_t hash = loaded_hashes_64[hash_location];
58 	hash_table_64[hash_table_idx] = (unsigned int)(hash & 0xffffffff);
59 	hash_table_64[hash_table_idx + hash_table_size] = (unsigned int)(hash >> 32);
60 }
61 
assign0_ht_64(unsigned int hash_table_idx)62 inline void assign0_ht_64(unsigned int hash_table_idx)
63 {
64 	hash_table_64[hash_table_idx] = hash_table_64[hash_table_idx + hash_table_size] = 0;
65 }
66 
get_offset_64(unsigned int hash_table_idx,unsigned int hash_location)67 unsigned int get_offset_64(unsigned int hash_table_idx, unsigned int hash_location)
68 {
69 	unsigned int z = modulo64_31b(loaded_hashes_64[hash_location], hash_table_size);
70 	return (hash_table_size - z + hash_table_idx);
71 }
72 
test_tables_64(unsigned int num_loaded_hashes,OFFSET_TABLE_WORD * offset_table,unsigned int offset_table_size,unsigned int shift64_ot_sz,unsigned int shift128_ot_sz,unsigned int verbosity)73 int test_tables_64(unsigned int num_loaded_hashes, OFFSET_TABLE_WORD *offset_table, unsigned int offset_table_size, unsigned int shift64_ot_sz, unsigned int shift128_ot_sz, unsigned int verbosity)
74 {
75 	unsigned char *hash_table_collisions;
76 	unsigned int i, hash_table_idx, error = 1, count = 0;
77 	uint64_t hash;
78 
79 	if (bt_calloc((void **)&hash_table_collisions, hash_table_size, sizeof(unsigned char)))
80 		bt_error("Failed to allocate memory: hash_table_collisions.");
81 
82 	if (verbosity > 1)
83 		fprintf(stdout, "\nTesting Tables...");
84 #if _OPENMP
85 #pragma omp parallel private(i, hash_table_idx, hash)
86 #endif
87 	{
88 #if _OPENMP
89 #pragma omp for
90 #endif
91 	for (i = 0; i < num_loaded_hashes; i++) {
92 			hash = loaded_hashes_64[i];
93 			hash_table_idx =
94 				calc_ht_idx_64(i,
95 					(unsigned int)offset_table[
96 					modulo64_31b(hash,
97 					offset_table_size)]);
98 #if _OPENMP
99 #pragma omp atomic
100 #endif
101 			hash_table_collisions[hash_table_idx]++;
102 
103 			if (error && (hash_table_64[hash_table_idx] != (unsigned int)(hash & 0xffffffff)  ||
104 			    hash_table_64[hash_table_idx + hash_table_size] != (unsigned int)(hash >> 32) ||
105 			    hash_table_collisions[hash_table_idx] > 1)) {
106 				fprintf(stderr, "Error building tables: Loaded hash idx:%u, No. of collisions:%u\n", i, hash_table_collisions[hash_table_idx]);
107 				error = 0;
108 			}
109 
110 		}
111 #if _OPENMP
112 #pragma omp single
113 #endif
114 		for (hash_table_idx = 0; hash_table_idx < hash_table_size; hash_table_idx++)
115 			if (zero_check_ht_64(hash_table_idx))
116 				count++;
117 #if _OPENMP
118 #pragma omp barrier
119 #endif
120 	}
121 
122 /* Suppress unused variable warning. */
123 #define UNUSED(x) (void)(x)
124 	UNUSED(shift128_ot_sz);
125 	UNUSED(shift64_ot_sz);
126 
127 	if (count != num_loaded_hashes) {
128 		error = 0;
129 		fprintf(stderr, "Error!! Tables contains extra or less entries.\n");
130 		return 0;
131 	}
132 
133 	bt_free((void **)&hash_table_collisions);
134 
135 	if (error && verbosity > 1)
136 		fprintf(stdout, "OK\n");
137 
138 	return 1;
139 }
140 
141 #define check_equal(p, q) \
142 	(loaded_hashes_64[p] == loaded_hashes_64[q])
143 
144 #define check_non_zero(p) \
145 	(loaded_hashes_64[p])
146 
147 #define check_zero(p) \
148 	(loaded_hashes_64[p] == 0)
149 
150 #define set_zero(p) \
151 	loaded_hashes_64[p] = 0
152 
remove_duplicates_final(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int * rehash_list)153 static void remove_duplicates_final(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int *rehash_list)
154 {
155 	unsigned int i, **hash_location_list, counter;
156 #define COLLISION_DTYPE unsigned int
157 	COLLISION_DTYPE *collisions;
158 	typedef struct {
159 		unsigned int store_loc1;
160 		unsigned int store_loc2;
161 		unsigned int idx_hash_loc_list;
162 		COLLISION_DTYPE  collisions;
163 		COLLISION_DTYPE iter;
164 	} hash_table_data;
165 
166 	hash_table_data *hash_table = NULL;
167 
168 	if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
169 		bt_error("Failed to allocate memory: hash_table.");
170 	if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
171 		bt_error("Failed to allocate memory: collisions.");
172 
173 	for (i = 0; i < num_loaded_hashes; i++) {
174 		unsigned int idx = loaded_hashes_64[rehash_list[i]] % hash_table_size;
175 		collisions[idx]++;
176 	}
177 
178 	counter = 0;
179 	for (i = 0; i < hash_table_size; i++) {
180 		 hash_table[i].collisions = collisions[i];
181 		 hash_table[i].iter = 0;
182 		 hash_table[i].store_loc1 = hash_table[i].store_loc2 =
183 			hash_table[i].idx_hash_loc_list = 0xffffffff;
184 		if (hash_table[i].collisions > 3)
185 			hash_table[i].idx_hash_loc_list = counter++;
186 	}
187 
188 	if (bt_malloc((void **)&hash_location_list, (counter + 1) * sizeof(unsigned int *)))
189 		bt_error("Failed to allocate memory: hash_location_list.");
190 
191 	counter = 0;
192 	for (i = 0; i < hash_table_size; i++)
193 		if (collisions[i] > 3) {
194 			if (bt_malloc((void **)&hash_location_list[counter], (collisions[i] - 1) * sizeof(unsigned int)))
195 				bt_error("Failed to allocate memory: hash_location_list[counter].");
196 			counter++;
197 		}
198 
199 	for (i = 0; i < num_loaded_hashes; i++) {
200 		unsigned int k = rehash_list[i];
201 		unsigned int idx = loaded_hashes_64[k] % hash_table_size ;
202 
203 		if (collisions[idx] == 2) {
204 			if (!hash_table[idx].iter) {
205 				hash_table[idx].iter++;
206 				hash_table[idx].store_loc1 = k;
207 			}
208 			else if (check_equal(hash_table[idx].store_loc1, k))
209 				set_zero(k);
210 		}
211 
212 		if (collisions[idx] == 3) {
213 			if (!hash_table[idx].iter) {
214 				hash_table[idx].iter++;
215 				hash_table[idx].store_loc1 = k;
216 			}
217 			else if (hash_table[idx].iter == 1) {
218 				if (check_equal(hash_table[idx].store_loc1, k))
219 					set_zero(k);
220 				else
221 					hash_table[idx].store_loc2 = k;
222 			}
223 			else if (check_equal(hash_table[idx].store_loc1, k) ||
224 				 check_equal(hash_table[idx].store_loc2, k))
225 				set_zero(k);
226 		}
227 
228 		else if (collisions[idx] > 3) {
229 			unsigned int iter = hash_table[idx].iter;
230 			if (!iter)
231 				hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
232 			else {
233 				unsigned int j;
234 				for (j = 0; j < iter; j++)
235 					if (check_equal(hash_location_list[hash_table[idx].idx_hash_loc_list][j], k)) {
236 						set_zero(k);
237 						break;
238 					}
239 				if (j == iter && iter < (unsigned int)hash_table[idx].collisions - 1)
240 					hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
241 			}
242 			hash_table[idx].iter = iter;
243 		}
244 	}
245 
246 #undef COLLISION_DTYPE
247 	for (i = 0; i < counter; i++)
248 		bt_free((void **)&hash_location_list[i]);
249 	bt_free((void **)&hash_location_list);
250 	bt_free((void **)&hash_table);
251 	bt_free((void **)&collisions);
252 }
253 
remove_duplicates_64(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int verbosity)254 unsigned int remove_duplicates_64(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int verbosity)
255 {
256 	unsigned int i, num_unique_hashes, *rehash_list, counter;
257 #define COLLISION_DTYPE unsigned int
258 	COLLISION_DTYPE *collisions;
259 	typedef struct {
260 		unsigned int store_loc1;
261 		unsigned int store_loc2;
262 		unsigned int store_loc3;
263 		COLLISION_DTYPE iter;
264 	} hash_table_data;
265 
266 	hash_table_data *hash_table = NULL;
267 
268 	if (verbosity > 1)
269 		fprintf(stdout, "Removing duplicate hashes...");
270 
271 	if (hash_table_size & (hash_table_size - 1)) {
272 		fprintf(stderr, "Duplicate removal hash table size must power of 2.\n");
273 		return 0;
274 	}
275 
276 	if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
277 		bt_error("Failed to allocate memory: hash_table.");
278 	if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
279 		bt_error("Failed to allocate memory: collisions.");
280 
281 #if _OPENMP
282 #pragma omp parallel private(i)
283 #endif
284 {
285 #if _OPENMP
286 #pragma omp for
287 #endif
288 	for (i = 0; i < num_loaded_hashes; i++) {
289 		unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
290 #if _OPENMP
291 #pragma omp atomic
292 #endif
293 		collisions[idx]++;
294 	}
295 
296 	counter = 0;
297 #if _OPENMP
298 #pragma omp barrier
299 #endif
300 
301 #if _OPENMP
302 #pragma omp for
303 #endif
304 	for (i = 0; i < hash_table_size; i++) {
305 		hash_table[i].iter = 0;
306 		if (collisions[i] > 4)
307 #if _OPENMP
308 #pragma omp atomic
309 #endif
310 			 counter += (collisions[i] - 3);
311 	}
312 #if _OPENMP
313 #pragma omp barrier
314 
315 #pragma omp sections
316 #endif
317 {
318 #if _OPENMP
319 #pragma omp section
320 #endif
321 {
322 	for (i = 0; i < num_loaded_hashes; i++) {
323 		unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
324 
325 		if (collisions[idx] == 2) {
326 			if (!hash_table[idx].iter) {
327 				hash_table[idx].iter++;
328 				hash_table[idx].store_loc1 = i;
329 			}
330 			else if (check_equal(hash_table[idx].store_loc1, i))
331 				set_zero(i);
332 		}
333 	}
334 }
335 
336 #if _OPENMP
337 #pragma omp section
338 #endif
339 {
340 	if (bt_malloc((void **)&rehash_list, counter * sizeof(unsigned int)))
341 		bt_error("Failed to allocate memory: rehash_list.");
342 	counter = 0;
343 	for (i = 0; i < num_loaded_hashes; i++) {
344 		unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
345 
346 		if (collisions[idx] == 3) {
347 			if (!hash_table[idx].iter) {
348 				hash_table[idx].iter++;
349 				hash_table[idx].store_loc1 = i;
350 			}
351 			else if (hash_table[idx].iter == 1) {
352 				if (check_equal(hash_table[idx].store_loc1, i))
353 					set_zero(i);
354 				else {
355 					hash_table[idx].iter++;
356 					hash_table[idx].store_loc2 = i;
357 				}
358 			}
359 			else if (check_equal(hash_table[idx].store_loc1, i) ||
360 				 check_equal(hash_table[idx].store_loc2, i))
361 				set_zero(i);
362 		}
363 
364 		else if (collisions[idx] >= 4) {
365 			if (!hash_table[idx].iter) {
366 				hash_table[idx].iter++;
367 				hash_table[idx].store_loc1 = i;
368 			}
369 			else if (hash_table[idx].iter == 1) {
370 				if (check_equal(hash_table[idx].store_loc1, i))
371 					set_zero(i);
372 				else {
373 					hash_table[idx].iter++;
374 					hash_table[idx].store_loc2 = i;
375 				}
376 
377 			}
378 			else if (hash_table[idx].iter == 2) {
379 				if (check_equal(hash_table[idx].store_loc1, i) ||
380 				    check_equal(hash_table[idx].store_loc2, i))
381 					set_zero(i);
382 				else {
383 					hash_table[idx].iter++;
384 					hash_table[idx].store_loc3 = i;
385 				}
386 			}
387 			else if (hash_table[idx].iter >= 3) {
388 				if (check_equal(hash_table[idx].store_loc1, i) ||
389 				    check_equal(hash_table[idx].store_loc2, i) ||
390 				    check_equal(hash_table[idx].store_loc3, i))
391 					set_zero(i);
392 				else {
393 					if (collisions[idx] > 4)
394 						rehash_list[counter++] = i;
395 				}
396 			}
397 		}
398 	}
399 
400 	if (counter)
401 		remove_duplicates_final(counter, counter + (counter >> 1), rehash_list);
402 	bt_free((void **)&rehash_list);
403 }
404 }
405 }
406 
407 #if 0
408 	{	unsigned int col1 = 0, col2 = 0, col3 = 0, col4 = 0, col5a = 0;
409 		for (i = 0; i < hash_table_size; i++) {
410 			if (collisions[i] == 1)
411 				col1++;
412 			else if (collisions[i] == 2)
413 				col2++;
414 			else if (collisions[i] == 3)
415 				col3++;
416 			else if (collisions[i] == 4)
417 				col4++;
418 			else if (collisions[i] > 4)
419 				col5a += collisions[i];
420 		}
421 		col2 *= 2;
422 		col3 *= 3;
423 		col4 *= 4;
424 		fprintf(stderr, "Statistics:%Lf %Lf %Lf %Lf %Lf\n", (long double)col1 / (long double)num_loaded_hashes,
425 		  (long double)col2 / (long double)num_loaded_hashes, (long double)col3 / (long double)num_loaded_hashes,
426 			(long double)col4 / (long double)num_loaded_hashes, (long double)col5a / (long double)num_loaded_hashes);
427 
428 	}
429 #endif
430 	num_unique_hashes = 0;
431 	for (i = num_loaded_hashes - 1; (int)i >= 0; i--)
432 		if (check_non_zero(i)) {
433 			num_unique_hashes = i;
434 			break;
435 		}
436 
437 	for (i = 0; i <= num_unique_hashes; i++)
438 		if (check_zero(i)) {
439 			unsigned int j;
440 			loaded_hashes_64[i] = loaded_hashes_64[num_unique_hashes];
441 			set_zero(num_unique_hashes);
442 			num_unique_hashes--;
443 			for (j = num_unique_hashes; (int)j >= 0; j--)
444 				if (check_non_zero(j)) {
445 					num_unique_hashes = j;
446 					break;
447 				}
448 		}
449 #undef COLLISION_DTYPE
450 	bt_free((void **)&collisions);
451 	bt_free((void **)&hash_table);
452 
453 	if (verbosity > 1)
454 		fprintf(stdout, "Done\n");
455 
456 	return (num_unique_hashes + 1);
457 }
458 
459 #endif
460