1 /*
2 * This software is Copyright (c) 2015 Sayantan Datta <std2048 at gmail dot com>
3 * and it is hereby released to the general public under the following terms:
4 * Redistribution and use in source and binary forms, with or without modification, are permitted.
5 */
6
7 #ifdef HAVE_OPENCL
8
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include "bt_hash_types.h"
12
13 uint64_t *loaded_hashes_64 = NULL;
14 unsigned int *hash_table_64 = NULL;
15
16 /* Assuming N < 0x7fffffff */
modulo64_31b(uint64_t a,unsigned int N)17 inline unsigned int modulo64_31b(uint64_t a, unsigned int N)
18 {
19 return (unsigned int)(a % N);
20 }
21
add64(uint64_t a,unsigned int b)22 inline uint64_t add64(uint64_t a, unsigned int b)
23 {
24 return (a + b);
25 }
26
allocate_ht_64(unsigned int num_loaded_hashes,unsigned int verbosity)27 void allocate_ht_64(unsigned int num_loaded_hashes, unsigned int verbosity)
28 {
29 unsigned int i;
30
31 if (bt_memalign_alloc((void **)&hash_table_64, 16, 2 * hash_table_size * sizeof(unsigned int)))
32 bt_error("Couldn't allocate hash_table_64.");
33
34 for (i = 0; i < hash_table_size; i++)
35 hash_table_64[i] = hash_table_64[i + hash_table_size] = 0;
36
37 total_memory_in_bytes += 2 * hash_table_size * sizeof(unsigned int);
38
39 if (verbosity > 2) {
40 fprintf(stdout, "Hash Table Size %Lf %% of Number of Loaded Hashes.\n", ((long double)hash_table_size / (long double)num_loaded_hashes) * 100.00);
41 fprintf(stdout, "Hash Table Size(in GBs):%Lf\n", ((long double)2.0 * hash_table_size * sizeof(unsigned int)) / ((long double)1024 * 1024 * 1024));
42 }
43 }
44
calc_ht_idx_64(unsigned int hash_location,unsigned int offset)45 inline unsigned int calc_ht_idx_64(unsigned int hash_location, unsigned int offset)
46 {
47 return modulo64_31b(add64(loaded_hashes_64[hash_location], offset), hash_table_size);
48 }
49
zero_check_ht_64(unsigned int hash_table_idx)50 inline unsigned int zero_check_ht_64(unsigned int hash_table_idx)
51 {
52 return (hash_table_64[hash_table_idx] || hash_table_64[hash_table_idx + hash_table_size]);
53 }
54
assign_ht_64(unsigned int hash_table_idx,unsigned int hash_location)55 inline void assign_ht_64(unsigned int hash_table_idx, unsigned int hash_location)
56 {
57 uint64_t hash = loaded_hashes_64[hash_location];
58 hash_table_64[hash_table_idx] = (unsigned int)(hash & 0xffffffff);
59 hash_table_64[hash_table_idx + hash_table_size] = (unsigned int)(hash >> 32);
60 }
61
assign0_ht_64(unsigned int hash_table_idx)62 inline void assign0_ht_64(unsigned int hash_table_idx)
63 {
64 hash_table_64[hash_table_idx] = hash_table_64[hash_table_idx + hash_table_size] = 0;
65 }
66
get_offset_64(unsigned int hash_table_idx,unsigned int hash_location)67 unsigned int get_offset_64(unsigned int hash_table_idx, unsigned int hash_location)
68 {
69 unsigned int z = modulo64_31b(loaded_hashes_64[hash_location], hash_table_size);
70 return (hash_table_size - z + hash_table_idx);
71 }
72
test_tables_64(unsigned int num_loaded_hashes,OFFSET_TABLE_WORD * offset_table,unsigned int offset_table_size,unsigned int shift64_ot_sz,unsigned int shift128_ot_sz,unsigned int verbosity)73 int test_tables_64(unsigned int num_loaded_hashes, OFFSET_TABLE_WORD *offset_table, unsigned int offset_table_size, unsigned int shift64_ot_sz, unsigned int shift128_ot_sz, unsigned int verbosity)
74 {
75 unsigned char *hash_table_collisions;
76 unsigned int i, hash_table_idx, error = 1, count = 0;
77 uint64_t hash;
78
79 if (bt_calloc((void **)&hash_table_collisions, hash_table_size, sizeof(unsigned char)))
80 bt_error("Failed to allocate memory: hash_table_collisions.");
81
82 if (verbosity > 1)
83 fprintf(stdout, "\nTesting Tables...");
84 #if _OPENMP
85 #pragma omp parallel private(i, hash_table_idx, hash)
86 #endif
87 {
88 #if _OPENMP
89 #pragma omp for
90 #endif
91 for (i = 0; i < num_loaded_hashes; i++) {
92 hash = loaded_hashes_64[i];
93 hash_table_idx =
94 calc_ht_idx_64(i,
95 (unsigned int)offset_table[
96 modulo64_31b(hash,
97 offset_table_size)]);
98 #if _OPENMP
99 #pragma omp atomic
100 #endif
101 hash_table_collisions[hash_table_idx]++;
102
103 if (error && (hash_table_64[hash_table_idx] != (unsigned int)(hash & 0xffffffff) ||
104 hash_table_64[hash_table_idx + hash_table_size] != (unsigned int)(hash >> 32) ||
105 hash_table_collisions[hash_table_idx] > 1)) {
106 fprintf(stderr, "Error building tables: Loaded hash idx:%u, No. of collisions:%u\n", i, hash_table_collisions[hash_table_idx]);
107 error = 0;
108 }
109
110 }
111 #if _OPENMP
112 #pragma omp single
113 #endif
114 for (hash_table_idx = 0; hash_table_idx < hash_table_size; hash_table_idx++)
115 if (zero_check_ht_64(hash_table_idx))
116 count++;
117 #if _OPENMP
118 #pragma omp barrier
119 #endif
120 }
121
122 /* Suppress unused variable warning. */
123 #define UNUSED(x) (void)(x)
124 UNUSED(shift128_ot_sz);
125 UNUSED(shift64_ot_sz);
126
127 if (count != num_loaded_hashes) {
128 error = 0;
129 fprintf(stderr, "Error!! Tables contains extra or less entries.\n");
130 return 0;
131 }
132
133 bt_free((void **)&hash_table_collisions);
134
135 if (error && verbosity > 1)
136 fprintf(stdout, "OK\n");
137
138 return 1;
139 }
140
141 #define check_equal(p, q) \
142 (loaded_hashes_64[p] == loaded_hashes_64[q])
143
144 #define check_non_zero(p) \
145 (loaded_hashes_64[p])
146
147 #define check_zero(p) \
148 (loaded_hashes_64[p] == 0)
149
150 #define set_zero(p) \
151 loaded_hashes_64[p] = 0
152
remove_duplicates_final(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int * rehash_list)153 static void remove_duplicates_final(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int *rehash_list)
154 {
155 unsigned int i, **hash_location_list, counter;
156 #define COLLISION_DTYPE unsigned int
157 COLLISION_DTYPE *collisions;
158 typedef struct {
159 unsigned int store_loc1;
160 unsigned int store_loc2;
161 unsigned int idx_hash_loc_list;
162 COLLISION_DTYPE collisions;
163 COLLISION_DTYPE iter;
164 } hash_table_data;
165
166 hash_table_data *hash_table = NULL;
167
168 if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
169 bt_error("Failed to allocate memory: hash_table.");
170 if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
171 bt_error("Failed to allocate memory: collisions.");
172
173 for (i = 0; i < num_loaded_hashes; i++) {
174 unsigned int idx = loaded_hashes_64[rehash_list[i]] % hash_table_size;
175 collisions[idx]++;
176 }
177
178 counter = 0;
179 for (i = 0; i < hash_table_size; i++) {
180 hash_table[i].collisions = collisions[i];
181 hash_table[i].iter = 0;
182 hash_table[i].store_loc1 = hash_table[i].store_loc2 =
183 hash_table[i].idx_hash_loc_list = 0xffffffff;
184 if (hash_table[i].collisions > 3)
185 hash_table[i].idx_hash_loc_list = counter++;
186 }
187
188 if (bt_malloc((void **)&hash_location_list, (counter + 1) * sizeof(unsigned int *)))
189 bt_error("Failed to allocate memory: hash_location_list.");
190
191 counter = 0;
192 for (i = 0; i < hash_table_size; i++)
193 if (collisions[i] > 3) {
194 if (bt_malloc((void **)&hash_location_list[counter], (collisions[i] - 1) * sizeof(unsigned int)))
195 bt_error("Failed to allocate memory: hash_location_list[counter].");
196 counter++;
197 }
198
199 for (i = 0; i < num_loaded_hashes; i++) {
200 unsigned int k = rehash_list[i];
201 unsigned int idx = loaded_hashes_64[k] % hash_table_size ;
202
203 if (collisions[idx] == 2) {
204 if (!hash_table[idx].iter) {
205 hash_table[idx].iter++;
206 hash_table[idx].store_loc1 = k;
207 }
208 else if (check_equal(hash_table[idx].store_loc1, k))
209 set_zero(k);
210 }
211
212 if (collisions[idx] == 3) {
213 if (!hash_table[idx].iter) {
214 hash_table[idx].iter++;
215 hash_table[idx].store_loc1 = k;
216 }
217 else if (hash_table[idx].iter == 1) {
218 if (check_equal(hash_table[idx].store_loc1, k))
219 set_zero(k);
220 else
221 hash_table[idx].store_loc2 = k;
222 }
223 else if (check_equal(hash_table[idx].store_loc1, k) ||
224 check_equal(hash_table[idx].store_loc2, k))
225 set_zero(k);
226 }
227
228 else if (collisions[idx] > 3) {
229 unsigned int iter = hash_table[idx].iter;
230 if (!iter)
231 hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
232 else {
233 unsigned int j;
234 for (j = 0; j < iter; j++)
235 if (check_equal(hash_location_list[hash_table[idx].idx_hash_loc_list][j], k)) {
236 set_zero(k);
237 break;
238 }
239 if (j == iter && iter < (unsigned int)hash_table[idx].collisions - 1)
240 hash_location_list[hash_table[idx].idx_hash_loc_list][iter++] = k;
241 }
242 hash_table[idx].iter = iter;
243 }
244 }
245
246 #undef COLLISION_DTYPE
247 for (i = 0; i < counter; i++)
248 bt_free((void **)&hash_location_list[i]);
249 bt_free((void **)&hash_location_list);
250 bt_free((void **)&hash_table);
251 bt_free((void **)&collisions);
252 }
253
remove_duplicates_64(unsigned int num_loaded_hashes,unsigned int hash_table_size,unsigned int verbosity)254 unsigned int remove_duplicates_64(unsigned int num_loaded_hashes, unsigned int hash_table_size, unsigned int verbosity)
255 {
256 unsigned int i, num_unique_hashes, *rehash_list, counter;
257 #define COLLISION_DTYPE unsigned int
258 COLLISION_DTYPE *collisions;
259 typedef struct {
260 unsigned int store_loc1;
261 unsigned int store_loc2;
262 unsigned int store_loc3;
263 COLLISION_DTYPE iter;
264 } hash_table_data;
265
266 hash_table_data *hash_table = NULL;
267
268 if (verbosity > 1)
269 fprintf(stdout, "Removing duplicate hashes...");
270
271 if (hash_table_size & (hash_table_size - 1)) {
272 fprintf(stderr, "Duplicate removal hash table size must power of 2.\n");
273 return 0;
274 }
275
276 if (bt_malloc((void **)&hash_table, hash_table_size * sizeof(hash_table_data)))
277 bt_error("Failed to allocate memory: hash_table.");
278 if (bt_calloc((void **)&collisions, hash_table_size, sizeof(COLLISION_DTYPE)))
279 bt_error("Failed to allocate memory: collisions.");
280
281 #if _OPENMP
282 #pragma omp parallel private(i)
283 #endif
284 {
285 #if _OPENMP
286 #pragma omp for
287 #endif
288 for (i = 0; i < num_loaded_hashes; i++) {
289 unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
290 #if _OPENMP
291 #pragma omp atomic
292 #endif
293 collisions[idx]++;
294 }
295
296 counter = 0;
297 #if _OPENMP
298 #pragma omp barrier
299 #endif
300
301 #if _OPENMP
302 #pragma omp for
303 #endif
304 for (i = 0; i < hash_table_size; i++) {
305 hash_table[i].iter = 0;
306 if (collisions[i] > 4)
307 #if _OPENMP
308 #pragma omp atomic
309 #endif
310 counter += (collisions[i] - 3);
311 }
312 #if _OPENMP
313 #pragma omp barrier
314
315 #pragma omp sections
316 #endif
317 {
318 #if _OPENMP
319 #pragma omp section
320 #endif
321 {
322 for (i = 0; i < num_loaded_hashes; i++) {
323 unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
324
325 if (collisions[idx] == 2) {
326 if (!hash_table[idx].iter) {
327 hash_table[idx].iter++;
328 hash_table[idx].store_loc1 = i;
329 }
330 else if (check_equal(hash_table[idx].store_loc1, i))
331 set_zero(i);
332 }
333 }
334 }
335
336 #if _OPENMP
337 #pragma omp section
338 #endif
339 {
340 if (bt_malloc((void **)&rehash_list, counter * sizeof(unsigned int)))
341 bt_error("Failed to allocate memory: rehash_list.");
342 counter = 0;
343 for (i = 0; i < num_loaded_hashes; i++) {
344 unsigned int idx = loaded_hashes_64[i] & (hash_table_size - 1);
345
346 if (collisions[idx] == 3) {
347 if (!hash_table[idx].iter) {
348 hash_table[idx].iter++;
349 hash_table[idx].store_loc1 = i;
350 }
351 else if (hash_table[idx].iter == 1) {
352 if (check_equal(hash_table[idx].store_loc1, i))
353 set_zero(i);
354 else {
355 hash_table[idx].iter++;
356 hash_table[idx].store_loc2 = i;
357 }
358 }
359 else if (check_equal(hash_table[idx].store_loc1, i) ||
360 check_equal(hash_table[idx].store_loc2, i))
361 set_zero(i);
362 }
363
364 else if (collisions[idx] >= 4) {
365 if (!hash_table[idx].iter) {
366 hash_table[idx].iter++;
367 hash_table[idx].store_loc1 = i;
368 }
369 else if (hash_table[idx].iter == 1) {
370 if (check_equal(hash_table[idx].store_loc1, i))
371 set_zero(i);
372 else {
373 hash_table[idx].iter++;
374 hash_table[idx].store_loc2 = i;
375 }
376
377 }
378 else if (hash_table[idx].iter == 2) {
379 if (check_equal(hash_table[idx].store_loc1, i) ||
380 check_equal(hash_table[idx].store_loc2, i))
381 set_zero(i);
382 else {
383 hash_table[idx].iter++;
384 hash_table[idx].store_loc3 = i;
385 }
386 }
387 else if (hash_table[idx].iter >= 3) {
388 if (check_equal(hash_table[idx].store_loc1, i) ||
389 check_equal(hash_table[idx].store_loc2, i) ||
390 check_equal(hash_table[idx].store_loc3, i))
391 set_zero(i);
392 else {
393 if (collisions[idx] > 4)
394 rehash_list[counter++] = i;
395 }
396 }
397 }
398 }
399
400 if (counter)
401 remove_duplicates_final(counter, counter + (counter >> 1), rehash_list);
402 bt_free((void **)&rehash_list);
403 }
404 }
405 }
406
407 #if 0
408 { unsigned int col1 = 0, col2 = 0, col3 = 0, col4 = 0, col5a = 0;
409 for (i = 0; i < hash_table_size; i++) {
410 if (collisions[i] == 1)
411 col1++;
412 else if (collisions[i] == 2)
413 col2++;
414 else if (collisions[i] == 3)
415 col3++;
416 else if (collisions[i] == 4)
417 col4++;
418 else if (collisions[i] > 4)
419 col5a += collisions[i];
420 }
421 col2 *= 2;
422 col3 *= 3;
423 col4 *= 4;
424 fprintf(stderr, "Statistics:%Lf %Lf %Lf %Lf %Lf\n", (long double)col1 / (long double)num_loaded_hashes,
425 (long double)col2 / (long double)num_loaded_hashes, (long double)col3 / (long double)num_loaded_hashes,
426 (long double)col4 / (long double)num_loaded_hashes, (long double)col5a / (long double)num_loaded_hashes);
427
428 }
429 #endif
430 num_unique_hashes = 0;
431 for (i = num_loaded_hashes - 1; (int)i >= 0; i--)
432 if (check_non_zero(i)) {
433 num_unique_hashes = i;
434 break;
435 }
436
437 for (i = 0; i <= num_unique_hashes; i++)
438 if (check_zero(i)) {
439 unsigned int j;
440 loaded_hashes_64[i] = loaded_hashes_64[num_unique_hashes];
441 set_zero(num_unique_hashes);
442 num_unique_hashes--;
443 for (j = num_unique_hashes; (int)j >= 0; j--)
444 if (check_non_zero(j)) {
445 num_unique_hashes = j;
446 break;
447 }
448 }
449 #undef COLLISION_DTYPE
450 bt_free((void **)&collisions);
451 bt_free((void **)&hash_table);
452
453 if (verbosity > 1)
454 fprintf(stdout, "Done\n");
455
456 return (num_unique_hashes + 1);
457 }
458
459 #endif
460