1 #if HAVE_OPENCL
2 
3 #include "options.h"
4 #include "opencl_hash_check_128.h"
5 #include "mask_ext.h"
6 
7 cl_uint num_loaded_hashes;
8 cl_uint *hash_ids = NULL;
9 unsigned int hash_table_size_128 = 0, offset_table_size = 0;
10 
11 static cl_uint *loaded_hashes = NULL;
12 static OFFSET_TABLE_WORD *offset_table = NULL;
13 static cl_ulong bitmap_size_bits = 0;
14 static cl_uint *bitmaps = NULL;
15 static cl_uint *zero_buffer = NULL;
16 static cl_mem buffer_offset_table, buffer_hash_table, buffer_return_hashes, buffer_hash_ids, buffer_bitmap_dupe, buffer_bitmaps;
17 static struct fmt_main *self;
18 
19 
ocl_hc_128_init(struct fmt_main * _self)20 void ocl_hc_128_init(struct fmt_main *_self)
21 {
22 	self = _self;
23 	hash_table_128 = NULL;
24 }
25 
ocl_hc_128_prepare_table(struct db_salt * salt)26 void ocl_hc_128_prepare_table(struct db_salt *salt) {
27 	unsigned int *bin, i;
28 	struct db_password *pw, *last;
29 
30 	num_loaded_hashes = (salt->count);
31 
32 	if (loaded_hashes)
33 		MEM_FREE(loaded_hashes);
34 	if (hash_ids)
35 		MEM_FREE(hash_ids);
36 	if (offset_table)
37 		MEM_FREE(offset_table);
38 	if (hash_table_128)
39 		MEM_FREE(hash_table_128);
40 
41 	loaded_hashes = (cl_uint*) mem_alloc(4 * num_loaded_hashes * sizeof(cl_uint));
42 	hash_ids = (cl_uint*) mem_calloc((3 * num_loaded_hashes + 1), sizeof(cl_uint));
43 
44 	last = pw = salt->list;
45 	i = 0;
46 	do {
47 		bin = (unsigned int *)pw->binary;
48 		if (bin == NULL) {
49 			if (last == pw)
50 				salt->list = pw->next;
51 			else
52 				last->next = pw->next;
53 		} else {
54 			last = pw;
55 			loaded_hashes[4 * i] = bin[0];
56 			loaded_hashes[4 * i + 1] = bin[1];
57 			loaded_hashes[4 * i + 2] = bin[2];
58 			loaded_hashes[4 * i + 3] = bin[3];
59 			i++;
60 		}
61 	} while ((pw = pw->next)) ;
62 
63 	if (i != (salt->count)) {
64 		fprintf(stderr,
65 			"Something went wrong while preparing hashes..Exiting..\n");
66 		error();
67 	}
68 
69 	num_loaded_hashes = create_perfect_hash_table(128, (void *)loaded_hashes,
70 				num_loaded_hashes,
71 			        &offset_table,
72 			        &offset_table_size,
73 			        &hash_table_size_128, 0);
74 
75 	if (!num_loaded_hashes) {
76 		MEM_FREE(hash_table_128);
77 		MEM_FREE(offset_table);
78 		fprintf(stderr, "Failed to create Hash Table for cracking.\n");
79 		error();
80 	}
81 }
82 
ocl_hc_128_prepare_table_test()83 void ocl_hc_128_prepare_table_test() {
84 	unsigned int *binary, i;
85 	char *ciphertext;
86 
87 	num_loaded_hashes = 0;
88 	while (self->params.tests[num_loaded_hashes].ciphertext != NULL)
89 			num_loaded_hashes++;
90 
91 	if (loaded_hashes)
92 		MEM_FREE(loaded_hashes);
93 	if (hash_ids)
94 		MEM_FREE(hash_ids);
95 	if (offset_table)
96 		MEM_FREE(offset_table);
97 	if (hash_table_128)
98 		MEM_FREE(hash_table_128);
99 
100 	loaded_hashes = (cl_uint*) mem_alloc(4 * num_loaded_hashes * sizeof(cl_uint));
101 	hash_ids = (cl_uint*) mem_calloc((3 * num_loaded_hashes + 1), sizeof(cl_uint));
102 
103 	i = 0;
104 	while (self->params.tests[i].ciphertext != NULL) {
105 			ciphertext = self->methods.split(self->params.tests[i].ciphertext, 0, self);
106 			binary = (unsigned int*)self->methods.binary(ciphertext);
107 			loaded_hashes[4 * i] = binary[0];
108 			loaded_hashes[4 * i + 1] = binary[1];
109 			loaded_hashes[4 * i + 2] = binary[2];
110 			loaded_hashes[4 * i + 3] = binary[3];
111 			i++;
112 	}
113 
114 	num_loaded_hashes = create_perfect_hash_table(128, (void *)loaded_hashes,
115 				num_loaded_hashes,
116 			        &offset_table,
117 			        &offset_table_size,
118 			        &hash_table_size_128, 0);
119 
120 	if (!num_loaded_hashes) {
121 		MEM_FREE(hash_table_128);
122 		MEM_FREE(offset_table);
123 		fprintf(stderr, "Failed to create Hash Table for cracking.\n");
124 		error();
125 	}
126 }
127 
128 /* Use only for smaller bitmaps < 16MB */
prepare_bitmap_8(cl_ulong bmp_sz,cl_uint ** bitmap_ptr)129 static void prepare_bitmap_8(cl_ulong bmp_sz, cl_uint **bitmap_ptr)
130 {
131 	unsigned int i;
132 	MEM_FREE(*bitmap_ptr);
133 	*bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 2), sizeof(cl_uint));
134 
135 	for (i = 0; i < num_loaded_hashes; i++) {
136 		unsigned int bmp_idx =
137 			(loaded_hashes[4 * i] & 0x0000ffff) & (bmp_sz - 1);
138 		(*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
139 
140 		bmp_idx = (loaded_hashes[4 * i] >> 16) & (bmp_sz - 1);
141 		(*bitmap_ptr)[(bmp_sz >> 5) + (bmp_idx >> 5)] |=
142 			(1U << (bmp_idx & 31));
143 
144 		bmp_idx = (loaded_hashes[4 * i + 1] & 0x0000ffff) & (bmp_sz - 1);
145 		(*bitmap_ptr)[(bmp_sz >> 4) + (bmp_idx >> 5)] |=
146 			(1U << (bmp_idx & 31));
147 
148 		bmp_idx = (loaded_hashes[4 * i + 1] >> 16) & (bmp_sz - 1);
149 		(*bitmap_ptr)[(bmp_sz >> 5) * 3 + (bmp_idx >> 5)] |=
150 			(1U << (bmp_idx & 31));
151 
152 		bmp_idx = (loaded_hashes[4 * i + 2] & 0x0000ffff) & (bmp_sz - 1);
153 		(*bitmap_ptr)[(bmp_sz >> 3) + (bmp_idx >> 5)] |=
154 			(1U << (bmp_idx & 31));
155 
156 		bmp_idx = (loaded_hashes[4 * i + 2] >> 16) & (bmp_sz - 1);
157 		(*bitmap_ptr)[(bmp_sz >> 5) * 5 + (bmp_idx >> 5)] |=
158 			(1U << (bmp_idx & 31));
159 
160 		bmp_idx = (loaded_hashes[4 * i + 3] & 0x0000ffff) & (bmp_sz - 1);
161 		(*bitmap_ptr)[(bmp_sz >> 5) * 6 + (bmp_idx >> 5)] |=
162 			(1U << (bmp_idx & 31));
163 
164 		bmp_idx = (loaded_hashes[4 * i + 3] >> 16) & (bmp_sz - 1);
165 		(*bitmap_ptr)[(bmp_sz >> 5) * 7 + (bmp_idx >> 5)] |=
166 			(1U << (bmp_idx & 31));
167 	}
168 }
169 
170 /* Use only for smaller bitmaps < 16MB */
prepare_bitmap_4(cl_ulong bmp_sz,cl_uint ** bitmap_ptr)171 static void prepare_bitmap_4(cl_ulong bmp_sz, cl_uint **bitmap_ptr)
172 {
173 	unsigned int i;
174 	MEM_FREE(*bitmap_ptr);
175 	*bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 3), sizeof(cl_uint));
176 
177 	for (i = 0; i < num_loaded_hashes; i++) {
178 		unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
179 		(*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
180 
181 		bmp_idx = loaded_hashes[4 * i + 2] & (bmp_sz - 1);
182 		(*bitmap_ptr)[(bmp_sz >> 5) + (bmp_idx >> 5)] |=
183 			(1U << (bmp_idx & 31));
184 
185 		bmp_idx = loaded_hashes[4 * i + 1] & (bmp_sz - 1);
186 		(*bitmap_ptr)[(bmp_sz >> 4) + (bmp_idx >> 5)] |=
187 			(1U << (bmp_idx & 31));
188 
189 		bmp_idx = loaded_hashes[4 * i] & (bmp_sz - 1);
190 		(*bitmap_ptr)[(bmp_sz >> 5) * 3 + (bmp_idx >> 5)] |=
191 			(1U << (bmp_idx & 31));
192 	}
193 }
194 
prepare_bitmap_1(cl_ulong bmp_sz,cl_uint ** bitmap_ptr)195 static void prepare_bitmap_1(cl_ulong bmp_sz, cl_uint **bitmap_ptr)
196 {
197 	unsigned int i;
198 	MEM_FREE(*bitmap_ptr);
199 	*bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 5), sizeof(cl_uint));
200 
201 	for (i = 0; i < num_loaded_hashes; i++) {
202 		unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
203 		(*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
204 	}
205 }
206 
ocl_hc_128_select_bitmap(unsigned int num_ld_hashes)207 char* ocl_hc_128_select_bitmap(unsigned int num_ld_hashes)
208 {
209 	static char kernel_params[200];
210 	cl_ulong max_local_mem_sz_bytes = 0;
211 	unsigned int cmp_steps = 2, use_local = 0;
212 
213 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_LOCAL_MEM_SIZE,
214 		sizeof(cl_ulong), &max_local_mem_sz_bytes, 0),
215 		"failed to get CL_DEVICE_LOCAL_MEM_SIZE.");
216 
217 	if (num_loaded_hashes <= 5100) {
218 		if (amd_gcn_10(device_info[gpu_id]) ||
219 			amd_vliw4(device_info[gpu_id]))
220 			bitmap_size_bits = 512 * 1024;
221 
222 		else if (amd_gcn_11(device_info[gpu_id]) ||
223 			max_local_mem_sz_bytes < 16384 ||
224 			cpu(device_info[gpu_id]))
225 			bitmap_size_bits = 256 * 1024;
226 
227 		else {
228 			bitmap_size_bits = 32 * 1024;
229 			cmp_steps = 4;
230 			use_local = 1;
231 		}
232 	}
233 
234 	else if (num_loaded_hashes <= 10100) {
235 		if (amd_gcn_10(device_info[gpu_id]) ||
236 			amd_vliw4(device_info[gpu_id]))
237 			bitmap_size_bits = 512 * 1024;
238 
239 		else if (amd_gcn_11(device_info[gpu_id]) ||
240 			max_local_mem_sz_bytes < 32768 ||
241 			cpu(device_info[gpu_id]))
242 			bitmap_size_bits = 256 * 1024;
243 
244 		else {
245 			bitmap_size_bits = 64 * 1024;
246 			cmp_steps = 4;
247 			use_local = 1;
248 		}
249 	}
250 
251 	else if (num_loaded_hashes <= 20100) {
252 		if (amd_gcn_10(device_info[gpu_id]))
253 			bitmap_size_bits = 1024 * 1024;
254 
255 		else if (amd_gcn_11(device_info[gpu_id]) ||
256 			max_local_mem_sz_bytes < 32768)
257 			bitmap_size_bits = 512 * 1024;
258 
259 		else if (amd_vliw4(device_info[gpu_id]) ||
260 			cpu(device_info[gpu_id])) {
261 			bitmap_size_bits = 256 * 1024;
262 			cmp_steps = 4;
263 		}
264 
265 		else {
266 			bitmap_size_bits = 32 * 1024;
267 			cmp_steps = 8;
268 			use_local = 1;
269 		}
270 	}
271 
272 	else if (num_loaded_hashes <= 250100)
273 		bitmap_size_bits = 2048 * 1024;
274 
275 	else if (num_loaded_hashes <= 1100100) {
276 		if (!amd_gcn_11(device_info[gpu_id]))
277 			bitmap_size_bits = 4096 * 1024;
278 
279 		else
280 			bitmap_size_bits = 2048 * 1024;
281 	}
282 
283 	else if (num_loaded_hashes <= 1500100) {
284 		bitmap_size_bits = 4096 * 1024 * 2;
285 		cmp_steps = 1;
286 	}
287 
288 	else if (num_loaded_hashes <= 2700100) {
289 		bitmap_size_bits = 4096 * 1024 * 2 * 2;
290 		cmp_steps = 1;
291 	}
292 
293 	else {
294 		cl_ulong mult = num_loaded_hashes / 2700100;
295 		cl_ulong buf_sz;
296 		bitmap_size_bits = 4096 * 4096;
297 		get_power_of_two(mult);
298 		bitmap_size_bits *= mult;
299 		buf_sz = get_max_mem_alloc_size(gpu_id);
300 		if (buf_sz & (buf_sz - 1)) {
301 			get_power_of_two(buf_sz);
302 			buf_sz >>= 1;
303 		}
304 		if (buf_sz >= 536870912)
305 			buf_sz = 536870912;
306 		if ((bitmap_size_bits >> 3) > buf_sz)
307 			bitmap_size_bits = buf_sz << 3;
308 		cmp_steps = 1;
309 	}
310 
311 	if (cmp_steps == 1)
312 		prepare_bitmap_1(bitmap_size_bits, &bitmaps);
313 
314 	else if (cmp_steps <= 4)
315 		prepare_bitmap_4(bitmap_size_bits, &bitmaps);
316 
317 	else
318 		prepare_bitmap_8(bitmap_size_bits, &bitmaps);
319 
320 	/*
321 	 * Much better speed seen on Macbook Pro with GT 650M. Not sure why -
322 	 * or what we should actually test for.
323 	 */
324 	if (platform_apple(platform_id) && gpu_nvidia(device_info[gpu_id]))
325 		use_local = 0;
326 
327 	sprintf(kernel_params,
328 		"-D SELECT_CMP_STEPS=%u"
329 		" -D BITMAP_SIZE_BITS_LESS_ONE="LLu" -D USE_LOCAL_BITMAPS=%u",
330 		cmp_steps, (unsigned long long)bitmap_size_bits - 1, use_local);
331 
332 	bitmap_size_bits *= cmp_steps;
333 
334 	return kernel_params;
335 }
336 
ocl_hc_128_crobj(cl_kernel kernel)337 void ocl_hc_128_crobj(cl_kernel kernel)
338 {
339 	cl_ulong max_alloc_size_bytes = 0;
340 	cl_ulong cache_size_bytes = 0;
341 
342 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_alloc_size_bytes, 0), "failed to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
343 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), &cache_size_bytes, 0), "failed to get CL_DEVICE_GLOBAL_MEM_CACHE_SIZE.");
344 
345 	if (max_alloc_size_bytes & (max_alloc_size_bytes - 1)) {
346 		get_power_of_two(max_alloc_size_bytes);
347 		max_alloc_size_bytes >>= 1;
348 	}
349 	if (max_alloc_size_bytes >= 536870912) max_alloc_size_bytes = 536870912;
350 
351 	if (!cache_size_bytes) cache_size_bytes = 1024;
352 
353 	zero_buffer = (cl_uint *) mem_calloc(hash_table_size_128/32 + 1, sizeof(cl_uint));
354 
355 	buffer_return_hashes = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, 2 * sizeof(cl_uint) * num_loaded_hashes, NULL, &ret_code);
356 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_return_hashes.");
357 
358 	buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (3 * num_loaded_hashes + 1) * sizeof(cl_uint), NULL, &ret_code);
359 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_buffer_hash_ids.");
360 
361 	buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, (hash_table_size_128/32 + 1) * sizeof(cl_uint), zero_buffer, &ret_code);
362 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap_dupe.");
363 
364 	buffer_bitmaps = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, max_alloc_size_bytes, NULL, &ret_code);
365 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmaps.");
366 
367 	buffer_offset_table = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, offset_table_size * sizeof(OFFSET_TABLE_WORD), NULL, &ret_code);
368 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_offset_table.");
369 
370 	buffer_hash_table = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, hash_table_size_128 * sizeof(unsigned int) * 2, NULL, &ret_code);
371 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_hash_table.");
372 
373 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
374 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmaps, CL_TRUE, 0, (size_t)(bitmap_size_bits >> 3), bitmaps, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_bitmaps.");
375 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_offset_table, CL_TRUE, 0, sizeof(OFFSET_TABLE_WORD) * offset_table_size, offset_table, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_offset_table.");
376 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_table, CL_TRUE, 0, sizeof(cl_uint) * hash_table_size_128 * 2, hash_table_128, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_table.");
377 
378 	HANDLE_CLERROR(clSetKernelArg(kernel, 4, sizeof(buffer_bitmaps), (void *) &buffer_bitmaps), "Error setting argument 5.");
379 	HANDLE_CLERROR(clSetKernelArg(kernel, 5, sizeof(buffer_offset_table), (void *) &buffer_offset_table), "Error setting argument 6.");
380 	HANDLE_CLERROR(clSetKernelArg(kernel, 6, sizeof(buffer_hash_table), (void *) &buffer_hash_table), "Error setting argument 7.");
381 	HANDLE_CLERROR(clSetKernelArg(kernel, 7, sizeof(buffer_return_hashes), (void *) &buffer_return_hashes), "Error setting argument 8.");
382 	HANDLE_CLERROR(clSetKernelArg(kernel, 8, sizeof(buffer_hash_ids), (void *) &buffer_hash_ids), "Error setting argument 9.");
383 	HANDLE_CLERROR(clSetKernelArg(kernel, 9, sizeof(buffer_bitmap_dupe), (void *) &buffer_bitmap_dupe), "Error setting argument 10.");
384 }
385 
ocl_hc_128_extract_info(struct db_salt * salt,void (* set_kernel_args)(void),void (* set_kernel_args_kpc)(void),void (* init_kernel)(unsigned int,char *),size_t gws,size_t * lws,int * pcount)386 int ocl_hc_128_extract_info(struct db_salt *salt, void (*set_kernel_args)(void), void (*set_kernel_args_kpc)(void), void (*init_kernel)(unsigned int, char *), size_t gws, size_t *lws, int *pcount)
387 {
388 	if (salt != NULL && salt->count > 4500 &&
389 		(num_loaded_hashes - num_loaded_hashes / 10) > salt->count) {
390 		size_t old_ot_sz_bytes, old_ht_sz_bytes;
391 		ocl_hc_128_prepare_table(salt);
392 		init_kernel(salt->count, ocl_hc_128_select_bitmap(salt->count));
393 
394 		BENCH_CLERROR(clGetMemObjectInfo(buffer_offset_table, CL_MEM_SIZE, sizeof(size_t), &old_ot_sz_bytes, NULL), "failed to query buffer_offset_table.");
395 
396 		if (old_ot_sz_bytes < offset_table_size *
397 			sizeof(OFFSET_TABLE_WORD)) {
398 			BENCH_CLERROR(clReleaseMemObject(buffer_offset_table), "Error Releasing buffer_offset_table.");
399 
400 			buffer_offset_table = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, offset_table_size * sizeof(OFFSET_TABLE_WORD), NULL, &ret_code);
401 			BENCH_CLERROR(ret_code, "Error creating buffer argument buffer_offset_table.");
402 		}
403 
404 		BENCH_CLERROR(clGetMemObjectInfo(buffer_hash_table, CL_MEM_SIZE, sizeof(size_t), &old_ht_sz_bytes, NULL), "failed to query buffer_hash_table.");
405 
406 		if (old_ht_sz_bytes < hash_table_size_128 * sizeof(cl_uint) * 2) {
407 			BENCH_CLERROR(clReleaseMemObject(buffer_hash_table), "Error Releasing buffer_hash_table.");
408 			BENCH_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Error Releasing buffer_bitmap_dupe.");
409 			MEM_FREE(zero_buffer);
410 
411 			zero_buffer = (cl_uint *) mem_calloc(hash_table_size_128/32 + 1, sizeof(cl_uint));
412 			buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, (hash_table_size_128/32 + 1) * sizeof(cl_uint), zero_buffer, &ret_code);
413 			BENCH_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap_dupe.");
414 			buffer_hash_table = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, hash_table_size_128 * sizeof(cl_uint) * 2, NULL, &ret_code);
415 			BENCH_CLERROR(ret_code, "Error creating buffer argument buffer_hash_table.");
416 		}
417 
418 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmaps, CL_TRUE, 0, (bitmap_size_bits >> 3), bitmaps, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_bitmaps.");
419 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_offset_table, CL_TRUE, 0, sizeof(OFFSET_TABLE_WORD) * offset_table_size, offset_table, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_offset_table.");
420 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_table, CL_TRUE, 0, sizeof(cl_uint) * hash_table_size_128 * 2, hash_table_128, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_table.");
421 
422 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(buffer_bitmaps), (void *) &buffer_bitmaps), "Error setting argument 5.");
423 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(buffer_offset_table), (void *) &buffer_offset_table), "Error setting argument 6.");
424 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 6, sizeof(buffer_hash_table), (void *) &buffer_hash_table), "Error setting argument 7.");
425 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 7, sizeof(buffer_return_hashes), (void *) &buffer_return_hashes), "Error setting argument 8.");
426 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 8, sizeof(buffer_hash_ids), (void *) &buffer_hash_ids), "Error setting argument 9.");
427 		BENCH_CLERROR(clSetKernelArg(crypt_kernel, 9, sizeof(buffer_bitmap_dupe), (void *) &buffer_bitmap_dupe), "Error setting argument 10.");
428 		set_kernel_args();
429 		set_kernel_args_kpc();
430 	}
431 
432 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel");
433 
434 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), hash_ids, 0, NULL, multi_profilingEvent[3]), "failed in reading back num cracked hashes.");
435 
436 	if (hash_ids[0] > num_loaded_hashes) {
437 		fprintf(stderr, "Error, crypt_all kernel.\n");
438 		error();
439 	}
440 
441 	if (hash_ids[0]) {
442 		BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_return_hashes, CL_FALSE, 0, 2 * sizeof(cl_uint) * hash_ids[0], loaded_hashes, 0, NULL, NULL), "failed in reading back return_hashes.");
443 		BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, (3 * hash_ids[0] + 1) * sizeof(cl_uint), hash_ids, 0, NULL, NULL), "failed in reading data back hash_ids.");
444 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap_dupe, CL_FALSE, 0, (hash_table_size_128/32 + 1) * sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_bitmap_dupe.");
445 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
446 	}
447 
448 	*pcount *= mask_int_cand.num_int_cand;
449 	return hash_ids[0];
450 }
451 
ocl_hc_128_rlobj(void)452 void ocl_hc_128_rlobj(void)
453 {
454 	if (buffer_bitmaps) {
455 		HANDLE_CLERROR(clReleaseMemObject(buffer_return_hashes), "Error Releasing buffer_return_hashes.");
456 		HANDLE_CLERROR(clReleaseMemObject(buffer_offset_table), "Error Releasing buffer_offset_table.");
457 		HANDLE_CLERROR(clReleaseMemObject(buffer_hash_table), "Error Releasing buffer_hash_table.");
458 		HANDLE_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Error Releasing buffer_bitmap_dupe.");
459 		HANDLE_CLERROR(clReleaseMemObject(buffer_hash_ids), "Error Releasing buffer_hash_ids.");
460 		HANDLE_CLERROR(clReleaseMemObject(buffer_bitmaps), "Error Releasing buffer_bitmap.");
461 		MEM_FREE(zero_buffer);
462 		buffer_bitmaps = 0;
463 	}
464 
465 	if (loaded_hashes)
466 		MEM_FREE(loaded_hashes);
467 	if (hash_ids)
468 		MEM_FREE(hash_ids);
469 	if (bitmaps)
470 		MEM_FREE(bitmaps);
471 	if (offset_table)
472 		MEM_FREE(offset_table);
473 	if (hash_table_128)
474 		MEM_FREE(hash_table_128);
475 }
476 
ocl_hc_128_cmp_all(void * binary,int count)477 int ocl_hc_128_cmp_all(void *binary, int count)
478 {
479 	if (count) return 1;
480 	return 0;
481 }
482 
ocl_hc_128_cmp_one(void * binary,int index)483 int ocl_hc_128_cmp_one(void *binary, int index)
484 {
485 	return (((unsigned int*)binary)[0] ==
486 		hash_table_128[hash_ids[3 + 3 * index]] &&
487 		((unsigned int*)binary)[1] ==
488 		hash_table_128[hash_table_size_128+ hash_ids[3 + 3 * index]]);
489 }
490 
ocl_hc_128_cmp_exact(char * source,int index)491 int ocl_hc_128_cmp_exact(char *source, int index)
492 {
493 	unsigned int *t = (unsigned int *) (self->methods.binary(source));
494 
495 	if (t[2] != loaded_hashes[2 * index])
496 		return 0;
497 	if (t[3] != loaded_hashes[2 * index + 1])
498 		return 0;
499 	return 1;
500 }
501 
502 #endif /* HAVE_OPENCL */
503