1 /*
2  * Developed by Claudio André <claudioandre.br at gmail.com> in 2012
3  * Based on source code provided by Samuele Giovanni Tonon
4  *
5  * More information at http://openwall.info/wiki/john/OpenCL-RAWSHA-256
6  * More information at http://openwall.info/wiki/john/OpenCL-CISCO4
7  *
8  * Copyright (c) 2011 Samuele Giovanni Tonon <samu at linuxasylum dot net>
9  * Copyright (c) 2012-2016 Claudio André <claudioandre.br at gmail.com>
10  * This program comes with ABSOLUTELY NO WARRANTY; express or implied .
11  * This is free software, and you are welcome to redistribute it
12  * under certain conditions; as expressed here
13  * http://www.gnu.org/licenses/gpl-2.0.html
14  */
15 
16 #ifdef HAVE_OPENCL
17 
18 #if FMT_EXTERNS_H
19 extern struct fmt_main fmt_opencl_rawsha256;
20 #elif FMT_REGISTERS_H
21 john_register_one(&fmt_opencl_rawsha256);
22 #else
23 
24 #include <string.h>
25 
26 #include "sha.h"
27 #include "sha2.h"
28 #include "johnswap.h"
29 #include "opencl_common.h"
30 #include "config.h"
31 #include "options.h"
32 #include "opencl_rawsha256.h"
33 #include "rawSHA256_common.h"
34 
35 #include "mask_ext.h"
36 #include "opencl_mask_extras.h"
37 
38 #define FORMAT_LABEL            "raw-SHA256-opencl"
39 #define FORMAT_NAME             ""
40 
41 #define ALGORITHM_NAME          "SHA256 OpenCL"
42 
43 #define BINARY_SIZE             DIGEST_SIZE
44 
45 //plaintext: keys to compute the hash function
46 //saved_idx: offset and length of each plaintext (data is sent using chunks)
47 static uint32_t *plaintext, *saved_idx;
48 
49 static cl_mem pass_buffer;      //Plaintext buffer.
50 static cl_mem idx_buffer;       //Sizes and offsets buffer.
51 static cl_kernel prepare_kernel;
52 
53 //Pinned buffers
54 static cl_mem pinned_plaintext, pinned_saved_idx, pinned_int_key_loc;
55 
56 //Reference to self
57 static struct fmt_main *self;
58 
59 //Reference to the first element in salt list
60 static struct db_main *main_db;
61 
62 //Device (GPU) buffers
63 //int_keys: mask to apply
64 //hash_ids: information about how recover the cracked password
65 //bitmap: a bitmap memory space.
66 //int_key_loc: the position of the mask to apply.
67 static cl_mem buffer_int_keys, buffer_hash_ids, buffer_bitmap, buffer_int_key_loc;
68 
69 //Host buffers
70 //saved_int_key_loc: the position of the mask to apply
71 //num_loaded_hashes: number of binary hashes transferred/loaded to GPU
72 //hash_ids: information about how recover the cracked password
73 static uint32_t *saved_int_key_loc, num_loaded_hashes, *hash_ids, *saved_bitmap;
74 
75 //ocl_initialized: a reference counter of the openCL objetcts (expect to be 0 or 1)
76 static unsigned ocl_initialized = 0;
77 
78 // Keeps track of whether we should tune for this reset() call.
79 static int should_tune;
80 
81 //Used to control partial key transfers.
82 static uint32_t key_idx = 0;
83 static size_t offset = 0, offset_idx = 0;
84 
85 static uint32_t bitmap_size;
86 
87 static void load_hash();
88 static char *get_key(int index);
89 static void build_kernel();
90 static void release_kernel();
91 static void release_mask_buffers(void);
92 
93 //This file contains auto-tuning routine(s). It has to be included after formats definitions.
94 #include "opencl_autotune.h"
95 
96 /* ------- Helper functions ------- */
get_task_max_work_group_size()97 static size_t get_task_max_work_group_size()
98 {
99 	size_t s;
100 
101 	s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
102 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0,
103 	        prepare_kernel));
104 	return MIN(s, 512);
105 }
106 
get_num_loaded_hashes()107 static uint32_t get_num_loaded_hashes()
108 {
109 	uint32_t num_hashes;
110 	struct db_salt *current_salt;
111 
112 	num_hashes = 0;
113 	current_salt = main_db->salts;
114 
115 	do
116 		num_hashes += current_salt->count;
117 	while ((current_salt = current_salt->next));
118 
119 	return num_hashes;
120 }
121 
crypt_one(int index)122 static uint32_t *crypt_one(int index) {
123 	SHA256_CTX ctx;
124 	static uint32_t hash[DIGEST_SIZE / sizeof(uint32_t)];
125 
126 	char * key = get_key(index);
127 	int len = strlen(key);
128 
129 	SHA256_Init(&ctx);
130 	SHA256_Update(&ctx, key, len);
131 	SHA256_Final((unsigned char *) (hash), &ctx);
132 
133 	alter_endianity_to_BE(hash, DIGEST_SIZE / sizeof(uint32_t));
134 
135 	return hash;
136 }
137 
138 /* ------- Create and destroy necessary objects ------- */
create_mask_buffers()139 static void create_mask_buffers()
140 {
141 	release_mask_buffers();
142 
143 	saved_bitmap = (uint32_t *)
144 		mem_alloc((bitmap_size / 32 + 1) * sizeof(uint32_t));
145 	buffer_bitmap = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY,
146 		(bitmap_size / 32 + 1) * sizeof(uint32_t), NULL, &ret_code);
147 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap");
148 
149 	//Set crypt kernel arguments
150 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 6, sizeof(buffer_bitmap),
151 	                              (void *)&buffer_bitmap), "Error setting argument 6");
152 }
153 
release_mask_buffers()154 static void release_mask_buffers()
155 {
156 	MEM_FREE(saved_bitmap);
157 
158 	if (buffer_bitmap)
159 		clReleaseMemObject(buffer_bitmap);
160 	buffer_bitmap = NULL;
161 }
162 
create_clobj(size_t gws,struct fmt_main * self)163 static void create_clobj(size_t gws, struct fmt_main *self)
164 {
165 	uint32_t hash_id_size;
166 	size_t mask_cand = 1, mask_gws = 1;
167 
168 	if (mask_int_cand.num_int_cand > 1) {
169 		mask_cand = mask_int_cand.num_int_cand;
170 		mask_gws = gws;
171 	}
172 
173 	pinned_plaintext = clCreateBuffer(context[gpu_id],
174 	                                  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
175 	                                  BUFFER_SIZE * gws, NULL, &ret_code);
176 	HANDLE_CLERROR(ret_code,
177 	               "Error creating page-locked memory pinned_plaintext");
178 
179 	plaintext = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
180 	            pinned_plaintext, CL_TRUE, CL_MAP_WRITE, 0,
181 	            BUFFER_SIZE * gws, 0, NULL, NULL, &ret_code);
182 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory plaintext");
183 
184 	pinned_saved_idx = clCreateBuffer(context[gpu_id],
185 	                                  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
186 	                                  sizeof(uint32_t) * gws, NULL, &ret_code);
187 	HANDLE_CLERROR(ret_code,
188 	               "Error creating page-locked memory pinned_saved_idx");
189 
190 	saved_idx = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
191 	            pinned_saved_idx, CL_TRUE, CL_MAP_WRITE, 0,
192 	            sizeof(uint32_t) * gws, 0, NULL, NULL, &ret_code);
193 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_idx");
194 
195 	// create arguments (buffers)
196 	pass_buffer = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
197 	                             BUFFER_SIZE * gws, NULL, &ret_code);
198 	HANDLE_CLERROR(ret_code, "Error creating buffer argument pass_buffer");
199 
200 	idx_buffer = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
201 	                            sizeof(uint32_t) * gws, NULL, &ret_code);
202 	HANDLE_CLERROR(ret_code, "Error creating buffer argument idx_buffer");
203 
204 	hash_id_size = mask_int_cand.num_int_cand * gws;
205 	hash_ids = (uint32_t *) mem_alloc(
206 		hash_id_size * 3 * sizeof(uint32_t) + sizeof(uint32_t));
207 	buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE,
208 		hash_id_size * 3 * sizeof(uint32_t) + sizeof(uint32_t),
209 		NULL, &ret_code);
210 
211 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_buffer_hash_ids");
212 
213 	//Mask mode
214 	pinned_int_key_loc = clCreateBuffer(context[gpu_id],
215 					    CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
216 					    sizeof(uint32_t) * mask_gws, NULL, &ret_code);
217 	HANDLE_CLERROR(ret_code,
218 		       "Error creating page-locked memory pinned_int_key_loc");
219 
220 	saved_int_key_loc = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
221 			    pinned_int_key_loc, CL_TRUE, CL_MAP_WRITE, 0,
222 			    sizeof(uint32_t) * mask_gws, 0, NULL, NULL, &ret_code);
223 	HANDLE_CLERROR(ret_code,
224 		       "Error mapping page-locked memory saved_int_key_loc");
225 
226 	buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
227 					    sizeof(uint32_t) * mask_gws, NULL, &ret_code);
228 	HANDLE_CLERROR(ret_code,
229 		       "Error creating buffer argument buffer_int_key_loc");
230 
231 	buffer_int_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
232 					 4 * mask_cand, NULL, &ret_code);
233 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_keys");
234 
235 	//Set prepare kernel arguments
236 	HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 0, sizeof(cl_uint),
237 	                              (void *)&mask_int_cand.num_int_cand), "Error setting argument 0");
238 	HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 1, sizeof(buffer_hash_ids),
239 	                              (void *)&buffer_hash_ids), "Error setting argument 1");
240 
241 	//Set kernel arguments
242 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(cl_mem),
243 	                              (void *)&pass_buffer), "Error setting argument 0");
244 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(cl_mem),
245 	                              (void *)&idx_buffer), "Error setting argument 1");
246 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(buffer_int_key_loc),
247 				      (void *)&buffer_int_key_loc), "Error setting argument 2");
248 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(buffer_int_keys),
249 				      (void *)&buffer_int_keys), "Error setting argument 3");
250 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(cl_uint),
251 				      (void *)&(mask_int_cand.num_int_cand)),
252 			              "Error setting argument 4");
253 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(buffer_hash_ids),
254 			      (void *)&buffer_hash_ids), "Error setting argument 5");
255 
256 	//Indicates that the OpenCL objetcs are initialized.
257 	ocl_initialized++;
258 
259 	//Assure buffers have no "trash data".
260 	memset(plaintext, '\0', BUFFER_SIZE * gws);
261 	memset(saved_idx, '\0', sizeof(uint32_t) * gws);
262 	memset(saved_int_key_loc, 0x80, sizeof(uint32_t) * mask_gws);
263 }
264 
release_clobj()265 static void release_clobj()
266 {
267 	cl_int ret_code;
268 
269 	if (ocl_initialized) {
270 		ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_plaintext,
271 		                                   plaintext, 0, NULL, NULL);
272 		HANDLE_CLERROR(ret_code, "Error Unmapping keys");
273 		ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_idx,
274 		                                   saved_idx, 0, NULL, NULL);
275 		HANDLE_CLERROR(ret_code, "Error Unmapping indexes");
276 		ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_int_key_loc,
277 							   saved_int_key_loc, 0, NULL, NULL);
278 		HANDLE_CLERROR(ret_code, "Error Unmapping key locations");
279 		HANDLE_CLERROR(clFinish(queue[gpu_id]),
280 		               "Error releasing memory mappings");
281 
282 		ret_code = clReleaseMemObject(pass_buffer);
283 		HANDLE_CLERROR(ret_code, "Error Releasing pass_buffer");
284 		ret_code = clReleaseMemObject(idx_buffer);
285 		HANDLE_CLERROR(ret_code, "Error Releasing idx_buffer");
286 
287 		MEM_FREE(hash_ids);
288 		clReleaseMemObject(buffer_hash_ids);
289 		HANDLE_CLERROR(ret_code, "Error Releasing buffer_hash_ids");
290 
291 		ret_code = clReleaseMemObject(buffer_int_key_loc);
292 		HANDLE_CLERROR(ret_code, "Error Releasing buffer_int_key_loc");
293 		ret_code = clReleaseMemObject(buffer_int_keys);
294 		HANDLE_CLERROR(ret_code, "Error Releasing buffer_int_keys");
295 		ret_code = clReleaseMemObject(pinned_plaintext);
296 		HANDLE_CLERROR(ret_code, "Error Releasing pinned_plaintext");
297 		ret_code = clReleaseMemObject(pinned_saved_idx);
298 		HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_idx");
299 		ret_code = clReleaseMemObject(pinned_int_key_loc);
300 		HANDLE_CLERROR(ret_code, "Error Releasing pinned_int_key_loc");
301 
302 		ocl_initialized--;
303 	}
304 }
305 
306 /* ------- Reset functions ------- */
tune(struct db_main * db)307 static void tune(struct db_main *db)
308 {
309 	char *tmp_value;
310 	size_t gws_limit;
311 	int autotune_limit = 500;
312 
313 	if ((tmp_value = getenv("_GPU_AUTOTUNE_LIMIT")))
314 		autotune_limit = atoi(tmp_value);
315 
316 	// Auto-tune / Benckmark / Self-test.
317 	gws_limit = MIN((0xf << 22) * 4 / BUFFER_SIZE,
318 			get_max_mem_alloc_size(gpu_id) / BUFFER_SIZE);
319 
320 	if (options.flags & FLG_MASK_CHK)
321 		gws_limit = MIN(gws_limit,
322 			get_max_mem_alloc_size(gpu_id) /
323 			(mask_int_cand.num_int_cand  * 3 * sizeof(uint32_t)));
324 
325 	//Initialize openCL tuning (library) for this format.
326 	opencl_init_auto_setup(SEED, 0, NULL,
327 			       warn, 3, self, create_clobj, release_clobj,
328 			       2 * BUFFER_SIZE, gws_limit, db);
329 
330 	//Auto tune execution from shared/included code.
331 	autotune_run(self, 1, gws_limit, autotune_limit);
332 }
333 
reset(struct db_main * db)334 static void reset(struct db_main *db)
335 {
336 	static size_t saved_lws, saved_gws;
337 
338 	offset = 0;
339 	offset_idx = 0;
340 	key_idx = 0;
341 
342 	if (!db)
343 		return;
344 
345 	main_db = db;
346 	num_loaded_hashes = get_num_loaded_hashes();
347 
348 	//Adjust kernel parameters and rebuild (if necessary).
349 	build_kernel();
350 
351 	if (!should_tune) {
352 		/* Read LWS/GWS prefs from config or environment */
353 		opencl_get_user_preferences(FORMAT_LABEL);
354 
355 		//Save the local and global work sizes.
356 		saved_lws = local_work_size;
357 		saved_gws = global_work_size;
358 	}
359 
360 	/*
361 	 * First reset() call. Don't run autotune.
362 	 *    -> If self test is running.
363 	 *    -> If --skip-self-test is running.
364 	 *    -> And if benchmark is NOT running.
365 	 *   Instead, use sane defauts. Tune is going to run later/below.
366 	 */
367 	if (!should_tune && (self_test_running || options.flags & FLG_NOTESTS) &&
368 	    !benchmark_running) {
369 		opencl_get_sane_lws_gws_values();
370 		tune(db);
371 
372 		//Tune later.
373 		autotuned = 0;
374 	} else if (!autotuned) {
375 		//Retrieve LWS/GWS prefs saved on first round
376 		local_work_size = saved_lws;
377 		global_work_size = saved_gws;
378 
379 		tune(db);
380 	} else if ((options.flags & FLG_MASK_CHK)) {
381 		//Tune for mask mode (for each mask change).
382 		// auto-tune for eg. ?a and then a reset for re-tuning for ?a?a
383 		local_work_size = saved_lws;
384 		global_work_size = saved_gws;
385 
386 		tune(db);
387 	} else {
388 		//Since it might re-compiled the kernel after tuning.
389 		if (ocl_initialized > 0)
390 			release_clobj();
391 
392 		create_clobj(global_work_size, self);
393 	}
394 	should_tune++;
395 	hash_ids[0] = 0;
396 	load_hash();
397 }
398 
399 /* ------- Key functions ------- */
clear_keys(void)400 static void clear_keys(void)
401 {
402 	offset = 0;
403 	offset_idx = 0;
404 	key_idx = 0;
405 }
406 
set_key(char * _key,int index)407 static void set_key(char *_key, int index)
408 {
409 
410 	const uint32_t *key = (uint32_t *) _key;
411 	int len = strlen(_key);
412 
413 	saved_idx[index] = (key_idx << 6) | len;
414 
415 	do {
416 		plaintext[key_idx++] = *key++;
417 		len -= 4;
418 	} while (len > 4);
419 
420 	if (len > 0)
421 		plaintext[key_idx++] = *key;
422 
423 	//Mask Mode ranges setup
424 	if (mask_int_cand.num_int_cand > 1) {
425 		int i;
426 
427 		saved_int_key_loc[index] = 0;
428 
429 		for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
430 
431 			if (mask_skip_ranges[i] != -1) {
432 				saved_int_key_loc[index] |=
433 				    ((mask_int_cand.int_cpu_mask_ctx->
434 				      ranges[mask_skip_ranges[i]].offset +
435 				      mask_int_cand.int_cpu_mask_ctx->
436 				      ranges[mask_skip_ranges[i]].pos) & 0xff)
437 				    << (i << 3);
438 			} else
439 				saved_int_key_loc[index] |= 0x80 << (i << 3);
440 		}
441 	}
442 	//Batch transfers to GPU.
443 	if ((index % TRANSFER_SIZE) == 0 && (index > 0)) {
444 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], pass_buffer,
445 		                                    CL_FALSE, sizeof(uint32_t) * offset,
446 		                                    sizeof(uint32_t) * TRANSFER_SIZE,
447 		                                    plaintext + offset, 0, NULL, NULL),
448 		               "failed in clEnqueueWriteBuffer pass_buffer");
449 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], idx_buffer,
450 		                                    CL_FALSE, sizeof(uint32_t) * offset,
451 		                                    sizeof(uint32_t) * TRANSFER_SIZE,
452 		                                    saved_idx + offset, 0, NULL, NULL),
453 		               "failed in clEnqueueWriteBuffer idx_buffer");
454 
455 		HANDLE_CLERROR(clFlush(queue[gpu_id]), "failed in clFlush");
456 		offset += TRANSFER_SIZE;
457 		offset_idx = key_idx;
458 	}
459 }
460 
get_key(int index)461 static char *get_key(int index)
462 {
463 	static char *ret;
464 	int int_index, t, i;
465 
466 	if (!ret)
467 		ret = mem_alloc_tiny(PLAINTEXT_LENGTH + 1, MEM_ALIGN_WORD);
468 
469 	//Mask Mode plaintext recovery
470 	if (hash_ids == NULL || hash_ids[0] == 0 || index > hash_ids[0]) {
471 		t = index;
472 		int_index = 0;
473 
474 	} else {
475 		t = hash_ids[1 + 3 * index];
476 		int_index = hash_ids[2 + 3 * index];
477 	}
478 
479 	//Mask Mode plaintext recovery.
480 	if (t >= global_work_size)
481 		t = 0;
482 
483 	memcpy(ret, ((char *)&plaintext[saved_idx[t] >> 6]), PLAINTEXT_LENGTH);
484 	ret[saved_idx[t] & 63] = '\0';
485 
486 	if (saved_idx[t] & 63 &&
487 	    mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
488 		for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
489 			ret[(saved_int_key_loc[t] & (0xff << (i * 8))) >> (i * 8)] =
490 			    mask_int_cand.int_cand[int_index].x[i];
491 	}
492 
493 	return ret;
494 
495 }
496 
497 /* ------- Initialization  ------- */
build_kernel()498 static void build_kernel()
499 {
500 	static int previous_size, num_int_cand;
501 
502 	char *task = "$JOHN/kernels/sha256_kernel.cl";
503 	char opt[MAX_OCLINFO_STRING_LEN];
504 
505 	bitmap_size = get_bitmap_size_bits(num_loaded_hashes, gpu_id);
506 
507 	if (previous_size != bitmap_size || num_int_cand != mask_int_cand.num_int_cand) {
508 		previous_size = bitmap_size;
509 		num_int_cand = mask_int_cand.num_int_cand;
510 
511 		if (prepare_kernel)
512 			release_kernel();
513 
514 		snprintf(opt, sizeof(opt), "-DBITMAP_SIZE_MINUS1=%u", bitmap_size - 1U);
515 
516 		if (mask_int_cand.num_int_cand > 1)
517 			strncat(opt, " -DGPU_MASK_MODE", 64U);
518 
519 		opencl_build_kernel(task, gpu_id, opt, 0);
520 
521 		// create kernel(s) to execute
522 		prepare_kernel = clCreateKernel(program[gpu_id], "kernel_prepare",
523 						&ret_code);
524 		HANDLE_CLERROR(ret_code,
525 			       "Error creating kernel_prepare. Double-check kernel name?");
526 
527 		crypt_kernel = clCreateKernel(program[gpu_id],
528 						      "kernel_crypt_raw", &ret_code);
529 		HANDLE_CLERROR(ret_code,
530 			       "Error creating kernel. Double-check kernel name?");
531 	}
532 	//Allocate bit array and pass its size to OpenCL.
533 	create_mask_buffers();
534 }
535 
release_kernel()536 static void release_kernel()
537 {
538 	HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel");
539 	HANDLE_CLERROR(clReleaseKernel(prepare_kernel), "Release kernel");
540 	HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
541 
542 	prepare_kernel = NULL;
543 }
544 
init(struct fmt_main * _self)545 static void init(struct fmt_main *_self)
546 {
547 	char *tmp_value;
548 
549 	self = _self;
550 	opencl_prepare_dev(gpu_id);
551 	mask_int_cand_target = opencl_speed_index(gpu_id) / 300;
552 
553 	if ((tmp_value = getenv("_GPU_MASK_CAND")))
554 		mask_int_cand_target = atoi(tmp_value);
555 }
556 
done(void)557 static void done(void)
558 {
559 	if (autotuned) {
560 		release_clobj();
561 		release_kernel();
562 		release_mask_buffers();
563 	}
564 	autotuned = 0;
565 	should_tune = 0;
566 	ocl_initialized = 0;
567 }
568 
prepare_bit_array()569 static void prepare_bit_array()
570 {
571 	uint32_t *binary;
572 	struct db_password *pw;
573 	struct db_salt *current_salt;
574 
575 	current_salt = main_db->salts;
576 #ifdef DEBUG
577 	fprintf(stderr, "Clear bitmap array\n");
578 #endif
579 	memset(saved_bitmap, '\0', (bitmap_size / 8 + 1));
580 
581 	do {
582 		pw = current_salt->list;
583 
584 		do {
585 			unsigned int bit_mask_x, bit_mask_y;
586 			binary = (uint32_t *) pw->binary;
587 
588 			// Skip cracked.
589 			if (binary) {
590 				SPREAD_32(binary[0], binary[1], binary[2], binary[3],
591 					(bitmap_size - 1U), bit_mask_x, bit_mask_y)
592 #ifdef DEBUG
593 				if (saved_bitmap[bit_mask_x >> 5] & (1U << (bit_mask_x & 31)) &&
594 				    saved_bitmap[bit_mask_y >> 5] & (1U << (bit_mask_y & 31)))
595 					fprintf(stderr, "Collision: %u %08x %08x %08x %08x\n",
596 						num_loaded_hashes, (unsigned int) binary[0],
597 						bit_mask_x, bit_mask_y,
598 						saved_bitmap[bit_mask_x >> 5]);
599 #endif
600 				saved_bitmap[bit_mask_x >> 5] |= (1U << (bit_mask_x & 31));
601 				saved_bitmap[bit_mask_y >> 5] |= (1U << (bit_mask_y & 31));
602 			}
603 		} while ((pw = pw->next));
604 
605 	} while ((current_salt = current_salt->next));
606 }
607 
608 /* ------- Send hashes to crack (binary) to GPU ------- */
load_hash()609 static void load_hash()
610 {
611 	num_loaded_hashes = get_num_loaded_hashes();
612 
613 	prepare_bit_array();
614 
615 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap, CL_TRUE, 0,
616 		(bitmap_size / 32 + 1) * sizeof(uint32_t),
617 	        saved_bitmap, 0, NULL, NULL),
618 	        "failed in clEnqueueWriteBuffer buffer_bitmap");
619 
620 	HANDLE_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
621 }
622 
623 /* ------- Crypt function ------- */
crypt_all(int * pcount,struct db_salt * _salt)624 static int crypt_all(int *pcount, struct db_salt *_salt)
625 {
626 	const int count = *pcount;
627 	const struct db_salt *salt = _salt;
628 	size_t gws;
629 	size_t *lws = local_work_size ? &local_work_size : NULL;
630 
631 	gws = GET_NEXT_MULTIPLE(count, local_work_size);
632 
633 	//Check if any password was cracked and reload (if necessary)
634 	if (num_loaded_hashes != salt->count)
635 		load_hash();
636 
637 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], prepare_kernel, 1,
638 	                                     NULL, &gws, lws, 0, NULL, multi_profilingEvent[0]),
639 	              "failed in clEnqueueNDRangeKernel I");
640 
641 	//Send data to device.
642 	if (key_idx > offset)
643 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], pass_buffer,
644 		                                   CL_FALSE, sizeof(uint32_t) * offset,
645 		                                   sizeof(uint32_t) * (key_idx - offset), plaintext + offset, 0,
646 		                                   NULL, multi_profilingEvent[1]),
647 		              "failed in clEnqueueWriteBuffer pass_buffer");
648 
649 	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], idx_buffer, CL_FALSE,
650 	                                   sizeof(uint32_t) * offset,
651 	                                   sizeof(uint32_t) * (gws - offset),
652 	                                   saved_idx + offset, 0, NULL, multi_profilingEvent[2]),
653 	              "failed in clEnqueueWriteBuffer idx_buffer");
654 
655 	if (mask_int_cand.num_int_cand > 1) {
656 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc,
657 		                                   CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL,
658 		                                   multi_profilingEvent[5]),
659 		              "failed in clEnqueueWriteBuffer buffer_int_key_loc");
660 
661 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_keys,
662 		                                   CL_FALSE, 0, 4 * mask_int_cand.num_int_cand,
663 		                                   mask_int_cand.int_cand, 0, NULL, multi_profilingEvent[6]),
664 		              "failed in clEnqueueWriteBuffer buffer_int_keys");
665 	}
666 	//Enqueue the kernel
667 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL,
668 	                                     &gws, lws, 0, NULL, multi_profilingEvent[3]),
669 	              "failed in clEnqueueNDRangeKernel");
670 
671 	//Possible cracked hashes
672 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_FALSE,
673 	                                  0, sizeof(uint32_t), hash_ids,
674 	                                  0, NULL, multi_profilingEvent[4]),
675 	              "failed in reading data back buffer_hash_ids");
676 
677 	//Do the work
678 	BENCH_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
679 
680 #ifdef DEBUG
681 	if (hash_ids[0])
682 		fprintf(stderr, "Some checks are going to be done on CPU: %u: %1.4f%%\n", hash_ids[0],
683 			((double) hash_ids[0]) / (global_work_size * mask_int_cand.num_int_cand) * 100);
684 #endif
685 	if (hash_ids[0] > global_work_size * mask_int_cand.num_int_cand) {
686 		fprintf(stderr, "Error, crypt_all() kernel: %u.\n", hash_ids[0]);
687 		error();
688 	}
689 
690 	if (hash_ids[0]) {
691 		BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_FALSE,
692 			0, (hash_ids[0] * 3 * sizeof(uint32_t) + sizeof(uint32_t)), hash_ids,
693 						  0, NULL, NULL),
694 			      "failed in reading data back buffer_hash_ids");
695 
696 		//Do the work
697 		BENCH_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
698 	}
699 	*pcount *= mask_int_cand.num_int_cand;
700 	return hash_ids[0];
701 }
702 
703 /* ------- Compare functins ------- */
cmp_all(void * binary,int count)704 static int cmp_all(void *binary, int count)
705 {
706 	return (count > 0);
707 }
708 
cmp_one(void * binary,int index)709 static int cmp_one(void *binary, int index)
710 {
711 	return (hash_ids[3 + 3 * index] == ((uint32_t *) binary)[0]);
712 }
713 
cmp_exact(char * source,int index)714 static int cmp_exact(char *source, int index)
715 {
716 	uint32_t *binary;
717 	uint32_t *full_hash;
718 
719 #ifdef DEBUG
720 	fprintf(stderr, "Stressing CPU\n");
721 #endif
722 	binary = (uint32_t *) sha256_common_binary_BE(source);
723 
724 	full_hash = crypt_one(index);
725 	return !memcmp(binary, (void *) full_hash, BINARY_SIZE);
726 }
727 
728 //Get Hash functions group.
get_hash_0(int index)729 static int get_hash_0(int index)
730 {
731 	return hash_ids[3 + 3 * index] & PH_MASK_0;
732 }
733 
get_hash_1(int index)734 static int get_hash_1(int index)
735 {
736 	return hash_ids[3 + 3 * index] & PH_MASK_1;
737 }
738 
get_hash_2(int index)739 static int get_hash_2(int index)
740 {
741 	return hash_ids[3 + 3 * index] & PH_MASK_2;
742 }
743 
get_hash_3(int index)744 static int get_hash_3(int index)
745 {
746 	return hash_ids[3 + 3 * index] & PH_MASK_3;
747 }
748 
get_hash_4(int index)749 static int get_hash_4(int index)
750 {
751 	return hash_ids[3 + 3 * index] & PH_MASK_4;
752 }
753 
get_hash_5(int index)754 static int get_hash_5(int index)
755 {
756 	return hash_ids[3 + 3 * index] & PH_MASK_5;
757 }
758 
get_hash_6(int index)759 static int get_hash_6(int index)
760 {
761 	return hash_ids[3 + 3 * index] & PH_MASK_6;
762 }
763 
764 /* ------- Format structure ------- */
765 struct fmt_main fmt_opencl_rawsha256 = {
766 	{
767 		FORMAT_LABEL,
768 		FORMAT_NAME,
769 		ALGORITHM_NAME,
770 		BENCHMARK_COMMENT,
771 		BENCHMARK_LENGTH,
772 		0,
773 		PLAINTEXT_LENGTH,
774 		BINARY_SIZE,
775 		BINARY_ALIGN,
776 		SALT_SIZE,
777 		SALT_ALIGN,
778 		MIN_KEYS_PER_CRYPT,
779 		MAX_KEYS_PER_CRYPT,
780 		FMT_CASE | FMT_8_BIT | FMT_SPLIT_UNIFIES_CASE | FMT_MASK,
781 		{NULL},
782 		{
783 			HEX_TAG,
784 			CISCO_TAG
785 		},
786 		sha256_common_tests
787 	}, {
788 		init,
789 		done,
790 		reset,
791 		sha256_common_prepare,
792 		sha256_common_valid,
793 		sha256_common_split,
794 		sha256_common_binary_BE,
795 		fmt_default_salt,
796 		{NULL},
797 		fmt_default_source,
798 		{
799 			fmt_default_binary_hash_0,
800 			fmt_default_binary_hash_1,
801 			fmt_default_binary_hash_2,
802 			fmt_default_binary_hash_3,
803 			fmt_default_binary_hash_4,
804 			fmt_default_binary_hash_5,
805 			fmt_default_binary_hash_6
806 		},
807 		fmt_default_salt_hash,
808 		NULL,
809 		fmt_default_set_salt,
810 		set_key,
811 		get_key,
812 		clear_keys,
813 		crypt_all,
814 		{
815 			get_hash_0,
816 			get_hash_1,
817 			get_hash_2,
818 			get_hash_3,
819 			get_hash_4,
820 			get_hash_5,
821 			get_hash_6
822 		},
823 		cmp_all,
824 		cmp_one,
825 		cmp_exact
826 	}
827 };
828 
829 #endif                          /* plugin stanza */
830 
831 #endif                          /* HAVE_OPENCL */
832