1 /*
2  * This software is Copyright (c) 2017 Dhiru Kholia <kholia at kth.se> and
3  * Copyright (c) 2013 Lukas Odzioba <ukasz at openwall dot net> and it is
4  * hereby released to the general public under the following terms:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted.
8  *
9  * salt length increase. HAS to match pbkdf2_hmac_sha256_kernel.cl code
10  *  Now uses a common header file.  Dec 2017, JimF.
11  */
12 #ifdef HAVE_OPENCL
13 
14 #if FMT_EXTERNS_H
15 extern struct fmt_main fmt_opencl_notes;
16 #elif FMT_REGISTERS_H
17 john_register_one(&fmt_opencl_notes);
18 #else
19 
20 #include <stdint.h>
21 #include <string.h>
22 
23 #include "misc.h"
24 #include "arch.h"
25 #include "notes_common.h"
26 #include "common.h"
27 #include "formats.h"
28 #include "options.h"
29 #include "opencl_common.h"
30 
31 #define FORMAT_LABEL            "notes-opencl"
32 #define ALGORITHM_NAME          "PBKDF2-SHA256 AES OpenCL"
33 #define BENCHMARK_COMMENT       ""
34 #define BENCHMARK_LENGTH        0x107
35 #define MIN_KEYS_PER_CRYPT      1
36 #define MAX_KEYS_PER_CRYPT      1
37 #define BINARY_SIZE             0
38 #define BINARY_ALIGN            MEM_ALIGN_WORD
39 #define SALT_SIZE               sizeof(*cur_salt)
40 #define SALT_ALIGN              sizeof(uint64_t)
41 
42 #define HASH_LOOPS              (7*113) // factors 7 89 113 (for 70400)
43 #define ITERATIONS              70400
44 
45 #include "opencl_pbkdf2_hmac_sha256.h"
46 
47 typedef struct {
48 	salt_t salt;
49 	int type;
50 	union {  // wrapped kek
51 		uint64_t qword[BLOBLEN/8];
52 		uint8_t chr[BLOBLEN];
53 	} blob;
54 } salt_t2;
55 
56 static pass_t *host_pass;			      /** plain ciphertexts **/
57 static salt_t2 *host_salt;			      /** salt **/
58 static cl_int cl_error;
59 static cl_mem mem_in, mem_out, mem_salt, mem_state, mem_cracked;
60 static cl_kernel split_kernel, final_kernel, decrypt_kernel;
61 static struct fmt_main *self;
62 
63 static unsigned int *cracked, cracked_size;
64 static struct custom_salt *cur_salt;
65 
66 #define STEP			0
67 #define SEED			1024
68 
69 static const char * warn[] = {
70 	"xfer: ",  ", init: " , ", crypt: ", ", final", ", decrypt: ", ", res xfer: "
71 };
72 
73 static int split_events[] = { 2, -1, -1 };
74 
75 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
76 #include "opencl_autotune.h"
77 
create_clobj(size_t kpc,struct fmt_main * self)78 static void create_clobj(size_t kpc, struct fmt_main *self)
79 {
80 #define CL_RO CL_MEM_READ_ONLY
81 #define CL_WO CL_MEM_WRITE_ONLY
82 #define CL_RW CL_MEM_READ_WRITE
83 
84 #define CLCREATEBUFFER(_flags, _size, _string)\
85 	clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error);\
86 	HANDLE_CLERROR(cl_error, _string);
87 
88 #define CLKERNELARG(kernel, id, arg, msg)\
89 	HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
90 
91 	host_pass = mem_calloc(kpc, sizeof(pass_t));
92 	host_salt = mem_calloc(1, sizeof(salt_t2));
93 	cracked_size = kpc * sizeof(*cracked);
94 	cracked = mem_calloc(cracked_size, 1);
95 
96 	mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
97 	                        "Cannot allocate mem in");
98 	mem_salt = CLCREATEBUFFER(CL_RO, sizeof(salt_t2),
99 	                          "Cannot allocate mem salt");
100 	mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t),
101 	                         "Cannot allocate mem out");
102 	mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
103 	                           "Cannot allocate mem state");
104 	mem_cracked = CLCREATEBUFFER(CL_RW, cracked_size,
105 	                           "Cannot allocate mem cracked");
106 
107 	CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
108 	CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
109 	CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
110 
111 	CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
112 
113 	CLKERNELARG(final_kernel, 0, mem_out, "Error while setting mem_out");
114 	CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
115 	CLKERNELARG(final_kernel, 2, mem_state, "Error while setting mem_state");
116 
117 	CLKERNELARG(decrypt_kernel, 0, mem_salt, "Error while setting mem_salt");
118 	CLKERNELARG(decrypt_kernel, 1, mem_out, "Error while setting mem_out");
119 	CLKERNELARG(decrypt_kernel, 2, mem_cracked, "Error setting mem_cracked");
120 }
121 
122 /* ------- Helper functions ------- */
get_task_max_work_group_size()123 static size_t get_task_max_work_group_size()
124 {
125 	size_t s;
126 
127 	s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
128 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
129 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
130 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, decrypt_kernel));
131 	return s;
132 }
133 
release_clobj(void)134 static void release_clobj(void)
135 {
136 	if (host_salt) {
137 		HANDLE_CLERROR(clReleaseMemObject(mem_cracked), "Release mem cracked");
138 		HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
139 		HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
140 		HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
141 		HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
142 
143 		MEM_FREE(cracked);
144 		MEM_FREE(host_pass);
145 		MEM_FREE(host_salt);
146 	}
147 }
148 
init(struct fmt_main * _self)149 static void init(struct fmt_main *_self)
150 {
151 	self = _self;
152 	opencl_prepare_dev(gpu_id);
153 }
154 
reset(struct db_main * db)155 static void reset(struct db_main *db)
156 {
157 	if (!autotuned) {
158 		char build_opts[64];
159 
160 		snprintf(build_opts, sizeof(build_opts),
161 		         "-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%u -DBLOBLEN=%u",
162 		         HASH_LOOPS, PLAINTEXT_LENGTH, BLOBLEN);
163 		opencl_init("$JOHN/kernels/fvde_kernel.cl",
164 		            gpu_id, build_opts);
165 
166 		crypt_kernel =
167 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_init", &cl_error);
168 		HANDLE_CLERROR(cl_error, "Error creating crypt kernel");
169 
170 		split_kernel =
171 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_loop", &cl_error);
172 		HANDLE_CLERROR(cl_error, "Error creating split kernel");
173 
174 		final_kernel =
175 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_final", &cl_error);
176 		HANDLE_CLERROR(cl_error, "Error creating final kernel");
177 
178 		decrypt_kernel =
179 			clCreateKernel(program[gpu_id], "fvde_decrypt", &cl_error);
180 		HANDLE_CLERROR(cl_error, "Error creating decrypt kernel");
181 
182 		// Initialize openCL tuning (library) for this format.
183 		opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn,
184 		                       2, self, create_clobj, release_clobj,
185 		                       sizeof(state_t), 0, db);
186 
187 		// Auto tune execution from shared/included code.
188 		autotune_run(self, ITERATIONS, 0, 200);
189 	}
190 }
191 
done(void)192 static void done(void)
193 {
194 	if (autotuned) {
195 		release_clobj();
196 		HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1");
197 		HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2");
198 		HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel 3");
199 		HANDLE_CLERROR(clReleaseKernel(decrypt_kernel), "Release kernel 4");
200 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]),
201 		               "Release Program");
202 
203 		autotuned--;
204 	}
205 }
206 
set_salt(void * salt)207 static void set_salt(void *salt)
208 {
209 	cur_salt = (struct custom_salt*)salt;
210 
211 	memcpy(host_salt->salt.salt, cur_salt->salt, cur_salt->salt_length);
212 	memcpy(host_salt->blob.qword, cur_salt->blob.qword, BLOBLEN);
213 	host_salt->salt.length = cur_salt->salt_length;
214 	host_salt->salt.rounds = cur_salt->iterations;
215 	host_salt->type = cur_salt->type;
216 
217 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
218 		CL_FALSE, 0, sizeof(salt_t2), host_salt, 0, NULL, NULL),
219 	    "Copy salt to gpu");
220 }
221 
crypt_all(int * pcount,struct db_salt * salt)222 static int crypt_all(int *pcount, struct db_salt *salt)
223 {
224 	int i;
225 	const int count = *pcount;
226 	int loops = (host_salt->salt.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
227 	size_t *lws = local_work_size ? &local_work_size : NULL;
228 
229 	global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
230 
231 	// Copy data to gpu
232 	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in,
233 		CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0,
234 		NULL, multi_profilingEvent[0]), "Copy data to gpu");
235 
236 	// Run kernel
237 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel,
238 		1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run kernel");
239 
240 	for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
241 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
242 			1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run split kernel");
243 		BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
244 		opencl_process_event();
245 	}
246 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel,
247 		1, NULL, &global_work_size, lws, 0, NULL,
248 		multi_profilingEvent[3]), "Run final kernel");
249 
250 	// Run FVDE decrypt/compare kernel
251 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], decrypt_kernel,
252 		1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run kernel");
253 
254 	// Read the result back
255 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_cracked,
256 		CL_TRUE, 0, cracked_size, cracked, 0,
257 		NULL, multi_profilingEvent[5]), "Copy result back");
258 
259 	return count;
260 }
261 
cmp_all(void * binary,int count)262 static int cmp_all(void *binary, int count)
263 {
264 	int i;
265 
266 	for (i = 0; i < count; i++)
267 		if (cracked[i])
268 			return 1;
269 	return 0;
270 }
271 
cmp_one(void * binary,int index)272 static int cmp_one(void *binary, int index)
273 {
274 	return cracked[index];
275 }
276 
cmp_exact(char * source,int index)277 static int cmp_exact(char *source, int index)
278 {
279 	return 1;
280 }
281 
set_key(char * key,int index)282 static void set_key(char *key, int index)
283 {
284 	int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
285 
286 	memcpy(host_pass[index].v, key, saved_len);
287 	host_pass[index].length = saved_len;
288 }
289 
get_key(int index)290 static char *get_key(int index)
291 {
292 	static char ret[PLAINTEXT_LENGTH + 1];
293 
294 	memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
295 	ret[host_pass[index].length] = 0;
296 
297 	return ret;
298 }
299 
300 struct fmt_main fmt_opencl_notes = {
301 	{
302 		FORMAT_LABEL,
303 		FORMAT_NAME,
304 		ALGORITHM_NAME,
305 		BENCHMARK_COMMENT,
306 		BENCHMARK_LENGTH,
307 		0,
308 		PLAINTEXT_LENGTH,
309 		BINARY_SIZE,
310 		BINARY_ALIGN,
311 		SALT_SIZE,
312 		SALT_ALIGN,
313 		MIN_KEYS_PER_CRYPT,
314 		MAX_KEYS_PER_CRYPT,
315 		FMT_CASE | FMT_8_BIT,
316 		{
317 			"iteration count",
318 		},
319 		{ FORMAT_TAG },
320 		notes_tests
321 	}, {
322 		init,
323 		done,
324 		reset,
325 		fmt_default_prepare,
326 		notes_common_valid,
327 		fmt_default_split,
328 		fmt_default_binary,
329 		notes_common_get_salt,
330 		{
331 			notes_common_iteration_count,
332 		},
333 		fmt_default_source,
334 		{
335 			fmt_default_binary_hash
336 		},
337 		fmt_default_salt_hash,
338 		NULL,
339 		set_salt,
340 		set_key,
341 		get_key,
342 		fmt_default_clear_keys,
343 		crypt_all,
344 		{
345 			fmt_default_get_hash
346 		},
347 		cmp_all,
348 		cmp_one,
349 		cmp_exact
350 	}
351 };
352 
353 #endif /* plugin stanza */
354 
355 #endif /* HAVE_OPENCL */
356