1 /*
2  * This software is Copyright (c) 2018, Dhiru Kholia <kholia at kth dot se>,
3  * Copyright (c) 2012-2013 Lukas Odzioba, Copyright (c) 2014 JimF, Copyright
4  * (c) 2014 magnum, and it is hereby released to the general public under the
5  * following terms:
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted.
9  *
10  * Based on opencl_pbkdf2_hmac_sha512_fmt_plug.c file.
11  */
12 
13 #ifdef HAVE_OPENCL
14 
15 #if FMT_EXTERNS_H
16 extern struct fmt_main fmt_opencl_axcrypt2;
17 #elif FMT_REGISTERS_H
18 john_register_one(&fmt_opencl_axcrypt2);
19 #else
20 
21 #include <stdint.h>
22 #include <string.h>
23 
24 #include "misc.h"
25 #include "arch.h"
26 #include "common.h"
27 #include "formats.h"
28 #include "options.h"
29 #include "unicode.h"
30 #include "opencl_common.h"
31 #include "axcrypt_common.h"
32 #define VERSION_2_SUPPORT 1
33 #include "axcrypt_variable_code.h"
34 #include "pbkdf2_hmac_common.h"
35 
36 #define FORMAT_NAME             "AxCrypt 2.x"
37 #define FORMAT_LABEL            "axcrypt2-opencl"
38 #define ALGORITHM_NAME          "PBKDF2-SHA512 AES OpenCL"
39 #define BINARY_SIZE             0
40 #define BINARY_ALIGN            MEM_ALIGN_WORD
41 #define SALT_SIZE               sizeof(struct custom_salt *)
42 #define SALT_ALIGN              sizeof(struct custom_salt *)
43 #define PLAINTEXT_LENGTH        110
44 #define MIN_KEYS_PER_CRYPT      1
45 #define MAX_KEYS_PER_CRYPT      1
46 #define KERNEL_NAME             "pbkdf2_sha512_kernel"
47 #define SPLIT_KERNEL_NAME       "pbkdf2_sha512_loop"
48 #define FINAL_KERNEL_NAME       "axcrypt2_final"
49 
50 #define HASH_LOOPS              250
51 #define ITERATIONS              25000
52 
53 typedef struct {
54 	// for plaintext, we must make sure it is a full uint64_t width.
55 	uint64_t v[(PLAINTEXT_LENGTH + 7) / 8]; // v must be kept aligned(8)
56 	uint64_t length; // keep 64 bit aligned, length is overkill, but easiest way to stay aligned.
57 } pass_t;
58 
59 typedef struct {
60 	uint64_t hash[8];
61 } crack_t;
62 
63 typedef struct {
64 	// for salt, we append \x00\x00\x00\x01\x80 and must make sure it is a full uint64 width
65 	uint64_t salt[(PBKDF2_64_MAX_SALT_SIZE + 1 + 4 + 7) / 8]; // salt must be kept aligned(8)
66 	uint32_t length;
67 	uint32_t rounds;
68 } salt_t;
69 
70 typedef struct {
71 	uint64_t ipad[8];
72 	uint64_t opad[8];
73 	uint64_t hash[8];
74 	uint64_t W[8];
75 	cl_uint rounds;
76 } state_t;
77 
78 typedef struct {
79 	salt_t pbkdf2;
80 	uint32_t key_wrapping_rounds;
81 	unsigned char salt[64];
82 	unsigned char wrappedkey[144];
83 } axcrypt2_salt_t;
84 
85 typedef struct {
86 	uint32_t cracked;
87 } out_t;
88 
89 static struct custom_salt *cur_salt;
90 
91 static pass_t *host_pass;
92 static axcrypt2_salt_t *host_salt;
93 static out_t *host_crack;
94 static cl_mem mem_in, mem_salt, mem_state, mem_dk, mem_out;
95 static cl_kernel split_kernel, final_kernel;
96 static cl_int cl_error;
97 static struct fmt_main *self;
98 
99 #define STEP                    0
100 #define SEED                    256
101 
102 static const char *warn[] = {
103 	"xfer: ",  ", init: " , ", crypt: ", ", final: ", ", res xfer: "
104 };
105 
106 static int split_events[] = { 2, -1, -1 };
107 
108 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
109 #include "opencl_autotune.h"
110 
111 /* ------- Helper functions ------- */
get_task_max_work_group_size()112 static size_t get_task_max_work_group_size()
113 {
114 	size_t s;
115 
116 	s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
117 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
118 	return MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
119 }
120 
create_clobj(size_t kpc,struct fmt_main * self)121 static void create_clobj(size_t kpc, struct fmt_main *self)
122 {
123 	host_pass = mem_calloc(kpc, sizeof(pass_t));
124 	host_crack = mem_calloc(kpc, sizeof(out_t));
125 	host_salt = mem_calloc(1, sizeof(axcrypt2_salt_t));
126 
127 #define CL_RO CL_MEM_READ_ONLY
128 #define CL_WO CL_MEM_WRITE_ONLY
129 #define CL_RW CL_MEM_READ_WRITE
130 
131 #define CLCREATEBUFFER(_flags, _size, _string)  \
132 	clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error);  \
133 	HANDLE_CLERROR(cl_error, _string);
134 
135 #define CLKERNELARG(kernel, id, arg, msg)  \
136 	HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
137 
138 	mem_salt = CLCREATEBUFFER(CL_RO, sizeof(axcrypt2_salt_t),
139 			"Cannot allocate mem salt");
140 	mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
141 			"Cannot allocate mem in");
142 	mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
143 			"Cannot allocate mem state");
144 	mem_dk = CLCREATEBUFFER(CL_RW, kpc * sizeof(crack_t),
145 			"Cannot allocate mem dk");
146 	mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(out_t),
147 			"Cannot allocate mem out");
148 
149 	CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
150 	CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
151 	CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
152 
153 	CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
154 	CLKERNELARG(split_kernel, 1, mem_dk, "Error while setting mem_dk");
155 
156 	CLKERNELARG(final_kernel, 0, mem_dk, "Error while setting mem_dk");
157 	CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
158 	CLKERNELARG(final_kernel, 2, mem_out, "Error while setting mem_out");
159 }
160 
init(struct fmt_main * _self)161 static void init(struct fmt_main *_self)
162 {
163 	self = _self;
164 	opencl_prepare_dev(gpu_id);
165 }
166 
reset(struct db_main * db)167 static void reset(struct db_main *db)
168 {
169 	if (!autotuned) {
170 		char build_opts[128];
171 
172 		snprintf(build_opts, sizeof(build_opts),
173 				"-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%d -DPBKDF2_64_MAX_SALT_SIZE=%d",
174 				HASH_LOOPS, PLAINTEXT_LENGTH, PBKDF2_64_MAX_SALT_SIZE);
175 
176 		opencl_init("$JOHN/kernels/axcrypt2_kernel.cl", gpu_id, build_opts);
177 
178 		crypt_kernel = clCreateKernel(program[gpu_id], KERNEL_NAME, &cl_error);
179 		HANDLE_CLERROR(cl_error, "Error creating kernel");
180 
181 		split_kernel =
182 			clCreateKernel(program[gpu_id], SPLIT_KERNEL_NAME, &cl_error);
183 		HANDLE_CLERROR(cl_error, "Error creating split kernel");
184 
185 		final_kernel =
186 			clCreateKernel(program[gpu_id], FINAL_KERNEL_NAME, &cl_error);
187 		HANDLE_CLERROR(cl_error, "Error creating final kernel");
188 
189 		// Initialize openCL tuning (library) for this format.
190 		opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn, 2,
191 				self, create_clobj, release_clobj,
192 				sizeof(state_t), 0, db);
193 
194 		// Auto tune execution from shared/included code.
195 		autotune_run(self, ITERATIONS, 0, 200);
196 	}
197 }
198 
release_clobj(void)199 static void release_clobj(void)
200 {
201 	if (host_pass) {
202 		HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
203 		HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
204 		HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
205 		HANDLE_CLERROR(clReleaseMemObject(mem_dk), "Release mem out");
206 		HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
207 
208 		MEM_FREE(host_pass);
209 		MEM_FREE(host_salt);
210 		MEM_FREE(host_crack);
211 	}
212 }
213 
done(void)214 static void done(void)
215 {
216 	if (autotuned) {
217 		release_clobj();
218 		HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel");
219 		HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel");
220 		HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel");
221 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
222 
223 		autotuned--;
224 	}
225 }
226 
axcrypt2_valid(char * ciphertext,struct fmt_main * self)227 static int axcrypt2_valid(char *ciphertext, struct fmt_main *self)
228 {
229 	return axcrypt_common_valid(ciphertext, self, 2);
230 }
231 
get_salt(char * ciphertext)232 static void *get_salt(char *ciphertext)
233 {
234 	char *ctcopy = strdup(ciphertext);
235 	char *keeptr = ctcopy;
236 	char *p;
237 	int i;
238 	static struct custom_salt cs;
239 	static void *ptr;
240 	int saltlen = 0;
241 	int wrappedkeylen;
242 
243 	memset(&cs, 0, sizeof(cs));
244 	cs.keyfile = NULL;
245 	ctcopy += FORMAT_TAG_LEN;
246 	p = strtokm(ctcopy, "*");
247 	cs.version = atoi(p);
248 
249 	saltlen = 64;  // WrapSalt
250 	wrappedkeylen = 144;
251 
252 	p = strtokm(NULL, "*");
253 	cs.key_wrapping_rounds = (uint32_t) atoi(p);
254 
255 	p = strtokm(NULL, "*");
256 	for (i = 0; i < saltlen; i++)
257 		cs.salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16
258 			+ atoi16[ARCH_INDEX(p[i * 2 + 1])];
259 
260 	p = strtokm(NULL, "*");
261 	for (i = 0; i < wrappedkeylen; i++)
262 		cs.wrappedkey[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16
263 			+ atoi16[ARCH_INDEX(p[i * 2 + 1])];
264 
265 	if (cs.version == 2) {
266 		p = strtokm(NULL, "*");
267 		cs.deriv_iterations = atoi(p);
268 		p = strtokm(NULL, "*");
269 
270 		for (i = 0; i < 32; i++)
271 			cs.deriv_salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16 + atoi16[ARCH_INDEX(p[i * 2 + 1])];
272 	}
273 
274 	// we append the count and EOM here, one time.
275 	memcpy(cs.deriv_salt + 32, "\x0\x0\x0\x1\x80", 5);
276 	cs.deriv_salt_length = 32 + 5; // we include the x80 byte in our saltlen, but the .cl kernel knows to reduce saltlen by 1 */
277 
278 	MEM_FREE(keeptr);
279 
280 	cs.dsalt.salt_cmp_offset = SALT_CMP_OFF(struct custom_salt, salt);
281 	cs.dsalt.salt_cmp_size = SALT_CMP_SIZE(struct custom_salt, salt, wrappedkey, 0);
282 	cs.dsalt.salt_alloc_needs_free = 0;
283 
284 	ptr = mem_alloc_tiny(sizeof(struct custom_salt), MEM_ALIGN_WORD);
285 	memcpy(ptr, &cs, sizeof(struct custom_salt));
286 
287 	return (void *)&ptr;
288 }
289 
set_salt(void * salt)290 static void set_salt(void *salt)
291 {
292 	cur_salt = *(struct custom_salt **) salt;
293 
294 	memcpy(host_salt->pbkdf2.salt, cur_salt->deriv_salt, cur_salt->deriv_salt_length);
295 	host_salt->pbkdf2.length = cur_salt->deriv_salt_length;
296 	host_salt->pbkdf2.rounds = cur_salt->deriv_iterations;
297 
298         memcpy(host_salt->salt, cur_salt->salt, 64);
299         memcpy(host_salt->wrappedkey, cur_salt->wrappedkey, 144);
300 
301 	host_salt->key_wrapping_rounds = cur_salt->key_wrapping_rounds;
302 
303 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
304 			CL_FALSE, 0, sizeof(axcrypt2_salt_t), host_salt, 0, NULL, NULL),
305 			"Copy salt to gpu");
306 }
307 
crypt_all(int * pcount,struct db_salt * salt)308 static int crypt_all(int *pcount, struct db_salt *salt)
309 {
310 	const int count = *pcount;
311 	int i, loops = (host_salt->pbkdf2.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
312 	size_t *lws = local_work_size ? &local_work_size : NULL;
313 	size_t gws = GET_NEXT_MULTIPLE(count, local_work_size);
314 
315 	// Copy data to gpu
316 	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0,
317 				gws * sizeof(pass_t), host_pass,
318 				0, NULL, multi_profilingEvent[0]),
319 				"Copy data to gpu");
320 
321 	// Run standard PBKDF2 kernel
322 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
323 				NULL, &gws, lws, 0, NULL,
324 				multi_profilingEvent[1]), "Run kernel");
325 
326 	for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
327 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id],
328 					split_kernel, 1, NULL,
329 					&gws, lws, 0, NULL,
330 					multi_profilingEvent[2]), "Run split kernel");
331 		BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
332 		opencl_process_event();
333 	}
334 
335 	// Run GELI post-processing kernel
336 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel, 1,
337 				NULL, &gws, lws, 0, NULL,
338 				multi_profilingEvent[3]), "Run kernel");
339 
340 	// Read the result back
341 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0,
342 				gws * sizeof(out_t), host_crack,
343 				0, NULL, multi_profilingEvent[4]), "Copy result back");
344 
345 	return count;
346 }
347 
cmp_all(void * binary,int count)348 static int cmp_all(void *binary, int count)
349 {
350 	int index;
351 	for (index = 0; index < count; index++)
352 		if (host_crack[index].cracked)
353 			return 1;
354 	return 0;
355 }
356 
cmp_one(void * binary,int index)357 static int cmp_one(void *binary, int index)
358 {
359 	return host_crack[index].cracked;
360 }
361 
cmp_exact(char * source,int index)362 static int cmp_exact(char *source, int index)
363 {
364 	return 1;
365 }
366 
set_key(char * key,int index)367 static void set_key(char *key, int index)
368 {
369 	int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
370 
371 	// make sure LAST uint64 that has any key in it gets null, since we simply
372 	// ^= the whole uint64 with the ipad/opad mask
373 	strncpy((char*)host_pass[index].v, key, PLAINTEXT_LENGTH);
374 	host_pass[index].length = saved_len;
375 }
376 
get_key(int index)377 static char *get_key(int index)
378 {
379 	static char ret[PLAINTEXT_LENGTH + 1];
380 
381 	memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
382 	ret[host_pass[index].length] = 0;
383 
384 	return ret;
385 }
386 
387 struct fmt_main fmt_opencl_axcrypt2 = {
388 	{
389 		FORMAT_LABEL,
390 		FORMAT_NAME,
391 		ALGORITHM_NAME,
392 		BENCHMARK_COMMENT,
393 		BENCHMARK_LENGTH,
394 		0,
395 		PLAINTEXT_LENGTH,
396 		BINARY_SIZE,
397 		BINARY_ALIGN,
398 		SALT_SIZE,
399 		SALT_ALIGN,
400 		MIN_KEYS_PER_CRYPT,
401 		MAX_KEYS_PER_CRYPT,
402 		FMT_CASE | FMT_8_BIT | FMT_DYNA_SALT | FMT_HUGE_INPUT,
403 		{
404 			"iteration count",
405 		},
406 		{ FORMAT_TAG },
407 		axcrypt_tests
408 	}, {
409 		init,
410 		done,
411 		reset,
412 		fmt_default_prepare,
413 		axcrypt2_valid,
414 		fmt_default_split,
415 		fmt_default_binary,
416 		get_salt,
417 		{
418 			axcrypt_iteration_count,
419 		},
420 		fmt_default_source,
421 		{
422 			fmt_default_binary_hash
423 		},
424 		fmt_default_salt_hash,
425 		NULL,
426 		set_salt,
427 		set_key,
428 		get_key,
429 		fmt_default_clear_keys,
430 		crypt_all,
431 		{
432 			fmt_default_get_hash
433 		},
434 		cmp_all,
435 		cmp_one,
436 		cmp_exact
437 	}
438 };
439 
440 #endif /* plugin stanza */
441 
442 #endif /* HAVE_OPENCL */
443