1 /*
2  * This software is Copyright (c) 2018 Dhiru Kholia, Copyright (c) 2017 Jim
3  * Fougeron, and Copyright (c) 2013 Lukas Odzioba <ukasz at openwall dot net>
4  * and it is hereby released to the general public under the following terms:
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted.
8  */
9 
10 #ifdef HAVE_OPENCL
11 
12 #if FMT_EXTERNS_H
13 extern struct fmt_main fmt_opencl_bitwarden;
14 #elif FMT_REGISTERS_H
15 john_register_one(&fmt_opencl_bitwarden);
16 #else
17 
18 #include <stdint.h>
19 #include <string.h>
20 
21 #include "misc.h"
22 #include "arch.h"
23 #include "common.h"
24 #include "formats.h"
25 #include "options.h"
26 #include "bitwarden_common.h"
27 #include "opencl_common.h"
28 
29 #define FORMAT_LABEL            "bitwarden-opencl"
30 #define ALGORITHM_NAME          "PBKDF2-SHA256 AES OpenCL"
31 #define BENCHMARK_COMMENT       ""
32 #define BENCHMARK_LENGTH        0x507
33 #define MIN_KEYS_PER_CRYPT      1
34 #define MAX_KEYS_PER_CRYPT      1
35 #define BINARY_SIZE             0
36 #define BINARY_ALIGN            MEM_ALIGN_WORD
37 #define SALT_SIZE               sizeof(struct custom_salt)
38 #define SALT_ALIGN              sizeof(uint64_t)
39 
40 #define HASH_LOOPS              (7*113) // factors 7 89 113 (for 70400)
41 #define ITERATIONS              70400
42 
43 #include "opencl_pbkdf2_hmac_sha256.h"
44 
45 typedef struct {
46 	salt_t salt; // this MUST match opencl_pbkdf2_hmac_sha256.cl structure!
47 
48 	// bitwarden extension
49 	union {
50 		uint64_t qword[32/8];
51 		uint8_t chr[32];
52 	} blob;
53 } salt_t2;
54 
55 static pass_t *host_pass;
56 static salt_t2 *host_salt;
57 static cl_int cl_error;
58 static cl_mem mem_in, mem_out, mem_salt, mem_state, mem_cracked;
59 static cl_kernel split_kernel, final_kernel, decrypt_kernel;
60 static struct fmt_main *self;
61 
62 static unsigned int *cracked, cracked_size;
63 static struct custom_salt *cur_salt;
64 
65 #define STEP			0
66 #define SEED			1024
67 
68 static const char * warn[] = {
69 	"xfer: ",  ", init: " , ", crypt: ", ", final", ", decrypt: ", ", res xfer: "
70 };
71 
72 static int split_events[] = { 2, -1, -1 };
73 
74 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
75 #include "opencl_autotune.h"
76 
create_clobj(size_t kpc,struct fmt_main * self)77 static void create_clobj(size_t kpc, struct fmt_main *self)
78 {
79 #define CL_RO CL_MEM_READ_ONLY
80 #define CL_WO CL_MEM_WRITE_ONLY
81 #define CL_RW CL_MEM_READ_WRITE
82 
83 #define CLCREATEBUFFER(_flags, _size, _string)\
84 	clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error);\
85 	HANDLE_CLERROR(cl_error, _string);
86 
87 #define CLKERNELARG(kernel, id, arg, msg)\
88 	HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
89 
90 	host_pass = mem_calloc(kpc, sizeof(pass_t));
91 	host_salt = mem_calloc(1, sizeof(salt_t2));
92 	cracked_size = kpc * sizeof(*cracked);
93 	cracked = mem_calloc(cracked_size, 1);
94 
95 	mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
96 	                        "Cannot allocate mem in");
97 	mem_salt = CLCREATEBUFFER(CL_RO, sizeof(salt_t2),
98 	                          "Cannot allocate mem salt");
99 	mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t),
100 	                         "Cannot allocate mem out");
101 	mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
102 	                           "Cannot allocate mem state");
103 	mem_cracked = CLCREATEBUFFER(CL_RW, cracked_size,
104 	                           "Cannot allocate mem cracked");
105 
106 	CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
107 	CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
108 	CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
109 
110 	CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
111 
112 	CLKERNELARG(final_kernel, 0, mem_out, "Error while setting mem_out");
113 	CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
114 	CLKERNELARG(final_kernel, 2, mem_state, "Error while setting mem_state");
115 
116 	CLKERNELARG(decrypt_kernel, 0, mem_salt, "Error while setting mem_salt");
117 	CLKERNELARG(decrypt_kernel, 1, mem_out, "Error while setting mem_out");
118 	CLKERNELARG(decrypt_kernel, 2, mem_cracked, "Error setting mem_cracked");
119 }
120 
121 /* ------- Helper functions ------- */
get_task_max_work_group_size()122 static size_t get_task_max_work_group_size()
123 {
124 	size_t s;
125 
126 	s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
127 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
128 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
129 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, decrypt_kernel));
130 	return s;
131 }
132 
release_clobj(void)133 static void release_clobj(void)
134 {
135 	if (host_salt) {
136 		HANDLE_CLERROR(clReleaseMemObject(mem_cracked), "Release mem cracked");
137 		HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
138 		HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
139 		HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
140 		HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
141 
142 		MEM_FREE(cracked);
143 		MEM_FREE(host_pass);
144 		MEM_FREE(host_salt);
145 	}
146 }
147 
init(struct fmt_main * _self)148 static void init(struct fmt_main *_self)
149 {
150 	self = _self;
151 	opencl_prepare_dev(gpu_id);
152 }
153 
reset(struct db_main * db)154 static void reset(struct db_main *db)
155 {
156 	if (!autotuned) {
157 		char build_opts[64];
158 
159 		snprintf(build_opts, sizeof(build_opts),
160 		         "-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%u",
161 		         HASH_LOOPS, PLAINTEXT_LENGTH);
162 		opencl_init("$JOHN/kernels/bitwarden_kernel.cl",
163 		            gpu_id, build_opts);
164 
165 		crypt_kernel =
166 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_init", &cl_error);
167 		HANDLE_CLERROR(cl_error, "Error creating crypt kernel");
168 
169 		split_kernel =
170 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_loop", &cl_error);
171 		HANDLE_CLERROR(cl_error, "Error creating split kernel");
172 
173 		final_kernel =
174 			clCreateKernel(program[gpu_id], "pbkdf2_sha256_final", &cl_error);
175 		HANDLE_CLERROR(cl_error, "Error creating final kernel");
176 
177 		decrypt_kernel =
178 			clCreateKernel(program[gpu_id], "bitwarden_decrypt", &cl_error);
179 		HANDLE_CLERROR(cl_error, "Error creating decrypt kernel");
180 
181 		// Initialize openCL tuning (library) for this format.
182 		opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn,
183 		                       2, self, create_clobj, release_clobj,
184 		                       sizeof(state_t), 0, db);
185 
186 		// Auto tune execution from shared/included code.
187 		autotune_run(self, ITERATIONS, 0, 200);
188 	}
189 }
190 
done(void)191 static void done(void)
192 {
193 	if (autotuned) {
194 		release_clobj();
195 		HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1");
196 		HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2");
197 		HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel 3");
198 		HANDLE_CLERROR(clReleaseKernel(decrypt_kernel), "Release kernel 4");
199 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]),
200 		               "Release Program");
201 
202 		autotuned--;
203 	}
204 }
205 
set_salt(void * salt)206 static void set_salt(void *salt)
207 {
208 	cur_salt = (struct custom_salt*)salt;
209 
210 	memcpy(host_salt->salt.salt, cur_salt->salt, cur_salt->salt_length);
211 	memcpy(host_salt->blob.chr, cur_salt->blob + BLOBLEN - 32, 32);
212 	host_salt->salt.length = cur_salt->salt_length;
213 	host_salt->salt.rounds = cur_salt->iterations;
214 
215 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
216 		CL_FALSE, 0, sizeof(salt_t2), host_salt, 0, NULL, NULL),
217 	    "Copy salt to gpu");
218 }
219 
crypt_all(int * pcount,struct db_salt * salt)220 static int crypt_all(int *pcount, struct db_salt *salt)
221 {
222 	int i;
223 	const int count = *pcount;
224 	int loops = (host_salt->salt.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
225 	size_t *lws = local_work_size ? &local_work_size : NULL;
226 
227 	global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
228 
229 	// Copy data to gpu
230 	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in,
231 		CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0,
232 		NULL, multi_profilingEvent[0]), "Copy data to gpu");
233 
234 	// Run kernel
235 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel,
236 		1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run kernel");
237 
238 	for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
239 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
240 			1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run split kernel");
241 		BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
242 		opencl_process_event();
243 	}
244 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel,
245 		1, NULL, &global_work_size, lws, 0, NULL,
246 		multi_profilingEvent[3]), "Run final kernel");
247 
248 	// Run Bitwarden decrypt/compare kernel
249 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], decrypt_kernel,
250 		1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run kernel");
251 
252 	// Read the result back
253 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_cracked,
254 		CL_TRUE, 0, cracked_size, cracked, 0,
255 		NULL, multi_profilingEvent[5]), "Copy result back");
256 
257 	return count;
258 }
259 
cmp_all(void * binary,int count)260 static int cmp_all(void *binary, int count)
261 {
262 	int i;
263 
264 	for (i = 0; i < count; i++)
265 		if (cracked[i])
266 			return 1;
267 	return 0;
268 }
269 
cmp_one(void * binary,int index)270 static int cmp_one(void *binary, int index)
271 {
272 	return cracked[index];
273 }
274 
cmp_exact(char * source,int index)275 static int cmp_exact(char *source, int index)
276 {
277 	return 1;
278 }
279 
set_key(char * key,int index)280 static void set_key(char *key, int index)
281 {
282 	int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
283 
284 	memcpy(host_pass[index].v, key, saved_len);
285 	host_pass[index].length = saved_len;
286 }
287 
get_key(int index)288 static char *get_key(int index)
289 {
290 	static char ret[PLAINTEXT_LENGTH + 1];
291 	memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
292 	ret[host_pass[index].length] = 0;
293 	return ret;
294 }
295 
296 struct fmt_main fmt_opencl_bitwarden = {
297 	{
298 		FORMAT_LABEL,
299 		FORMAT_NAME,
300 		ALGORITHM_NAME,
301 		BENCHMARK_COMMENT,
302 		BENCHMARK_LENGTH,
303 		0,
304 		PLAINTEXT_LENGTH,
305 		BINARY_SIZE,
306 		BINARY_ALIGN,
307 		SALT_SIZE,
308 		SALT_ALIGN,
309 		MIN_KEYS_PER_CRYPT,
310 		MAX_KEYS_PER_CRYPT,
311 		FMT_CASE | FMT_8_BIT,
312 		{
313 			"iteration count",
314 		},
315 		{ FORMAT_TAG },
316 		bitwarden_tests
317 	}, {
318 		init,
319 		done,
320 		reset,
321 		fmt_default_prepare,
322 		bitwarden_common_valid,
323 		fmt_default_split,
324 		fmt_default_binary,
325 		bitwarden_common_get_salt,
326 		{
327 			bitwarden_common_iteration_count,
328 		},
329 		fmt_default_source,
330 		{
331 			fmt_default_binary_hash
332 		},
333 		fmt_default_salt_hash,
334 		NULL,
335 		set_salt,
336 		set_key,
337 		get_key,
338 		fmt_default_clear_keys,
339 		crypt_all,
340 		{
341 			fmt_default_get_hash
342 		},
343 		cmp_all,
344 		cmp_one,
345 		cmp_exact
346 	}
347 };
348 
349 #endif /* plugin stanza */
350 
351 #endif /* HAVE_OPENCL */
352