1 /*
2 * This software is Copyright (c) 2018 Dhiru Kholia, Copyright (c) 2017 Jim
3 * Fougeron, and Copyright (c) 2013 Lukas Odzioba <ukasz at openwall dot net>
4 * and it is hereby released to the general public under the following terms:
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted.
8 */
9
10 #ifdef HAVE_OPENCL
11
12 #if FMT_EXTERNS_H
13 extern struct fmt_main fmt_opencl_bitwarden;
14 #elif FMT_REGISTERS_H
15 john_register_one(&fmt_opencl_bitwarden);
16 #else
17
18 #include <stdint.h>
19 #include <string.h>
20
21 #include "misc.h"
22 #include "arch.h"
23 #include "common.h"
24 #include "formats.h"
25 #include "options.h"
26 #include "bitwarden_common.h"
27 #include "opencl_common.h"
28
29 #define FORMAT_LABEL "bitwarden-opencl"
30 #define ALGORITHM_NAME "PBKDF2-SHA256 AES OpenCL"
31 #define BENCHMARK_COMMENT ""
32 #define BENCHMARK_LENGTH 0x507
33 #define MIN_KEYS_PER_CRYPT 1
34 #define MAX_KEYS_PER_CRYPT 1
35 #define BINARY_SIZE 0
36 #define BINARY_ALIGN MEM_ALIGN_WORD
37 #define SALT_SIZE sizeof(struct custom_salt)
38 #define SALT_ALIGN sizeof(uint64_t)
39
40 #define HASH_LOOPS (7*113) // factors 7 89 113 (for 70400)
41 #define ITERATIONS 70400
42
43 #include "opencl_pbkdf2_hmac_sha256.h"
44
45 typedef struct {
46 salt_t salt; // this MUST match opencl_pbkdf2_hmac_sha256.cl structure!
47
48 // bitwarden extension
49 union {
50 uint64_t qword[32/8];
51 uint8_t chr[32];
52 } blob;
53 } salt_t2;
54
55 static pass_t *host_pass;
56 static salt_t2 *host_salt;
57 static cl_int cl_error;
58 static cl_mem mem_in, mem_out, mem_salt, mem_state, mem_cracked;
59 static cl_kernel split_kernel, final_kernel, decrypt_kernel;
60 static struct fmt_main *self;
61
62 static unsigned int *cracked, cracked_size;
63 static struct custom_salt *cur_salt;
64
65 #define STEP 0
66 #define SEED 1024
67
68 static const char * warn[] = {
69 "xfer: ", ", init: " , ", crypt: ", ", final", ", decrypt: ", ", res xfer: "
70 };
71
72 static int split_events[] = { 2, -1, -1 };
73
74 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
75 #include "opencl_autotune.h"
76
create_clobj(size_t kpc,struct fmt_main * self)77 static void create_clobj(size_t kpc, struct fmt_main *self)
78 {
79 #define CL_RO CL_MEM_READ_ONLY
80 #define CL_WO CL_MEM_WRITE_ONLY
81 #define CL_RW CL_MEM_READ_WRITE
82
83 #define CLCREATEBUFFER(_flags, _size, _string)\
84 clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error);\
85 HANDLE_CLERROR(cl_error, _string);
86
87 #define CLKERNELARG(kernel, id, arg, msg)\
88 HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
89
90 host_pass = mem_calloc(kpc, sizeof(pass_t));
91 host_salt = mem_calloc(1, sizeof(salt_t2));
92 cracked_size = kpc * sizeof(*cracked);
93 cracked = mem_calloc(cracked_size, 1);
94
95 mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
96 "Cannot allocate mem in");
97 mem_salt = CLCREATEBUFFER(CL_RO, sizeof(salt_t2),
98 "Cannot allocate mem salt");
99 mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t),
100 "Cannot allocate mem out");
101 mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
102 "Cannot allocate mem state");
103 mem_cracked = CLCREATEBUFFER(CL_RW, cracked_size,
104 "Cannot allocate mem cracked");
105
106 CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
107 CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
108 CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
109
110 CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
111
112 CLKERNELARG(final_kernel, 0, mem_out, "Error while setting mem_out");
113 CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
114 CLKERNELARG(final_kernel, 2, mem_state, "Error while setting mem_state");
115
116 CLKERNELARG(decrypt_kernel, 0, mem_salt, "Error while setting mem_salt");
117 CLKERNELARG(decrypt_kernel, 1, mem_out, "Error while setting mem_out");
118 CLKERNELARG(decrypt_kernel, 2, mem_cracked, "Error setting mem_cracked");
119 }
120
121 /* ------- Helper functions ------- */
get_task_max_work_group_size()122 static size_t get_task_max_work_group_size()
123 {
124 size_t s;
125
126 s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
127 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
128 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
129 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, decrypt_kernel));
130 return s;
131 }
132
release_clobj(void)133 static void release_clobj(void)
134 {
135 if (host_salt) {
136 HANDLE_CLERROR(clReleaseMemObject(mem_cracked), "Release mem cracked");
137 HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
138 HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
139 HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
140 HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
141
142 MEM_FREE(cracked);
143 MEM_FREE(host_pass);
144 MEM_FREE(host_salt);
145 }
146 }
147
init(struct fmt_main * _self)148 static void init(struct fmt_main *_self)
149 {
150 self = _self;
151 opencl_prepare_dev(gpu_id);
152 }
153
reset(struct db_main * db)154 static void reset(struct db_main *db)
155 {
156 if (!autotuned) {
157 char build_opts[64];
158
159 snprintf(build_opts, sizeof(build_opts),
160 "-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%u",
161 HASH_LOOPS, PLAINTEXT_LENGTH);
162 opencl_init("$JOHN/kernels/bitwarden_kernel.cl",
163 gpu_id, build_opts);
164
165 crypt_kernel =
166 clCreateKernel(program[gpu_id], "pbkdf2_sha256_init", &cl_error);
167 HANDLE_CLERROR(cl_error, "Error creating crypt kernel");
168
169 split_kernel =
170 clCreateKernel(program[gpu_id], "pbkdf2_sha256_loop", &cl_error);
171 HANDLE_CLERROR(cl_error, "Error creating split kernel");
172
173 final_kernel =
174 clCreateKernel(program[gpu_id], "pbkdf2_sha256_final", &cl_error);
175 HANDLE_CLERROR(cl_error, "Error creating final kernel");
176
177 decrypt_kernel =
178 clCreateKernel(program[gpu_id], "bitwarden_decrypt", &cl_error);
179 HANDLE_CLERROR(cl_error, "Error creating decrypt kernel");
180
181 // Initialize openCL tuning (library) for this format.
182 opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn,
183 2, self, create_clobj, release_clobj,
184 sizeof(state_t), 0, db);
185
186 // Auto tune execution from shared/included code.
187 autotune_run(self, ITERATIONS, 0, 200);
188 }
189 }
190
done(void)191 static void done(void)
192 {
193 if (autotuned) {
194 release_clobj();
195 HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1");
196 HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2");
197 HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel 3");
198 HANDLE_CLERROR(clReleaseKernel(decrypt_kernel), "Release kernel 4");
199 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]),
200 "Release Program");
201
202 autotuned--;
203 }
204 }
205
set_salt(void * salt)206 static void set_salt(void *salt)
207 {
208 cur_salt = (struct custom_salt*)salt;
209
210 memcpy(host_salt->salt.salt, cur_salt->salt, cur_salt->salt_length);
211 memcpy(host_salt->blob.chr, cur_salt->blob + BLOBLEN - 32, 32);
212 host_salt->salt.length = cur_salt->salt_length;
213 host_salt->salt.rounds = cur_salt->iterations;
214
215 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
216 CL_FALSE, 0, sizeof(salt_t2), host_salt, 0, NULL, NULL),
217 "Copy salt to gpu");
218 }
219
crypt_all(int * pcount,struct db_salt * salt)220 static int crypt_all(int *pcount, struct db_salt *salt)
221 {
222 int i;
223 const int count = *pcount;
224 int loops = (host_salt->salt.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
225 size_t *lws = local_work_size ? &local_work_size : NULL;
226
227 global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
228
229 // Copy data to gpu
230 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in,
231 CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0,
232 NULL, multi_profilingEvent[0]), "Copy data to gpu");
233
234 // Run kernel
235 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel,
236 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run kernel");
237
238 for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
239 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
240 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run split kernel");
241 BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
242 opencl_process_event();
243 }
244 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel,
245 1, NULL, &global_work_size, lws, 0, NULL,
246 multi_profilingEvent[3]), "Run final kernel");
247
248 // Run Bitwarden decrypt/compare kernel
249 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], decrypt_kernel,
250 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run kernel");
251
252 // Read the result back
253 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_cracked,
254 CL_TRUE, 0, cracked_size, cracked, 0,
255 NULL, multi_profilingEvent[5]), "Copy result back");
256
257 return count;
258 }
259
cmp_all(void * binary,int count)260 static int cmp_all(void *binary, int count)
261 {
262 int i;
263
264 for (i = 0; i < count; i++)
265 if (cracked[i])
266 return 1;
267 return 0;
268 }
269
cmp_one(void * binary,int index)270 static int cmp_one(void *binary, int index)
271 {
272 return cracked[index];
273 }
274
cmp_exact(char * source,int index)275 static int cmp_exact(char *source, int index)
276 {
277 return 1;
278 }
279
set_key(char * key,int index)280 static void set_key(char *key, int index)
281 {
282 int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
283
284 memcpy(host_pass[index].v, key, saved_len);
285 host_pass[index].length = saved_len;
286 }
287
get_key(int index)288 static char *get_key(int index)
289 {
290 static char ret[PLAINTEXT_LENGTH + 1];
291 memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
292 ret[host_pass[index].length] = 0;
293 return ret;
294 }
295
296 struct fmt_main fmt_opencl_bitwarden = {
297 {
298 FORMAT_LABEL,
299 FORMAT_NAME,
300 ALGORITHM_NAME,
301 BENCHMARK_COMMENT,
302 BENCHMARK_LENGTH,
303 0,
304 PLAINTEXT_LENGTH,
305 BINARY_SIZE,
306 BINARY_ALIGN,
307 SALT_SIZE,
308 SALT_ALIGN,
309 MIN_KEYS_PER_CRYPT,
310 MAX_KEYS_PER_CRYPT,
311 FMT_CASE | FMT_8_BIT,
312 {
313 "iteration count",
314 },
315 { FORMAT_TAG },
316 bitwarden_tests
317 }, {
318 init,
319 done,
320 reset,
321 fmt_default_prepare,
322 bitwarden_common_valid,
323 fmt_default_split,
324 fmt_default_binary,
325 bitwarden_common_get_salt,
326 {
327 bitwarden_common_iteration_count,
328 },
329 fmt_default_source,
330 {
331 fmt_default_binary_hash
332 },
333 fmt_default_salt_hash,
334 NULL,
335 set_salt,
336 set_key,
337 get_key,
338 fmt_default_clear_keys,
339 crypt_all,
340 {
341 fmt_default_get_hash
342 },
343 cmp_all,
344 cmp_one,
345 cmp_exact
346 }
347 };
348
349 #endif /* plugin stanza */
350
351 #endif /* HAVE_OPENCL */
352