1 /*
2 * This software is Copyright (c) 2017 Dhiru Kholia <kholia at kth.se> and
3 * Copyright (c) 2013 Lukas Odzioba <ukasz at openwall dot net> and it is
4 * hereby released to the general public under the following terms:
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted.
8 *
9 * salt length increase. HAS to match pbkdf2_hmac_sha256_kernel.cl code
10 * Now uses a common header file. Dec 2017, JimF.
11 */
12 #ifdef HAVE_OPENCL
13
14 #if FMT_EXTERNS_H
15 extern struct fmt_main fmt_opencl_notes;
16 #elif FMT_REGISTERS_H
17 john_register_one(&fmt_opencl_notes);
18 #else
19
20 #include <stdint.h>
21 #include <string.h>
22
23 #include "misc.h"
24 #include "arch.h"
25 #include "notes_common.h"
26 #include "common.h"
27 #include "formats.h"
28 #include "options.h"
29 #include "opencl_common.h"
30
31 #define FORMAT_LABEL "notes-opencl"
32 #define ALGORITHM_NAME "PBKDF2-SHA256 AES OpenCL"
33 #define BENCHMARK_COMMENT ""
34 #define BENCHMARK_LENGTH 0x107
35 #define MIN_KEYS_PER_CRYPT 1
36 #define MAX_KEYS_PER_CRYPT 1
37 #define BINARY_SIZE 0
38 #define BINARY_ALIGN MEM_ALIGN_WORD
39 #define SALT_SIZE sizeof(*cur_salt)
40 #define SALT_ALIGN sizeof(uint64_t)
41
42 #define HASH_LOOPS (7*113) // factors 7 89 113 (for 70400)
43 #define ITERATIONS 70400
44
45 #include "opencl_pbkdf2_hmac_sha256.h"
46
47 typedef struct {
48 salt_t salt;
49 int type;
50 union { // wrapped kek
51 uint64_t qword[BLOBLEN/8];
52 uint8_t chr[BLOBLEN];
53 } blob;
54 } salt_t2;
55
56 static pass_t *host_pass; /** plain ciphertexts **/
57 static salt_t2 *host_salt; /** salt **/
58 static cl_int cl_error;
59 static cl_mem mem_in, mem_out, mem_salt, mem_state, mem_cracked;
60 static cl_kernel split_kernel, final_kernel, decrypt_kernel;
61 static struct fmt_main *self;
62
63 static unsigned int *cracked, cracked_size;
64 static struct custom_salt *cur_salt;
65
66 #define STEP 0
67 #define SEED 1024
68
69 static const char * warn[] = {
70 "xfer: ", ", init: " , ", crypt: ", ", final", ", decrypt: ", ", res xfer: "
71 };
72
73 static int split_events[] = { 2, -1, -1 };
74
75 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
76 #include "opencl_autotune.h"
77
create_clobj(size_t kpc,struct fmt_main * self)78 static void create_clobj(size_t kpc, struct fmt_main *self)
79 {
80 #define CL_RO CL_MEM_READ_ONLY
81 #define CL_WO CL_MEM_WRITE_ONLY
82 #define CL_RW CL_MEM_READ_WRITE
83
84 #define CLCREATEBUFFER(_flags, _size, _string)\
85 clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error);\
86 HANDLE_CLERROR(cl_error, _string);
87
88 #define CLKERNELARG(kernel, id, arg, msg)\
89 HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
90
91 host_pass = mem_calloc(kpc, sizeof(pass_t));
92 host_salt = mem_calloc(1, sizeof(salt_t2));
93 cracked_size = kpc * sizeof(*cracked);
94 cracked = mem_calloc(cracked_size, 1);
95
96 mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
97 "Cannot allocate mem in");
98 mem_salt = CLCREATEBUFFER(CL_RO, sizeof(salt_t2),
99 "Cannot allocate mem salt");
100 mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t),
101 "Cannot allocate mem out");
102 mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
103 "Cannot allocate mem state");
104 mem_cracked = CLCREATEBUFFER(CL_RW, cracked_size,
105 "Cannot allocate mem cracked");
106
107 CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
108 CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
109 CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
110
111 CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
112
113 CLKERNELARG(final_kernel, 0, mem_out, "Error while setting mem_out");
114 CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
115 CLKERNELARG(final_kernel, 2, mem_state, "Error while setting mem_state");
116
117 CLKERNELARG(decrypt_kernel, 0, mem_salt, "Error while setting mem_salt");
118 CLKERNELARG(decrypt_kernel, 1, mem_out, "Error while setting mem_out");
119 CLKERNELARG(decrypt_kernel, 2, mem_cracked, "Error setting mem_cracked");
120 }
121
122 /* ------- Helper functions ------- */
get_task_max_work_group_size()123 static size_t get_task_max_work_group_size()
124 {
125 size_t s;
126
127 s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
128 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
129 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
130 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, decrypt_kernel));
131 return s;
132 }
133
release_clobj(void)134 static void release_clobj(void)
135 {
136 if (host_salt) {
137 HANDLE_CLERROR(clReleaseMemObject(mem_cracked), "Release mem cracked");
138 HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
139 HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
140 HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
141 HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
142
143 MEM_FREE(cracked);
144 MEM_FREE(host_pass);
145 MEM_FREE(host_salt);
146 }
147 }
148
init(struct fmt_main * _self)149 static void init(struct fmt_main *_self)
150 {
151 self = _self;
152 opencl_prepare_dev(gpu_id);
153 }
154
reset(struct db_main * db)155 static void reset(struct db_main *db)
156 {
157 if (!autotuned) {
158 char build_opts[64];
159
160 snprintf(build_opts, sizeof(build_opts),
161 "-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%u -DBLOBLEN=%u",
162 HASH_LOOPS, PLAINTEXT_LENGTH, BLOBLEN);
163 opencl_init("$JOHN/kernels/fvde_kernel.cl",
164 gpu_id, build_opts);
165
166 crypt_kernel =
167 clCreateKernel(program[gpu_id], "pbkdf2_sha256_init", &cl_error);
168 HANDLE_CLERROR(cl_error, "Error creating crypt kernel");
169
170 split_kernel =
171 clCreateKernel(program[gpu_id], "pbkdf2_sha256_loop", &cl_error);
172 HANDLE_CLERROR(cl_error, "Error creating split kernel");
173
174 final_kernel =
175 clCreateKernel(program[gpu_id], "pbkdf2_sha256_final", &cl_error);
176 HANDLE_CLERROR(cl_error, "Error creating final kernel");
177
178 decrypt_kernel =
179 clCreateKernel(program[gpu_id], "fvde_decrypt", &cl_error);
180 HANDLE_CLERROR(cl_error, "Error creating decrypt kernel");
181
182 // Initialize openCL tuning (library) for this format.
183 opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn,
184 2, self, create_clobj, release_clobj,
185 sizeof(state_t), 0, db);
186
187 // Auto tune execution from shared/included code.
188 autotune_run(self, ITERATIONS, 0, 200);
189 }
190 }
191
done(void)192 static void done(void)
193 {
194 if (autotuned) {
195 release_clobj();
196 HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1");
197 HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2");
198 HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel 3");
199 HANDLE_CLERROR(clReleaseKernel(decrypt_kernel), "Release kernel 4");
200 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]),
201 "Release Program");
202
203 autotuned--;
204 }
205 }
206
set_salt(void * salt)207 static void set_salt(void *salt)
208 {
209 cur_salt = (struct custom_salt*)salt;
210
211 memcpy(host_salt->salt.salt, cur_salt->salt, cur_salt->salt_length);
212 memcpy(host_salt->blob.qword, cur_salt->blob.qword, BLOBLEN);
213 host_salt->salt.length = cur_salt->salt_length;
214 host_salt->salt.rounds = cur_salt->iterations;
215 host_salt->type = cur_salt->type;
216
217 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
218 CL_FALSE, 0, sizeof(salt_t2), host_salt, 0, NULL, NULL),
219 "Copy salt to gpu");
220 }
221
crypt_all(int * pcount,struct db_salt * salt)222 static int crypt_all(int *pcount, struct db_salt *salt)
223 {
224 int i;
225 const int count = *pcount;
226 int loops = (host_salt->salt.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
227 size_t *lws = local_work_size ? &local_work_size : NULL;
228
229 global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
230
231 // Copy data to gpu
232 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in,
233 CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0,
234 NULL, multi_profilingEvent[0]), "Copy data to gpu");
235
236 // Run kernel
237 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel,
238 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run kernel");
239
240 for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
241 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], split_kernel,
242 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run split kernel");
243 BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
244 opencl_process_event();
245 }
246 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel,
247 1, NULL, &global_work_size, lws, 0, NULL,
248 multi_profilingEvent[3]), "Run final kernel");
249
250 // Run FVDE decrypt/compare kernel
251 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], decrypt_kernel,
252 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run kernel");
253
254 // Read the result back
255 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_cracked,
256 CL_TRUE, 0, cracked_size, cracked, 0,
257 NULL, multi_profilingEvent[5]), "Copy result back");
258
259 return count;
260 }
261
cmp_all(void * binary,int count)262 static int cmp_all(void *binary, int count)
263 {
264 int i;
265
266 for (i = 0; i < count; i++)
267 if (cracked[i])
268 return 1;
269 return 0;
270 }
271
cmp_one(void * binary,int index)272 static int cmp_one(void *binary, int index)
273 {
274 return cracked[index];
275 }
276
cmp_exact(char * source,int index)277 static int cmp_exact(char *source, int index)
278 {
279 return 1;
280 }
281
set_key(char * key,int index)282 static void set_key(char *key, int index)
283 {
284 int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
285
286 memcpy(host_pass[index].v, key, saved_len);
287 host_pass[index].length = saved_len;
288 }
289
get_key(int index)290 static char *get_key(int index)
291 {
292 static char ret[PLAINTEXT_LENGTH + 1];
293
294 memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
295 ret[host_pass[index].length] = 0;
296
297 return ret;
298 }
299
300 struct fmt_main fmt_opencl_notes = {
301 {
302 FORMAT_LABEL,
303 FORMAT_NAME,
304 ALGORITHM_NAME,
305 BENCHMARK_COMMENT,
306 BENCHMARK_LENGTH,
307 0,
308 PLAINTEXT_LENGTH,
309 BINARY_SIZE,
310 BINARY_ALIGN,
311 SALT_SIZE,
312 SALT_ALIGN,
313 MIN_KEYS_PER_CRYPT,
314 MAX_KEYS_PER_CRYPT,
315 FMT_CASE | FMT_8_BIT,
316 {
317 "iteration count",
318 },
319 { FORMAT_TAG },
320 notes_tests
321 }, {
322 init,
323 done,
324 reset,
325 fmt_default_prepare,
326 notes_common_valid,
327 fmt_default_split,
328 fmt_default_binary,
329 notes_common_get_salt,
330 {
331 notes_common_iteration_count,
332 },
333 fmt_default_source,
334 {
335 fmt_default_binary_hash
336 },
337 fmt_default_salt_hash,
338 NULL,
339 set_salt,
340 set_key,
341 get_key,
342 fmt_default_clear_keys,
343 crypt_all,
344 {
345 fmt_default_get_hash
346 },
347 cmp_all,
348 cmp_one,
349 cmp_exact
350 }
351 };
352
353 #endif /* plugin stanza */
354
355 #endif /* HAVE_OPENCL */
356