1 /*
2 * This software is Copyright (c) 2018, Dhiru Kholia <kholia at kth dot se>,
3 * Copyright (c) 2012-2013 Lukas Odzioba, Copyright (c) 2014 JimF, Copyright
4 * (c) 2014 magnum, and it is hereby released to the general public under the
5 * following terms:
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted.
9 *
10 * Based on opencl_pbkdf2_hmac_sha512_fmt_plug.c file.
11 */
12
13 #ifdef HAVE_OPENCL
14
15 #if FMT_EXTERNS_H
16 extern struct fmt_main fmt_opencl_axcrypt2;
17 #elif FMT_REGISTERS_H
18 john_register_one(&fmt_opencl_axcrypt2);
19 #else
20
21 #include <stdint.h>
22 #include <string.h>
23
24 #include "misc.h"
25 #include "arch.h"
26 #include "common.h"
27 #include "formats.h"
28 #include "options.h"
29 #include "unicode.h"
30 #include "opencl_common.h"
31 #include "axcrypt_common.h"
32 #define VERSION_2_SUPPORT 1
33 #include "axcrypt_variable_code.h"
34 #include "pbkdf2_hmac_common.h"
35
36 #define FORMAT_NAME "AxCrypt 2.x"
37 #define FORMAT_LABEL "axcrypt2-opencl"
38 #define ALGORITHM_NAME "PBKDF2-SHA512 AES OpenCL"
39 #define BINARY_SIZE 0
40 #define BINARY_ALIGN MEM_ALIGN_WORD
41 #define SALT_SIZE sizeof(struct custom_salt *)
42 #define SALT_ALIGN sizeof(struct custom_salt *)
43 #define PLAINTEXT_LENGTH 110
44 #define MIN_KEYS_PER_CRYPT 1
45 #define MAX_KEYS_PER_CRYPT 1
46 #define KERNEL_NAME "pbkdf2_sha512_kernel"
47 #define SPLIT_KERNEL_NAME "pbkdf2_sha512_loop"
48 #define FINAL_KERNEL_NAME "axcrypt2_final"
49
50 #define HASH_LOOPS 250
51 #define ITERATIONS 25000
52
53 typedef struct {
54 // for plaintext, we must make sure it is a full uint64_t width.
55 uint64_t v[(PLAINTEXT_LENGTH + 7) / 8]; // v must be kept aligned(8)
56 uint64_t length; // keep 64 bit aligned, length is overkill, but easiest way to stay aligned.
57 } pass_t;
58
59 typedef struct {
60 uint64_t hash[8];
61 } crack_t;
62
63 typedef struct {
64 // for salt, we append \x00\x00\x00\x01\x80 and must make sure it is a full uint64 width
65 uint64_t salt[(PBKDF2_64_MAX_SALT_SIZE + 1 + 4 + 7) / 8]; // salt must be kept aligned(8)
66 uint32_t length;
67 uint32_t rounds;
68 } salt_t;
69
70 typedef struct {
71 uint64_t ipad[8];
72 uint64_t opad[8];
73 uint64_t hash[8];
74 uint64_t W[8];
75 cl_uint rounds;
76 } state_t;
77
78 typedef struct {
79 salt_t pbkdf2;
80 uint32_t key_wrapping_rounds;
81 unsigned char salt[64];
82 unsigned char wrappedkey[144];
83 } axcrypt2_salt_t;
84
85 typedef struct {
86 uint32_t cracked;
87 } out_t;
88
89 static struct custom_salt *cur_salt;
90
91 static pass_t *host_pass;
92 static axcrypt2_salt_t *host_salt;
93 static out_t *host_crack;
94 static cl_mem mem_in, mem_salt, mem_state, mem_dk, mem_out;
95 static cl_kernel split_kernel, final_kernel;
96 static cl_int cl_error;
97 static struct fmt_main *self;
98
99 #define STEP 0
100 #define SEED 256
101
102 static const char *warn[] = {
103 "xfer: ", ", init: " , ", crypt: ", ", final: ", ", res xfer: "
104 };
105
106 static int split_events[] = { 2, -1, -1 };
107
108 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
109 #include "opencl_autotune.h"
110
111 /* ------- Helper functions ------- */
get_task_max_work_group_size()112 static size_t get_task_max_work_group_size()
113 {
114 size_t s;
115
116 s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
117 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, split_kernel));
118 return MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel));
119 }
120
create_clobj(size_t kpc,struct fmt_main * self)121 static void create_clobj(size_t kpc, struct fmt_main *self)
122 {
123 host_pass = mem_calloc(kpc, sizeof(pass_t));
124 host_crack = mem_calloc(kpc, sizeof(out_t));
125 host_salt = mem_calloc(1, sizeof(axcrypt2_salt_t));
126
127 #define CL_RO CL_MEM_READ_ONLY
128 #define CL_WO CL_MEM_WRITE_ONLY
129 #define CL_RW CL_MEM_READ_WRITE
130
131 #define CLCREATEBUFFER(_flags, _size, _string) \
132 clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error); \
133 HANDLE_CLERROR(cl_error, _string);
134
135 #define CLKERNELARG(kernel, id, arg, msg) \
136 HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg);
137
138 mem_salt = CLCREATEBUFFER(CL_RO, sizeof(axcrypt2_salt_t),
139 "Cannot allocate mem salt");
140 mem_in = CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t),
141 "Cannot allocate mem in");
142 mem_state = CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t),
143 "Cannot allocate mem state");
144 mem_dk = CLCREATEBUFFER(CL_RW, kpc * sizeof(crack_t),
145 "Cannot allocate mem dk");
146 mem_out = CLCREATEBUFFER(CL_WO, kpc * sizeof(out_t),
147 "Cannot allocate mem out");
148
149 CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in");
150 CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt");
151 CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state");
152
153 CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state");
154 CLKERNELARG(split_kernel, 1, mem_dk, "Error while setting mem_dk");
155
156 CLKERNELARG(final_kernel, 0, mem_dk, "Error while setting mem_dk");
157 CLKERNELARG(final_kernel, 1, mem_salt, "Error while setting mem_salt");
158 CLKERNELARG(final_kernel, 2, mem_out, "Error while setting mem_out");
159 }
160
init(struct fmt_main * _self)161 static void init(struct fmt_main *_self)
162 {
163 self = _self;
164 opencl_prepare_dev(gpu_id);
165 }
166
reset(struct db_main * db)167 static void reset(struct db_main *db)
168 {
169 if (!autotuned) {
170 char build_opts[128];
171
172 snprintf(build_opts, sizeof(build_opts),
173 "-DHASH_LOOPS=%u -DPLAINTEXT_LENGTH=%d -DPBKDF2_64_MAX_SALT_SIZE=%d",
174 HASH_LOOPS, PLAINTEXT_LENGTH, PBKDF2_64_MAX_SALT_SIZE);
175
176 opencl_init("$JOHN/kernels/axcrypt2_kernel.cl", gpu_id, build_opts);
177
178 crypt_kernel = clCreateKernel(program[gpu_id], KERNEL_NAME, &cl_error);
179 HANDLE_CLERROR(cl_error, "Error creating kernel");
180
181 split_kernel =
182 clCreateKernel(program[gpu_id], SPLIT_KERNEL_NAME, &cl_error);
183 HANDLE_CLERROR(cl_error, "Error creating split kernel");
184
185 final_kernel =
186 clCreateKernel(program[gpu_id], FINAL_KERNEL_NAME, &cl_error);
187 HANDLE_CLERROR(cl_error, "Error creating final kernel");
188
189 // Initialize openCL tuning (library) for this format.
190 opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn, 2,
191 self, create_clobj, release_clobj,
192 sizeof(state_t), 0, db);
193
194 // Auto tune execution from shared/included code.
195 autotune_run(self, ITERATIONS, 0, 200);
196 }
197 }
198
release_clobj(void)199 static void release_clobj(void)
200 {
201 if (host_pass) {
202 HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt");
203 HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
204 HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
205 HANDLE_CLERROR(clReleaseMemObject(mem_dk), "Release mem out");
206 HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
207
208 MEM_FREE(host_pass);
209 MEM_FREE(host_salt);
210 MEM_FREE(host_crack);
211 }
212 }
213
done(void)214 static void done(void)
215 {
216 if (autotuned) {
217 release_clobj();
218 HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel");
219 HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel");
220 HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel");
221 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
222
223 autotuned--;
224 }
225 }
226
axcrypt2_valid(char * ciphertext,struct fmt_main * self)227 static int axcrypt2_valid(char *ciphertext, struct fmt_main *self)
228 {
229 return axcrypt_common_valid(ciphertext, self, 2);
230 }
231
get_salt(char * ciphertext)232 static void *get_salt(char *ciphertext)
233 {
234 char *ctcopy = strdup(ciphertext);
235 char *keeptr = ctcopy;
236 char *p;
237 int i;
238 static struct custom_salt cs;
239 static void *ptr;
240 int saltlen = 0;
241 int wrappedkeylen;
242
243 memset(&cs, 0, sizeof(cs));
244 cs.keyfile = NULL;
245 ctcopy += FORMAT_TAG_LEN;
246 p = strtokm(ctcopy, "*");
247 cs.version = atoi(p);
248
249 saltlen = 64; // WrapSalt
250 wrappedkeylen = 144;
251
252 p = strtokm(NULL, "*");
253 cs.key_wrapping_rounds = (uint32_t) atoi(p);
254
255 p = strtokm(NULL, "*");
256 for (i = 0; i < saltlen; i++)
257 cs.salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16
258 + atoi16[ARCH_INDEX(p[i * 2 + 1])];
259
260 p = strtokm(NULL, "*");
261 for (i = 0; i < wrappedkeylen; i++)
262 cs.wrappedkey[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16
263 + atoi16[ARCH_INDEX(p[i * 2 + 1])];
264
265 if (cs.version == 2) {
266 p = strtokm(NULL, "*");
267 cs.deriv_iterations = atoi(p);
268 p = strtokm(NULL, "*");
269
270 for (i = 0; i < 32; i++)
271 cs.deriv_salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16 + atoi16[ARCH_INDEX(p[i * 2 + 1])];
272 }
273
274 // we append the count and EOM here, one time.
275 memcpy(cs.deriv_salt + 32, "\x0\x0\x0\x1\x80", 5);
276 cs.deriv_salt_length = 32 + 5; // we include the x80 byte in our saltlen, but the .cl kernel knows to reduce saltlen by 1 */
277
278 MEM_FREE(keeptr);
279
280 cs.dsalt.salt_cmp_offset = SALT_CMP_OFF(struct custom_salt, salt);
281 cs.dsalt.salt_cmp_size = SALT_CMP_SIZE(struct custom_salt, salt, wrappedkey, 0);
282 cs.dsalt.salt_alloc_needs_free = 0;
283
284 ptr = mem_alloc_tiny(sizeof(struct custom_salt), MEM_ALIGN_WORD);
285 memcpy(ptr, &cs, sizeof(struct custom_salt));
286
287 return (void *)&ptr;
288 }
289
set_salt(void * salt)290 static void set_salt(void *salt)
291 {
292 cur_salt = *(struct custom_salt **) salt;
293
294 memcpy(host_salt->pbkdf2.salt, cur_salt->deriv_salt, cur_salt->deriv_salt_length);
295 host_salt->pbkdf2.length = cur_salt->deriv_salt_length;
296 host_salt->pbkdf2.rounds = cur_salt->deriv_iterations;
297
298 memcpy(host_salt->salt, cur_salt->salt, 64);
299 memcpy(host_salt->wrappedkey, cur_salt->wrappedkey, 144);
300
301 host_salt->key_wrapping_rounds = cur_salt->key_wrapping_rounds;
302
303 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt,
304 CL_FALSE, 0, sizeof(axcrypt2_salt_t), host_salt, 0, NULL, NULL),
305 "Copy salt to gpu");
306 }
307
crypt_all(int * pcount,struct db_salt * salt)308 static int crypt_all(int *pcount, struct db_salt *salt)
309 {
310 const int count = *pcount;
311 int i, loops = (host_salt->pbkdf2.rounds + HASH_LOOPS - 1) / HASH_LOOPS;
312 size_t *lws = local_work_size ? &local_work_size : NULL;
313 size_t gws = GET_NEXT_MULTIPLE(count, local_work_size);
314
315 // Copy data to gpu
316 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0,
317 gws * sizeof(pass_t), host_pass,
318 0, NULL, multi_profilingEvent[0]),
319 "Copy data to gpu");
320
321 // Run standard PBKDF2 kernel
322 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1,
323 NULL, &gws, lws, 0, NULL,
324 multi_profilingEvent[1]), "Run kernel");
325
326 for (i = 0; i < (ocl_autotune_running ? 1 : loops); i++) {
327 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id],
328 split_kernel, 1, NULL,
329 &gws, lws, 0, NULL,
330 multi_profilingEvent[2]), "Run split kernel");
331 BENCH_CLERROR(clFinish(queue[gpu_id]), "clFinish");
332 opencl_process_event();
333 }
334
335 // Run GELI post-processing kernel
336 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel, 1,
337 NULL, &gws, lws, 0, NULL,
338 multi_profilingEvent[3]), "Run kernel");
339
340 // Read the result back
341 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0,
342 gws * sizeof(out_t), host_crack,
343 0, NULL, multi_profilingEvent[4]), "Copy result back");
344
345 return count;
346 }
347
cmp_all(void * binary,int count)348 static int cmp_all(void *binary, int count)
349 {
350 int index;
351 for (index = 0; index < count; index++)
352 if (host_crack[index].cracked)
353 return 1;
354 return 0;
355 }
356
cmp_one(void * binary,int index)357 static int cmp_one(void *binary, int index)
358 {
359 return host_crack[index].cracked;
360 }
361
cmp_exact(char * source,int index)362 static int cmp_exact(char *source, int index)
363 {
364 return 1;
365 }
366
set_key(char * key,int index)367 static void set_key(char *key, int index)
368 {
369 int saved_len = MIN(strlen(key), PLAINTEXT_LENGTH);
370
371 // make sure LAST uint64 that has any key in it gets null, since we simply
372 // ^= the whole uint64 with the ipad/opad mask
373 strncpy((char*)host_pass[index].v, key, PLAINTEXT_LENGTH);
374 host_pass[index].length = saved_len;
375 }
376
get_key(int index)377 static char *get_key(int index)
378 {
379 static char ret[PLAINTEXT_LENGTH + 1];
380
381 memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH);
382 ret[host_pass[index].length] = 0;
383
384 return ret;
385 }
386
387 struct fmt_main fmt_opencl_axcrypt2 = {
388 {
389 FORMAT_LABEL,
390 FORMAT_NAME,
391 ALGORITHM_NAME,
392 BENCHMARK_COMMENT,
393 BENCHMARK_LENGTH,
394 0,
395 PLAINTEXT_LENGTH,
396 BINARY_SIZE,
397 BINARY_ALIGN,
398 SALT_SIZE,
399 SALT_ALIGN,
400 MIN_KEYS_PER_CRYPT,
401 MAX_KEYS_PER_CRYPT,
402 FMT_CASE | FMT_8_BIT | FMT_DYNA_SALT | FMT_HUGE_INPUT,
403 {
404 "iteration count",
405 },
406 { FORMAT_TAG },
407 axcrypt_tests
408 }, {
409 init,
410 done,
411 reset,
412 fmt_default_prepare,
413 axcrypt2_valid,
414 fmt_default_split,
415 fmt_default_binary,
416 get_salt,
417 {
418 axcrypt_iteration_count,
419 },
420 fmt_default_source,
421 {
422 fmt_default_binary_hash
423 },
424 fmt_default_salt_hash,
425 NULL,
426 set_salt,
427 set_key,
428 get_key,
429 fmt_default_clear_keys,
430 crypt_all,
431 {
432 fmt_default_get_hash
433 },
434 cmp_all,
435 cmp_one,
436 cmp_exact
437 }
438 };
439
440 #endif /* plugin stanza */
441
442 #endif /* HAVE_OPENCL */
443