1 /*
2 * JtR OpenCL format to crack OpenBSD-SoftRAID hashes.
3 *
4 * This software is Copyright (c) 2017, Dhiru Kholia <dhiru at openwall.com>,
5 * and it is hereby released to the general public under the following terms:
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted.
8 *
9 * The OpenCL boilerplate code is borrowed from other OpenCL formats.
10 */
11
12 #ifdef HAVE_OPENCL
13
14 #if FMT_EXTERNS_H
15 extern struct fmt_main fmt_opencl_openbsd_softraid;
16 #elif FMT_REGISTERS_H
17 john_register_one(&fmt_opencl_openbsd_softraid);
18 #else
19
20 #include <string.h>
21 #include <stdint.h>
22
23 #include "arch.h"
24 #include "formats.h"
25 #include "common.h"
26 #include "options.h"
27 #include "jumbo.h"
28 #include "opencl_common.h"
29 #include "misc.h"
30 #define OUTLEN (32)
31 #include "opencl_pbkdf2_hmac_sha1.h"
32 #include "openbsdsoftraid_common.h"
33 #include "openbsdsoftraid_variable_code.h"
34
35 #define FORMAT_LABEL "OpenBSD-SoftRAID-opencl"
36 #define ALGORITHM_NAME "PBKDF2-SHA1 AES OpenCL"
37 #define BENCHMARK_COMMENT ""
38 #define BENCHMARK_LENGTH 0x507
39 #define MIN_KEYS_PER_CRYPT 1
40 #define MAX_KEYS_PER_CRYPT 1
41 #define PLAINTEXT_LENGTH 64
42 #define SALT_SIZE sizeof(*cur_salt)
43 #define SALT_ALIGN MEM_ALIGN_WORD
44
45 /* This handles all widths */
46 #define GETPOS(i, index) (((index) % ocl_v_width) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
47
48 typedef struct {
49 pbkdf2_salt pbkdf2;
50 int kdf_type;
51 unsigned char masked_keys[OPENBSD_SOFTRAID_KEYLENGTH * OPENBSD_SOFTRAID_KEYS];
52 } softraid_salt;
53
54 static struct custom_salt *cur_salt;
55 static size_t key_buf_size;
56 static unsigned int *inbuffer;
57 static pbkdf2_out *output;
58 static softraid_salt currentsalt;
59 static cl_mem mem_in, mem_out, mem_salt, mem_state;
60 static size_t key_buf_size;
61 static int new_keys;
62 static struct fmt_main *self;
63
64 static cl_kernel pbkdf2_init, pbkdf2_loop, pbkdf2_final, softraid_final;
65
66 /*
67 * HASH_LOOPS is ideally made by factors of (iteration count - 1) and should
68 * be chosen for a kernel duration of not more than 200 ms
69 */
70 #define HASH_LOOPS (3 * 271)
71 #define ITERATIONS 8192 /* Just for auto tune */
72 #define LOOP_COUNT (((currentsalt.pbkdf2.iterations - 1 + HASH_LOOPS - 1)) / HASH_LOOPS)
73 #define STEP 0
74 #define SEED 128
75
76 static const char * warn[] = {
77 "P xfer: ", ", init: ", ", loop: ", ", final: ", ", softraid: ", ", res xfer: "
78 };
79
80 static int split_events[] = { 2, -1, -1 };
81
82 //This file contains auto-tuning routine(s). Has to be included after formats definitions.
83 #include "opencl_autotune.h"
84
85 /* ------- Helper functions ------- */
get_task_max_work_group_size()86 static size_t get_task_max_work_group_size()
87 {
88 size_t s;
89
90 s = autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_init);
91 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_loop));
92 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_final));
93 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, softraid_final));
94 return s;
95 }
96
create_clobj(size_t gws,struct fmt_main * self)97 static void create_clobj(size_t gws, struct fmt_main *self)
98 {
99 gws *= ocl_v_width;
100 key_buf_size = PLAINTEXT_LENGTH * gws;
101
102 // Allocate memory
103 inbuffer = mem_calloc(1, key_buf_size);
104 output = mem_alloc(sizeof(pbkdf2_out) * gws);
105
106 mem_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, key_buf_size, NULL, &ret_code);
107 HANDLE_CLERROR(ret_code, "Error allocating mem in");
108 mem_salt = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, sizeof(softraid_salt), NULL, &ret_code);
109 HANDLE_CLERROR(ret_code, "Error allocating mem setting");
110 mem_out = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(pbkdf2_out) * gws, NULL, &ret_code);
111 HANDLE_CLERROR(ret_code, "Error allocating mem out");
112
113 mem_state = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(pbkdf2_state) * gws, NULL, &ret_code);
114 HANDLE_CLERROR(ret_code, "Error allocating mem_state");
115
116 HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 0, sizeof(mem_in), &mem_in), "Error while setting mem_in kernel argument");
117 HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
118 HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
119
120 HANDLE_CLERROR(clSetKernelArg(pbkdf2_loop, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
121
122 HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
123 HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 1, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
124 HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
125
126 HANDLE_CLERROR(clSetKernelArg(softraid_final, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
127 HANDLE_CLERROR(clSetKernelArg(softraid_final, 1, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
128 }
129
release_clobj(void)130 static void release_clobj(void)
131 {
132 if (output) {
133 HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
134 HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem setting");
135 HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
136 HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
137
138 MEM_FREE(inbuffer);
139 MEM_FREE(output);
140 }
141 }
142
done(void)143 static void done(void)
144 {
145 if (autotuned) {
146 release_clobj();
147
148 HANDLE_CLERROR(clReleaseKernel(pbkdf2_init), "Release kernel");
149 HANDLE_CLERROR(clReleaseKernel(pbkdf2_loop), "Release kernel");
150 HANDLE_CLERROR(clReleaseKernel(pbkdf2_final), "Release kernel");
151 HANDLE_CLERROR(clReleaseKernel(softraid_final), "Release kernel");
152 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
153
154 autotuned--;
155 }
156 }
157
init(struct fmt_main * _self)158 static void init(struct fmt_main *_self)
159 {
160 static char valgo[sizeof(ALGORITHM_NAME) + 12] = "";
161
162 self = _self;
163
164 opencl_prepare_dev(gpu_id);
165 /* VLIW5 does better with just 2x vectors due to GPR pressure */
166 if (!options.v_width && amd_vliw5(device_info[gpu_id]))
167 ocl_v_width = 2;
168 else
169 ocl_v_width = opencl_get_vector_width(gpu_id, sizeof(cl_int));
170
171 if (ocl_v_width > 1) {
172 /* Run vectorized kernel */
173 snprintf(valgo, sizeof(valgo), ALGORITHM_NAME " %ux", ocl_v_width);
174 self->params.algorithm_name = valgo;
175 }
176 }
177
reset(struct db_main * db)178 static void reset(struct db_main *db)
179 {
180 if (!autotuned) {
181 char build_opts[64];
182
183 snprintf(build_opts, sizeof(build_opts),
184 "-DHASH_LOOPS=%u -DOUTLEN=%u "
185 "-DPLAINTEXT_LENGTH=%u -DV_WIDTH=%u",
186 HASH_LOOPS, OUTLEN, PLAINTEXT_LENGTH, ocl_v_width);
187 opencl_init("$JOHN/kernels/bsd_softraid_kernel.cl", gpu_id, build_opts);
188
189 pbkdf2_init = clCreateKernel(program[gpu_id], "pbkdf2_init", &ret_code);
190 HANDLE_CLERROR(ret_code, "Error creating kernel");
191 crypt_kernel = pbkdf2_loop = clCreateKernel(program[gpu_id], "pbkdf2_loop", &ret_code);
192 HANDLE_CLERROR(ret_code, "Error creating kernel");
193 pbkdf2_final = clCreateKernel(program[gpu_id], "pbkdf2_final", &ret_code);
194 HANDLE_CLERROR(ret_code, "Error creating kernel");
195 softraid_final = clCreateKernel(program[gpu_id], "softraid_final", &ret_code);
196 HANDLE_CLERROR(ret_code, "Error creating kernel");
197
198 // Initialize openCL tuning (library) for this format.
199 opencl_init_auto_setup(SEED, 2 * HASH_LOOPS, split_events,
200 warn, 2, self, create_clobj,
201 release_clobj,
202 ocl_v_width * sizeof(pbkdf2_state), 0, db);
203
204 // Auto tune execution from shared/included code.
205 autotune_run(self, 2 * (ITERATIONS - 1) + 4, 0, 200);
206 }
207 }
208
set_salt(void * salt)209 static void set_salt(void *salt)
210 {
211 cur_salt = (struct custom_salt*)salt;
212 memcpy(currentsalt.pbkdf2.salt, cur_salt->salt,
213 sizeof(currentsalt.pbkdf2.salt));
214 currentsalt.pbkdf2.length = OPENBSD_SOFTRAID_SALTLENGTH;
215 currentsalt.pbkdf2.iterations = cur_salt->num_iterations;
216 currentsalt.pbkdf2.outlen = 32;
217 memcpy(currentsalt.masked_keys, cur_salt->masked_keys,
218 sizeof(currentsalt.masked_keys));
219 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_FALSE, 0, sizeof(softraid_salt), ¤tsalt, 0, NULL, NULL), "Copy salt to gpu");
220 }
221
clear_keys(void)222 static void clear_keys(void)
223 {
224 memset(inbuffer, 0, key_buf_size);
225 }
226
valid(char * ciphertext,struct fmt_main * self)227 static int valid(char *ciphertext, struct fmt_main *self)
228 {
229 return openbsdsoftraid_valid(ciphertext, self, 0);
230 }
231
set_key(char * key,int index)232 static void set_key(char *key, int index)
233 {
234 int i;
235 int length = strlen(key);
236
237 for (i = 0; i < length; i++)
238 ((char*)inbuffer)[GETPOS(i, index)] = key[i];
239
240 new_keys = 1;
241 }
242
get_key(int index)243 static char *get_key(int index)
244 {
245 static char ret[PLAINTEXT_LENGTH + 1];
246 int i = 0;
247
248 while (i < PLAINTEXT_LENGTH &&
249 (ret[i] = ((char*)inbuffer)[GETPOS(i, index)]))
250 i++;
251 ret[i] = 0;
252
253 return ret;
254 }
255
crypt_all(int * pcount,struct db_salt * salt)256 static int crypt_all(int *pcount, struct db_salt *salt)
257 {
258 const int count = *pcount;
259 int i, j;
260 size_t scalar_gws;
261 size_t *lws = local_work_size ? &local_work_size : NULL;
262
263 global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
264 scalar_gws = global_work_size * ocl_v_width;
265
266 // Copy data to gpu
267 if (ocl_autotune_running || new_keys) {
268 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, PLAINTEXT_LENGTH * scalar_gws, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");
269 new_keys = 0;
270 }
271
272 // Run kernels
273 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");
274
275 for (j = 0; j < (ocl_autotune_running ? 1 : (currentsalt.pbkdf2.outlen + 19) / 20); j++) {
276 for (i = 0; i < (ocl_autotune_running ? 1 : LOOP_COUNT); i++) {
277 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");
278 BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
279 opencl_process_event();
280 }
281
282 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel");
283 }
284
285 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], softraid_final, 1, NULL, &scalar_gws, lws, 0, NULL, multi_profilingEvent[4]), "Run softraid kernel");
286
287 // Read the result back
288 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf2_out) * scalar_gws, output, 0, NULL, multi_profilingEvent[5]), "Copy result back");
289
290 return count;
291 }
292
cmp_all(void * binary,int count)293 static int cmp_all(void *binary, int count)
294 {
295 int index;
296
297 for (index = 0; index < count; index++)
298 if (!memcmp(binary, output[index].dk, ARCH_SIZE))
299 return 1;
300 return 0;
301 }
302
cmp_one(void * binary,int index)303 static int cmp_one(void *binary, int index)
304 {
305 return !memcmp(binary, output[index].dk, BINARY_SIZE);
306 }
307
cmp_exact(char * source,int index)308 static int cmp_exact(char *source, int index)
309 {
310 return 1;
311 }
312
313 struct fmt_main fmt_opencl_openbsd_softraid = {
314 {
315 FORMAT_LABEL,
316 FORMAT_NAME,
317 ALGORITHM_NAME,
318 BENCHMARK_COMMENT,
319 BENCHMARK_LENGTH,
320 0,
321 PLAINTEXT_LENGTH,
322 BINARY_SIZE,
323 BINARY_ALIGN,
324 SALT_SIZE,
325 SALT_ALIGN,
326 MIN_KEYS_PER_CRYPT,
327 MAX_KEYS_PER_CRYPT,
328 FMT_CASE | FMT_8_BIT | FMT_HUGE_INPUT,
329 { NULL },
330 { FORMAT_TAG },
331 tests_openbsdsoftraid
332 }, {
333 init,
334 done,
335 reset,
336 fmt_default_prepare,
337 valid,
338 fmt_default_split,
339 openbsdsoftraid_get_binary,
340 openbsdsoftraid_get_salt,
341 { NULL },
342 fmt_default_source,
343 {
344 fmt_default_binary_hash
345 },
346 fmt_default_salt_hash,
347 NULL,
348 set_salt,
349 set_key,
350 get_key,
351 clear_keys,
352 crypt_all,
353 {
354 fmt_default_get_hash
355 },
356 cmp_all,
357 cmp_one,
358 cmp_exact
359 }
360 };
361
362 #endif /* plugin stanza */
363
364 #endif /* HAVE_OPENCL */
365