1 /*
2  * JtR OpenCL format to crack OpenBSD-SoftRAID hashes.
3  *
4  * This software is Copyright (c) 2017, Dhiru Kholia <dhiru at openwall.com>,
5  * and it is hereby released to the general public under the following terms:
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted.
8  *
9  * The OpenCL boilerplate code is borrowed from other OpenCL formats.
10  */
11 
12 #ifdef HAVE_OPENCL
13 
14 #if FMT_EXTERNS_H
15 extern struct fmt_main fmt_opencl_openbsd_softraid;
16 #elif FMT_REGISTERS_H
17 john_register_one(&fmt_opencl_openbsd_softraid);
18 #else
19 
20 #include <string.h>
21 #include <stdint.h>
22 
23 #include "arch.h"
24 #include "formats.h"
25 #include "common.h"
26 #include "options.h"
27 #include "jumbo.h"
28 #include "opencl_common.h"
29 #include "misc.h"
30 #define OUTLEN (32)
31 #include "opencl_pbkdf2_hmac_sha1.h"
32 #include "openbsdsoftraid_common.h"
33 #include "openbsdsoftraid_variable_code.h"
34 
35 #define FORMAT_LABEL            "OpenBSD-SoftRAID-opencl"
36 #define ALGORITHM_NAME          "PBKDF2-SHA1 AES OpenCL"
37 #define BENCHMARK_COMMENT       ""
38 #define BENCHMARK_LENGTH        0x507
39 #define MIN_KEYS_PER_CRYPT      1
40 #define MAX_KEYS_PER_CRYPT      1
41 #define PLAINTEXT_LENGTH        64
42 #define SALT_SIZE               sizeof(*cur_salt)
43 #define SALT_ALIGN              MEM_ALIGN_WORD
44 
45 /* This handles all widths */
46 #define GETPOS(i, index)        (((index) % ocl_v_width) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
47 
48 typedef struct {
49 	pbkdf2_salt pbkdf2;
50 	int kdf_type;
51 	unsigned char masked_keys[OPENBSD_SOFTRAID_KEYLENGTH * OPENBSD_SOFTRAID_KEYS];
52 } softraid_salt;
53 
54 static struct custom_salt *cur_salt;
55 static size_t key_buf_size;
56 static unsigned int *inbuffer;
57 static pbkdf2_out *output;
58 static softraid_salt currentsalt;
59 static cl_mem mem_in, mem_out, mem_salt, mem_state;
60 static size_t key_buf_size;
61 static int new_keys;
62 static struct fmt_main *self;
63 
64 static cl_kernel pbkdf2_init, pbkdf2_loop, pbkdf2_final, softraid_final;
65 
66 /*
67  * HASH_LOOPS is ideally made by factors of (iteration count - 1) and should
68  * be chosen for a kernel duration of not more than 200 ms
69  */
70 #define HASH_LOOPS              (3 * 271)
71 #define ITERATIONS              8192 /* Just for auto tune */
72 #define LOOP_COUNT              (((currentsalt.pbkdf2.iterations - 1 + HASH_LOOPS - 1)) / HASH_LOOPS)
73 #define STEP                    0
74 #define SEED                    128
75 
76 static const char * warn[] = {
77 	"P xfer: ",  ", init: ", ", loop: ", ", final: ", ", softraid: ", ", res xfer: "
78 };
79 
80 static int split_events[] = { 2, -1, -1 };
81 
82 //This file contains auto-tuning routine(s). Has to be included after formats definitions.
83 #include "opencl_autotune.h"
84 
85 /* ------- Helper functions ------- */
get_task_max_work_group_size()86 static size_t get_task_max_work_group_size()
87 {
88 	size_t s;
89 
90 	s = autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_init);
91 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_loop));
92 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, pbkdf2_final));
93 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, softraid_final));
94 	return s;
95 }
96 
create_clobj(size_t gws,struct fmt_main * self)97 static void create_clobj(size_t gws, struct fmt_main *self)
98 {
99 	gws *= ocl_v_width;
100 	key_buf_size = PLAINTEXT_LENGTH * gws;
101 
102 	// Allocate memory
103 	inbuffer = mem_calloc(1, key_buf_size);
104 	output = mem_alloc(sizeof(pbkdf2_out) * gws);
105 
106 	mem_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, key_buf_size, NULL, &ret_code);
107 	HANDLE_CLERROR(ret_code, "Error allocating mem in");
108 	mem_salt = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, sizeof(softraid_salt), NULL, &ret_code);
109 	HANDLE_CLERROR(ret_code, "Error allocating mem setting");
110 	mem_out = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(pbkdf2_out) * gws, NULL, &ret_code);
111 	HANDLE_CLERROR(ret_code, "Error allocating mem out");
112 
113 	mem_state = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(pbkdf2_state) * gws, NULL, &ret_code);
114 	HANDLE_CLERROR(ret_code, "Error allocating mem_state");
115 
116 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 0, sizeof(mem_in), &mem_in), "Error while setting mem_in kernel argument");
117 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
118 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_init, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
119 
120 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_loop, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
121 
122 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
123 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 1, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
124 	HANDLE_CLERROR(clSetKernelArg(pbkdf2_final, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
125 
126 	HANDLE_CLERROR(clSetKernelArg(softraid_final, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
127 	HANDLE_CLERROR(clSetKernelArg(softraid_final, 1, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
128 }
129 
release_clobj(void)130 static void release_clobj(void)
131 {
132 	if (output) {
133 		HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in");
134 		HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem setting");
135 		HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
136 		HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out");
137 
138 		MEM_FREE(inbuffer);
139 		MEM_FREE(output);
140 	}
141 }
142 
done(void)143 static void done(void)
144 {
145 	if (autotuned) {
146 		release_clobj();
147 
148 		HANDLE_CLERROR(clReleaseKernel(pbkdf2_init), "Release kernel");
149 		HANDLE_CLERROR(clReleaseKernel(pbkdf2_loop), "Release kernel");
150 		HANDLE_CLERROR(clReleaseKernel(pbkdf2_final), "Release kernel");
151 		HANDLE_CLERROR(clReleaseKernel(softraid_final), "Release kernel");
152 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
153 
154 		autotuned--;
155 	}
156 }
157 
init(struct fmt_main * _self)158 static void init(struct fmt_main *_self)
159 {
160 	static char valgo[sizeof(ALGORITHM_NAME) + 12] = "";
161 
162 	self = _self;
163 
164 	opencl_prepare_dev(gpu_id);
165 	/* VLIW5 does better with just 2x vectors due to GPR pressure */
166 	if (!options.v_width && amd_vliw5(device_info[gpu_id]))
167 		ocl_v_width = 2;
168 	else
169 		ocl_v_width = opencl_get_vector_width(gpu_id, sizeof(cl_int));
170 
171 	if (ocl_v_width > 1) {
172 		/* Run vectorized kernel */
173 		snprintf(valgo, sizeof(valgo), ALGORITHM_NAME " %ux", ocl_v_width);
174 		self->params.algorithm_name = valgo;
175 	}
176 }
177 
reset(struct db_main * db)178 static void reset(struct db_main *db)
179 {
180 	if (!autotuned) {
181 		char build_opts[64];
182 
183 		snprintf(build_opts, sizeof(build_opts),
184 		         "-DHASH_LOOPS=%u -DOUTLEN=%u "
185 		         "-DPLAINTEXT_LENGTH=%u -DV_WIDTH=%u",
186 		         HASH_LOOPS, OUTLEN, PLAINTEXT_LENGTH, ocl_v_width);
187 		opencl_init("$JOHN/kernels/bsd_softraid_kernel.cl", gpu_id, build_opts);
188 
189 		pbkdf2_init = clCreateKernel(program[gpu_id], "pbkdf2_init", &ret_code);
190 		HANDLE_CLERROR(ret_code, "Error creating kernel");
191 		crypt_kernel = pbkdf2_loop = clCreateKernel(program[gpu_id], "pbkdf2_loop", &ret_code);
192 		HANDLE_CLERROR(ret_code, "Error creating kernel");
193 		pbkdf2_final = clCreateKernel(program[gpu_id], "pbkdf2_final", &ret_code);
194 		HANDLE_CLERROR(ret_code, "Error creating kernel");
195 		softraid_final = clCreateKernel(program[gpu_id], "softraid_final", &ret_code);
196 		HANDLE_CLERROR(ret_code, "Error creating kernel");
197 
198 		// Initialize openCL tuning (library) for this format.
199 		opencl_init_auto_setup(SEED, 2 * HASH_LOOPS, split_events,
200 		                       warn, 2, self, create_clobj,
201 		                       release_clobj,
202 		                       ocl_v_width * sizeof(pbkdf2_state), 0, db);
203 
204 		// Auto tune execution from shared/included code.
205 		autotune_run(self, 2 * (ITERATIONS - 1) + 4, 0, 200);
206 	}
207 }
208 
set_salt(void * salt)209 static void set_salt(void *salt)
210 {
211 	cur_salt = (struct custom_salt*)salt;
212 	memcpy(currentsalt.pbkdf2.salt, cur_salt->salt,
213 	       sizeof(currentsalt.pbkdf2.salt));
214 	currentsalt.pbkdf2.length = OPENBSD_SOFTRAID_SALTLENGTH;
215 	currentsalt.pbkdf2.iterations = cur_salt->num_iterations;
216 	currentsalt.pbkdf2.outlen = 32;
217 	memcpy(currentsalt.masked_keys, cur_salt->masked_keys,
218 	       sizeof(currentsalt.masked_keys));
219 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_FALSE, 0, sizeof(softraid_salt), &currentsalt, 0, NULL, NULL), "Copy salt to gpu");
220 }
221 
clear_keys(void)222 static void clear_keys(void)
223 {
224 	memset(inbuffer, 0, key_buf_size);
225 }
226 
valid(char * ciphertext,struct fmt_main * self)227 static int valid(char *ciphertext, struct fmt_main *self)
228 {
229         return openbsdsoftraid_valid(ciphertext, self, 0);
230 }
231 
set_key(char * key,int index)232 static void set_key(char *key, int index)
233 {
234 	int i;
235 	int length = strlen(key);
236 
237 	for (i = 0; i < length; i++)
238 		((char*)inbuffer)[GETPOS(i, index)] = key[i];
239 
240 	new_keys = 1;
241 }
242 
get_key(int index)243 static char *get_key(int index)
244 {
245 	static char ret[PLAINTEXT_LENGTH + 1];
246 	int i = 0;
247 
248 	while (i < PLAINTEXT_LENGTH &&
249 	       (ret[i] = ((char*)inbuffer)[GETPOS(i, index)]))
250 		i++;
251 	ret[i] = 0;
252 
253 	return ret;
254 }
255 
crypt_all(int * pcount,struct db_salt * salt)256 static int crypt_all(int *pcount, struct db_salt *salt)
257 {
258 	const int count = *pcount;
259 	int i, j;
260 	size_t scalar_gws;
261 	size_t *lws = local_work_size ? &local_work_size : NULL;
262 
263 	global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
264 	scalar_gws = global_work_size * ocl_v_width;
265 
266 	// Copy data to gpu
267 	if (ocl_autotune_running || new_keys) {
268 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, PLAINTEXT_LENGTH * scalar_gws, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");
269 		new_keys = 0;
270 	}
271 
272 	// Run kernels
273 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");
274 
275 	for (j = 0; j < (ocl_autotune_running ? 1 : (currentsalt.pbkdf2.outlen + 19) / 20); j++) {
276 		for (i = 0; i < (ocl_autotune_running ? 1 : LOOP_COUNT); i++) {
277 			BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");
278 			BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
279 			opencl_process_event();
280 		}
281 
282 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], pbkdf2_final, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel");
283 	}
284 
285 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], softraid_final, 1, NULL, &scalar_gws, lws, 0, NULL, multi_profilingEvent[4]), "Run softraid kernel");
286 
287 	// Read the result back
288 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(pbkdf2_out) * scalar_gws, output, 0, NULL, multi_profilingEvent[5]), "Copy result back");
289 
290 	return count;
291 }
292 
cmp_all(void * binary,int count)293 static int cmp_all(void *binary, int count)
294 {
295 	int index;
296 
297 	for (index = 0; index < count; index++)
298 		if (!memcmp(binary, output[index].dk, ARCH_SIZE))
299 			return 1;
300 	return 0;
301 }
302 
cmp_one(void * binary,int index)303 static int cmp_one(void *binary, int index)
304 {
305 	return !memcmp(binary, output[index].dk, BINARY_SIZE);
306 }
307 
cmp_exact(char * source,int index)308 static int cmp_exact(char *source, int index)
309 {
310 	return 1;
311 }
312 
313 struct fmt_main fmt_opencl_openbsd_softraid = {
314 	{
315 		FORMAT_LABEL,
316 		FORMAT_NAME,
317 		ALGORITHM_NAME,
318 		BENCHMARK_COMMENT,
319 		BENCHMARK_LENGTH,
320 		0,
321 		PLAINTEXT_LENGTH,
322 		BINARY_SIZE,
323 		BINARY_ALIGN,
324 		SALT_SIZE,
325 		SALT_ALIGN,
326 		MIN_KEYS_PER_CRYPT,
327 		MAX_KEYS_PER_CRYPT,
328 		FMT_CASE | FMT_8_BIT | FMT_HUGE_INPUT,
329 		{ NULL },
330 		{ FORMAT_TAG },
331 		tests_openbsdsoftraid
332 	}, {
333 		init,
334 		done,
335 		reset,
336 		fmt_default_prepare,
337 		valid,
338 		fmt_default_split,
339 		openbsdsoftraid_get_binary,
340 		openbsdsoftraid_get_salt,
341 		{ NULL },
342 		fmt_default_source,
343 		{
344 			fmt_default_binary_hash
345 		},
346 		fmt_default_salt_hash,
347 		NULL,
348 		set_salt,
349 		set_key,
350 		get_key,
351 		clear_keys,
352 		crypt_all,
353 		{
354 			fmt_default_get_hash
355 		},
356 		cmp_all,
357 		cmp_one,
358 		cmp_exact
359 	}
360 };
361 
362 #endif /* plugin stanza */
363 
364 #endif /* HAVE_OPENCL */
365