1 /*
2  * This software is Copyright (c) 2012 Lukas Odzioba <ukasz at openwall.net>
3  * and Copyright (c) 2012-2018 magnum, and it is hereby released to the general
4  * public under the following terms: Redistribution and use in source and
5  * binary forms, with or without modification, are permitted.
6  *
7  * Code was at some point based on Aircrack-ng source
8  */
9 #ifdef HAVE_OPENCL
10 
11 #if FMT_EXTERNS_H
12 extern struct fmt_main fmt_opencl_wpapsk;
13 #elif FMT_REGISTERS_H
14 john_register_one(&fmt_opencl_wpapsk);
15 #else
16 
17 #include <string.h>
18 
19 #include "arch.h"
20 #include "formats.h"
21 #include "common.h"
22 #include "misc.h"
23 #include "config.h"
24 #include "options.h"
25 #include "unicode.h"
26 #include "opencl_common.h"
27 
28 static cl_mem mem_in, mem_out, mem_salt, mem_state, pinned_in, pinned_out;
29 static cl_kernel wpapsk_init, wpapsk_loop, wpapsk_pass2, wpapsk_final_md5, wpapsk_final_sha1, wpapsk_final_sha256, wpapsk_final_pmkid;
30 static size_t key_buf_size;
31 static unsigned int *inbuffer;
32 static struct fmt_main *self;
33 
34 #define JOHN_OCL_WPAPSK
35 #include "wpapsk.h"
36 
37 #define FORMAT_LABEL		"wpapsk-opencl"
38 #define FORMAT_NAME		"WPA/WPA2/PMF/PMKID PSK"
39 #define ALGORITHM_NAME		"PBKDF2-SHA1 OpenCL"
40 
41 #define ITERATIONS		4095
42 #define HASH_LOOPS		105 // factors 3, 3, 5, 7, 13
43 #define SEED			256
44 
45 #define MIN_KEYS_PER_CRYPT	1
46 #define MAX_KEYS_PER_CRYPT	1
47 
48 #define OCL_CONFIG		"wpapsk"
49 
50 /* This handles all sizes */
51 #define GETPOS(i, index)	(((index) % ocl_v_width) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
52 /* This is faster but can't handle size 3 */
53 //#define GETPOS(i, index)	(((index) & (ocl_v_width - 1)) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
54 
55 extern wpapsk_salt currentsalt;
56 extern mic_t *mic;
57 extern hccap_t hccap;
58 
59 typedef struct {
60 	cl_uint W[5];
61 	cl_uint ipad[5];
62 	cl_uint opad[5];
63 	cl_uint out[5];
64 	cl_uint partial[5];
65 } wpapsk_state;
66 
67 static const char * warn[] = {
68 	"xfer: ", ", init: ", ", loop: ", ", pass2: ", ", final: ", ", xfer: "
69 };
70 
71 static int split_events[] = { 2, -1, -1 };
72 
73 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
74 #include "opencl_autotune.h"
75 
76 /* ------- Helper functions ------- */
get_task_max_work_group_size()77 static size_t get_task_max_work_group_size()
78 {
79 	size_t s;
80 
81 	s = autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_init);
82 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_loop));
83 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_pass2));
84 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_md5));
85 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_sha1));
86 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_sha256));
87 	s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_pmkid));
88 
89 	return s;
90 }
91 
create_clobj(size_t gws,struct fmt_main * self)92 static void create_clobj(size_t gws, struct fmt_main *self)
93 {
94 	gws *= ocl_v_width;
95 
96 	key_buf_size = 64 * gws;
97 
98 	// Allocate memory
99 	pinned_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, key_buf_size, NULL, &ret_code);
100 	HANDLE_CLERROR(ret_code, "Error allocating pinned in");
101 	mem_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, key_buf_size, NULL, &ret_code);
102 	HANDLE_CLERROR(ret_code, "Error allocating mem in");
103 	inbuffer = clEnqueueMapBuffer(queue[gpu_id], pinned_in, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, key_buf_size, 0, NULL, NULL, &ret_code);
104 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory");
105 
106 	mem_state = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(wpapsk_state) * gws, NULL, &ret_code);
107 	HANDLE_CLERROR(ret_code, "Error allocating mem_state");
108 
109 	mem_salt = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(wpapsk_salt), &currentsalt, &ret_code);
110 	HANDLE_CLERROR(ret_code, "Error allocating mem setting");
111 
112 	pinned_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(mic_t) * gws, NULL, &ret_code);
113 	HANDLE_CLERROR(ret_code, "Error allocating pinned out");
114 	mem_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, sizeof(mic_t) * gws, NULL, &ret_code);
115 	HANDLE_CLERROR(ret_code, "Error allocating mem out");
116 	mic = clEnqueueMapBuffer(queue[gpu_id], pinned_out, CL_TRUE, CL_MAP_READ, 0, sizeof(mic_t) * gws, 0, NULL, NULL, &ret_code);
117 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory");
118 
119 	HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 0, sizeof(mem_in), &mem_in), "Error while setting mem_in kernel argument");
120 	HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
121 	HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
122 
123 	HANDLE_CLERROR(clSetKernelArg(wpapsk_loop, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
124 
125 	HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
126 	HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 1, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
127 
128 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
129 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
130 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
131 
132 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
133 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
134 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
135 
136 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
137 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
138 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
139 
140 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
141 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
142 	HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
143 }
144 
release_clobj(void)145 static void release_clobj(void)
146 {
147 	if (mem_state) {
148 		HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_in, inbuffer, 0, NULL, NULL), "Error Unmapping mem in");
149 		HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_out, mic, 0, NULL, NULL), "Error Unmapping mem in");
150 		HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing memory mappings");
151 
152 		HANDLE_CLERROR(clReleaseMemObject(pinned_in), "Release pinned_in");
153 		HANDLE_CLERROR(clReleaseMemObject(pinned_out), "Release pinned_out");
154 		HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release pinned_in");
155 		HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem_out");
156 		HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem_salt");
157 		HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
158 		mem_state = NULL;
159 	}
160 }
161 
done(void)162 static void done(void)
163 {
164 	if (autotuned) {
165 		release_clobj();
166 
167 		HANDLE_CLERROR(clReleaseKernel(wpapsk_init), "Release Kernel");
168 		HANDLE_CLERROR(clReleaseKernel(wpapsk_loop), "Release Kernel");
169 		HANDLE_CLERROR(clReleaseKernel(wpapsk_pass2), "Release Kernel");
170 		HANDLE_CLERROR(clReleaseKernel(wpapsk_final_md5), "Release Kernel");
171 		HANDLE_CLERROR(clReleaseKernel(wpapsk_final_sha1), "Release Kernel");
172 		HANDLE_CLERROR(clReleaseKernel(wpapsk_final_sha256), "Release Kernel");
173 		HANDLE_CLERROR(clReleaseKernel(wpapsk_final_pmkid), "Release Kernel");
174 
175 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
176 
177 		autotuned--;
178 	}
179 }
180 
clear_keys(void)181 static void clear_keys(void) {
182 	memset(inbuffer, 0, key_buf_size);
183 	new_keys = 1;
184 }
185 
set_key(char * key,int index)186 static void set_key(char *key, int index)
187 {
188 	int i;
189 	int length = strlen(key);
190 
191 	for (i = 0; i < length; i++)
192 		((char*)inbuffer)[GETPOS(i, index)] = key[i];
193 	new_keys = 1;
194 }
195 
get_key(int index)196 static char* get_key(int index)
197 {
198 	static char ret[PLAINTEXT_LENGTH + 1];
199 	int i = 0;
200 
201 	while ((ret[i] = ((char*)inbuffer)[GETPOS(i, index)]))
202 		i++;
203 
204 	return ret;
205 }
206 
init(struct fmt_main * _self)207 static void init(struct fmt_main *_self)
208 {
209 	static char valgo[32] = "";
210 
211 	self = _self;
212 
213 	/*
214 	 * Implementations seen IRL that have 8 *bytes* (of eg. UTF-8) passwords
215 	 * as opposed to 8 *characters*
216 	 */
217 	if (options.target_enc == UTF_8)
218 		self->params.plaintext_min_length = 2;
219 
220 	opencl_prepare_dev(gpu_id);
221 	/* VLIW5 does better with just 2x vectors due to GPR pressure */
222 	if (!options.v_width && amd_vliw5(device_info[gpu_id]))
223 		ocl_v_width = 2;
224 	else
225 		ocl_v_width = opencl_get_vector_width(gpu_id, sizeof(cl_int));
226 
227 	/* Vectorizing disabled until fixed for keyver 3 */
228 	ocl_v_width = 1;
229 
230 	if (ocl_v_width > 1) {
231 		/* Run vectorized kernel */
232 		snprintf(valgo, sizeof(valgo),
233 		         ALGORITHM_NAME " %ux", ocl_v_width);
234 		self->params.algorithm_name = valgo;
235 	}
236 }
237 
reset(struct db_main * db)238 static void reset(struct db_main *db)
239 {
240 	if (!autotuned) {
241 		const char *custom_opts;
242 		char build_opts[256];
243 
244 		if (!(custom_opts = getenv(OCL_CONFIG "_BuildOpts")))
245 			custom_opts = cfg_get_param(SECTION_OPTIONS,
246 			                            SUBSECTION_OPENCL,
247 			                            OCL_CONFIG "_BuildOpts");
248 
249 		snprintf(build_opts, sizeof(build_opts),
250 		         "%s%s-DHASH_LOOPS=%u -DITERATIONS=%u "
251 		         "-DPLAINTEXT_LENGTH=%u -DV_WIDTH=%u",
252 		         custom_opts ? custom_opts : "",
253 		         custom_opts ? " " : "",
254 		         HASH_LOOPS, ITERATIONS,
255 		         PLAINTEXT_LENGTH, ocl_v_width);
256 		opencl_init("$JOHN/kernels/wpapsk_kernel.cl", gpu_id, build_opts);
257 
258 		// create kernels to execute
259 		crypt_kernel = wpapsk_init = clCreateKernel(program[gpu_id], "wpapsk_init", &ret_code);
260 		HANDLE_CLERROR(ret_code, "Error creating kernel");
261 		wpapsk_loop = clCreateKernel(program[gpu_id], "wpapsk_loop", &ret_code);
262 		HANDLE_CLERROR(ret_code, "Error creating kernel");
263 		wpapsk_pass2 = clCreateKernel(program[gpu_id], "wpapsk_pass2", &ret_code);
264 		HANDLE_CLERROR(ret_code, "Error creating kernel");
265 		wpapsk_final_md5 = clCreateKernel(program[gpu_id], "wpapsk_final_md5", &ret_code);
266 		HANDLE_CLERROR(ret_code, "Error creating kernel");
267 		wpapsk_final_sha1 = clCreateKernel(program[gpu_id], "wpapsk_final_sha1", &ret_code);
268 		HANDLE_CLERROR(ret_code, "Error creating kernel");
269 		wpapsk_final_sha256 = clCreateKernel(program[gpu_id], "wpapsk_final_sha256", &ret_code);
270 		HANDLE_CLERROR(ret_code, "Error creating kernel");
271 		wpapsk_final_pmkid = clCreateKernel(program[gpu_id], "wpapsk_final_pmkid", &ret_code);
272 		HANDLE_CLERROR(ret_code, "Error creating kernel");
273 
274 		// Initialize openCL tuning (library) for this format.
275 		opencl_init_auto_setup(SEED, 2 * HASH_LOOPS, split_events,
276 		                       warn, 2, self,
277 		                       create_clobj, release_clobj,
278 		                       2 * ocl_v_width * sizeof(wpapsk_state), 0, db);
279 
280 		// Auto tune execution from shared/included code.
281 		autotune_run(self, 2 * ITERATIONS * 2 + 2, 0, 200);
282 	}
283 }
284 
crypt_all(int * pcount,struct db_salt * salt)285 static int crypt_all(int *pcount, struct db_salt *salt)
286 {
287 	const int count = *pcount;
288 	int i;
289 	size_t scalar_gws;
290 	size_t *lws = local_work_size ? &local_work_size : NULL;
291 
292 	global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
293 	scalar_gws = global_work_size * ocl_v_width;
294 
295 	// Copy data to gpu
296 	BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, scalar_gws * 64, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");
297 
298 	// Run kernel
299 	if (new_keys || strcmp(last_ssid, hccap.essid) ||
300 	    ocl_autotune_running || bench_or_test_running) {
301 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");
302 
303 		for (i = 0; i < (ocl_autotune_running ? 1 : ITERATIONS / HASH_LOOPS); i++) {
304 			BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");
305 			BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
306 			opencl_process_event();
307 		}
308 
309 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_pass2, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel");
310 
311 		for (i = 0; i < (ocl_autotune_running ? 1 : ITERATIONS / HASH_LOOPS); i++) {
312 			BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel (2nd pass)");
313 			BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
314 			opencl_process_event();
315 		}
316 
317 		new_keys = 0;
318 		strcpy(last_ssid, hccap.essid);
319 	}
320 
321 	if (hccap.keyver == 0)
322 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_pmkid, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (PMKID)");
323 	else if (hccap.keyver == 1)
324 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_md5, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (MD5)");
325 	else if (hccap.keyver == 2)
326 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_sha1, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (SHA1)");
327 	else
328 		BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_sha256, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (SHA256)");
329 	BENCH_CLERROR(clFinish(queue[gpu_id]), "Failed running final kernel");
330 
331 	// Read the result back
332 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(mic_t) * scalar_gws, mic, 0, NULL, multi_profilingEvent[5]), "Copy result back");
333 
334 	return count;
335 }
336 
337 struct fmt_main fmt_opencl_wpapsk = {
338 	{
339 		FORMAT_LABEL,
340 		FORMAT_NAME,
341 		ALGORITHM_NAME,
342 		BENCHMARK_COMMENT,
343 		BENCHMARK_LENGTH,
344 		8,
345 		PLAINTEXT_LENGTH,
346 		BINARY_SIZE,
347 		BINARY_ALIGN,
348 		SALT_SIZE,
349 		SALT_ALIGN,
350 		MIN_KEYS_PER_CRYPT,
351 		MAX_KEYS_PER_CRYPT,
352 		FMT_8_BIT | FMT_CASE,
353 		{
354 			"key version [0:PMKID 1:WPA 2:WPA2 3:802.11w]"
355 		},
356 		{
357 			FORMAT_TAG, ""
358 		},
359 		tests
360 	}, {
361 		init,
362 		done,
363 		reset,
364 		fmt_default_prepare,
365 		valid,
366 		fmt_default_split,
367 		get_binary,
368 		get_salt,
369 		{
370 			get_keyver,
371 		},
372 		fmt_default_source,
373 		{
374 			fmt_default_binary_hash_0,
375 			fmt_default_binary_hash_1,
376 			fmt_default_binary_hash_2,
377 			fmt_default_binary_hash_3,
378 			fmt_default_binary_hash_4,
379 			fmt_default_binary_hash_5,
380 			fmt_default_binary_hash_6
381 		},
382 		salt_hash,
383 		salt_compare,
384 		set_salt,
385 		set_key,
386 		get_key,
387 		clear_keys,
388 		crypt_all,
389 		{
390 			get_hash_0,
391 			get_hash_1,
392 			get_hash_2,
393 			get_hash_3,
394 			get_hash_4,
395 			get_hash_5,
396 			get_hash_6
397 		},
398 		cmp_all,
399 		cmp_one,
400 		cmp_exact
401 	}
402 };
403 
404 #endif /* plugin stanza */
405 
406 #endif /* HAVE_OPENCL */
407