1 /*
2 * This software is Copyright (c) 2012 Lukas Odzioba <ukasz at openwall.net>
3 * and Copyright (c) 2012-2018 magnum, and it is hereby released to the general
4 * public under the following terms: Redistribution and use in source and
5 * binary forms, with or without modification, are permitted.
6 *
7 * Code was at some point based on Aircrack-ng source
8 */
9 #ifdef HAVE_OPENCL
10
11 #if FMT_EXTERNS_H
12 extern struct fmt_main fmt_opencl_wpapsk;
13 #elif FMT_REGISTERS_H
14 john_register_one(&fmt_opencl_wpapsk);
15 #else
16
17 #include <string.h>
18
19 #include "arch.h"
20 #include "formats.h"
21 #include "common.h"
22 #include "misc.h"
23 #include "config.h"
24 #include "options.h"
25 #include "unicode.h"
26 #include "opencl_common.h"
27
28 static cl_mem mem_in, mem_out, mem_salt, mem_state, pinned_in, pinned_out;
29 static cl_kernel wpapsk_init, wpapsk_loop, wpapsk_pass2, wpapsk_final_md5, wpapsk_final_sha1, wpapsk_final_sha256, wpapsk_final_pmkid;
30 static size_t key_buf_size;
31 static unsigned int *inbuffer;
32 static struct fmt_main *self;
33
34 #define JOHN_OCL_WPAPSK
35 #include "wpapsk.h"
36
37 #define FORMAT_LABEL "wpapsk-opencl"
38 #define FORMAT_NAME "WPA/WPA2/PMF/PMKID PSK"
39 #define ALGORITHM_NAME "PBKDF2-SHA1 OpenCL"
40
41 #define ITERATIONS 4095
42 #define HASH_LOOPS 105 // factors 3, 3, 5, 7, 13
43 #define SEED 256
44
45 #define MIN_KEYS_PER_CRYPT 1
46 #define MAX_KEYS_PER_CRYPT 1
47
48 #define OCL_CONFIG "wpapsk"
49
50 /* This handles all sizes */
51 #define GETPOS(i, index) (((index) % ocl_v_width) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
52 /* This is faster but can't handle size 3 */
53 //#define GETPOS(i, index) (((index) & (ocl_v_width - 1)) * 4 + ((i) & ~3U) * ocl_v_width + (((i) & 3) ^ 3) + ((index) / ocl_v_width) * 64 * ocl_v_width)
54
55 extern wpapsk_salt currentsalt;
56 extern mic_t *mic;
57 extern hccap_t hccap;
58
59 typedef struct {
60 cl_uint W[5];
61 cl_uint ipad[5];
62 cl_uint opad[5];
63 cl_uint out[5];
64 cl_uint partial[5];
65 } wpapsk_state;
66
67 static const char * warn[] = {
68 "xfer: ", ", init: ", ", loop: ", ", pass2: ", ", final: ", ", xfer: "
69 };
70
71 static int split_events[] = { 2, -1, -1 };
72
73 // This file contains auto-tuning routine(s). Has to be included after formats definitions.
74 #include "opencl_autotune.h"
75
76 /* ------- Helper functions ------- */
get_task_max_work_group_size()77 static size_t get_task_max_work_group_size()
78 {
79 size_t s;
80
81 s = autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_init);
82 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_loop));
83 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_pass2));
84 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_md5));
85 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_sha1));
86 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_sha256));
87 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, wpapsk_final_pmkid));
88
89 return s;
90 }
91
create_clobj(size_t gws,struct fmt_main * self)92 static void create_clobj(size_t gws, struct fmt_main *self)
93 {
94 gws *= ocl_v_width;
95
96 key_buf_size = 64 * gws;
97
98 // Allocate memory
99 pinned_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, key_buf_size, NULL, &ret_code);
100 HANDLE_CLERROR(ret_code, "Error allocating pinned in");
101 mem_in = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, key_buf_size, NULL, &ret_code);
102 HANDLE_CLERROR(ret_code, "Error allocating mem in");
103 inbuffer = clEnqueueMapBuffer(queue[gpu_id], pinned_in, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, key_buf_size, 0, NULL, NULL, &ret_code);
104 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory");
105
106 mem_state = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, sizeof(wpapsk_state) * gws, NULL, &ret_code);
107 HANDLE_CLERROR(ret_code, "Error allocating mem_state");
108
109 mem_salt = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(wpapsk_salt), ¤tsalt, &ret_code);
110 HANDLE_CLERROR(ret_code, "Error allocating mem setting");
111
112 pinned_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(mic_t) * gws, NULL, &ret_code);
113 HANDLE_CLERROR(ret_code, "Error allocating pinned out");
114 mem_out = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, sizeof(mic_t) * gws, NULL, &ret_code);
115 HANDLE_CLERROR(ret_code, "Error allocating mem out");
116 mic = clEnqueueMapBuffer(queue[gpu_id], pinned_out, CL_TRUE, CL_MAP_READ, 0, sizeof(mic_t) * gws, 0, NULL, NULL, &ret_code);
117 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory");
118
119 HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 0, sizeof(mem_in), &mem_in), "Error while setting mem_in kernel argument");
120 HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
121 HANDLE_CLERROR(clSetKernelArg(wpapsk_init, 2, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
122
123 HANDLE_CLERROR(clSetKernelArg(wpapsk_loop, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
124
125 HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 0, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
126 HANDLE_CLERROR(clSetKernelArg(wpapsk_pass2, 1, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
127
128 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
129 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
130 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_md5, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
131
132 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
133 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
134 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha1, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
135
136 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
137 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
138 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_sha256, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
139
140 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 0, sizeof(mem_state), &mem_state), "Error while setting mem_state kernel argument");
141 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 1, sizeof(mem_salt), &mem_salt), "Error while setting mem_salt kernel argument");
142 HANDLE_CLERROR(clSetKernelArg(wpapsk_final_pmkid, 2, sizeof(mem_out), &mem_out), "Error while setting mem_out kernel argument");
143 }
144
release_clobj(void)145 static void release_clobj(void)
146 {
147 if (mem_state) {
148 HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_in, inbuffer, 0, NULL, NULL), "Error Unmapping mem in");
149 HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_out, mic, 0, NULL, NULL), "Error Unmapping mem in");
150 HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing memory mappings");
151
152 HANDLE_CLERROR(clReleaseMemObject(pinned_in), "Release pinned_in");
153 HANDLE_CLERROR(clReleaseMemObject(pinned_out), "Release pinned_out");
154 HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release pinned_in");
155 HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem_out");
156 HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem_salt");
157 HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state");
158 mem_state = NULL;
159 }
160 }
161
done(void)162 static void done(void)
163 {
164 if (autotuned) {
165 release_clobj();
166
167 HANDLE_CLERROR(clReleaseKernel(wpapsk_init), "Release Kernel");
168 HANDLE_CLERROR(clReleaseKernel(wpapsk_loop), "Release Kernel");
169 HANDLE_CLERROR(clReleaseKernel(wpapsk_pass2), "Release Kernel");
170 HANDLE_CLERROR(clReleaseKernel(wpapsk_final_md5), "Release Kernel");
171 HANDLE_CLERROR(clReleaseKernel(wpapsk_final_sha1), "Release Kernel");
172 HANDLE_CLERROR(clReleaseKernel(wpapsk_final_sha256), "Release Kernel");
173 HANDLE_CLERROR(clReleaseKernel(wpapsk_final_pmkid), "Release Kernel");
174
175 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
176
177 autotuned--;
178 }
179 }
180
clear_keys(void)181 static void clear_keys(void) {
182 memset(inbuffer, 0, key_buf_size);
183 new_keys = 1;
184 }
185
set_key(char * key,int index)186 static void set_key(char *key, int index)
187 {
188 int i;
189 int length = strlen(key);
190
191 for (i = 0; i < length; i++)
192 ((char*)inbuffer)[GETPOS(i, index)] = key[i];
193 new_keys = 1;
194 }
195
get_key(int index)196 static char* get_key(int index)
197 {
198 static char ret[PLAINTEXT_LENGTH + 1];
199 int i = 0;
200
201 while ((ret[i] = ((char*)inbuffer)[GETPOS(i, index)]))
202 i++;
203
204 return ret;
205 }
206
init(struct fmt_main * _self)207 static void init(struct fmt_main *_self)
208 {
209 static char valgo[32] = "";
210
211 self = _self;
212
213 /*
214 * Implementations seen IRL that have 8 *bytes* (of eg. UTF-8) passwords
215 * as opposed to 8 *characters*
216 */
217 if (options.target_enc == UTF_8)
218 self->params.plaintext_min_length = 2;
219
220 opencl_prepare_dev(gpu_id);
221 /* VLIW5 does better with just 2x vectors due to GPR pressure */
222 if (!options.v_width && amd_vliw5(device_info[gpu_id]))
223 ocl_v_width = 2;
224 else
225 ocl_v_width = opencl_get_vector_width(gpu_id, sizeof(cl_int));
226
227 /* Vectorizing disabled until fixed for keyver 3 */
228 ocl_v_width = 1;
229
230 if (ocl_v_width > 1) {
231 /* Run vectorized kernel */
232 snprintf(valgo, sizeof(valgo),
233 ALGORITHM_NAME " %ux", ocl_v_width);
234 self->params.algorithm_name = valgo;
235 }
236 }
237
reset(struct db_main * db)238 static void reset(struct db_main *db)
239 {
240 if (!autotuned) {
241 const char *custom_opts;
242 char build_opts[256];
243
244 if (!(custom_opts = getenv(OCL_CONFIG "_BuildOpts")))
245 custom_opts = cfg_get_param(SECTION_OPTIONS,
246 SUBSECTION_OPENCL,
247 OCL_CONFIG "_BuildOpts");
248
249 snprintf(build_opts, sizeof(build_opts),
250 "%s%s-DHASH_LOOPS=%u -DITERATIONS=%u "
251 "-DPLAINTEXT_LENGTH=%u -DV_WIDTH=%u",
252 custom_opts ? custom_opts : "",
253 custom_opts ? " " : "",
254 HASH_LOOPS, ITERATIONS,
255 PLAINTEXT_LENGTH, ocl_v_width);
256 opencl_init("$JOHN/kernels/wpapsk_kernel.cl", gpu_id, build_opts);
257
258 // create kernels to execute
259 crypt_kernel = wpapsk_init = clCreateKernel(program[gpu_id], "wpapsk_init", &ret_code);
260 HANDLE_CLERROR(ret_code, "Error creating kernel");
261 wpapsk_loop = clCreateKernel(program[gpu_id], "wpapsk_loop", &ret_code);
262 HANDLE_CLERROR(ret_code, "Error creating kernel");
263 wpapsk_pass2 = clCreateKernel(program[gpu_id], "wpapsk_pass2", &ret_code);
264 HANDLE_CLERROR(ret_code, "Error creating kernel");
265 wpapsk_final_md5 = clCreateKernel(program[gpu_id], "wpapsk_final_md5", &ret_code);
266 HANDLE_CLERROR(ret_code, "Error creating kernel");
267 wpapsk_final_sha1 = clCreateKernel(program[gpu_id], "wpapsk_final_sha1", &ret_code);
268 HANDLE_CLERROR(ret_code, "Error creating kernel");
269 wpapsk_final_sha256 = clCreateKernel(program[gpu_id], "wpapsk_final_sha256", &ret_code);
270 HANDLE_CLERROR(ret_code, "Error creating kernel");
271 wpapsk_final_pmkid = clCreateKernel(program[gpu_id], "wpapsk_final_pmkid", &ret_code);
272 HANDLE_CLERROR(ret_code, "Error creating kernel");
273
274 // Initialize openCL tuning (library) for this format.
275 opencl_init_auto_setup(SEED, 2 * HASH_LOOPS, split_events,
276 warn, 2, self,
277 create_clobj, release_clobj,
278 2 * ocl_v_width * sizeof(wpapsk_state), 0, db);
279
280 // Auto tune execution from shared/included code.
281 autotune_run(self, 2 * ITERATIONS * 2 + 2, 0, 200);
282 }
283 }
284
crypt_all(int * pcount,struct db_salt * salt)285 static int crypt_all(int *pcount, struct db_salt *salt)
286 {
287 const int count = *pcount;
288 int i;
289 size_t scalar_gws;
290 size_t *lws = local_work_size ? &local_work_size : NULL;
291
292 global_work_size = GET_NEXT_MULTIPLE(count, local_work_size);
293 scalar_gws = global_work_size * ocl_v_width;
294
295 // Copy data to gpu
296 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, scalar_gws * 64, inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu");
297
298 // Run kernel
299 if (new_keys || strcmp(last_ssid, hccap.essid) ||
300 ocl_autotune_running || bench_or_test_running) {
301 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_init, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel");
302
303 for (i = 0; i < (ocl_autotune_running ? 1 : ITERATIONS / HASH_LOOPS); i++) {
304 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_loop, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[2]), "Run loop kernel");
305 BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
306 opencl_process_event();
307 }
308
309 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_pass2, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[3]), "Run intermediate kernel");
310
311 for (i = 0; i < (ocl_autotune_running ? 1 : ITERATIONS / HASH_LOOPS); i++) {
312 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_loop, 1, NULL, &global_work_size, lws, 0, NULL, NULL), "Run loop kernel (2nd pass)");
313 BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel");
314 opencl_process_event();
315 }
316
317 new_keys = 0;
318 strcpy(last_ssid, hccap.essid);
319 }
320
321 if (hccap.keyver == 0)
322 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_pmkid, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (PMKID)");
323 else if (hccap.keyver == 1)
324 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_md5, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (MD5)");
325 else if (hccap.keyver == 2)
326 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_sha1, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (SHA1)");
327 else
328 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], wpapsk_final_sha256, 1, NULL, &global_work_size, lws, 0, NULL, multi_profilingEvent[4]), "Run final kernel (SHA256)");
329 BENCH_CLERROR(clFinish(queue[gpu_id]), "Failed running final kernel");
330
331 // Read the result back
332 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, sizeof(mic_t) * scalar_gws, mic, 0, NULL, multi_profilingEvent[5]), "Copy result back");
333
334 return count;
335 }
336
337 struct fmt_main fmt_opencl_wpapsk = {
338 {
339 FORMAT_LABEL,
340 FORMAT_NAME,
341 ALGORITHM_NAME,
342 BENCHMARK_COMMENT,
343 BENCHMARK_LENGTH,
344 8,
345 PLAINTEXT_LENGTH,
346 BINARY_SIZE,
347 BINARY_ALIGN,
348 SALT_SIZE,
349 SALT_ALIGN,
350 MIN_KEYS_PER_CRYPT,
351 MAX_KEYS_PER_CRYPT,
352 FMT_8_BIT | FMT_CASE,
353 {
354 "key version [0:PMKID 1:WPA 2:WPA2 3:802.11w]"
355 },
356 {
357 FORMAT_TAG, ""
358 },
359 tests
360 }, {
361 init,
362 done,
363 reset,
364 fmt_default_prepare,
365 valid,
366 fmt_default_split,
367 get_binary,
368 get_salt,
369 {
370 get_keyver,
371 },
372 fmt_default_source,
373 {
374 fmt_default_binary_hash_0,
375 fmt_default_binary_hash_1,
376 fmt_default_binary_hash_2,
377 fmt_default_binary_hash_3,
378 fmt_default_binary_hash_4,
379 fmt_default_binary_hash_5,
380 fmt_default_binary_hash_6
381 },
382 salt_hash,
383 salt_compare,
384 set_salt,
385 set_key,
386 get_key,
387 clear_keys,
388 crypt_all,
389 {
390 get_hash_0,
391 get_hash_1,
392 get_hash_2,
393 get_hash_3,
394 get_hash_4,
395 get_hash_5,
396 get_hash_6
397 },
398 cmp_all,
399 cmp_one,
400 cmp_exact
401 }
402 };
403
404 #endif /* plugin stanza */
405
406 #endif /* HAVE_OPENCL */
407