1 /*
2 * This software is Copyright (c) 2015, Sayantan Datta <std2048@gmail.com>
3 * and Copyright (c) 2015, magnum
4 * and it is hereby released to the general public under the following terms:
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted.
7 */
8
9 #ifdef HAVE_OPENCL
10 #define FMT_STRUCT fmt_opencl_mscash
11
12 #if FMT_EXTERNS_H
13 extern struct fmt_main FMT_STRUCT;
14 #elif FMT_REGISTERS_H
15 john_register_one(&FMT_STRUCT);
16 #else
17
18 #include <string.h>
19 #include <sys/time.h>
20
21 #include "arch.h"
22 #include "params.h"
23 #include "path.h"
24 #include "common.h"
25 #include "formats.h"
26 #include "opencl_common.h"
27 #include "config.h"
28 #include "options.h"
29 #include "unicode.h"
30 #include "mask_ext.h"
31 #include "bt_interface.h"
32 #include "mscash_common.h"
33
34 #define PLAINTEXT_LENGTH 27 /* Max. is 55 with current kernel */
35 #define UTF8_MAX_LENGTH (3 * PLAINTEXT_LENGTH)
36 #define BUFSIZE ((UTF8_MAX_LENGTH + 3) / 4 * 4)
37 #define AUTOTUNE_LENGTH 8
38 #define FORMAT_LABEL "mscash-opencl"
39 #define FORMAT_NAME "M$ Cache Hash"
40 #define ALGORITHM_NAME "MD4 OpenCL"
41 #define SALT_SIZE (12 * sizeof(unsigned int))
42
43 static cl_mem pinned_saved_keys, pinned_saved_idx, pinned_int_key_loc;
44 static cl_mem buffer_keys, buffer_idx, buffer_int_keys, buffer_int_key_loc;
45 static cl_uint *saved_plain, *saved_idx, *saved_int_key_loc;
46 static int static_gpu_locations[MASK_FMT_INT_PLHDR];
47
48 static cl_mem buffer_return_hashes, buffer_hash_ids, buffer_bitmap_dupe;
49 static cl_mem *buffer_offset_tables, *buffer_hash_tables, *buffer_bitmaps, *buffer_salts;
50 static OFFSET_TABLE_WORD *offset_table;
51 static unsigned int **hash_tables;
52 static unsigned int current_salt;
53 static cl_uint *loaded_hashes, max_num_loaded_hashes, *hash_ids, *bitmaps, max_hash_table_size;
54 static cl_ulong bitmap_size_bits;
55
56 static unsigned int key_idx;
57 static unsigned int set_new_keys;
58 static struct fmt_main *self;
59 static cl_uint *zero_buffer;
60
61 #define MIN_KEYS_PER_CRYPT 1
62 #define MAX_KEYS_PER_CRYPT 1
63
64 #define STEP 0
65 #define SEED 1024
66
67 static const char *warn[] = {
68 "key xfer: ", ", idx xfer: ", ", crypt: ", ", res xfer: "
69 };
70
71 //This file contains auto-tuning routine(s). Has to be included after formats definitions.
72 #include "opencl_autotune.h"
73
74 /* ------- Helper functions ------- */
get_task_max_work_group_size()75 static size_t get_task_max_work_group_size()
76 {
77 return autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
78 }
79
80 struct fmt_main FMT_STRUCT;
81
set_kernel_args_kpc()82 static void set_kernel_args_kpc()
83 {
84 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(buffer_keys), (void *) &buffer_keys), "Error setting argument 1.");
85 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(buffer_idx), (void *) &buffer_idx), "Error setting argument 2.");
86 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(buffer_int_key_loc), (void *) &buffer_int_key_loc), "Error setting argument 4.");
87 }
88
set_kernel_args()89 static void set_kernel_args()
90 {
91 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(buffer_int_keys), (void *) &buffer_int_keys), "Error setting argument 5.");
92 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 8, sizeof(buffer_return_hashes), (void *) &buffer_return_hashes), "Error setting argument 9.");
93 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 9, sizeof(buffer_hash_ids), (void *) &buffer_hash_ids), "Error setting argument 10.");
94 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 10, sizeof(buffer_bitmap_dupe), (void *) &buffer_bitmap_dupe), "Error setting argument 11.");
95 }
96
create_clobj(size_t kpc,struct fmt_main * self)97 static void create_clobj(size_t kpc, struct fmt_main *self)
98 {
99 pinned_saved_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, BUFSIZE * kpc, NULL, &ret_code);
100 if (ret_code != CL_SUCCESS) {
101 saved_plain = (cl_uint *) mem_alloc(BUFSIZE * kpc);
102 if (saved_plain == NULL)
103 HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys.");
104 }
105 else {
106 saved_plain = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_saved_keys, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, BUFSIZE * kpc, 0, NULL, NULL, &ret_code);
107 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain.");
108 }
109
110 pinned_saved_idx = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint) * kpc, NULL, &ret_code);
111 HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_idx.");
112 saved_idx = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_saved_idx, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uint) * kpc, 0, NULL, NULL, &ret_code);
113 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_idx.");
114
115 pinned_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint) * kpc, NULL, &ret_code);
116 HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_int_key_loc.");
117 saved_int_key_loc = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_int_key_loc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uint) * kpc, 0, NULL, NULL, &ret_code);
118 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_int_key_loc.");
119
120 // create and set arguments
121 buffer_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, BUFSIZE * kpc, NULL, &ret_code);
122 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys.");
123
124 buffer_idx = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, 4 * kpc, NULL, &ret_code);
125 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_idx.");
126
127 buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, sizeof(cl_uint) * kpc, NULL, &ret_code);
128 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_key_loc.");
129
130 set_kernel_args_kpc();
131 }
132
create_base_clobj()133 static void create_base_clobj()
134 {
135 unsigned int dummy = 0;
136
137 zero_buffer = (cl_uint *) mem_calloc(max_hash_table_size/32 + 1, sizeof(cl_uint));
138
139 buffer_return_hashes = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, 2 * sizeof(cl_uint) * max_num_loaded_hashes, NULL, &ret_code);
140 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_return_hashes.");
141
142 buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (3 * max_num_loaded_hashes + 1) * sizeof(cl_uint), NULL, &ret_code);
143 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_buffer_hash_ids.");
144
145 buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, (max_hash_table_size/32 + 1) * sizeof(cl_uint), zero_buffer, &ret_code);
146 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap_dupe.");
147
148 //ref_ctr is used as dummy parameter
149 buffer_int_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 4 * mask_int_cand.num_int_cand, mask_int_cand.int_cand ? mask_int_cand.int_cand : (void *)&dummy, &ret_code);
150 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_keys.");
151
152 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
153
154 set_kernel_args();
155 }
156
release_clobj(void)157 static void release_clobj(void)
158 {
159 if (buffer_keys) {
160 if (pinned_saved_keys) {
161 HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL), "Error Unmapping saved_plain.");
162 HANDLE_CLERROR(clReleaseMemObject(pinned_saved_keys), "Error Releasing pinned_saved_keys.");
163 }
164 else
165 MEM_FREE(saved_plain);
166
167 HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_idx, saved_idx, 0, NULL, NULL), "Error Unmapping saved_idx.");
168 HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_int_key_loc, saved_int_key_loc, 0, NULL, NULL), "Error Unmapping saved_int_key_loc.");
169 HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing mappings.");
170 HANDLE_CLERROR(clReleaseMemObject(pinned_saved_idx), "Error Releasing pinned_saved_idx.");
171 HANDLE_CLERROR(clReleaseMemObject(pinned_int_key_loc), "Error Releasing pinned_int_key_loc.");
172 HANDLE_CLERROR(clReleaseMemObject(buffer_keys), "Error Releasing buffer_keys.");
173 HANDLE_CLERROR(clReleaseMemObject(buffer_idx), "Error Releasing buffer_idx.");
174 HANDLE_CLERROR(clReleaseMemObject(buffer_int_key_loc), "Error Releasing buffer_int_key_loc.");
175 buffer_keys = 0;
176 }
177 }
178
release_base_clobj(void)179 static void release_base_clobj(void)
180 {
181 if (buffer_int_keys) {
182 HANDLE_CLERROR(clReleaseMemObject(buffer_int_keys), "Error Releasing buffer_int_keys.");
183 HANDLE_CLERROR(clReleaseMemObject(buffer_return_hashes), "Error Releasing buffer_return_hashes.");
184 HANDLE_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Error Releasing buffer_bitmap_dupe.");
185 HANDLE_CLERROR(clReleaseMemObject(buffer_hash_ids), "Error Releasing buffer_hash_ids.");
186 MEM_FREE(zero_buffer);
187 buffer_int_keys = 0;
188 }
189 }
190
release_salt_buffers()191 static void release_salt_buffers()
192 {
193 unsigned int k;
194 if (hash_tables) {
195 k = 0;
196 while (hash_tables[k]) {
197 MEM_FREE(hash_tables[k]);
198 k++;
199 }
200 MEM_FREE(hash_tables);
201 }
202 if (buffer_offset_tables) {
203 k = 0;
204 while (buffer_offset_tables[k]) {
205 clReleaseMemObject(buffer_offset_tables[k]);
206 buffer_offset_tables[k] = 0;
207 k++;
208 }
209 MEM_FREE(buffer_offset_tables);
210 }
211 if (buffer_hash_tables) {
212 k = 0;
213 while (buffer_hash_tables[k]) {
214 clReleaseMemObject(buffer_hash_tables[k]);
215 buffer_hash_tables[k] = 0;
216 k++;
217 }
218 MEM_FREE(buffer_hash_tables);
219 }
220 if (buffer_bitmaps) {
221 k = 0;
222 while (buffer_bitmaps[k]) {
223 clReleaseMemObject(buffer_bitmaps[k]);
224 buffer_bitmaps[k] = 0;
225 k++;
226 }
227 MEM_FREE(buffer_bitmaps);
228 }
229 if (buffer_salts) {
230 k = 0;
231 while (buffer_salts[k]) {
232 clReleaseMemObject(buffer_salts[k]);
233 buffer_salts[k] = 0;
234 k++;
235 }
236 MEM_FREE(buffer_salts);
237 }
238 }
239
done(void)240 static void done(void)
241 {
242 release_clobj();
243 release_base_clobj();
244
245 if (crypt_kernel) {
246 HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel.");
247 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program.");
248 crypt_kernel = NULL;
249 }
250
251 if (loaded_hashes)
252 MEM_FREE(loaded_hashes);
253 if (hash_ids)
254 MEM_FREE(hash_ids);
255 release_salt_buffers();
256 }
257
init_kernel(void)258 static void init_kernel(void)
259 {
260 char build_opts[5000];
261 int i;
262 cl_ulong const_cache_size;
263
264 clReleaseKernel(crypt_kernel);
265
266 for (i = 0; i < MASK_FMT_INT_PLHDR; i++)
267 if (mask_skip_ranges && mask_skip_ranges[i] != -1)
268 static_gpu_locations[i] = mask_int_cand.int_cpu_mask_ctx->
269 ranges[mask_skip_ranges[i]].pos;
270 else
271 static_gpu_locations[i] = -1;
272
273 HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &const_cache_size, 0), "failed to get CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE.");
274
275 sprintf(build_opts, "-D NUM_INT_KEYS=%u -D IS_STATIC_GPU_MASK=%d"
276 #if !NT_FULL_UNICODE
277 " -DUCS_2"
278 #endif
279 " -D CONST_CACHE_SIZE=%llu -D%s -D%s -DPLAINTEXT_LENGTH=%d -D LOC_0=%d"
280 #if MASK_FMT_INT_PLHDR > 1
281 " -D LOC_1=%d "
282 #endif
283 #if MASK_FMT_INT_PLHDR > 2
284 "-D LOC_2=%d "
285 #endif
286 #if MASK_FMT_INT_PLHDR > 3
287 "-D LOC_3=%d"
288 #endif
289 , mask_int_cand.num_int_cand, mask_gpu_is_static,
290 (unsigned long long)const_cache_size, cp_id2macro(options.target_enc),
291 options.internal_cp == UTF_8 ? cp_id2macro(ASCII) :
292 cp_id2macro(options.internal_cp), PLAINTEXT_LENGTH,
293 static_gpu_locations[0]
294 #if MASK_FMT_INT_PLHDR > 1
295 , static_gpu_locations[1]
296 #endif
297 #if MASK_FMT_INT_PLHDR > 2
298 , static_gpu_locations[2]
299 #endif
300 #if MASK_FMT_INT_PLHDR > 3
301 , static_gpu_locations[3]
302 #endif
303 );
304
305 opencl_build_kernel("$JOHN/kernels/mscash_kernel.cl", gpu_id, build_opts, 0);
306 crypt_kernel = clCreateKernel(program[gpu_id], "mscash", &ret_code);
307 HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?");
308 }
309
310 static void set_key(char *_key, int index);
311 static void *salt(char *_ciphertext);
312
init(struct fmt_main * _self)313 static void init(struct fmt_main *_self)
314 {
315 self = _self;
316 max_num_loaded_hashes = 0;
317
318 opencl_prepare_dev(gpu_id);
319 mask_int_cand_target = opencl_speed_index(gpu_id) / 300;
320
321 mscash1_adjust_tests(self, options.target_enc, PLAINTEXT_LENGTH,
322 set_key, set_key);
323 }
324
salt(char * ciphertext)325 static void *salt(char *ciphertext)
326 {
327 static union {
328 unsigned int w[12];
329 UTF16 s[24];
330 } nt_buffer;
331 UTF16 *out = nt_buffer.s;
332 UTF16 usalt[MSCASH1_MAX_SALT_LENGTH + 1 + 2];
333 UTF16 *login = usalt;
334 UTF8 csalt[3 * MSCASH1_MAX_SALT_LENGTH + 1];
335 int i, length = 0;
336 char *pos = ciphertext + FORMAT_TAG_LEN;
337 char *lasth = strrchr(ciphertext, '#');
338
339 memset(nt_buffer.w, 0, sizeof(nt_buffer.w));
340 memset(usalt, 0, sizeof(usalt));
341
342 while (pos < lasth)
343 csalt[length++] = *pos++;
344 csalt[length] = 0;
345
346 enc_strlwr((char*)csalt);
347 enc_to_utf16(usalt, MSCASH1_MAX_SALT_LENGTH, csalt, length);
348 length = strlen16(usalt);
349
350 for (i = 0; i < length; i++)
351 *out++ = *login++;
352 *out++ = 0x80;
353
354 nt_buffer.w[10] = (length << 4) + 128;
355
356 return &nt_buffer.w;
357 }
358
get_hash_0(int index)359 static int get_hash_0(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_0; }
get_hash_1(int index)360 static int get_hash_1(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_1; }
get_hash_2(int index)361 static int get_hash_2(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_2; }
get_hash_3(int index)362 static int get_hash_3(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_3; }
get_hash_4(int index)363 static int get_hash_4(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_4; }
get_hash_5(int index)364 static int get_hash_5(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_5; }
get_hash_6(int index)365 static int get_hash_6(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_6; }
366
clear_keys(void)367 static void clear_keys(void)
368 {
369 key_idx = 0;
370 set_new_keys = 0;
371 }
372
set_key(char * _key,int index)373 static void set_key(char *_key, int index)
374 {
375 const uint32_t *key = (uint32_t*)_key;
376 int len = strlen(_key);
377
378 if (mask_int_cand.num_int_cand > 1 && !mask_gpu_is_static) {
379 int i;
380 saved_int_key_loc[index] = 0;
381 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
382 if (mask_skip_ranges[i] != -1) {
383 saved_int_key_loc[index] |= ((mask_int_cand.
384 int_cpu_mask_ctx->ranges[mask_skip_ranges[i]].offset +
385 mask_int_cand.int_cpu_mask_ctx->
386 ranges[mask_skip_ranges[i]].pos) & 0xff) << (i << 3);
387 }
388 else
389 saved_int_key_loc[index] |= 0x80 << (i << 3);
390 }
391 }
392
393 saved_idx[index] = (key_idx << 7) | len;
394
395 while (len > 4) {
396 saved_plain[key_idx++] = *key++;
397 len -= 4;
398 }
399 if (len)
400 saved_plain[key_idx++] = *key & (0xffffffffU >> (32 - (len << 3)));
401 set_new_keys = 1;
402 }
403
get_key(int index)404 static char *get_key(int index)
405 {
406 static char out[UTF8_MAX_LENGTH + 1];
407 int i, len, int_index, t;
408 char *key;
409
410 if (hash_ids == NULL || hash_ids[0] == 0 ||
411 index >= hash_ids[0] || hash_ids[0] > max_num_loaded_hashes) {
412 t = index;
413 int_index = 0;
414 }
415 else {
416 t = hash_ids[1 + 3 * index];
417 int_index = hash_ids[2 + 3 * index];
418
419 }
420
421 if (t >= global_work_size) {
422 //fprintf(stderr, "Get key error! %d %d\n", t, index);
423 t = 0;
424 }
425
426 len = saved_idx[t] & 127;
427 key = (char*)&saved_plain[saved_idx[t] >> 7];
428
429 for (i = 0; i < len; i++)
430 out[i] = *key++;
431 out[i] = 0;
432
433 if (len && mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
434 for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
435 if (mask_gpu_is_static)
436 out[static_gpu_locations[i]] =
437 mask_int_cand.int_cand[int_index].x[i];
438 else
439 out[(saved_int_key_loc[t]& (0xff << (i * 8))) >> (i * 8)] =
440 mask_int_cand.int_cand[int_index].x[i];
441 }
442
443 return out;
444 }
445
446 /* Use only for smaller bitmaps < 16MB */
prepare_bitmap_4(cl_ulong bmp_sz,cl_uint ** bitmap_ptr,uint32_t num_loaded_hashes)447 static void prepare_bitmap_4(cl_ulong bmp_sz, cl_uint **bitmap_ptr, uint32_t num_loaded_hashes)
448 {
449 unsigned int i;
450 MEM_FREE(*bitmap_ptr);
451 *bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 3), sizeof(cl_uint));
452
453 for (i = 0; i < num_loaded_hashes; i++) {
454 unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
455 (*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
456
457 bmp_idx = loaded_hashes[4 * i + 2] & (bmp_sz - 1);
458 (*bitmap_ptr)[(bmp_sz >> 5) + (bmp_idx >> 5)] |=
459 (1U << (bmp_idx & 31));
460
461 bmp_idx = loaded_hashes[4 * i + 1] & (bmp_sz - 1);
462 (*bitmap_ptr)[(bmp_sz >> 4) + (bmp_idx >> 5)] |=
463 (1U << (bmp_idx & 31));
464
465 bmp_idx = loaded_hashes[4 * i] & (bmp_sz - 1);
466 (*bitmap_ptr)[(bmp_sz >> 5) * 3 + (bmp_idx >> 5)] |=
467 (1U << (bmp_idx & 31));
468 }
469 }
470 /*
471 static void prepare_bitmap_1(cl_ulong bmp_sz, cl_uint **bitmap_ptr, uint32_t num_loaded_hashes)
472 {
473 unsigned int i;
474 MEM_FREE(*bitmap_ptr);
475 *bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 5), sizeof(cl_uint));
476
477 for (i = 0; i < num_loaded_hashes; i++) {
478 unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
479 (*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
480 }
481 }*/
482
select_bitmap(unsigned int num_loaded_hashes)483 static void select_bitmap(unsigned int num_loaded_hashes)
484 {
485 cl_ulong max_local_mem_sz_bytes = 0;
486
487 HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_LOCAL_MEM_SIZE,
488 sizeof(cl_ulong), &max_local_mem_sz_bytes, 0),
489 "failed to get CL_DEVICE_LOCAL_MEM_SIZE.");
490
491 if (num_loaded_hashes <= 5100) {
492 if (amd_gcn_10(device_info[gpu_id]) ||
493 amd_vliw4(device_info[gpu_id]))
494 bitmap_size_bits = 512 * 1024;
495
496 else
497 bitmap_size_bits = 256 * 1024;
498 }
499
500 else if (num_loaded_hashes <= 10100) {
501 if (amd_gcn_10(device_info[gpu_id]) ||
502 amd_vliw4(device_info[gpu_id]))
503 bitmap_size_bits = 512 * 1024;
504
505 else
506 bitmap_size_bits = 256 * 1024;
507
508 }
509
510 else if (num_loaded_hashes <= 20100) {
511 if (amd_gcn_10(device_info[gpu_id]) ||
512 amd_vliw4(device_info[gpu_id]))
513 bitmap_size_bits = 1024 * 1024;
514
515 else
516 bitmap_size_bits = 512 * 1024;
517
518 }
519
520 else if (num_loaded_hashes <= 250100)
521 bitmap_size_bits = 2048 * 1024;
522
523 else if (num_loaded_hashes <= 1100100) {
524 if (!amd_gcn_11(device_info[gpu_id]))
525 bitmap_size_bits = 4096 * 1024;
526
527 else
528 bitmap_size_bits = 2048 * 1024;
529 }
530 else {
531 fprintf(stderr, "Too many hashes (%d), max is 1100100\n",
532 num_loaded_hashes);
533 error();
534 }
535
536 prepare_bitmap_4(bitmap_size_bits, &bitmaps, num_loaded_hashes);
537 }
538
prepare_table(struct db_main * db)539 static void prepare_table(struct db_main *db)
540 {
541 struct db_salt *salt;
542 int seq_ids = 0;
543
544 max_num_loaded_hashes = 0;
545 max_hash_table_size = 1;
546
547 salt = db->salts;
548 do {
549 if (salt->count > max_num_loaded_hashes)
550 max_num_loaded_hashes = salt->count;
551 } while ((salt = salt->next));
552
553 MEM_FREE(loaded_hashes);
554 MEM_FREE(hash_ids);
555 release_salt_buffers();
556
557 loaded_hashes = (cl_uint*) mem_alloc(4 * max_num_loaded_hashes * sizeof(cl_uint));
558 hash_ids = (cl_uint*) mem_calloc((3 * max_num_loaded_hashes + 1), sizeof(cl_uint));
559
560 hash_tables = (unsigned int **)mem_calloc(sizeof(unsigned int*), db->salt_count + 1);
561 buffer_offset_tables = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
562 buffer_hash_tables = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
563 buffer_bitmaps = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
564 buffer_salts = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
565
566 hash_tables[db->salt_count] = NULL;
567 buffer_offset_tables[db->salt_count] = NULL;
568 buffer_hash_tables[db->salt_count] = NULL;
569 buffer_bitmaps[db->salt_count] = NULL;
570 buffer_salts[db->salt_count] = NULL;
571
572 salt = db->salts;
573 do {
574 unsigned int i = 0;
575 unsigned int num_loaded_hashes, salt_params[SALT_SIZE / sizeof(unsigned int) + 5];
576 unsigned int hash_table_size, offset_table_size, shift64_ht_sz, shift64_ot_sz;
577 struct db_password *pw, *last;
578
579 last = pw = salt->list;
580 do {
581 unsigned int *bin = (unsigned int *)pw->binary;
582 if (bin == NULL) {
583 if (last == pw)
584 salt->list = pw->next;
585 else
586 last->next = pw->next;
587 } else {
588 last = pw;
589 loaded_hashes[4 * i] = bin[0];
590 loaded_hashes[4 * i + 1] = bin[1];
591 loaded_hashes[4 * i + 2] = bin[2];
592 loaded_hashes[4 * i + 3] = bin[3];
593 i++;
594 }
595 } while ((pw = pw->next));
596
597 if (i != salt->count) {
598 fprintf(stderr,
599 "Something went wrong while preparing hashes..Exiting..\n");
600 error();
601 }
602 num_loaded_hashes = salt->count;
603 salt->sequential_id = seq_ids++;
604
605 num_loaded_hashes = create_perfect_hash_table(128, (void*)loaded_hashes,
606 num_loaded_hashes,
607 &offset_table,
608 &offset_table_size,
609 &hash_table_size, 0);
610
611 if (!num_loaded_hashes) {
612 MEM_FREE(hash_table_128);
613 fprintf(stderr, "Failed to create Hash Table for cracking.\n");
614 error();
615 }
616
617 hash_tables[salt->sequential_id] = hash_table_128;
618
619 buffer_offset_tables[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, offset_table_size * sizeof(OFFSET_TABLE_WORD), offset_table, &ret_code);
620 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_offset_tables[].");
621
622 buffer_hash_tables[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hash_table_size * sizeof(unsigned int) * 2, hash_table_128, &ret_code);
623 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_hash_tables[].");
624
625 if (max_hash_table_size < hash_table_size)
626 max_hash_table_size = hash_table_size;
627
628 shift64_ht_sz = (((1ULL << 63) % hash_table_size) * 2) % hash_table_size;
629 shift64_ot_sz = (((1ULL << 63) % offset_table_size) * 2) % offset_table_size;
630
631 select_bitmap(num_loaded_hashes);
632
633 memcpy(salt_params, salt->salt, SALT_SIZE);
634 salt_params[12] = bitmap_size_bits - 1;
635 salt_params[13] = offset_table_size;
636 salt_params[14] = hash_table_size;
637 salt_params[15] = shift64_ot_sz;
638 salt_params[16] = shift64_ht_sz;
639
640 buffer_bitmaps[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (size_t)(bitmap_size_bits >> 3) * 2, bitmaps, &ret_code);
641 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmaps[].");
642
643 buffer_salts[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (SALT_SIZE / sizeof(unsigned int) + 5) * sizeof(unsigned int), salt_params, &ret_code);
644 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_salts[].");
645
646 MEM_FREE(bitmaps);
647 MEM_FREE(offset_table);
648
649 } while ((salt = salt->next));
650 }
651
crypt_all(int * pcount,struct db_salt * salt)652 static int crypt_all(int *pcount, struct db_salt *salt)
653 {
654 const int count = *pcount;
655
656 size_t *lws = local_work_size ? &local_work_size : NULL;
657 size_t gws = GET_NEXT_MULTIPLE(count, local_work_size);
658
659 //fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" idx %u int_cand %d\n", __FUNCTION__, count, local_work_size, gws, key_idx, mask_int_cand.num_int_cand);
660
661 // copy keys to the device
662 if (set_new_keys || ocl_autotune_running) {
663 if (key_idx)
664 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, 0, 4 * key_idx, saved_plain, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");
665 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, 0, 4 * gws, saved_idx, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
666 if (!mask_gpu_is_static)
667 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
668 set_new_keys = 0;
669 }
670
671 current_salt = salt->sequential_id;
672 BENCH_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(buffer_salts[current_salt]), (void *) &buffer_salts[current_salt]), "Error setting argument 3.");
673 BENCH_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(buffer_bitmaps[current_salt]), (void *) &buffer_bitmaps[current_salt]), "Error setting argument 6.");
674 BENCH_CLERROR(clSetKernelArg(crypt_kernel, 6, sizeof(buffer_offset_tables[current_salt]), (void *) &buffer_offset_tables[current_salt]), "Error setting argument 7.");
675 BENCH_CLERROR(clSetKernelArg(crypt_kernel, 7, sizeof(buffer_hash_tables[current_salt]), (void *) &buffer_hash_tables[current_salt]), "Error setting argument 8.");
676
677 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel");
678
679 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), hash_ids, 0, NULL, multi_profilingEvent[3]), "failed in reading back num cracked hashes.");
680
681 if (hash_ids[0] > max_num_loaded_hashes) {
682 fprintf(stderr, "Error, crypt_all kernel.\n");
683 error();
684 }
685
686 if (hash_ids[0]) {
687 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_return_hashes, CL_FALSE, 0, 2 * sizeof(cl_uint) * hash_ids[0], loaded_hashes, 0, NULL, NULL), "failed in reading back return_hashes.");
688 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, (3 * hash_ids[0] + 1) * sizeof(cl_uint), hash_ids, 0, NULL, NULL), "failed in reading data back hash_ids.");
689 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap_dupe, CL_FALSE, 0, (max_hash_table_size/32 + 1) * sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_bitmap_dupe.");
690 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
691 }
692
693 *pcount *= mask_int_cand.num_int_cand;
694 return hash_ids[0];
695 }
696
cmp_all(void * binary,int count)697 static int cmp_all(void *binary, int count)
698 {
699 if (count) return 1;
700 return 0;
701 }
702
cmp_one(void * binary,int index)703 static int cmp_one(void *binary, int index)
704 {
705 return (((unsigned int*)binary)[0] ==
706 hash_tables[current_salt][hash_ids[3 + 3 * index]]);
707 }
708
cmp_exact(char * source,int index)709 static int cmp_exact(char *source, int index)
710 {
711 unsigned int *t = (unsigned int *) mscash_common_binary(source);
712
713 if (t[2] != loaded_hashes[2 * index])
714 return 0;
715 if (t[3] != loaded_hashes[2 * index + 1])
716 return 0;
717 return 1;
718 }
719
reset(struct db_main * db)720 static void reset(struct db_main *db)
721 {
722 static size_t o_lws, o_gws;
723 static int initialized;
724 size_t gws_limit;
725
726 //fprintf(stderr, "%s(%p), i=%d\n", __FUNCTION__, db, initialized);
727 gws_limit = MIN((0xf << 21) * 4 / BUFSIZE,
728 get_max_mem_alloc_size(gpu_id) / BUFSIZE);
729 get_power_of_two(gws_limit);
730 if (gws_limit > MIN((0xf << 21) * 4 / BUFSIZE,
731 get_max_mem_alloc_size(gpu_id) / BUFSIZE))
732 gws_limit >>= 1;
733
734
735 if (initialized) {
736 // Forget the previous auto-tune
737 local_work_size = o_lws;
738 global_work_size = o_gws;
739
740 release_base_clobj();
741 release_clobj();
742 } else {
743 o_lws = local_work_size;
744 o_gws = global_work_size;
745 initialized = 1;
746 }
747
748 prepare_table(db);
749 init_kernel();
750
751 create_base_clobj();
752
753 current_salt = 0;
754 hash_ids[0] = 0;
755
756 // If real crack run, don't auto-tune for self-tests
757 if (db->real && db != db->real)
758 opencl_get_sane_lws_gws_values();
759
760 // Initialize openCL tuning (library) for this format.
761 opencl_init_auto_setup(SEED, 1, NULL, warn, 2, self,
762 create_clobj, release_clobj,
763 2 * BUFSIZE, gws_limit, db);
764
765 // Auto tune execution from shared/included code.
766 autotune_run_extra(self, 1, gws_limit, 200, CL_TRUE);
767 }
768
769 struct fmt_main FMT_STRUCT = {
770 {
771 FORMAT_LABEL,
772 FORMAT_NAME,
773 ALGORITHM_NAME,
774 BENCHMARK_COMMENT,
775 BENCHMARK_LENGTH,
776 0,
777 PLAINTEXT_LENGTH,
778 BINARY_SIZE,
779 BINARY_ALIGN,
780 SALT_SIZE,
781 SALT_ALIGN,
782 MIN_KEYS_PER_CRYPT,
783 MAX_KEYS_PER_CRYPT,
784 FMT_CASE | FMT_8_BIT | FMT_SPLIT_UNIFIES_CASE | FMT_UNICODE | FMT_ENC | FMT_REMOVE | FMT_MASK,
785 { NULL },
786 { FORMAT_TAG },
787 mscash1_common_tests
788 }, {
789 init,
790 done,
791 reset,
792 mscash1_common_prepare,
793 mscash1_common_valid,
794 mscash1_common_split,
795 mscash_common_binary,
796 salt,
797 { NULL },
798 fmt_default_source,
799 {
800 fmt_default_binary_hash_0,
801 fmt_default_binary_hash_1,
802 fmt_default_binary_hash_2,
803 fmt_default_binary_hash_3,
804 fmt_default_binary_hash_4,
805 fmt_default_binary_hash_5,
806 fmt_default_binary_hash_6
807 },
808 fmt_default_salt_hash,
809 NULL,
810 fmt_default_set_salt,
811 set_key,
812 get_key,
813 clear_keys,
814 crypt_all,
815 {
816 get_hash_0,
817 get_hash_1,
818 get_hash_2,
819 get_hash_3,
820 get_hash_4,
821 get_hash_5,
822 get_hash_6
823 },
824 cmp_all,
825 cmp_one,
826 cmp_exact
827 }
828 };
829
830 #endif /* plugin stanza */
831
832 #endif /* HAVE_OPENCL */
833