1 /*
2  * This software is Copyright (c) 2015, Sayantan Datta <std2048@gmail.com>
3  * and Copyright (c) 2015, magnum
4  * and it is hereby released to the general public under the following terms:
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted.
7  */
8 
9 #ifdef HAVE_OPENCL
10 #define FMT_STRUCT fmt_opencl_mscash
11 
12 #if FMT_EXTERNS_H
13 extern struct fmt_main FMT_STRUCT;
14 #elif FMT_REGISTERS_H
15 john_register_one(&FMT_STRUCT);
16 #else
17 
18 #include <string.h>
19 #include <sys/time.h>
20 
21 #include "arch.h"
22 #include "params.h"
23 #include "path.h"
24 #include "common.h"
25 #include "formats.h"
26 #include "opencl_common.h"
27 #include "config.h"
28 #include "options.h"
29 #include "unicode.h"
30 #include "mask_ext.h"
31 #include "bt_interface.h"
32 #include "mscash_common.h"
33 
34 #define PLAINTEXT_LENGTH    27 /* Max. is 55 with current kernel */
35 #define UTF8_MAX_LENGTH     (3 * PLAINTEXT_LENGTH)
36 #define BUFSIZE             ((UTF8_MAX_LENGTH + 3) / 4 * 4)
37 #define AUTOTUNE_LENGTH     8
38 #define FORMAT_LABEL        "mscash-opencl"
39 #define FORMAT_NAME         "M$ Cache Hash"
40 #define ALGORITHM_NAME      "MD4 OpenCL"
41 #define SALT_SIZE           (12 * sizeof(unsigned int))
42 
43 static cl_mem pinned_saved_keys, pinned_saved_idx, pinned_int_key_loc;
44 static cl_mem buffer_keys, buffer_idx, buffer_int_keys, buffer_int_key_loc;
45 static cl_uint *saved_plain, *saved_idx, *saved_int_key_loc;
46 static int static_gpu_locations[MASK_FMT_INT_PLHDR];
47 
48 static cl_mem buffer_return_hashes, buffer_hash_ids, buffer_bitmap_dupe;
49 static cl_mem *buffer_offset_tables, *buffer_hash_tables, *buffer_bitmaps, *buffer_salts;
50 static OFFSET_TABLE_WORD *offset_table;
51 static unsigned int **hash_tables;
52 static unsigned int current_salt;
53 static cl_uint *loaded_hashes, max_num_loaded_hashes, *hash_ids, *bitmaps, max_hash_table_size;
54 static cl_ulong bitmap_size_bits;
55 
56 static unsigned int key_idx;
57 static unsigned int set_new_keys;
58 static struct fmt_main *self;
59 static cl_uint *zero_buffer;
60 
61 #define MIN_KEYS_PER_CRYPT      1
62 #define MAX_KEYS_PER_CRYPT      1
63 
64 #define STEP                    0
65 #define SEED                    1024
66 
67 static const char *warn[] = {
68 	"key xfer: ",  ", idx xfer: ",  ", crypt: ",  ", res xfer: "
69 };
70 
71 //This file contains auto-tuning routine(s). Has to be included after formats definitions.
72 #include "opencl_autotune.h"
73 
74 /* ------- Helper functions ------- */
get_task_max_work_group_size()75 static size_t get_task_max_work_group_size()
76 {
77 	return autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
78 }
79 
80 struct fmt_main FMT_STRUCT;
81 
set_kernel_args_kpc()82 static void set_kernel_args_kpc()
83 {
84 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(buffer_keys), (void *) &buffer_keys), "Error setting argument 1.");
85 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(buffer_idx), (void *) &buffer_idx), "Error setting argument 2.");
86 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(buffer_int_key_loc), (void *) &buffer_int_key_loc), "Error setting argument 4.");
87 }
88 
set_kernel_args()89 static void set_kernel_args()
90 {
91 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(buffer_int_keys), (void *) &buffer_int_keys), "Error setting argument 5.");
92 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 8, sizeof(buffer_return_hashes), (void *) &buffer_return_hashes), "Error setting argument 9.");
93 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 9, sizeof(buffer_hash_ids), (void *) &buffer_hash_ids), "Error setting argument 10.");
94 	HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 10, sizeof(buffer_bitmap_dupe), (void *) &buffer_bitmap_dupe), "Error setting argument 11.");
95 }
96 
create_clobj(size_t kpc,struct fmt_main * self)97 static void create_clobj(size_t kpc, struct fmt_main *self)
98 {
99 	pinned_saved_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, BUFSIZE * kpc, NULL, &ret_code);
100 	if (ret_code != CL_SUCCESS) {
101 		saved_plain = (cl_uint *) mem_alloc(BUFSIZE * kpc);
102 		if (saved_plain == NULL)
103 			HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_keys.");
104 	}
105 	else {
106 		saved_plain = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_saved_keys, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, BUFSIZE * kpc, 0, NULL, NULL, &ret_code);
107 		HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_plain.");
108 	}
109 
110 	pinned_saved_idx = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint) * kpc, NULL, &ret_code);
111 	HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_saved_idx.");
112 	saved_idx = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_saved_idx, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uint) * kpc, 0, NULL, NULL, &ret_code);
113 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_idx.");
114 
115 	pinned_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, sizeof(cl_uint) * kpc, NULL, &ret_code);
116 	HANDLE_CLERROR(ret_code, "Error creating page-locked memory pinned_int_key_loc.");
117 	saved_int_key_loc = (cl_uint *) clEnqueueMapBuffer(queue[gpu_id], pinned_int_key_loc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uint) * kpc, 0, NULL, NULL, &ret_code);
118 	HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_int_key_loc.");
119 
120 	// create and set arguments
121 	buffer_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, BUFSIZE * kpc, NULL, &ret_code);
122 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_keys.");
123 
124 	buffer_idx = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, 4 * kpc, NULL, &ret_code);
125 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_idx.");
126 
127 	buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, sizeof(cl_uint) * kpc, NULL, &ret_code);
128 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_key_loc.");
129 
130 	set_kernel_args_kpc();
131 }
132 
create_base_clobj()133 static void create_base_clobj()
134 {
135 	unsigned int dummy = 0;
136 
137 	zero_buffer = (cl_uint *) mem_calloc(max_hash_table_size/32 + 1, sizeof(cl_uint));
138 
139 	buffer_return_hashes = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY, 2 * sizeof(cl_uint) * max_num_loaded_hashes, NULL, &ret_code);
140 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_return_hashes.");
141 
142 	buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (3 * max_num_loaded_hashes + 1) * sizeof(cl_uint), NULL, &ret_code);
143 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_buffer_hash_ids.");
144 
145 	buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, (max_hash_table_size/32 + 1) * sizeof(cl_uint), zero_buffer, &ret_code);
146 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap_dupe.");
147 
148 	//ref_ctr is used as dummy parameter
149 	buffer_int_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 4 * mask_int_cand.num_int_cand, mask_int_cand.int_cand ? mask_int_cand.int_cand : (void *)&dummy, &ret_code);
150 	HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_keys.");
151 
152 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
153 
154 	set_kernel_args();
155 }
156 
release_clobj(void)157 static void release_clobj(void)
158 {
159 	if (buffer_keys) {
160 		if (pinned_saved_keys) {
161 			HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL), "Error Unmapping saved_plain.");
162 			HANDLE_CLERROR(clReleaseMemObject(pinned_saved_keys), "Error Releasing pinned_saved_keys.");
163 		}
164 		else
165 			MEM_FREE(saved_plain);
166 
167 		HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_idx, saved_idx, 0, NULL, NULL), "Error Unmapping saved_idx.");
168 		HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_int_key_loc, saved_int_key_loc, 0, NULL, NULL), "Error Unmapping saved_int_key_loc.");
169 		HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing mappings.");
170 		HANDLE_CLERROR(clReleaseMemObject(pinned_saved_idx), "Error Releasing pinned_saved_idx.");
171 		HANDLE_CLERROR(clReleaseMemObject(pinned_int_key_loc), "Error Releasing pinned_int_key_loc.");
172 		HANDLE_CLERROR(clReleaseMemObject(buffer_keys), "Error Releasing buffer_keys.");
173 		HANDLE_CLERROR(clReleaseMemObject(buffer_idx), "Error Releasing buffer_idx.");
174 		HANDLE_CLERROR(clReleaseMemObject(buffer_int_key_loc), "Error Releasing buffer_int_key_loc.");
175 		buffer_keys = 0;
176 	}
177 }
178 
release_base_clobj(void)179 static void release_base_clobj(void)
180 {
181 	if (buffer_int_keys) {
182 		HANDLE_CLERROR(clReleaseMemObject(buffer_int_keys), "Error Releasing buffer_int_keys.");
183 		HANDLE_CLERROR(clReleaseMemObject(buffer_return_hashes), "Error Releasing buffer_return_hashes.");
184 		HANDLE_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Error Releasing buffer_bitmap_dupe.");
185 		HANDLE_CLERROR(clReleaseMemObject(buffer_hash_ids), "Error Releasing buffer_hash_ids.");
186 		MEM_FREE(zero_buffer);
187 		buffer_int_keys = 0;
188 	}
189 }
190 
release_salt_buffers()191 static void release_salt_buffers()
192 {
193 	unsigned int k;
194 	if (hash_tables) {
195 		k = 0;
196 		while (hash_tables[k]) {
197 			MEM_FREE(hash_tables[k]);
198 			k++;
199 		}
200 		MEM_FREE(hash_tables);
201 	}
202 	if (buffer_offset_tables) {
203 		k = 0;
204 		while (buffer_offset_tables[k]) {
205 			clReleaseMemObject(buffer_offset_tables[k]);
206 			buffer_offset_tables[k] = 0;
207 			k++;
208 		}
209 		MEM_FREE(buffer_offset_tables);
210 	}
211 	if (buffer_hash_tables) {
212 		k = 0;
213 		while (buffer_hash_tables[k]) {
214 			clReleaseMemObject(buffer_hash_tables[k]);
215 			buffer_hash_tables[k] = 0;
216 			k++;
217 		}
218 		MEM_FREE(buffer_hash_tables);
219 	}
220 	if (buffer_bitmaps) {
221 		k = 0;
222 		while (buffer_bitmaps[k]) {
223 			clReleaseMemObject(buffer_bitmaps[k]);
224 			buffer_bitmaps[k] = 0;
225 			k++;
226 		}
227 		MEM_FREE(buffer_bitmaps);
228 	}
229 	if (buffer_salts) {
230 		k = 0;
231 		while (buffer_salts[k]) {
232 			clReleaseMemObject(buffer_salts[k]);
233 			buffer_salts[k] = 0;
234 			k++;
235 		}
236 		MEM_FREE(buffer_salts);
237 	}
238 }
239 
done(void)240 static void done(void)
241 {
242 	release_clobj();
243 	release_base_clobj();
244 
245 	if (crypt_kernel) {
246 		HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel.");
247 		HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program.");
248 		crypt_kernel = NULL;
249 	}
250 
251 	if (loaded_hashes)
252 		MEM_FREE(loaded_hashes);
253 	if (hash_ids)
254 		MEM_FREE(hash_ids);
255 	release_salt_buffers();
256 }
257 
init_kernel(void)258 static void init_kernel(void)
259 {
260 	char build_opts[5000];
261 	int i;
262 	cl_ulong const_cache_size;
263 
264 	clReleaseKernel(crypt_kernel);
265 
266 	for (i = 0; i < MASK_FMT_INT_PLHDR; i++)
267 		if (mask_skip_ranges && mask_skip_ranges[i] != -1)
268 			static_gpu_locations[i] = mask_int_cand.int_cpu_mask_ctx->
269 				ranges[mask_skip_ranges[i]].pos;
270 		else
271 			static_gpu_locations[i] = -1;
272 
273 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &const_cache_size, 0), "failed to get CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE.");
274 
275 	sprintf(build_opts, "-D NUM_INT_KEYS=%u -D IS_STATIC_GPU_MASK=%d"
276 #if !NT_FULL_UNICODE
277 		" -DUCS_2"
278 #endif
279 		" -D CONST_CACHE_SIZE=%llu -D%s -D%s -DPLAINTEXT_LENGTH=%d -D LOC_0=%d"
280 #if MASK_FMT_INT_PLHDR > 1
281 	" -D LOC_1=%d "
282 #endif
283 #if MASK_FMT_INT_PLHDR > 2
284 	"-D LOC_2=%d "
285 #endif
286 #if MASK_FMT_INT_PLHDR > 3
287 	"-D LOC_3=%d"
288 #endif
289 	, mask_int_cand.num_int_cand, mask_gpu_is_static,
290 	(unsigned long long)const_cache_size, cp_id2macro(options.target_enc),
291 	options.internal_cp == UTF_8 ? cp_id2macro(ASCII) :
292 	cp_id2macro(options.internal_cp), PLAINTEXT_LENGTH,
293 	static_gpu_locations[0]
294 #if MASK_FMT_INT_PLHDR > 1
295 	, static_gpu_locations[1]
296 #endif
297 #if MASK_FMT_INT_PLHDR > 2
298 	, static_gpu_locations[2]
299 #endif
300 #if MASK_FMT_INT_PLHDR > 3
301 	, static_gpu_locations[3]
302 #endif
303 	);
304 
305 	opencl_build_kernel("$JOHN/kernels/mscash_kernel.cl", gpu_id, build_opts, 0);
306 	crypt_kernel = clCreateKernel(program[gpu_id], "mscash", &ret_code);
307 	HANDLE_CLERROR(ret_code, "Error creating kernel. Double-check kernel name?");
308 }
309 
310 static void set_key(char *_key, int index);
311 static void *salt(char *_ciphertext);
312 
init(struct fmt_main * _self)313 static void init(struct fmt_main *_self)
314 {
315 	self = _self;
316 	max_num_loaded_hashes = 0;
317 
318 	opencl_prepare_dev(gpu_id);
319 	mask_int_cand_target = opencl_speed_index(gpu_id) / 300;
320 
321 	mscash1_adjust_tests(self, options.target_enc, PLAINTEXT_LENGTH,
322 	                     set_key, set_key);
323 }
324 
salt(char * ciphertext)325 static void *salt(char *ciphertext)
326 {
327 	static union {
328 		unsigned int w[12];
329 		UTF16 s[24];
330 	} nt_buffer;
331 	UTF16 *out = nt_buffer.s;
332 	UTF16 usalt[MSCASH1_MAX_SALT_LENGTH + 1 + 2];
333 	UTF16 *login = usalt;
334 	UTF8 csalt[3 * MSCASH1_MAX_SALT_LENGTH + 1];
335 	int i, length = 0;
336 	char *pos = ciphertext + FORMAT_TAG_LEN;
337 	char *lasth = strrchr(ciphertext, '#');
338 
339 	memset(nt_buffer.w, 0, sizeof(nt_buffer.w));
340 	memset(usalt, 0, sizeof(usalt));
341 
342 	while (pos < lasth)
343 		csalt[length++] = *pos++;
344 	csalt[length] = 0;
345 
346 	enc_strlwr((char*)csalt);
347 	enc_to_utf16(usalt, MSCASH1_MAX_SALT_LENGTH, csalt, length);
348 	length = strlen16(usalt);
349 
350 	for (i = 0; i < length; i++)
351 		*out++ = *login++;
352 	*out++ = 0x80;
353 
354 	nt_buffer.w[10] = (length << 4) + 128;
355 
356 	return &nt_buffer.w;
357 }
358 
get_hash_0(int index)359 static int get_hash_0(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_0; }
get_hash_1(int index)360 static int get_hash_1(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_1; }
get_hash_2(int index)361 static int get_hash_2(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_2; }
get_hash_3(int index)362 static int get_hash_3(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_3; }
get_hash_4(int index)363 static int get_hash_4(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_4; }
get_hash_5(int index)364 static int get_hash_5(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_5; }
get_hash_6(int index)365 static int get_hash_6(int index) { return hash_tables[current_salt][hash_ids[3 + 3 * index]] & PH_MASK_6; }
366 
clear_keys(void)367 static void clear_keys(void)
368 {
369 	key_idx = 0;
370 	set_new_keys = 0;
371 }
372 
set_key(char * _key,int index)373 static void set_key(char *_key, int index)
374 {
375 	const uint32_t *key = (uint32_t*)_key;
376 	int len = strlen(_key);
377 
378 	if (mask_int_cand.num_int_cand > 1 && !mask_gpu_is_static) {
379 		int i;
380 		saved_int_key_loc[index] = 0;
381 		for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
382 			if (mask_skip_ranges[i] != -1)  {
383 				saved_int_key_loc[index] |= ((mask_int_cand.
384 				int_cpu_mask_ctx->ranges[mask_skip_ranges[i]].offset +
385 				mask_int_cand.int_cpu_mask_ctx->
386 				ranges[mask_skip_ranges[i]].pos) & 0xff) << (i << 3);
387 			}
388 			else
389 				saved_int_key_loc[index] |= 0x80 << (i << 3);
390 		}
391 	}
392 
393 	saved_idx[index] = (key_idx << 7) | len;
394 
395 	while (len > 4) {
396 		saved_plain[key_idx++] = *key++;
397 		len -= 4;
398 	}
399 	if (len)
400 		saved_plain[key_idx++] = *key & (0xffffffffU >> (32 - (len << 3)));
401 	set_new_keys = 1;
402 }
403 
get_key(int index)404 static char *get_key(int index)
405 {
406 	static char out[UTF8_MAX_LENGTH + 1];
407 	int i, len, int_index, t;
408 	char *key;
409 
410 	if (hash_ids == NULL || hash_ids[0] == 0 ||
411 	    index >= hash_ids[0] || hash_ids[0] > max_num_loaded_hashes) {
412 		t = index;
413 		int_index = 0;
414 	}
415 	else  {
416 		t = hash_ids[1 + 3 * index];
417 		int_index = hash_ids[2 + 3 * index];
418 
419 	}
420 
421 	if (t >= global_work_size) {
422 		//fprintf(stderr, "Get key error! %d %d\n", t, index);
423 		t = 0;
424 	}
425 
426 	len = saved_idx[t] & 127;
427 	key = (char*)&saved_plain[saved_idx[t] >> 7];
428 
429 	for (i = 0; i < len; i++)
430 		out[i] = *key++;
431 	out[i] = 0;
432 
433 	if (len && mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
434 		for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
435 			if (mask_gpu_is_static)
436 				out[static_gpu_locations[i]] =
437 				mask_int_cand.int_cand[int_index].x[i];
438 			else
439 				out[(saved_int_key_loc[t]& (0xff << (i * 8))) >> (i * 8)] =
440 				mask_int_cand.int_cand[int_index].x[i];
441 	}
442 
443 	return out;
444 }
445 
446 /* Use only for smaller bitmaps < 16MB */
prepare_bitmap_4(cl_ulong bmp_sz,cl_uint ** bitmap_ptr,uint32_t num_loaded_hashes)447 static void prepare_bitmap_4(cl_ulong bmp_sz, cl_uint **bitmap_ptr, uint32_t num_loaded_hashes)
448 {
449 	unsigned int i;
450 	MEM_FREE(*bitmap_ptr);
451 	*bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 3), sizeof(cl_uint));
452 
453 	for (i = 0; i < num_loaded_hashes; i++) {
454 		unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
455 		(*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
456 
457 		bmp_idx = loaded_hashes[4 * i + 2] & (bmp_sz - 1);
458 		(*bitmap_ptr)[(bmp_sz >> 5) + (bmp_idx >> 5)] |=
459 			(1U << (bmp_idx & 31));
460 
461 		bmp_idx = loaded_hashes[4 * i + 1] & (bmp_sz - 1);
462 		(*bitmap_ptr)[(bmp_sz >> 4) + (bmp_idx >> 5)] |=
463 			(1U << (bmp_idx & 31));
464 
465 		bmp_idx = loaded_hashes[4 * i] & (bmp_sz - 1);
466 		(*bitmap_ptr)[(bmp_sz >> 5) * 3 + (bmp_idx >> 5)] |=
467 			(1U << (bmp_idx & 31));
468 	}
469 }
470 /*
471 static void prepare_bitmap_1(cl_ulong bmp_sz, cl_uint **bitmap_ptr, uint32_t num_loaded_hashes)
472 {
473 	unsigned int i;
474 	MEM_FREE(*bitmap_ptr);
475 	*bitmap_ptr = (cl_uint*) mem_calloc((bmp_sz >> 5), sizeof(cl_uint));
476 
477 	for (i = 0; i < num_loaded_hashes; i++) {
478 		unsigned int bmp_idx = loaded_hashes[4 * i + 3] & (bmp_sz - 1);
479 		(*bitmap_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
480 	}
481 }*/
482 
select_bitmap(unsigned int num_loaded_hashes)483 static void select_bitmap(unsigned int num_loaded_hashes)
484 {
485 	cl_ulong max_local_mem_sz_bytes = 0;
486 
487 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_LOCAL_MEM_SIZE,
488 		sizeof(cl_ulong), &max_local_mem_sz_bytes, 0),
489 		"failed to get CL_DEVICE_LOCAL_MEM_SIZE.");
490 
491 	if (num_loaded_hashes <= 5100) {
492 		if (amd_gcn_10(device_info[gpu_id]) ||
493 			amd_vliw4(device_info[gpu_id]))
494 			bitmap_size_bits = 512 * 1024;
495 
496 		else
497 			bitmap_size_bits = 256 * 1024;
498 	}
499 
500 	else if (num_loaded_hashes <= 10100) {
501 		if (amd_gcn_10(device_info[gpu_id]) ||
502 			amd_vliw4(device_info[gpu_id]))
503 			bitmap_size_bits = 512 * 1024;
504 
505 		else
506 			bitmap_size_bits = 256 * 1024;
507 
508 	}
509 
510 	else if (num_loaded_hashes <= 20100) {
511 		if (amd_gcn_10(device_info[gpu_id]) ||
512 			amd_vliw4(device_info[gpu_id]))
513 			bitmap_size_bits = 1024 * 1024;
514 
515 		else
516 			bitmap_size_bits = 512 * 1024;
517 
518 	}
519 
520 	else if (num_loaded_hashes <= 250100)
521 		bitmap_size_bits = 2048 * 1024;
522 
523 	else if (num_loaded_hashes <= 1100100) {
524 		if (!amd_gcn_11(device_info[gpu_id]))
525 			bitmap_size_bits = 4096 * 1024;
526 
527 		else
528 			bitmap_size_bits = 2048 * 1024;
529 	}
530 	else {
531 		fprintf(stderr, "Too many hashes (%d), max is 1100100\n",
532 		        num_loaded_hashes);
533 		error();
534 	}
535 
536 	prepare_bitmap_4(bitmap_size_bits, &bitmaps, num_loaded_hashes);
537 }
538 
prepare_table(struct db_main * db)539 static void prepare_table(struct db_main *db)
540 {
541 	struct db_salt *salt;
542 	int seq_ids = 0;
543 
544 	max_num_loaded_hashes = 0;
545 	max_hash_table_size = 1;
546 
547 	salt = db->salts;
548 	do {
549 		if (salt->count > max_num_loaded_hashes)
550 			max_num_loaded_hashes = salt->count;
551 	} while ((salt = salt->next));
552 
553 	MEM_FREE(loaded_hashes);
554 	MEM_FREE(hash_ids);
555 	release_salt_buffers();
556 
557 	loaded_hashes = (cl_uint*) mem_alloc(4 * max_num_loaded_hashes * sizeof(cl_uint));
558 	hash_ids = (cl_uint*) mem_calloc((3 * max_num_loaded_hashes + 1), sizeof(cl_uint));
559 
560 	hash_tables = (unsigned int **)mem_calloc(sizeof(unsigned int*), db->salt_count + 1);
561 	buffer_offset_tables = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
562 	buffer_hash_tables = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
563 	buffer_bitmaps = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
564 	buffer_salts = (cl_mem *)mem_calloc(sizeof(cl_mem), db->salt_count + 1);
565 
566 	hash_tables[db->salt_count] = NULL;
567 	buffer_offset_tables[db->salt_count] = NULL;
568 	buffer_hash_tables[db->salt_count] = NULL;
569 	buffer_bitmaps[db->salt_count] = NULL;
570 	buffer_salts[db->salt_count] = NULL;
571 
572 	salt = db->salts;
573 	do {
574 		unsigned int i = 0;
575 		unsigned int num_loaded_hashes, salt_params[SALT_SIZE / sizeof(unsigned int) + 5];
576 		unsigned int hash_table_size, offset_table_size, shift64_ht_sz, shift64_ot_sz;
577 		struct db_password *pw, *last;
578 
579 		last = pw = salt->list;
580 		do {
581 			unsigned int *bin = (unsigned int *)pw->binary;
582 			if (bin == NULL) {
583 				if (last == pw)
584 					salt->list = pw->next;
585 				else
586 					last->next = pw->next;
587 			} else {
588 				last = pw;
589 				loaded_hashes[4 * i] = bin[0];
590 				loaded_hashes[4 * i + 1] = bin[1];
591 				loaded_hashes[4 * i + 2] = bin[2];
592 				loaded_hashes[4 * i + 3] = bin[3];
593 				i++;
594 			}
595 		} while ((pw = pw->next));
596 
597 		if (i != salt->count) {
598 			fprintf(stderr,
599 				"Something went wrong while preparing hashes..Exiting..\n");
600 			error();
601 		}
602 		num_loaded_hashes = salt->count;
603 		salt->sequential_id = seq_ids++;
604 
605 		num_loaded_hashes = create_perfect_hash_table(128, (void*)loaded_hashes,
606 		                                              num_loaded_hashes,
607 		                                              &offset_table,
608 		                                              &offset_table_size,
609 		                                              &hash_table_size, 0);
610 
611 		if (!num_loaded_hashes) {
612 			MEM_FREE(hash_table_128);
613 			fprintf(stderr, "Failed to create Hash Table for cracking.\n");
614 			error();
615 		}
616 
617 		hash_tables[salt->sequential_id] = hash_table_128;
618 
619 		buffer_offset_tables[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, offset_table_size * sizeof(OFFSET_TABLE_WORD), offset_table, &ret_code);
620 		HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_offset_tables[].");
621 
622 		buffer_hash_tables[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, hash_table_size * sizeof(unsigned int) * 2, hash_table_128, &ret_code);
623 		HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_hash_tables[].");
624 
625 		if (max_hash_table_size < hash_table_size)
626 			max_hash_table_size = hash_table_size;
627 
628 		shift64_ht_sz = (((1ULL << 63) % hash_table_size) * 2) % hash_table_size;
629 		shift64_ot_sz = (((1ULL << 63) % offset_table_size) * 2) % offset_table_size;
630 
631 		select_bitmap(num_loaded_hashes);
632 
633 		memcpy(salt_params, salt->salt, SALT_SIZE);
634 		salt_params[12] = bitmap_size_bits - 1;
635 		salt_params[13] = offset_table_size;
636 		salt_params[14] = hash_table_size;
637 		salt_params[15] = shift64_ot_sz;
638 		salt_params[16] = shift64_ht_sz;
639 
640 		buffer_bitmaps[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (size_t)(bitmap_size_bits >> 3) * 2, bitmaps, &ret_code);
641 		HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmaps[].");
642 
643 		buffer_salts[salt->sequential_id] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, (SALT_SIZE / sizeof(unsigned int) + 5) * sizeof(unsigned int), salt_params, &ret_code);
644 		HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_salts[].");
645 
646 		MEM_FREE(bitmaps);
647 		MEM_FREE(offset_table);
648 
649 	} while ((salt = salt->next));
650 }
651 
crypt_all(int * pcount,struct db_salt * salt)652 static int crypt_all(int *pcount, struct db_salt *salt)
653 {
654 	const int count = *pcount;
655 
656 	size_t *lws = local_work_size ? &local_work_size : NULL;
657 	size_t gws = GET_NEXT_MULTIPLE(count, local_work_size);
658 
659 	//fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" idx %u int_cand %d\n", __FUNCTION__, count, local_work_size, gws, key_idx, mask_int_cand.num_int_cand);
660 
661 	// copy keys to the device
662 	if (set_new_keys || ocl_autotune_running) {
663 		if (key_idx)
664 			BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, 0, 4 * key_idx, saved_plain, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");
665 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, 0, 4 * gws, saved_idx, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
666 		if (!mask_gpu_is_static)
667 			BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
668 		set_new_keys = 0;
669 	}
670 
671 	current_salt = salt->sequential_id;
672 	BENCH_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(buffer_salts[current_salt]), (void *) &buffer_salts[current_salt]), "Error setting argument 3.");
673 	BENCH_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(buffer_bitmaps[current_salt]), (void *) &buffer_bitmaps[current_salt]), "Error setting argument 6.");
674 	BENCH_CLERROR(clSetKernelArg(crypt_kernel, 6, sizeof(buffer_offset_tables[current_salt]), (void *) &buffer_offset_tables[current_salt]), "Error setting argument 7.");
675 	BENCH_CLERROR(clSetKernelArg(crypt_kernel, 7, sizeof(buffer_hash_tables[current_salt]), (void *) &buffer_hash_tables[current_salt]), "Error setting argument 8.");
676 
677 	BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel");
678 
679 	BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), hash_ids, 0, NULL, multi_profilingEvent[3]), "failed in reading back num cracked hashes.");
680 
681 	if (hash_ids[0] > max_num_loaded_hashes) {
682 		fprintf(stderr, "Error, crypt_all kernel.\n");
683 		error();
684 	}
685 
686 	if (hash_ids[0]) {
687 		BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_return_hashes, CL_FALSE, 0, 2 * sizeof(cl_uint) * hash_ids[0], loaded_hashes, 0, NULL, NULL), "failed in reading back return_hashes.");
688 		BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, (3 * hash_ids[0] + 1) * sizeof(cl_uint), hash_ids, 0, NULL, NULL), "failed in reading data back hash_ids.");
689 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap_dupe, CL_FALSE, 0, (max_hash_table_size/32 + 1) * sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_bitmap_dupe.");
690 		BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_hash_ids.");
691 	}
692 
693 	*pcount *=  mask_int_cand.num_int_cand;
694 	return hash_ids[0];
695 }
696 
cmp_all(void * binary,int count)697 static int cmp_all(void *binary, int count)
698 {
699 	if (count) return 1;
700 	return 0;
701 }
702 
cmp_one(void * binary,int index)703 static int cmp_one(void *binary, int index)
704 {
705 	return (((unsigned int*)binary)[0] ==
706 		hash_tables[current_salt][hash_ids[3 + 3 * index]]);
707 }
708 
cmp_exact(char * source,int index)709 static int cmp_exact(char *source, int index)
710 {
711 	unsigned int *t = (unsigned int *) mscash_common_binary(source);
712 
713 	if (t[2] != loaded_hashes[2 * index])
714 		return 0;
715 	if (t[3] != loaded_hashes[2 * index + 1])
716 		return 0;
717 	return 1;
718 }
719 
reset(struct db_main * db)720 static void reset(struct db_main *db)
721 {
722 	static size_t o_lws, o_gws;
723 	static int initialized;
724 	size_t gws_limit;
725 
726 	//fprintf(stderr, "%s(%p), i=%d\n", __FUNCTION__, db, initialized);
727 	gws_limit = MIN((0xf << 21) * 4 / BUFSIZE,
728 	                get_max_mem_alloc_size(gpu_id) / BUFSIZE);
729 	get_power_of_two(gws_limit);
730 	if (gws_limit > MIN((0xf << 21) * 4 / BUFSIZE,
731 	                    get_max_mem_alloc_size(gpu_id) / BUFSIZE))
732 		gws_limit >>= 1;
733 
734 
735 	if (initialized) {
736 		// Forget the previous auto-tune
737 		local_work_size = o_lws;
738 		global_work_size = o_gws;
739 
740 		release_base_clobj();
741 		release_clobj();
742 	} else {
743 		o_lws = local_work_size;
744 		o_gws = global_work_size;
745 		initialized = 1;
746 	}
747 
748 	prepare_table(db);
749 	init_kernel();
750 
751 	create_base_clobj();
752 
753 	current_salt = 0;
754 	hash_ids[0] = 0;
755 
756 	// If real crack run, don't auto-tune for self-tests
757 	if (db->real && db != db->real)
758 		opencl_get_sane_lws_gws_values();
759 
760 	// Initialize openCL tuning (library) for this format.
761 	opencl_init_auto_setup(SEED, 1, NULL, warn, 2, self,
762 	                       create_clobj, release_clobj,
763 	                       2 * BUFSIZE, gws_limit, db);
764 
765 	// Auto tune execution from shared/included code.
766 	autotune_run_extra(self, 1, gws_limit, 200, CL_TRUE);
767 }
768 
769 struct fmt_main FMT_STRUCT = {
770 	{
771 		FORMAT_LABEL,
772 		FORMAT_NAME,
773 		ALGORITHM_NAME,
774 		BENCHMARK_COMMENT,
775 		BENCHMARK_LENGTH,
776 		0,
777 		PLAINTEXT_LENGTH,
778 		BINARY_SIZE,
779 		BINARY_ALIGN,
780 		SALT_SIZE,
781 		SALT_ALIGN,
782 		MIN_KEYS_PER_CRYPT,
783 		MAX_KEYS_PER_CRYPT,
784 		FMT_CASE | FMT_8_BIT | FMT_SPLIT_UNIFIES_CASE | FMT_UNICODE | FMT_ENC | FMT_REMOVE | FMT_MASK,
785 		{ NULL },
786 		{ FORMAT_TAG },
787 		mscash1_common_tests
788 	}, {
789 		init,
790 		done,
791 		reset,
792 		mscash1_common_prepare,
793 		mscash1_common_valid,
794 		mscash1_common_split,
795 		mscash_common_binary,
796 		salt,
797 		{ NULL },
798 		fmt_default_source,
799 		{
800 			fmt_default_binary_hash_0,
801 			fmt_default_binary_hash_1,
802 			fmt_default_binary_hash_2,
803 			fmt_default_binary_hash_3,
804 			fmt_default_binary_hash_4,
805 			fmt_default_binary_hash_5,
806 			fmt_default_binary_hash_6
807 		},
808 		fmt_default_salt_hash,
809 		NULL,
810 		fmt_default_set_salt,
811 		set_key,
812 		get_key,
813 		clear_keys,
814 		crypt_all,
815 		{
816 			get_hash_0,
817 			get_hash_1,
818 			get_hash_2,
819 			get_hash_3,
820 			get_hash_4,
821 			get_hash_5,
822 			get_hash_6
823 		},
824 		cmp_all,
825 		cmp_one,
826 		cmp_exact
827 	}
828 };
829 
830 #endif /* plugin stanza */
831 
832 #endif /* HAVE_OPENCL */
833