1 /*
2  * This software is Copyright (c) 2012 Sayantan Datta <std2048 at gmail dot com>
3  * and it is hereby released to the general public under the following terms:
4  * Redistribution and use in source and binary forms, with or without modification, are permitted.
5  * Based on Solar Designer implementation of DES_bs_b.c in jtr-v1.7.9
6  */
7 
8 #ifdef HAVE_OPENCL
9 
10 #include <string.h>
11 
12 #include "arch.h"
13 #include "common.h"
14 #include "opencl_DES_bs.h"
15 #include "opencl_DES_hst_dev_shared.h"
16 #include "unicode.h"
17 #include "bt_interface.h"
18 #include "mask_ext.h"
19 
20 typedef struct {
21 	unsigned char *pxkeys[DES_BS_DEPTH]; /* Pointers into xkeys.c */
22 } des_combined;
23 
24 static cl_kernel **cmp_kernel;
25 static cl_kernel kernel_high, kernel_low;
26 static cl_mem buffer_hash_ids, buffer_bitmap_dupe, *buffer_uncracked_hashes, *buffer_hash_tables, *buffer_offset_tables, *buffer_bitmaps;
27 static unsigned int *zero_buffer, **hash_tables;
28 static unsigned int *hash_ids;
29 static unsigned int max_uncracked_hashes, max_hash_table_size;
30 DES_hash_check_params *hash_chk_params;
31 static WORD current_salt;
32 
33 static cl_kernel keys_kernel;
34 static cl_mem buffer_raw_keys, buffer_int_des_keys, buffer_int_key_loc;
35 static int keys_changed = 1;
36 static des_combined *des_all;
37 static opencl_DES_bs_transfer *des_raw_keys;
38 static unsigned int *des_int_key_loc;
39 static unsigned int static_gpu_locations[MASK_FMT_INT_PLHDR];
40 static size_t process_key_gws;
41 unsigned int CC_CACHE_ALIGN opencl_DES_bs_index768[0x300];
42 
43 unsigned char opencl_DES_E[48] = {
44 	31, 0, 1, 2, 3, 4,
45 	3, 4, 5, 6, 7, 8,
46 	7, 8, 9, 10, 11, 12,
47 	11, 12, 13, 14, 15, 16,
48 	15, 16, 17, 18, 19, 20,
49 	19, 20, 21, 22, 23, 24,
50 	23, 24, 25, 26, 27, 28,
51 	27, 28, 29, 30, 31, 0
52 };
53 
54 static unsigned char opencl_DES_PC1[56] = {
55 	56, 48, 40, 32, 24, 16, 8,
56 	0, 57, 49, 41, 33, 25, 17,
57 	9, 1, 58, 50, 42, 34, 26,
58 	18, 10, 2, 59, 51, 43, 35,
59 	62, 54, 46, 38, 30, 22, 14,
60 	6, 61, 53, 45, 37, 29, 21,
61 	13, 5, 60, 52, 44, 36, 28,
62 	20, 12, 4, 27, 19, 11, 3
63 };
64 
65 static unsigned char opencl_DES_ROT[16] = {
66 	1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
67 };
68 
69 static unsigned char opencl_DES_PC2[48] = {
70 	13, 16, 10, 23, 0, 4,
71 	2, 27, 14, 5, 20, 9,
72 	22, 18, 11, 3, 25, 7,
73 	15, 6, 26, 19, 12, 1,
74 	40, 51, 30, 36, 46, 54,
75 	29, 39, 50, 44, 32, 47,
76 	43, 48, 38, 55, 33, 52,
77 	45, 41, 49, 35, 28, 31
78 };
79 
80 #define num_uncracked_hashes(k) hash_chk_params[k].num_uncracked_hashes
81 #define hash_table_size(k) hash_chk_params[k].hash_table_size
82 #define offset_table_size(k) hash_chk_params[k].offset_table_size
83 
84 #define LOW_THRESHOLD 		10
85 
86 #define get_num_bits(r, v)			\
87 {						\
88 	r = (v & 0xAAAAAAAA) != 0;		\
89 	r |= ((v & 0xFFFF0000) != 0) << 4;	\
90 	r |= ((v & 0xFF00FF00) != 0) << 3;	\
91 	r |= ((v & 0xF0F0F0F0) != 0) << 2;	\
92 	r |= ((v & 0xCCCCCCCC) != 0) << 1;	\
93 }
94 
prepare_bitmap_1(cl_ulong bmp_sz_bits,cl_uint ** bitmaps_ptr,unsigned WORD * loaded_hashes,unsigned int num_uncracked_hashes)95 static void prepare_bitmap_1(cl_ulong bmp_sz_bits, cl_uint **bitmaps_ptr, unsigned WORD *loaded_hashes, unsigned int num_uncracked_hashes)
96 {
97 	unsigned int i;
98 	MEM_FREE(*bitmaps_ptr);
99 	*bitmaps_ptr = (cl_uint*) mem_calloc((bmp_sz_bits >> 5), sizeof(cl_uint));
100 
101 	for (i = 0; i < num_uncracked_hashes; i++) {
102 		unsigned int bmp_idx = loaded_hashes[2 * i + 1] & (bmp_sz_bits - 1);
103 		(*bitmaps_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
104 	}
105 }
106 
select_bitmap(unsigned int num_ld_hashes,WORD * uncracked_hashes_t,unsigned long * bitmap_size_bits,unsigned int ** bitmaps_ptr,DES_hash_check_params * hash_chk_params)107 static void select_bitmap(unsigned int num_ld_hashes, WORD *uncracked_hashes_t, unsigned long *bitmap_size_bits, unsigned int **bitmaps_ptr, DES_hash_check_params *hash_chk_params)
108 {
109 	unsigned int cmp_steps = 1, bits_req = 32;
110 
111 	if (num_ld_hashes <= 5100) {
112 		if (amd_gcn_10(device_info[gpu_id]) ||
113 			amd_vliw4(device_info[gpu_id]))
114 			*bitmap_size_bits = 512 * 1024;
115 
116 		else
117 			*bitmap_size_bits = 256 * 1024;
118 
119 	}
120 
121 	else if (num_ld_hashes <= 10100) {
122 		if (amd_gcn_10(device_info[gpu_id]) ||
123 			amd_vliw4(device_info[gpu_id]))
124 			*bitmap_size_bits = 512 * 1024;
125 
126 		else
127 			*bitmap_size_bits = 256 * 1024;
128 	}
129 
130 	else if (num_ld_hashes <= 20100) {
131 		if (amd_gcn_10(device_info[gpu_id]))
132 			*bitmap_size_bits = 1024 * 1024;
133 
134 		else
135 			*bitmap_size_bits = 512 * 1024;
136 	}
137 
138 	else if (num_ld_hashes <= 250100)
139 		*bitmap_size_bits = 2048 * 1024;
140 
141 	else if (num_ld_hashes <= 1100100) {
142 		if (!amd_gcn_11(device_info[gpu_id]))
143 			*bitmap_size_bits = 4096 * 1024;
144 
145 		else
146 			*bitmap_size_bits = 2048 * 1024;
147 	}
148 
149 	else if (num_ld_hashes <= 1500100)
150 		*bitmap_size_bits = 4096 * 1024 * 2;
151 
152 	else if (num_ld_hashes <= 2700100)
153 		*bitmap_size_bits = 4096 * 1024 * 2 * 2;
154 
155 	else {
156 		cl_ulong mult = num_ld_hashes / 2700100;
157 		cl_ulong buf_sz;
158 		*bitmap_size_bits = 4096 * 4096;
159 		get_power_of_two(mult);
160 		*bitmap_size_bits *= mult;
161 		buf_sz = get_max_mem_alloc_size(gpu_id);
162 		if (buf_sz & (buf_sz - 1)) {
163 			get_power_of_two(buf_sz);
164 			buf_sz >>= 1;
165 		}
166 		if (buf_sz >= 536870912)
167 			buf_sz = 536870912;
168 		if (((*bitmap_size_bits) >> 3) > buf_sz)
169 			*bitmap_size_bits = buf_sz << 3;
170 	}
171 
172 	prepare_bitmap_1(*bitmap_size_bits, bitmaps_ptr, (unsigned WORD *)uncracked_hashes_t, num_ld_hashes);
173 
174 	get_num_bits(bits_req, (*bitmap_size_bits));
175 
176 	hash_chk_params->bitmap_size_bits = (unsigned int)(*bitmap_size_bits);
177 	hash_chk_params->cmp_steps = cmp_steps;
178 	hash_chk_params->cmp_bits = bits_req;
179 
180 	*bitmap_size_bits *= cmp_steps;
181 }
182 
fill_buffer(struct db_salt * salt,unsigned int * max_uncracked_hashes,unsigned int * max_hash_table_size)183 static void fill_buffer(struct db_salt *salt, unsigned int *max_uncracked_hashes, unsigned int *max_hash_table_size)
184 {
185 	int i;
186 	WORD salt_val;
187 	WORD *binary;
188 	WORD *uncracked_hashes = NULL, *uncracked_hashes_t = NULL;
189 	struct db_password *pw, *last;
190 	OFFSET_TABLE_WORD *offset_table;
191 	unsigned int hash_table_size, offset_table_size;
192 
193 	salt_val = *(WORD *)salt->salt;
194 	num_uncracked_hashes(salt_val) = salt->count;
195 
196 	uncracked_hashes = (WORD *) mem_calloc(2 * num_uncracked_hashes(salt_val), sizeof(WORD));
197 	uncracked_hashes_t = (WORD *) mem_calloc(2 * num_uncracked_hashes(salt_val), sizeof(WORD));
198 
199 	i = 0;
200 	last = pw = salt->list;
201 	do {
202 		binary = (WORD *)pw->binary;
203 		if (binary == NULL) {
204 			if (last == pw)
205 				salt->list = pw->next;
206 			else
207 				last->next = pw->next;
208 		} else {
209 			last = pw;
210 			uncracked_hashes_t[2 * i] = binary[0];
211 			uncracked_hashes_t[2 * i + 1] = binary[1];
212 			i++;
213 		}
214 	} while ((pw = pw->next));
215 
216 	if (salt->count > *max_uncracked_hashes)
217 		*max_uncracked_hashes = salt->count;
218 
219 	num_uncracked_hashes(salt_val) = create_perfect_hash_table(64, (void *)uncracked_hashes_t,
220 				num_uncracked_hashes(salt_val),
221 			        &offset_table,
222 			        &offset_table_size,
223 			        &hash_table_size, 0);
224 
225 	hash_table_size(salt_val) = hash_table_size;
226 	offset_table_size(salt_val) = offset_table_size;
227 
228 	if (hash_table_size(salt_val) > *max_hash_table_size)
229 		*max_hash_table_size = hash_table_size(salt_val);
230 
231 	if (!num_uncracked_hashes(salt_val)) {
232 		MEM_FREE(hash_table_64);
233 		MEM_FREE(offset_table);
234 		fprintf(stderr, "Failed to create Hash Table for cracking.\n");
235 		error();
236 	}
237 
238 	hash_tables[salt_val] = hash_table_64;
239 
240 	/* uncracked_hashes_t is modified by create_perfect_hash_table. */
241 	for (i = 0; i < num_uncracked_hashes(salt_val); i++) {
242 		uncracked_hashes[i] = uncracked_hashes_t[2 * i];
243 		uncracked_hashes[i + num_uncracked_hashes(salt_val)] = uncracked_hashes_t[2 * i + 1];
244 	}
245 
246 	buffer_offset_tables[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(OFFSET_TABLE_WORD) * offset_table_size , offset_table, &ret_code);
247 	HANDLE_CLERROR(ret_code, "Create buffer_offset_tables failed.\n");
248 
249 	buffer_hash_tables[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(unsigned int) * hash_table_size, hash_table_64, &ret_code);
250 	HANDLE_CLERROR(ret_code, "Create buffer_hash_tables failed.\n");
251 
252 	if (num_uncracked_hashes(salt_val) <= LOW_THRESHOLD) {
253 		buffer_uncracked_hashes[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(WORD) * num_uncracked_hashes(salt_val), uncracked_hashes, &ret_code);
254 		HANDLE_CLERROR(ret_code, "Create buffer_uncracked_hashes failed.\n");
255 	}
256 	else {
257 		unsigned long bitmap_size_bits = 0;
258 		unsigned int *bitmaps = NULL;
259 		select_bitmap(num_uncracked_hashes(salt_val), uncracked_hashes_t, &bitmap_size_bits, &bitmaps, &hash_chk_params[salt_val]);
260 		buffer_bitmaps[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bitmap_size_bits >> 3, bitmaps, &ret_code);
261 		HANDLE_CLERROR(ret_code, "Create buffer_bitmaps failed.\n");
262 		MEM_FREE(bitmaps);
263 	}
264 
265 	MEM_FREE(uncracked_hashes);
266 	MEM_FREE(uncracked_hashes_t);
267 	MEM_FREE(offset_table);
268 }
269 
fill_buffer_self_test(unsigned int * max_uncracked_hashes,unsigned int * max_hash_table_size)270 static void fill_buffer_self_test(unsigned int *max_uncracked_hashes, unsigned int *max_hash_table_size)
271 {
272 	char *ciphertext;
273 	WORD *binary;
274 	WORD salt_val;
275 	unsigned int offset_table_size, hash_table_size;
276 	unsigned long bitmap_size_bits = 0;
277 	unsigned int *bitmaps = NULL;
278 	WORD *uncracked_hashes = NULL, *uncracked_hashes_t = NULL;
279 	int i;
280 	OFFSET_TABLE_WORD *offset_table = NULL;
281 	DES_hash_check_params temp_param;
282 
283 	while (fmt_opencl_DES.params.tests[*max_uncracked_hashes].ciphertext) {
284 		ciphertext = fmt_opencl_DES.methods.split(fmt_opencl_DES.params.tests[*max_uncracked_hashes].ciphertext, 0, &fmt_opencl_DES);
285 		(*max_uncracked_hashes)++;
286 	}
287 
288 	uncracked_hashes = (WORD *) mem_calloc(2 * *max_uncracked_hashes, sizeof(WORD));
289 	uncracked_hashes_t = (WORD *) mem_calloc(2 * *max_uncracked_hashes, sizeof(WORD));
290 
291 	i = 0;
292 	while (fmt_opencl_DES.params.tests[i].ciphertext) {
293 		ciphertext = fmt_opencl_DES.methods.split(fmt_opencl_DES.params.tests[i].ciphertext, 0, &fmt_opencl_DES);
294 		binary = (WORD *)fmt_opencl_DES.methods.binary(ciphertext);
295 		salt_val = *(WORD *)fmt_opencl_DES.methods.salt(ciphertext);
296 		uncracked_hashes_t[2 * i] = binary[0];
297 		uncracked_hashes_t[2 * i + 1] = binary[1];
298 		num_uncracked_hashes(salt_val) = 1;
299 		//fprintf(stderr, "C:%s B:%d \n", ciphertext, binary[1]);
300 		i++;
301 	}
302 
303 	*max_uncracked_hashes = create_perfect_hash_table(64, (void *)uncracked_hashes_t,
304 				*max_uncracked_hashes,
305 			        &offset_table,
306 			        &offset_table_size,
307 			        &hash_table_size, 0);
308 	*max_hash_table_size = hash_table_size;
309 
310 	if (!*max_uncracked_hashes) {
311 		MEM_FREE(hash_table_64);
312 		MEM_FREE(offset_table);
313 		fprintf(stderr, "Failed to create Hash Table for cracking.\n");
314 		error();
315 	}
316 
317 	/* uncracked_hashes_t is modified by create_perfect_hash_table. */
318 	for (i = 0; i < *max_uncracked_hashes; i++) {
319 		uncracked_hashes[i] = uncracked_hashes_t[2 * i];
320 		uncracked_hashes[i + *max_uncracked_hashes] = uncracked_hashes_t[2 * i + 1];
321 	}
322 
323 	select_bitmap(*max_uncracked_hashes, uncracked_hashes_t, &bitmap_size_bits, &bitmaps, &temp_param);
324 
325 	for (i = 0; i < 4096; i++) {
326 		if (!num_uncracked_hashes(i)) continue;
327 		hash_chk_params[i] = temp_param; /* Error if this statement is excuted later in the body of loop. */
328 		num_uncracked_hashes(i) = *max_uncracked_hashes;
329 		hash_table_size(i) = hash_table_size;
330 		offset_table_size(i) = offset_table_size;
331 		hash_tables[i] = (unsigned int *) mem_alloc(2 * sizeof(unsigned int) * hash_table_size);
332 		memcpy(hash_tables[i], hash_table_64, 2 * sizeof(unsigned int) * hash_table_size);
333 		buffer_offset_tables[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(OFFSET_TABLE_WORD) * offset_table_size , offset_table, &ret_code);
334 		HANDLE_CLERROR(ret_code, "Create buffer_offset_tables failed.\n");
335 		buffer_hash_tables[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(unsigned int) * hash_table_size, hash_table_64, &ret_code);
336 		HANDLE_CLERROR(ret_code, "Create buffer_hash_tables failed.\n");
337 		buffer_bitmaps[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bitmap_size_bits >> 3, bitmaps, &ret_code);
338 		HANDLE_CLERROR(ret_code, "Create buffer_bitmaps failed.\n");
339 		buffer_uncracked_hashes[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(WORD) * *max_uncracked_hashes, uncracked_hashes, &ret_code);
340 		HANDLE_CLERROR(ret_code, "Create buffer_uncracked_hashes failed.\n");
341 	}
342 
343 	MEM_FREE(uncracked_hashes);
344 	MEM_FREE(uncracked_hashes_t);
345 	MEM_FREE(offset_table);
346 	MEM_FREE(hash_table_64);
347 	MEM_FREE(bitmaps);
348 }
349 
release_fill_buffer(WORD i)350 static void release_fill_buffer(WORD i)
351 {
352 	if (buffer_uncracked_hashes[i] != (cl_mem)0) {
353 		HANDLE_CLERROR(clReleaseMemObject(buffer_uncracked_hashes[i]), "Release buffer_uncracked_hashes failed.\n");
354 		buffer_uncracked_hashes[i] = (cl_mem)0;
355 	}
356 	if (buffer_offset_tables[i] != (cl_mem)0) {
357 		HANDLE_CLERROR(clReleaseMemObject(buffer_offset_tables[i]), "Release buffer_offset_tables failed.\n");
358 		HANDLE_CLERROR(clReleaseMemObject(buffer_hash_tables[i]), "Release buffer_hash_tables failed.\n");
359 		buffer_hash_tables[i] = (cl_mem)0;
360 		buffer_offset_tables[i] = (cl_mem)0;
361 	}
362 	if (buffer_bitmaps[i] != (cl_mem)0) {
363 		HANDLE_CLERROR(clReleaseMemObject(buffer_bitmaps[i]), "Release buffer_bitmaps failed.\n");
364 		buffer_bitmaps[i] = (cl_mem)0;
365 	}
366 	if (hash_tables[i])
367 		MEM_FREE(hash_tables[i]);
368 	hash_tables[i] = 0;
369 }
370 
release_fill_buffers()371 static void release_fill_buffers()
372 {
373 	int i;
374 
375 	for (i = 0; i < 4096; i++)
376 		release_fill_buffer(i);
377 }
378 
create_aux_buffers(unsigned int max_uncracked_hashes,unsigned int max_hash_table_size)379 static void create_aux_buffers(unsigned int max_uncracked_hashes, unsigned int max_hash_table_size)
380 {
381 	zero_buffer = (unsigned int *) mem_calloc((max_hash_table_size - 1) / 32 + 1, sizeof(unsigned int));
382 
383 	buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, ((max_hash_table_size - 1) / 32 + 1) * sizeof(unsigned int), zero_buffer, &ret_code);
384 	HANDLE_CLERROR(ret_code, "Create buffer_bitmap_dupe failed.\n");
385 
386 	buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (2 * max_uncracked_hashes + 1) * sizeof(unsigned int), NULL, &ret_code);
387 	HANDLE_CLERROR(ret_code, "Create buffer_hash_ids failed.\n");
388 
389 	hash_ids = (unsigned int *) mem_calloc((2 * max_uncracked_hashes + 1), sizeof(unsigned int));
390 
391 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_hash_ids.\n");
392 }
393 
release_aux_buffers()394 static void release_aux_buffers()
395 {
396 	if (zero_buffer) {
397 		HANDLE_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Release buffer_bitmap_dupe failed.\n");
398 		HANDLE_CLERROR(clReleaseMemObject(buffer_hash_ids), "Release buffer_hash_ids failed.\n");
399 
400 		MEM_FREE(hash_ids);
401 		MEM_FREE(zero_buffer);
402 		zero_buffer = 0;
403 	}
404 }
405 
build_tables(struct db_main * db)406 void build_tables(struct db_main *db)
407 {
408 	buffer_uncracked_hashes = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
409 	hash_tables = (unsigned int **) mem_calloc(4096, sizeof(unsigned int *));
410 	buffer_offset_tables = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
411 	buffer_hash_tables = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
412 	buffer_bitmaps = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
413 	memset(hash_chk_params, 0, 4096 * sizeof(DES_hash_check_params));
414 
415 	if (db) {
416 	struct db_salt *salt = db->salts;
417 	do {
418 		fill_buffer(salt, &max_uncracked_hashes, &max_hash_table_size);
419 	} while((salt = salt->next));
420 	}
421 	else {
422 		fill_buffer_self_test(&max_uncracked_hashes, &max_hash_table_size);
423 	}
424 
425 	create_aux_buffers(max_uncracked_hashes, max_hash_table_size);
426 }
427 
release_tables()428 void release_tables()
429 {
430 	release_aux_buffers();
431 
432 	if (buffer_uncracked_hashes) {
433 		release_fill_buffers();
434 		MEM_FREE(buffer_uncracked_hashes);
435 		MEM_FREE(buffer_offset_tables);
436 		MEM_FREE(buffer_hash_tables);
437 		MEM_FREE(buffer_bitmaps);
438 		MEM_FREE(hash_tables);
439 		hash_tables = 0;
440 		buffer_uncracked_hashes = 0;
441 	}
442 }
443 
set_kernel_args_aux_buf()444 static void set_kernel_args_aux_buf()
445 {
446 	HANDLE_CLERROR(clSetKernelArg(kernel_low, 4, sizeof(cl_mem), &buffer_hash_ids), "Failed setting kernel argument buffer_hash_ids, kernel DES_bs_cmp_low.\n");
447 	HANDLE_CLERROR(clSetKernelArg(kernel_low, 5, sizeof(cl_mem), &buffer_bitmap_dupe), "Failed setting kernel argument buffer_bitmap_dupe, kernel DES_bs_cmp_low.\n");
448 
449 	HANDLE_CLERROR(clSetKernelArg(kernel_high, 4, sizeof(cl_mem), &buffer_hash_ids), "Failed setting kernel argument buffer_hash_ids, kernel DES_bs_cmp.\n");
450 	HANDLE_CLERROR(clSetKernelArg(kernel_high, 5, sizeof(cl_mem), &buffer_bitmap_dupe), "Failed setting kernel argument buffer_bitmap_dupe, kernel DES_bs_cmp.\n");
451 }
452 
create_checking_kernel_set_args()453 size_t create_checking_kernel_set_args()
454 {
455 	int i;
456 	size_t min_lws;
457 
458 	opencl_build_kernel("$JOHN/kernels/DES_bs_hash_checking_kernel.cl",
459 	                    gpu_id, NULL, 0);
460 
461 	if (kernel_high == 0) {
462 		kernel_high = clCreateKernel(program[gpu_id], "DES_bs_cmp_high", &ret_code);
463 		HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_cmp_high.\n");
464 	}
465 	if (kernel_low == 0) {
466 		kernel_low = clCreateKernel(program[gpu_id], "DES_bs_cmp", &ret_code);
467 		HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_cmp.\n");
468 	}
469 
470 	memset(cmp_kernel[gpu_id], 0, 4096 * sizeof(cl_kernel));
471 
472 	for (i = 0; i < 4096; i++) {
473 		if (num_uncracked_hashes(i) <= LOW_THRESHOLD)
474 			cmp_kernel[gpu_id][i] = kernel_low;
475 		else
476 			cmp_kernel[gpu_id][i] = kernel_high;
477 	}
478 
479 	set_kernel_args_aux_buf();
480 
481 	min_lws = get_kernel_max_lws(gpu_id, kernel_high);
482 
483 	if (min_lws > get_kernel_max_lws(gpu_id, kernel_low))
484 		return get_kernel_max_lws(gpu_id, kernel_low);
485 
486 	return min_lws;
487 }
488 
set_common_kernel_args_kpc(cl_mem buffer_unchecked_hashes,cl_mem buffer_bs_keys)489 void set_common_kernel_args_kpc(cl_mem buffer_unchecked_hashes, cl_mem buffer_bs_keys)
490 {
491 	HANDLE_CLERROR(clSetKernelArg(kernel_low, 0, sizeof(cl_mem), &buffer_unchecked_hashes), "Failed setting kernel argument buffer_unchecked_hashes, kernel DES_bs_cmp.\n");
492 	HANDLE_CLERROR(clSetKernelArg(kernel_high, 0, sizeof(cl_mem), &buffer_unchecked_hashes), "Failed setting kernel argument buffer_unchecked_hashes, kernel DES_bs_cmp.\n");
493 
494 	HANDLE_CLERROR(clSetKernelArg(keys_kernel, 0, sizeof(cl_mem), &buffer_raw_keys), "Failed setting kernel argument buffer_raw_keys, kernel DES_bs_finalize_keys.\n");
495 	HANDLE_CLERROR(clSetKernelArg(keys_kernel, 2, sizeof(cl_mem), &buffer_int_key_loc), "Failed setting kernel argument buffer_int_key_loc, kernel DES_bs_finalize_keys.\n");
496 	HANDLE_CLERROR(clSetKernelArg(keys_kernel, 3, sizeof(cl_mem), &buffer_bs_keys), "Failed setting kernel argument buffer_bs_keys, kernel DES_bs_finalize_keys.\n");
497 }
498 
update_buffer(struct db_salt * salt)499 void update_buffer(struct db_salt *salt)
500 {
501 	unsigned int _max_uncracked_hashes = 0, _max_hash_table_size = 0;
502 	WORD salt_val = *(WORD *)salt->salt;
503 	release_fill_buffer(salt_val);
504 
505 	if (salt->count > LOW_THRESHOLD &&
506 		(num_uncracked_hashes(salt_val) - num_uncracked_hashes(salt_val) / 10) < salt->count)
507 		return;
508 
509 	fill_buffer(salt, &_max_uncracked_hashes, &_max_hash_table_size);
510 
511 	if (_max_uncracked_hashes > max_uncracked_hashes || _max_hash_table_size > max_hash_table_size) {
512 		release_aux_buffers();
513 		create_aux_buffers(max_uncracked_hashes, max_hash_table_size);
514 		set_kernel_args_aux_buf();
515 		max_hash_table_size = _max_hash_table_size;
516 		max_uncracked_hashes = _max_uncracked_hashes;
517 	}
518 
519 	if (num_uncracked_hashes(salt_val) <= LOW_THRESHOLD)
520 		cmp_kernel[gpu_id][salt_val] = kernel_low;
521 	else
522 		cmp_kernel[gpu_id][salt_val] = kernel_high;
523 
524 	if (options.verbosity > VERB_LEGACY)
525 		fprintf(stderr,
526 		        "Updated internal tables and buffers for salt %d.\n", salt_val);
527 }
528 
extract_info(size_t current_gws,size_t * lws,WORD salt_val)529 int extract_info(size_t current_gws, size_t *lws, WORD salt_val)
530 {
531 	current_salt = salt_val;
532 
533 	HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 1, sizeof(cl_mem), &buffer_offset_tables[current_salt]), "Failed setting kernel argument buffer_offset_tables, kernel DES_bs_cmp.\n");
534 	HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 2, sizeof(cl_mem), &buffer_hash_tables[current_salt]), "Failed setting kernel argument buffer_hash_tables, kernel DES_bs_cmp.\n");
535 	HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 3, sizeof(DES_hash_check_params), &hash_chk_params[current_salt]), "Failed setting kernel argument num_uncracked_hashes, kernel DES_bs_cmp.\n");
536 	if (num_uncracked_hashes(current_salt) <= LOW_THRESHOLD)
537 		HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 6, sizeof(cl_mem), &buffer_uncracked_hashes[current_salt]), "Failed setting kernel argument buffer_uncracked_hashes, kernel DES_bs_cmp.\n");
538 	else
539 		HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 6, sizeof(cl_mem), &buffer_bitmaps[current_salt]), "Failed setting kernel argument buffer_bitmaps, kernel DES_bs_cmp_high.\n");
540 
541 	ret_code = clEnqueueNDRangeKernel(queue[gpu_id], cmp_kernel[gpu_id][current_salt], 1, NULL, &current_gws, lws, 0, NULL, NULL);
542 	HANDLE_CLERROR(ret_code, "Enque kernel DES_bs_cmp failed.\n");
543 
544 	HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(unsigned int), hash_ids, 0, NULL, NULL), "Failed to read buffer buffer_hash_ids.\n");
545 
546 	if (hash_ids[0] > num_uncracked_hashes(current_salt)) {
547 		fprintf(stderr, "Error, crypt_all kernel.\n");
548 		error();
549 	}
550 
551 	if (hash_ids[0]) {
552 		HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, (2 * hash_ids[0] + 1) * sizeof(unsigned int), hash_ids, 0, NULL, NULL), "Failed to read buffer buffer_hash_ids.\n");
553 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap_dupe, CL_TRUE, 0, ((hash_table_size(current_salt) - 1)/32 + 1) * sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_bitmap_dupe.\n");
554 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_hash_ids.\n");
555 	}
556 
557 	return hash_ids[0];
558 }
559 
init_checking()560 void init_checking()
561 {
562 	int i = 0;
563 	cmp_kernel = (cl_kernel **) mem_calloc(MAX_GPU_DEVICES, sizeof(cl_kernel *));
564 	for (i = 0; i < MAX_GPU_DEVICES; i++)
565 		cmp_kernel[i] = (cl_kernel *) mem_calloc(4096, sizeof(cl_kernel));
566 	hash_chk_params = (DES_hash_check_params *) mem_calloc(4096, sizeof(DES_hash_check_params));
567 }
568 
finish_checking()569 void finish_checking()
570 {
571 	int i;
572 
573 	if (kernel_high) {
574 		HANDLE_CLERROR(clReleaseKernel(kernel_high), "Error releasing kernel_high.");
575 		kernel_high = 0;
576 	}
577 	if (kernel_low) {
578 		HANDLE_CLERROR(clReleaseKernel(kernel_low), "Error releasing kernel_low.");
579 		kernel_low = 0;
580 	}
581 	for (i = 0; i < MAX_GPU_DEVICES; i++)
582 		MEM_FREE(cmp_kernel[i]);
583 	MEM_FREE(cmp_kernel);
584 	MEM_FREE(hash_chk_params);
585 }
586 
opencl_DES_bs_get_hash_0(int index)587 int opencl_DES_bs_get_hash_0(int index)
588 {
589 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_0;
590 }
591 
opencl_DES_bs_get_hash_1(int index)592 int opencl_DES_bs_get_hash_1(int index)
593 {
594 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_1;
595 }
596 
opencl_DES_bs_get_hash_2(int index)597 int opencl_DES_bs_get_hash_2(int index)
598 {
599 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_2;
600 }
601 
opencl_DES_bs_get_hash_3(int index)602 int opencl_DES_bs_get_hash_3(int index)
603 {
604 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_3;
605 }
606 
opencl_DES_bs_get_hash_4(int index)607 int opencl_DES_bs_get_hash_4(int index)
608 {
609 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_4;
610 }
611 
opencl_DES_bs_get_hash_5(int index)612 int opencl_DES_bs_get_hash_5(int index)
613 {
614 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_5;
615 }
616 
opencl_DES_bs_get_hash_6(int index)617 int opencl_DES_bs_get_hash_6(int index)
618 {
619 	return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_6;
620 }
621 
opencl_DES_bs_cmp_one(void * binary,int index)622 int opencl_DES_bs_cmp_one(void *binary, int index)
623 {
624 	if (((int *)binary)[0] == hash_tables[current_salt][hash_ids[2 + 2 * index]])
625 		return 1;
626 	return 0;
627 }
628 
opencl_DES_bs_cmp_exact(char * source,int index)629 int opencl_DES_bs_cmp_exact(char *source, int index)
630 {
631 	int *binary = fmt_opencl_DES.methods.binary(source);
632 
633 	if (binary[1] == hash_tables[current_salt][hash_ids[2 + 2 * index] + hash_table_size(current_salt)])
634 		return 1;
635 	return 0;
636 }
637 
638 /* End of hash checking. */
639 
640 typedef union {
641 	unsigned char c[8][sizeof(DES_bs_vector)];
642 	DES_bs_vector v[8];
643 } key_page;
644 
645 #define vxorf(a, b) 					\
646 	((a) ^ (b))
647 #define vnot(dst, a) 					\
648 	(dst) = ~(a)
649 #define vand(dst, a, b) 				\
650 	(dst) = (a) & (b)
651 #define vor(dst, a, b) 					\
652 	(dst) = (a) | (b)
653 #define vandn(dst, a, b) 				\
654 	(dst) = (a) & ~(b)
655 #define vxor(dst, a, b) 				\
656 	(dst) = vxorf((a), (b))
657 #define vshl(dst, src, shift) 				\
658 	(dst) = (src) << (shift)
659 #define vshr(dst, src, shift) 				\
660 	(dst) = (src) >> (shift)
661 #define vshl1(dst, src) 				\
662 	vshl((dst), (src), 1)
663 
664 #define kvtype vtype
665 #define kvand vand
666 #define kvor vor
667 #define kvshl1 vshl1
668 #define kvshl vshl
669 #define kvshr vshr
670 
671 #define mask01 0x01010101
672 #define mask02 0x02020202
673 #define mask04 0x04040404
674 #define mask08 0x08080808
675 #define mask10 0x10101010
676 #define mask20 0x20202020
677 #define mask40 0x40404040
678 #define mask80 0x80808080
679 
680 #define kvand_shl1_or(dst, src, mask) 			\
681 	kvand(tmp, src, mask); 				\
682 	kvshl1(tmp, tmp); 				\
683 	kvor(dst, dst, tmp)
684 
685 #define kvand_shl_or(dst, src, mask, shift) 		\
686 	kvand(tmp, src, mask); 				\
687 	kvshl(tmp, tmp, shift); 			\
688 	kvor(dst, dst, tmp)
689 
690 #define kvand_shl1(dst, src, mask) 			\
691 	kvand(tmp, src, mask) ;				\
692 	kvshl1(dst, tmp)
693 
694 #define kvand_or(dst, src, mask) 			\
695 	kvand(tmp, src, mask); 				\
696 	kvor(dst, dst, tmp)
697 
698 #define kvand_shr_or(dst, src, mask, shift)		\
699 	kvand(tmp, src, mask); 				\
700 	kvshr(tmp, tmp, shift); 			\
701 	kvor(dst, dst, tmp)
702 
703 #define kvand_shr(dst, src, mask, shift) 		\
704 	kvand(tmp, src, mask); 				\
705 	kvshr(dst, tmp, shift)
706 
707 #define LOAD_V 						\
708 	kvtype v0 = *(kvtype *)&vp[0]; 	\
709 	kvtype v1 = *(kvtype *)&vp[1]; 	\
710 	kvtype v2 = *(kvtype *)&vp[2]; 	\
711 	kvtype v3 = *(kvtype *)&vp[3]; 	\
712 	kvtype v4 = *(kvtype *)&vp[4]; 	\
713 	kvtype v5 = *(kvtype *)&vp[5]; 	\
714 	kvtype v6 = *(kvtype *)&vp[6]; 	\
715 	kvtype v7 = *(kvtype *)&vp[7];
716 
717 #define FINALIZE_NEXT_KEY_BIT_0g { 			\
718 	kvtype m = mask01, va, vb, tmp; 		\
719 	kvand(va, v0, m); 				\
720 	kvand_shl1(vb, v1, m); 				\
721 	kvand_shl_or(va, v2, m, 2); 			\
722 	kvand_shl_or(vb, v3, m, 3); 			\
723 	kvand_shl_or(va, v4, m, 4); 			\
724 	kvand_shl_or(vb, v5, m, 5); 			\
725 	kvand_shl_or(va, v6, m, 6); 			\
726 	kvand_shl_or(vb, v7, m, 7); 			\
727 	kvor(kp[0], va, vb); 				\
728 	kp += 1;					\
729 }
730 
731 #define FINALIZE_NEXT_KEY_BIT_1g { 			\
732 	kvtype m = mask02, va, vb, tmp; 		\
733 	kvand_shr(va, v0, m, 1); 			\
734 	kvand(vb, v1, m); 				\
735 	kvand_shl1_or(va, v2, m); 			\
736 	kvand_shl_or(vb, v3, m, 2); 			\
737 	kvand_shl_or(va, v4, m, 3); 			\
738 	kvand_shl_or(vb, v5, m, 4); 			\
739 	kvand_shl_or(va, v6, m, 5); 			\
740 	kvand_shl_or(vb, v7, m, 6); 			\
741 	kvor(kp[0], va, vb); 				\
742 	kp += 1;					\
743 }
744 
745 #define FINALIZE_NEXT_KEY_BIT_2g { 			\
746 	kvtype m = mask04, va, vb, tmp; 		\
747 	kvand_shr(va, v0, m, 2); 			\
748 	kvand_shr(vb, v1, m, 1); 			\
749 	kvand_or(va, v2, m); 				\
750 	kvand_shl1_or(vb, v3, m); 			\
751 	kvand_shl_or(va, v4, m, 2); 			\
752 	kvand_shl_or(vb, v5, m, 3); 			\
753 	kvand_shl_or(va, v6, m, 4); 			\
754 	kvand_shl_or(vb, v7, m, 5); 			\
755 	kvor(kp[0], va, vb); 				\
756 	kp += 1;					\
757 }
758 
759 #define FINALIZE_NEXT_KEY_BIT_3g { 			\
760 	kvtype m = mask08, va, vb, tmp; 		\
761 	kvand_shr(va, v0, m, 3); 			\
762 	kvand_shr(vb, v1, m, 2); 			\
763 	kvand_shr_or(va, v2, m, 1); 			\
764 	kvand_or(vb, v3, m); 				\
765 	kvand_shl1_or(va, v4, m); 			\
766 	kvand_shl_or(vb, v5, m, 2); 			\
767 	kvand_shl_or(va, v6, m, 3); 			\
768 	kvand_shl_or(vb, v7, m, 4); 			\
769 	kvor(kp[0], va, vb); 				\
770 	kp += 1;					\
771 }
772 
773 #define FINALIZE_NEXT_KEY_BIT_4g { 			\
774 	kvtype m = mask10, va, vb, tmp; 		\
775 	kvand_shr(va, v0, m, 4); 			\
776 	kvand_shr(vb, v1, m, 3); 			\
777 	kvand_shr_or(va, v2, m, 2); 			\
778 	kvand_shr_or(vb, v3, m, 1); 			\
779 	kvand_or(va, v4, m); 				\
780 	kvand_shl1_or(vb, v5, m); 			\
781 	kvand_shl_or(va, v6, m, 2); 			\
782 	kvand_shl_or(vb, v7, m, 3); 			\
783 	kvor(kp[0], va, vb); 				\
784 	kp += 1;					\
785 }
786 
787 #define FINALIZE_NEXT_KEY_BIT_5g { 			\
788 	kvtype m = mask20, va, vb, tmp; 		\
789 	kvand_shr(va, v0, m, 5); 			\
790 	kvand_shr(vb, v1, m, 4); 			\
791 	kvand_shr_or(va, v2, m, 3); 			\
792 	kvand_shr_or(vb, v3, m, 2); 			\
793 	kvand_shr_or(va, v4, m, 1); 			\
794 	kvand_or(vb, v5, m); 				\
795 	kvand_shl1_or(va, v6, m); 			\
796 	kvand_shl_or(vb, v7, m, 2); 			\
797 	kvor(kp[0], va, vb); 				\
798 	kp += 1;					\
799 }
800 
801 #define FINALIZE_NEXT_KEY_BIT_6g { 			\
802 	kvtype m = mask40, va, vb, tmp; 		\
803 	kvand_shr(va, v0, m, 6); 			\
804 	kvand_shr(vb, v1, m, 5); 			\
805 	kvand_shr_or(va, v2, m, 4); 			\
806 	kvand_shr_or(vb, v3, m, 3); 			\
807 	kvand_shr_or(va, v4, m, 2); 			\
808 	kvand_shr_or(vb, v5, m, 1); 			\
809 	kvand_or(va, v6, m); 				\
810 	kvand_shl1_or(vb, v7, m); 			\
811 	kvor(kp[0], va, vb); 				\
812 	kp += 1;					\
813 }
814 
des_finalize_int_keys()815 static void des_finalize_int_keys()
816 {
817 	key_page *int_key_page[MASK_FMT_INT_PLHDR];
818 	unsigned int *final_key_pages[MASK_FMT_INT_PLHDR], i, j;
819 
820 	for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
821 		int_key_page[i] = (key_page *) mem_alloc(((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(key_page));
822 		final_key_pages[i] = (unsigned int *) mem_alloc(7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int));
823 		memset(int_key_page[i], 0x7f, ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(key_page));
824 		memset(final_key_pages[i], 0xff, 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int));
825 	}
826 
827 	for (i = 0; i < mask_int_cand.num_int_cand && mask_int_cand.int_cand; i++) {
828 		j = i >> DES_LOG_DEPTH;
829 		int_key_page[0][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[0] & 0xFF;
830 #if MASK_FMT_INT_PLHDR > 1
831 		if (mask_skip_ranges[1] != -1)
832 			int_key_page[1][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[1] & 0xFF;
833 #endif
834 #if MASK_FMT_INT_PLHDR > 2
835 		if (mask_skip_ranges[2] != -1)
836 			int_key_page[2][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[2] & 0xFF;
837 #endif
838 #if MASK_FMT_INT_PLHDR > 3
839 		if (mask_skip_ranges[3] != -1)
840 			int_key_page[3][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[3] & 0xFF;
841 #endif
842 	}
843 
844 	for (j = 0; j < MASK_FMT_INT_PLHDR; j++) {
845 		if (mask_skip_ranges == NULL || mask_skip_ranges[j] == -1)
846 			continue;
847 		for (i = 0; i < ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH); i++) {
848 			DES_bs_vector *kp = (DES_bs_vector *)&final_key_pages[j][7 * i];
849 			DES_bs_vector *vp = (DES_bs_vector *)&int_key_page[j][i].v[0];
850 			LOAD_V
851 			FINALIZE_NEXT_KEY_BIT_0g
852 			FINALIZE_NEXT_KEY_BIT_1g
853 			FINALIZE_NEXT_KEY_BIT_2g
854 			FINALIZE_NEXT_KEY_BIT_3g
855 			FINALIZE_NEXT_KEY_BIT_4g
856 			FINALIZE_NEXT_KEY_BIT_5g
857 			FINALIZE_NEXT_KEY_BIT_6g
858 		}
859 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_des_keys, CL_TRUE, j * 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int),
860 			7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int), final_key_pages[j], 0, NULL, NULL ), "Failed Copy data to gpu");
861 	}
862 
863 	for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
864 		MEM_FREE(int_key_page[i]);
865 		MEM_FREE(final_key_pages[i]);
866 	}
867 }
868 
opencl_DES_bs_init_index()869 void opencl_DES_bs_init_index()
870 {
871 	int p,q,s,t ;
872 	int round, index, bit;
873 
874 	s = 0;
875 	t = 0;
876 	for (round = 0; round < 16; round++) {
877 		s += opencl_DES_ROT[round];
878 		for (index = 0; index < 48; index++) {
879 			p = opencl_DES_PC2[index];
880 			q = p < 28 ? 0 : 28;
881 			p += s;
882 			while (p >= 28) p -= 28;
883 			bit = opencl_DES_PC1[p + q];
884 			bit ^= 070;
885 			bit -= bit >> 3;
886 			bit = 55 - bit;
887 			opencl_DES_bs_index768[t++] = bit;
888 		}
889 	}
890 }
891 
DES_bs_init(int block)892 static void DES_bs_init(int block)
893 {
894 	int index;
895 
896 	for (index = 0; index < DES_BS_DEPTH; index++)
897 		des_all[block].pxkeys[index] =
898 			&des_raw_keys[block].xkeys.c[0][index & 7][index >> 3];
899 }
900 
create_keys_buffer(size_t gws,size_t padding)901 void create_keys_buffer(size_t gws, size_t padding)
902 {
903 	int i;
904 
905 	des_all = (des_combined *) mem_alloc((gws + padding) * sizeof(des_combined));
906 	des_raw_keys = (opencl_DES_bs_transfer *) mem_alloc((gws + padding) * sizeof(opencl_DES_bs_transfer));
907 	des_int_key_loc = (unsigned int *) mem_calloc((gws + padding), sizeof(unsigned int));
908 
909 	for (i = 0; i < gws; i++)
910 		DES_bs_init(i);
911 
912 	buffer_raw_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, (gws + padding) * sizeof(opencl_DES_bs_transfer), NULL, &ret_code);
913 	HANDLE_CLERROR(ret_code, "Create buffer_raw_keys failed.\n");
914 
915 	buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (gws + padding) * sizeof(unsigned int), NULL, &ret_code);
916 	HANDLE_CLERROR(ret_code, "Create buffer_int_key_loc failed.\n");
917 }
918 
create_int_keys_buffer()919 void create_int_keys_buffer()
920 {
921 	unsigned int active_placeholders, i;
922 
923 	active_placeholders = 1;
924 	if (mask_skip_ranges)
925 	for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
926 		if (mask_skip_ranges[i] != -1)
927 			active_placeholders++;
928 	}
929 
930 	buffer_int_des_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, active_placeholders * 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int), NULL, &ret_code);
931 	HANDLE_CLERROR(ret_code, "Create buffer_int_des_keys failed.\n");
932 }
933 
release_int_keys_buffer()934 void release_int_keys_buffer()
935 {
936 	if (buffer_int_des_keys) {
937 		HANDLE_CLERROR(clReleaseMemObject(buffer_int_des_keys), "Release buffer_int_des_keys failed.\n");
938 		HANDLE_CLERROR(clReleaseKernel(keys_kernel), "Release keys_kernel failed.\n");
939 		buffer_int_des_keys = 0;
940 	}
941 }
942 
release_keys_buffer()943 void release_keys_buffer()
944 {
945 	if (buffer_raw_keys) {
946 		MEM_FREE(des_all);
947 		MEM_FREE(des_raw_keys);
948 		MEM_FREE(des_int_key_loc);
949 		HANDLE_CLERROR(clReleaseMemObject(buffer_raw_keys), "Release buffer_raw_keys failed.\n");
950 		HANDLE_CLERROR(clReleaseMemObject(buffer_int_key_loc), "Release buffer_int_key_loc failed.\n");
951 		buffer_raw_keys = 0;
952 	}
953 }
954 
opencl_DES_bs_set_key(char * key,int index)955 void opencl_DES_bs_set_key(char *key, int index)
956 {
957 	unsigned char *dst;
958 	unsigned int sector,key_index;
959 	unsigned int flag = key[0];
960 
961 	sector = index >> DES_LOG_DEPTH;
962 	key_index = index & (DES_BS_DEPTH - 1);
963 	dst = des_all[sector].pxkeys[key_index];
964 
965 	keys_changed = 1;
966 
967 	dst[0] = 				(!flag) ? 0 : key[0];
968 	dst[sizeof(DES_bs_vector) * 8]      =	(!flag) ? 0 : key[1];
969 	flag = flag&&key[1] ;
970 	dst[sizeof(DES_bs_vector) * 8 * 2]  =	(!flag) ? 0 : key[2];
971 	flag = flag&&key[2];
972 	dst[sizeof(DES_bs_vector) * 8 * 3]  =	(!flag) ? 0 : key[3];
973 	flag = flag&&key[3];
974 	dst[sizeof(DES_bs_vector) * 8 * 4]  =	(!flag) ? 0 : key[4];
975 	flag = flag&&key[4]&&key[5];
976 	dst[sizeof(DES_bs_vector) * 8 * 5]  =	(!flag) ? 0 : key[5];
977 	flag = flag&&key[6];
978 	dst[sizeof(DES_bs_vector) * 8 * 6]  =	(!flag) ? 0 : key[6];
979 	dst[sizeof(DES_bs_vector) * 8 * 7]  =	(!flag) ? 0 : key[7];
980 
981 /*
982 	if (!key[0]) goto fill8;
983 	*dst = key[0];
984 	*(dst + sizeof(DES_bs_vector) * 8) = key[1];
985 	*(dst + sizeof(DES_bs_vector) * 8 * 2) = key[2];
986 	if (!key[1]) goto fill6;
987 	if (!key[2]) goto fill5;
988 	*(dst + sizeof(DES_bs_vector) * 8 * 3) = key[3];
989 	*(dst + sizeof(DES_bs_vector) * 8 * 4) = key[4];
990 	if (!key[3]) goto fill4;
991 	if (!key[4] || !key[5]) goto fill3;
992 	*(dst + sizeof(DES_bs_vector) * 8 * 5) = key[5];
993 	if (!key[6]) goto fill2;
994 	*(dst + sizeof(DES_bs_vector) * 8 * 6) = key[6];
995 	*(dst + sizeof(DES_bs_vector) * 8 * 7) = key[7];
996 	return;
997 fill8:
998 	dst[0] = 0;
999 	dst[sizeof(DES_bs_vector) * 8] = 0;
1000 fill6:
1001 	dst[sizeof(DES_bs_vector) * 8 * 2] = 0;
1002 fill5:
1003 	dst[sizeof(DES_bs_vector) * 8 * 3] = 0;
1004 fill4:
1005 	dst[sizeof(DES_bs_vector) * 8 * 4] = 0;
1006 fill3:
1007 	dst[sizeof(DES_bs_vector) * 8 * 5] = 0;
1008 fill2:
1009 	dst[sizeof(DES_bs_vector) * 8 * 6] = 0;
1010 	dst[sizeof(DES_bs_vector) * 8 * 7] = 0;
1011 	*/
1012 }
1013 
opencl_DES_bs_get_key(int index)1014 char *opencl_DES_bs_get_key(int index)
1015 {
1016 	static char out[PLAINTEXT_LENGTH + 1];
1017 	unsigned int section, block;
1018 	unsigned char *src;
1019 	char *dst;
1020 
1021 	if (hash_ids == NULL || hash_ids[0] == 0 ||
1022 	    index >= hash_ids[0] || hash_ids[0] > num_uncracked_hashes(current_salt)) {
1023 		section = 0;
1024 		block = 0;
1025 	}
1026 	else {
1027 		section = hash_ids[2 * index + 1] / 32;
1028 		block  = hash_ids[2 * index + 1] & 31;
1029 
1030 	}
1031 
1032 	if (section > global_work_size) {
1033 		//fprintf(stderr, "Get key error! %d "Zu"\n", section, global_work_size);
1034 		section = 0;
1035 	}
1036 
1037 	src = des_all[section].pxkeys[block];
1038 	dst = out;
1039 	while (dst < &out[PLAINTEXT_LENGTH] && (*dst = *src)) {
1040 		src += sizeof(DES_bs_vector) * 8;
1041 		dst++;
1042 	}
1043 	*dst = 0;
1044 
1045 	return out;
1046 }
1047 
set_key_mm(char * key,int index)1048 static void set_key_mm(char *key, int index)
1049 {
1050 	unsigned int len = strlen(key);
1051 	unsigned int i;
1052 	unsigned long c;
1053 
1054 	for (i = 0; i < len; i++) {
1055 		c = (unsigned char) key[i];
1056 		memset(des_raw_keys[index].xkeys.v[i], c, 8 * sizeof(DES_bs_vector));
1057 	}
1058 
1059 	for (i = len; i < PLAINTEXT_LENGTH; i++)
1060 		memset(des_raw_keys[index].xkeys.v[i], 0, 8 * sizeof(DES_bs_vector));
1061 
1062 	if (!mask_gpu_is_static) {
1063 		des_int_key_loc[index] = 0;
1064 		for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
1065 			if (mask_skip_ranges[i] != -1)  {
1066 				des_int_key_loc[index] |= ((mask_int_cand.
1067 				int_cpu_mask_ctx->ranges[mask_skip_ranges[i]].offset +
1068 				mask_int_cand.int_cpu_mask_ctx->
1069 				ranges[mask_skip_ranges[i]].pos) & 0xff) << (i << 3);
1070 			}
1071 			else
1072 				des_int_key_loc[index] |= 0x80 << (i << 3);
1073 		}
1074 	}
1075 
1076 	keys_changed = 1;
1077 }
1078 
1079 /* des_bs_key arrangement.
1080 	iter 0			iter 1			iter n-1
1081 0	w0 w1 w2.. w(gws-1)	w0 w1 w2.. w(gws-1) 	w0 w1 w2.. w(gws-1)
1082 1	w0 w1 w2.. w(gws-1)	w0 w1 w2.. w(gws-1) 	w0 w1 w2.. w(gws-1)
1083 2	w0 w1 w2.. w(gws-1)	w0 w1 w2.. w(gws-1) 	w0 w1 w2.. w(gws-1)
1084 .
1085 .
1086 .
1087 55 	w0 w1 w2.. w(gws-1)	w0 w1 w2.. w(gws-1) 	w0 w1 w2.. w(gws-1) */
get_key_mm(int index)1088 static char *get_key_mm(int index)
1089 {
1090 	static char out[PLAINTEXT_LENGTH + 1];
1091 	unsigned int section, depth, iter;
1092 	unsigned char *src, i;
1093 	char *dst;
1094 
1095 	if (hash_ids == NULL || hash_ids[0] == 0 ||
1096 	    index >= hash_ids[0] || hash_ids[0] > num_uncracked_hashes(current_salt)) {
1097 		section = 0;
1098 		depth = 0;
1099 		iter = 0;
1100 	}
1101 	else {
1102 		section = (hash_ids[2 * index + 1] >> DES_LOG_DEPTH) % process_key_gws;
1103 		depth  = hash_ids[2 * index + 1] & (DES_BS_DEPTH - 1);
1104 		iter = (hash_ids[2 * index + 1] >> DES_LOG_DEPTH) / process_key_gws;
1105 	}
1106 
1107 	if (section > process_key_gws) {
1108 		fprintf(stderr, "Get key error! %u "Zu"\n", section,
1109 			process_key_gws);
1110 		section = 0;
1111 		depth = 0;
1112 		iter = 0;
1113 	}
1114 
1115 	if (mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
1116 		for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
1117 			if (mask_gpu_is_static)
1118 				des_raw_keys[section].xkeys.c[static_gpu_locations[i]][depth & 7][depth >> 3] = mask_int_cand.int_cand[iter * 32 + depth].x[i];
1119 			else
1120 				des_raw_keys[section].xkeys.c[(des_int_key_loc[section] & (0xff << (i * 8))) >> (i * 8)][depth & 7][depth >> 3] = mask_int_cand.int_cand[iter * 32 + depth].x[i];
1121 	}
1122 
1123 	src = des_all[section].pxkeys[depth];
1124 	dst = out;
1125 	while (dst < &out[PLAINTEXT_LENGTH] && (*dst = *src)) {
1126 		src += sizeof(DES_bs_vector) * 8;
1127 		dst++;
1128 	}
1129 	*dst = 0;
1130 
1131 	return out;
1132 }
1133 
opencl_DES_bs_clear_keys()1134 void opencl_DES_bs_clear_keys()
1135 {	/* Auto-tune might set hash_ids[0] to some value, which interferes with
1136 	set-key/get-key checking during self test. */
1137 	hash_ids[0] = 0;
1138 }
1139 
create_keys_kernel_set_args(int mask_mode)1140 size_t create_keys_kernel_set_args(int mask_mode)
1141 {
1142 	char build_opts[400];
1143 	cl_ulong const_cache_size;
1144 	int i;
1145 
1146 	if (mask_mode) {
1147 		fmt_opencl_DES.methods.set_key = set_key_mm;
1148 		fmt_opencl_DES.methods.get_key = get_key_mm;
1149 	}
1150 
1151 	des_finalize_int_keys();
1152 
1153 	for (i = 0; i < MASK_FMT_INT_PLHDR; i++)
1154 		if (mask_skip_ranges && mask_skip_ranges[i] != -1)
1155 			static_gpu_locations[i] = mask_int_cand.int_cpu_mask_ctx->
1156 				ranges[mask_skip_ranges[i]].pos;
1157 		else
1158 			static_gpu_locations[i] = -1;
1159 
1160 	HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &const_cache_size, 0), "failed to get CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE.");
1161 
1162 	sprintf(build_opts, "-D ITER_COUNT=%u -D MASK_ENABLED=%d -D LOC_0=%d"
1163 #if MASK_FMT_INT_PLHDR > 1
1164 		" -D LOC_1=%d "
1165 #endif
1166 #if MASK_FMT_INT_PLHDR > 2
1167 		"-D LOC_2=%d "
1168 #endif
1169 #if MASK_FMT_INT_PLHDR > 3
1170 		"-D LOC_3=%d"
1171 #endif
1172 		" -D IS_STATIC_GPU_MASK=%d -D CONST_CACHE_SIZE=%llu"
1173 		, ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH), mask_mode, static_gpu_locations[0]
1174 #if MASK_FMT_INT_PLHDR > 1
1175 		, static_gpu_locations[1]
1176 #endif
1177 #if MASK_FMT_INT_PLHDR > 2
1178 		, static_gpu_locations[2]
1179 #endif
1180 #if MASK_FMT_INT_PLHDR > 3
1181 		, static_gpu_locations[3]
1182 #endif
1183 		, mask_gpu_is_static, (unsigned long long)const_cache_size);
1184 
1185 	opencl_build_kernel("$JOHN/kernels/DES_bs_finalize_keys_kernel.cl",
1186 	                    gpu_id, build_opts, 0);
1187 	keys_kernel = clCreateKernel(program[gpu_id], "DES_bs_finalize_keys", &ret_code);
1188 	HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_finalize_keys.\n");
1189 
1190 	HANDLE_CLERROR(clSetKernelArg(keys_kernel, 1, sizeof(cl_mem), &buffer_int_des_keys), "Failed setting kernel argument buffer_int_des_keys, kernel DES_bs_finalize_keys.\n");
1191 
1192 	return get_kernel_max_lws(gpu_id, keys_kernel);
1193 }
1194 
process_keys(size_t current_gws,size_t * lws)1195 void process_keys(size_t current_gws, size_t *lws)
1196 {
1197 	process_key_gws = current_gws;
1198 
1199 	if (keys_changed) {
1200 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_raw_keys, CL_TRUE, 0, current_gws * sizeof(opencl_DES_bs_transfer), des_raw_keys, 0, NULL, NULL ), "Failed to write buffer buffer_raw_keys.\n");
1201 
1202 		if (!mask_gpu_is_static)
1203 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_TRUE, 0, current_gws * sizeof(unsigned int), des_int_key_loc, 0, NULL, NULL ), "Failed Copy data to gpu");
1204 
1205 		ret_code = clEnqueueNDRangeKernel(queue[gpu_id], keys_kernel, 1, NULL, &current_gws, lws, 0, NULL, NULL);
1206 		HANDLE_CLERROR(ret_code, "Enque kernel DES_bs_finalize_keys failed.\n");
1207 
1208 		keys_changed = 0;
1209 	}
1210 }
1211 
get_device_name(int id)1212 char *get_device_name(int id)
1213 {
1214 	char *d_name;
1215 
1216 	d_name = (char *) mem_calloc(600, sizeof(char));
1217 	HANDLE_CLERROR(clGetDeviceInfo(devices[id], CL_DEVICE_NAME, 600, d_name, NULL), "Failed to get device name.\n");
1218 	return d_name;
1219 }
1220 
save_lws_config(const char * config_file,int id_gpu,size_t lws,unsigned int forced_global_key)1221 void save_lws_config(const char* config_file, int id_gpu, size_t lws, unsigned int forced_global_key)
1222 {
1223 	FILE *file;
1224 	char config_file_name[500];
1225 	char *d_name;
1226 
1227 	sprintf(config_file_name, config_file, d_name = get_device_name(id_gpu));
1228 	MEM_FREE(d_name);
1229 
1230 	file = fopen(path_expand(config_file_name), "r");
1231 	if (file != NULL) {
1232 		fclose(file);
1233 		return;
1234 	}
1235 	file = fopen(path_expand(config_file_name), "w");
1236 
1237 #if OS_FLOCK || FCNTL_LOCKS
1238 	{
1239 #if FCNTL_LOCKS
1240 		struct flock lock;
1241 
1242 		memset(&lock, 0, sizeof(lock));
1243 		lock.l_type = F_WRLCK;
1244 		while (fcntl(fileno(file), F_SETLKW, &lock)) {
1245 			if (errno != EINTR)
1246 				pexit("fcntl(F_WRLCK)");
1247 		}
1248 #else
1249 		while (flock(fileno(file), LOCK_EX)) {
1250 			if (errno != EINTR)
1251 				pexit("flock(LOCK_EX)");
1252 		}
1253 #endif
1254 	}
1255 #endif
1256 	fprintf(file, ""Zu" %u", lws, forced_global_key);
1257 	fclose(file);
1258 }
1259 
restore_lws_config(const char * config_file,int id_gpu,size_t * lws,size_t extern_lws_limit,unsigned int * forced_global_key)1260 int restore_lws_config(const char *config_file, int id_gpu, size_t *lws, size_t extern_lws_limit, unsigned int *forced_global_key)
1261 {
1262 	FILE *file;
1263 	char config_file_name[500];
1264 	char *d_name;
1265 	unsigned int param;
1266 
1267 	sprintf(config_file_name, config_file, d_name = get_device_name(id_gpu));
1268 	MEM_FREE(d_name);
1269 
1270 	file = fopen(path_expand(config_file_name), "r");
1271 	if (file == NULL)
1272 		return 0;
1273 
1274 
1275 #if OS_FLOCK || FCNTL_LOCKS
1276 	{
1277 #if FCNTL_LOCKS
1278 		struct flock lock;
1279 
1280 		memset(&lock, 0, sizeof(lock));
1281 		lock.l_type = F_RDLCK;
1282 		while (fcntl(fileno(fp), F_SETLKW, &lock)) {
1283 			if (errno != EINTR)
1284 				pexit("fcntl(F_RDLCK)");
1285 		}
1286 #else
1287 		while (flock(fileno(fp), LOCK_SH)) {
1288 			if (errno != EINTR)
1289 				pexit("flock(LOCK_SH)");
1290 		}
1291 #endif
1292 	}
1293 #endif
1294 	if (fscanf(file, ""Zu" %u", lws, &param) != 2 || *lws > extern_lws_limit) {
1295 		if (forced_global_key)
1296 			*forced_global_key = param;
1297 		fclose(file);
1298 		return 0;
1299 	}
1300 
1301 	fclose(file);
1302 	return 1;
1303 }
1304 #endif /* HAVE_OPENCL */
1305