1 /*
2 * This software is Copyright (c) 2012 Sayantan Datta <std2048 at gmail dot com>
3 * and it is hereby released to the general public under the following terms:
4 * Redistribution and use in source and binary forms, with or without modification, are permitted.
5 * Based on Solar Designer implementation of DES_bs_b.c in jtr-v1.7.9
6 */
7
8 #ifdef HAVE_OPENCL
9
10 #include <string.h>
11
12 #include "arch.h"
13 #include "common.h"
14 #include "opencl_DES_bs.h"
15 #include "opencl_DES_hst_dev_shared.h"
16 #include "unicode.h"
17 #include "bt_interface.h"
18 #include "mask_ext.h"
19
20 typedef struct {
21 unsigned char *pxkeys[DES_BS_DEPTH]; /* Pointers into xkeys.c */
22 } des_combined;
23
24 static cl_kernel **cmp_kernel;
25 static cl_kernel kernel_high, kernel_low;
26 static cl_mem buffer_hash_ids, buffer_bitmap_dupe, *buffer_uncracked_hashes, *buffer_hash_tables, *buffer_offset_tables, *buffer_bitmaps;
27 static unsigned int *zero_buffer, **hash_tables;
28 static unsigned int *hash_ids;
29 static unsigned int max_uncracked_hashes, max_hash_table_size;
30 DES_hash_check_params *hash_chk_params;
31 static WORD current_salt;
32
33 static cl_kernel keys_kernel;
34 static cl_mem buffer_raw_keys, buffer_int_des_keys, buffer_int_key_loc;
35 static int keys_changed = 1;
36 static des_combined *des_all;
37 static opencl_DES_bs_transfer *des_raw_keys;
38 static unsigned int *des_int_key_loc;
39 static unsigned int static_gpu_locations[MASK_FMT_INT_PLHDR];
40 static size_t process_key_gws;
41 unsigned int CC_CACHE_ALIGN opencl_DES_bs_index768[0x300];
42
43 unsigned char opencl_DES_E[48] = {
44 31, 0, 1, 2, 3, 4,
45 3, 4, 5, 6, 7, 8,
46 7, 8, 9, 10, 11, 12,
47 11, 12, 13, 14, 15, 16,
48 15, 16, 17, 18, 19, 20,
49 19, 20, 21, 22, 23, 24,
50 23, 24, 25, 26, 27, 28,
51 27, 28, 29, 30, 31, 0
52 };
53
54 static unsigned char opencl_DES_PC1[56] = {
55 56, 48, 40, 32, 24, 16, 8,
56 0, 57, 49, 41, 33, 25, 17,
57 9, 1, 58, 50, 42, 34, 26,
58 18, 10, 2, 59, 51, 43, 35,
59 62, 54, 46, 38, 30, 22, 14,
60 6, 61, 53, 45, 37, 29, 21,
61 13, 5, 60, 52, 44, 36, 28,
62 20, 12, 4, 27, 19, 11, 3
63 };
64
65 static unsigned char opencl_DES_ROT[16] = {
66 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1
67 };
68
69 static unsigned char opencl_DES_PC2[48] = {
70 13, 16, 10, 23, 0, 4,
71 2, 27, 14, 5, 20, 9,
72 22, 18, 11, 3, 25, 7,
73 15, 6, 26, 19, 12, 1,
74 40, 51, 30, 36, 46, 54,
75 29, 39, 50, 44, 32, 47,
76 43, 48, 38, 55, 33, 52,
77 45, 41, 49, 35, 28, 31
78 };
79
80 #define num_uncracked_hashes(k) hash_chk_params[k].num_uncracked_hashes
81 #define hash_table_size(k) hash_chk_params[k].hash_table_size
82 #define offset_table_size(k) hash_chk_params[k].offset_table_size
83
84 #define LOW_THRESHOLD 10
85
86 #define get_num_bits(r, v) \
87 { \
88 r = (v & 0xAAAAAAAA) != 0; \
89 r |= ((v & 0xFFFF0000) != 0) << 4; \
90 r |= ((v & 0xFF00FF00) != 0) << 3; \
91 r |= ((v & 0xF0F0F0F0) != 0) << 2; \
92 r |= ((v & 0xCCCCCCCC) != 0) << 1; \
93 }
94
prepare_bitmap_1(cl_ulong bmp_sz_bits,cl_uint ** bitmaps_ptr,unsigned WORD * loaded_hashes,unsigned int num_uncracked_hashes)95 static void prepare_bitmap_1(cl_ulong bmp_sz_bits, cl_uint **bitmaps_ptr, unsigned WORD *loaded_hashes, unsigned int num_uncracked_hashes)
96 {
97 unsigned int i;
98 MEM_FREE(*bitmaps_ptr);
99 *bitmaps_ptr = (cl_uint*) mem_calloc((bmp_sz_bits >> 5), sizeof(cl_uint));
100
101 for (i = 0; i < num_uncracked_hashes; i++) {
102 unsigned int bmp_idx = loaded_hashes[2 * i + 1] & (bmp_sz_bits - 1);
103 (*bitmaps_ptr)[bmp_idx >> 5] |= (1U << (bmp_idx & 31));
104 }
105 }
106
select_bitmap(unsigned int num_ld_hashes,WORD * uncracked_hashes_t,unsigned long * bitmap_size_bits,unsigned int ** bitmaps_ptr,DES_hash_check_params * hash_chk_params)107 static void select_bitmap(unsigned int num_ld_hashes, WORD *uncracked_hashes_t, unsigned long *bitmap_size_bits, unsigned int **bitmaps_ptr, DES_hash_check_params *hash_chk_params)
108 {
109 unsigned int cmp_steps = 1, bits_req = 32;
110
111 if (num_ld_hashes <= 5100) {
112 if (amd_gcn_10(device_info[gpu_id]) ||
113 amd_vliw4(device_info[gpu_id]))
114 *bitmap_size_bits = 512 * 1024;
115
116 else
117 *bitmap_size_bits = 256 * 1024;
118
119 }
120
121 else if (num_ld_hashes <= 10100) {
122 if (amd_gcn_10(device_info[gpu_id]) ||
123 amd_vliw4(device_info[gpu_id]))
124 *bitmap_size_bits = 512 * 1024;
125
126 else
127 *bitmap_size_bits = 256 * 1024;
128 }
129
130 else if (num_ld_hashes <= 20100) {
131 if (amd_gcn_10(device_info[gpu_id]))
132 *bitmap_size_bits = 1024 * 1024;
133
134 else
135 *bitmap_size_bits = 512 * 1024;
136 }
137
138 else if (num_ld_hashes <= 250100)
139 *bitmap_size_bits = 2048 * 1024;
140
141 else if (num_ld_hashes <= 1100100) {
142 if (!amd_gcn_11(device_info[gpu_id]))
143 *bitmap_size_bits = 4096 * 1024;
144
145 else
146 *bitmap_size_bits = 2048 * 1024;
147 }
148
149 else if (num_ld_hashes <= 1500100)
150 *bitmap_size_bits = 4096 * 1024 * 2;
151
152 else if (num_ld_hashes <= 2700100)
153 *bitmap_size_bits = 4096 * 1024 * 2 * 2;
154
155 else {
156 cl_ulong mult = num_ld_hashes / 2700100;
157 cl_ulong buf_sz;
158 *bitmap_size_bits = 4096 * 4096;
159 get_power_of_two(mult);
160 *bitmap_size_bits *= mult;
161 buf_sz = get_max_mem_alloc_size(gpu_id);
162 if (buf_sz & (buf_sz - 1)) {
163 get_power_of_two(buf_sz);
164 buf_sz >>= 1;
165 }
166 if (buf_sz >= 536870912)
167 buf_sz = 536870912;
168 if (((*bitmap_size_bits) >> 3) > buf_sz)
169 *bitmap_size_bits = buf_sz << 3;
170 }
171
172 prepare_bitmap_1(*bitmap_size_bits, bitmaps_ptr, (unsigned WORD *)uncracked_hashes_t, num_ld_hashes);
173
174 get_num_bits(bits_req, (*bitmap_size_bits));
175
176 hash_chk_params->bitmap_size_bits = (unsigned int)(*bitmap_size_bits);
177 hash_chk_params->cmp_steps = cmp_steps;
178 hash_chk_params->cmp_bits = bits_req;
179
180 *bitmap_size_bits *= cmp_steps;
181 }
182
fill_buffer(struct db_salt * salt,unsigned int * max_uncracked_hashes,unsigned int * max_hash_table_size)183 static void fill_buffer(struct db_salt *salt, unsigned int *max_uncracked_hashes, unsigned int *max_hash_table_size)
184 {
185 int i;
186 WORD salt_val;
187 WORD *binary;
188 WORD *uncracked_hashes = NULL, *uncracked_hashes_t = NULL;
189 struct db_password *pw, *last;
190 OFFSET_TABLE_WORD *offset_table;
191 unsigned int hash_table_size, offset_table_size;
192
193 salt_val = *(WORD *)salt->salt;
194 num_uncracked_hashes(salt_val) = salt->count;
195
196 uncracked_hashes = (WORD *) mem_calloc(2 * num_uncracked_hashes(salt_val), sizeof(WORD));
197 uncracked_hashes_t = (WORD *) mem_calloc(2 * num_uncracked_hashes(salt_val), sizeof(WORD));
198
199 i = 0;
200 last = pw = salt->list;
201 do {
202 binary = (WORD *)pw->binary;
203 if (binary == NULL) {
204 if (last == pw)
205 salt->list = pw->next;
206 else
207 last->next = pw->next;
208 } else {
209 last = pw;
210 uncracked_hashes_t[2 * i] = binary[0];
211 uncracked_hashes_t[2 * i + 1] = binary[1];
212 i++;
213 }
214 } while ((pw = pw->next));
215
216 if (salt->count > *max_uncracked_hashes)
217 *max_uncracked_hashes = salt->count;
218
219 num_uncracked_hashes(salt_val) = create_perfect_hash_table(64, (void *)uncracked_hashes_t,
220 num_uncracked_hashes(salt_val),
221 &offset_table,
222 &offset_table_size,
223 &hash_table_size, 0);
224
225 hash_table_size(salt_val) = hash_table_size;
226 offset_table_size(salt_val) = offset_table_size;
227
228 if (hash_table_size(salt_val) > *max_hash_table_size)
229 *max_hash_table_size = hash_table_size(salt_val);
230
231 if (!num_uncracked_hashes(salt_val)) {
232 MEM_FREE(hash_table_64);
233 MEM_FREE(offset_table);
234 fprintf(stderr, "Failed to create Hash Table for cracking.\n");
235 error();
236 }
237
238 hash_tables[salt_val] = hash_table_64;
239
240 /* uncracked_hashes_t is modified by create_perfect_hash_table. */
241 for (i = 0; i < num_uncracked_hashes(salt_val); i++) {
242 uncracked_hashes[i] = uncracked_hashes_t[2 * i];
243 uncracked_hashes[i + num_uncracked_hashes(salt_val)] = uncracked_hashes_t[2 * i + 1];
244 }
245
246 buffer_offset_tables[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(OFFSET_TABLE_WORD) * offset_table_size , offset_table, &ret_code);
247 HANDLE_CLERROR(ret_code, "Create buffer_offset_tables failed.\n");
248
249 buffer_hash_tables[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(unsigned int) * hash_table_size, hash_table_64, &ret_code);
250 HANDLE_CLERROR(ret_code, "Create buffer_hash_tables failed.\n");
251
252 if (num_uncracked_hashes(salt_val) <= LOW_THRESHOLD) {
253 buffer_uncracked_hashes[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(WORD) * num_uncracked_hashes(salt_val), uncracked_hashes, &ret_code);
254 HANDLE_CLERROR(ret_code, "Create buffer_uncracked_hashes failed.\n");
255 }
256 else {
257 unsigned long bitmap_size_bits = 0;
258 unsigned int *bitmaps = NULL;
259 select_bitmap(num_uncracked_hashes(salt_val), uncracked_hashes_t, &bitmap_size_bits, &bitmaps, &hash_chk_params[salt_val]);
260 buffer_bitmaps[salt_val] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bitmap_size_bits >> 3, bitmaps, &ret_code);
261 HANDLE_CLERROR(ret_code, "Create buffer_bitmaps failed.\n");
262 MEM_FREE(bitmaps);
263 }
264
265 MEM_FREE(uncracked_hashes);
266 MEM_FREE(uncracked_hashes_t);
267 MEM_FREE(offset_table);
268 }
269
fill_buffer_self_test(unsigned int * max_uncracked_hashes,unsigned int * max_hash_table_size)270 static void fill_buffer_self_test(unsigned int *max_uncracked_hashes, unsigned int *max_hash_table_size)
271 {
272 char *ciphertext;
273 WORD *binary;
274 WORD salt_val;
275 unsigned int offset_table_size, hash_table_size;
276 unsigned long bitmap_size_bits = 0;
277 unsigned int *bitmaps = NULL;
278 WORD *uncracked_hashes = NULL, *uncracked_hashes_t = NULL;
279 int i;
280 OFFSET_TABLE_WORD *offset_table = NULL;
281 DES_hash_check_params temp_param;
282
283 while (fmt_opencl_DES.params.tests[*max_uncracked_hashes].ciphertext) {
284 ciphertext = fmt_opencl_DES.methods.split(fmt_opencl_DES.params.tests[*max_uncracked_hashes].ciphertext, 0, &fmt_opencl_DES);
285 (*max_uncracked_hashes)++;
286 }
287
288 uncracked_hashes = (WORD *) mem_calloc(2 * *max_uncracked_hashes, sizeof(WORD));
289 uncracked_hashes_t = (WORD *) mem_calloc(2 * *max_uncracked_hashes, sizeof(WORD));
290
291 i = 0;
292 while (fmt_opencl_DES.params.tests[i].ciphertext) {
293 ciphertext = fmt_opencl_DES.methods.split(fmt_opencl_DES.params.tests[i].ciphertext, 0, &fmt_opencl_DES);
294 binary = (WORD *)fmt_opencl_DES.methods.binary(ciphertext);
295 salt_val = *(WORD *)fmt_opencl_DES.methods.salt(ciphertext);
296 uncracked_hashes_t[2 * i] = binary[0];
297 uncracked_hashes_t[2 * i + 1] = binary[1];
298 num_uncracked_hashes(salt_val) = 1;
299 //fprintf(stderr, "C:%s B:%d \n", ciphertext, binary[1]);
300 i++;
301 }
302
303 *max_uncracked_hashes = create_perfect_hash_table(64, (void *)uncracked_hashes_t,
304 *max_uncracked_hashes,
305 &offset_table,
306 &offset_table_size,
307 &hash_table_size, 0);
308 *max_hash_table_size = hash_table_size;
309
310 if (!*max_uncracked_hashes) {
311 MEM_FREE(hash_table_64);
312 MEM_FREE(offset_table);
313 fprintf(stderr, "Failed to create Hash Table for cracking.\n");
314 error();
315 }
316
317 /* uncracked_hashes_t is modified by create_perfect_hash_table. */
318 for (i = 0; i < *max_uncracked_hashes; i++) {
319 uncracked_hashes[i] = uncracked_hashes_t[2 * i];
320 uncracked_hashes[i + *max_uncracked_hashes] = uncracked_hashes_t[2 * i + 1];
321 }
322
323 select_bitmap(*max_uncracked_hashes, uncracked_hashes_t, &bitmap_size_bits, &bitmaps, &temp_param);
324
325 for (i = 0; i < 4096; i++) {
326 if (!num_uncracked_hashes(i)) continue;
327 hash_chk_params[i] = temp_param; /* Error if this statement is excuted later in the body of loop. */
328 num_uncracked_hashes(i) = *max_uncracked_hashes;
329 hash_table_size(i) = hash_table_size;
330 offset_table_size(i) = offset_table_size;
331 hash_tables[i] = (unsigned int *) mem_alloc(2 * sizeof(unsigned int) * hash_table_size);
332 memcpy(hash_tables[i], hash_table_64, 2 * sizeof(unsigned int) * hash_table_size);
333 buffer_offset_tables[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(OFFSET_TABLE_WORD) * offset_table_size , offset_table, &ret_code);
334 HANDLE_CLERROR(ret_code, "Create buffer_offset_tables failed.\n");
335 buffer_hash_tables[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(unsigned int) * hash_table_size, hash_table_64, &ret_code);
336 HANDLE_CLERROR(ret_code, "Create buffer_hash_tables failed.\n");
337 buffer_bitmaps[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bitmap_size_bits >> 3, bitmaps, &ret_code);
338 HANDLE_CLERROR(ret_code, "Create buffer_bitmaps failed.\n");
339 buffer_uncracked_hashes[i] = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 2 * sizeof(WORD) * *max_uncracked_hashes, uncracked_hashes, &ret_code);
340 HANDLE_CLERROR(ret_code, "Create buffer_uncracked_hashes failed.\n");
341 }
342
343 MEM_FREE(uncracked_hashes);
344 MEM_FREE(uncracked_hashes_t);
345 MEM_FREE(offset_table);
346 MEM_FREE(hash_table_64);
347 MEM_FREE(bitmaps);
348 }
349
release_fill_buffer(WORD i)350 static void release_fill_buffer(WORD i)
351 {
352 if (buffer_uncracked_hashes[i] != (cl_mem)0) {
353 HANDLE_CLERROR(clReleaseMemObject(buffer_uncracked_hashes[i]), "Release buffer_uncracked_hashes failed.\n");
354 buffer_uncracked_hashes[i] = (cl_mem)0;
355 }
356 if (buffer_offset_tables[i] != (cl_mem)0) {
357 HANDLE_CLERROR(clReleaseMemObject(buffer_offset_tables[i]), "Release buffer_offset_tables failed.\n");
358 HANDLE_CLERROR(clReleaseMemObject(buffer_hash_tables[i]), "Release buffer_hash_tables failed.\n");
359 buffer_hash_tables[i] = (cl_mem)0;
360 buffer_offset_tables[i] = (cl_mem)0;
361 }
362 if (buffer_bitmaps[i] != (cl_mem)0) {
363 HANDLE_CLERROR(clReleaseMemObject(buffer_bitmaps[i]), "Release buffer_bitmaps failed.\n");
364 buffer_bitmaps[i] = (cl_mem)0;
365 }
366 if (hash_tables[i])
367 MEM_FREE(hash_tables[i]);
368 hash_tables[i] = 0;
369 }
370
release_fill_buffers()371 static void release_fill_buffers()
372 {
373 int i;
374
375 for (i = 0; i < 4096; i++)
376 release_fill_buffer(i);
377 }
378
create_aux_buffers(unsigned int max_uncracked_hashes,unsigned int max_hash_table_size)379 static void create_aux_buffers(unsigned int max_uncracked_hashes, unsigned int max_hash_table_size)
380 {
381 zero_buffer = (unsigned int *) mem_calloc((max_hash_table_size - 1) / 32 + 1, sizeof(unsigned int));
382
383 buffer_bitmap_dupe = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, ((max_hash_table_size - 1) / 32 + 1) * sizeof(unsigned int), zero_buffer, &ret_code);
384 HANDLE_CLERROR(ret_code, "Create buffer_bitmap_dupe failed.\n");
385
386 buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (2 * max_uncracked_hashes + 1) * sizeof(unsigned int), NULL, &ret_code);
387 HANDLE_CLERROR(ret_code, "Create buffer_hash_ids failed.\n");
388
389 hash_ids = (unsigned int *) mem_calloc((2 * max_uncracked_hashes + 1), sizeof(unsigned int));
390
391 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_hash_ids.\n");
392 }
393
release_aux_buffers()394 static void release_aux_buffers()
395 {
396 if (zero_buffer) {
397 HANDLE_CLERROR(clReleaseMemObject(buffer_bitmap_dupe), "Release buffer_bitmap_dupe failed.\n");
398 HANDLE_CLERROR(clReleaseMemObject(buffer_hash_ids), "Release buffer_hash_ids failed.\n");
399
400 MEM_FREE(hash_ids);
401 MEM_FREE(zero_buffer);
402 zero_buffer = 0;
403 }
404 }
405
build_tables(struct db_main * db)406 void build_tables(struct db_main *db)
407 {
408 buffer_uncracked_hashes = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
409 hash_tables = (unsigned int **) mem_calloc(4096, sizeof(unsigned int *));
410 buffer_offset_tables = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
411 buffer_hash_tables = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
412 buffer_bitmaps = (cl_mem *) mem_calloc(4096, sizeof(cl_mem));
413 memset(hash_chk_params, 0, 4096 * sizeof(DES_hash_check_params));
414
415 if (db) {
416 struct db_salt *salt = db->salts;
417 do {
418 fill_buffer(salt, &max_uncracked_hashes, &max_hash_table_size);
419 } while((salt = salt->next));
420 }
421 else {
422 fill_buffer_self_test(&max_uncracked_hashes, &max_hash_table_size);
423 }
424
425 create_aux_buffers(max_uncracked_hashes, max_hash_table_size);
426 }
427
release_tables()428 void release_tables()
429 {
430 release_aux_buffers();
431
432 if (buffer_uncracked_hashes) {
433 release_fill_buffers();
434 MEM_FREE(buffer_uncracked_hashes);
435 MEM_FREE(buffer_offset_tables);
436 MEM_FREE(buffer_hash_tables);
437 MEM_FREE(buffer_bitmaps);
438 MEM_FREE(hash_tables);
439 hash_tables = 0;
440 buffer_uncracked_hashes = 0;
441 }
442 }
443
set_kernel_args_aux_buf()444 static void set_kernel_args_aux_buf()
445 {
446 HANDLE_CLERROR(clSetKernelArg(kernel_low, 4, sizeof(cl_mem), &buffer_hash_ids), "Failed setting kernel argument buffer_hash_ids, kernel DES_bs_cmp_low.\n");
447 HANDLE_CLERROR(clSetKernelArg(kernel_low, 5, sizeof(cl_mem), &buffer_bitmap_dupe), "Failed setting kernel argument buffer_bitmap_dupe, kernel DES_bs_cmp_low.\n");
448
449 HANDLE_CLERROR(clSetKernelArg(kernel_high, 4, sizeof(cl_mem), &buffer_hash_ids), "Failed setting kernel argument buffer_hash_ids, kernel DES_bs_cmp.\n");
450 HANDLE_CLERROR(clSetKernelArg(kernel_high, 5, sizeof(cl_mem), &buffer_bitmap_dupe), "Failed setting kernel argument buffer_bitmap_dupe, kernel DES_bs_cmp.\n");
451 }
452
create_checking_kernel_set_args()453 size_t create_checking_kernel_set_args()
454 {
455 int i;
456 size_t min_lws;
457
458 opencl_build_kernel("$JOHN/kernels/DES_bs_hash_checking_kernel.cl",
459 gpu_id, NULL, 0);
460
461 if (kernel_high == 0) {
462 kernel_high = clCreateKernel(program[gpu_id], "DES_bs_cmp_high", &ret_code);
463 HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_cmp_high.\n");
464 }
465 if (kernel_low == 0) {
466 kernel_low = clCreateKernel(program[gpu_id], "DES_bs_cmp", &ret_code);
467 HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_cmp.\n");
468 }
469
470 memset(cmp_kernel[gpu_id], 0, 4096 * sizeof(cl_kernel));
471
472 for (i = 0; i < 4096; i++) {
473 if (num_uncracked_hashes(i) <= LOW_THRESHOLD)
474 cmp_kernel[gpu_id][i] = kernel_low;
475 else
476 cmp_kernel[gpu_id][i] = kernel_high;
477 }
478
479 set_kernel_args_aux_buf();
480
481 min_lws = get_kernel_max_lws(gpu_id, kernel_high);
482
483 if (min_lws > get_kernel_max_lws(gpu_id, kernel_low))
484 return get_kernel_max_lws(gpu_id, kernel_low);
485
486 return min_lws;
487 }
488
set_common_kernel_args_kpc(cl_mem buffer_unchecked_hashes,cl_mem buffer_bs_keys)489 void set_common_kernel_args_kpc(cl_mem buffer_unchecked_hashes, cl_mem buffer_bs_keys)
490 {
491 HANDLE_CLERROR(clSetKernelArg(kernel_low, 0, sizeof(cl_mem), &buffer_unchecked_hashes), "Failed setting kernel argument buffer_unchecked_hashes, kernel DES_bs_cmp.\n");
492 HANDLE_CLERROR(clSetKernelArg(kernel_high, 0, sizeof(cl_mem), &buffer_unchecked_hashes), "Failed setting kernel argument buffer_unchecked_hashes, kernel DES_bs_cmp.\n");
493
494 HANDLE_CLERROR(clSetKernelArg(keys_kernel, 0, sizeof(cl_mem), &buffer_raw_keys), "Failed setting kernel argument buffer_raw_keys, kernel DES_bs_finalize_keys.\n");
495 HANDLE_CLERROR(clSetKernelArg(keys_kernel, 2, sizeof(cl_mem), &buffer_int_key_loc), "Failed setting kernel argument buffer_int_key_loc, kernel DES_bs_finalize_keys.\n");
496 HANDLE_CLERROR(clSetKernelArg(keys_kernel, 3, sizeof(cl_mem), &buffer_bs_keys), "Failed setting kernel argument buffer_bs_keys, kernel DES_bs_finalize_keys.\n");
497 }
498
update_buffer(struct db_salt * salt)499 void update_buffer(struct db_salt *salt)
500 {
501 unsigned int _max_uncracked_hashes = 0, _max_hash_table_size = 0;
502 WORD salt_val = *(WORD *)salt->salt;
503 release_fill_buffer(salt_val);
504
505 if (salt->count > LOW_THRESHOLD &&
506 (num_uncracked_hashes(salt_val) - num_uncracked_hashes(salt_val) / 10) < salt->count)
507 return;
508
509 fill_buffer(salt, &_max_uncracked_hashes, &_max_hash_table_size);
510
511 if (_max_uncracked_hashes > max_uncracked_hashes || _max_hash_table_size > max_hash_table_size) {
512 release_aux_buffers();
513 create_aux_buffers(max_uncracked_hashes, max_hash_table_size);
514 set_kernel_args_aux_buf();
515 max_hash_table_size = _max_hash_table_size;
516 max_uncracked_hashes = _max_uncracked_hashes;
517 }
518
519 if (num_uncracked_hashes(salt_val) <= LOW_THRESHOLD)
520 cmp_kernel[gpu_id][salt_val] = kernel_low;
521 else
522 cmp_kernel[gpu_id][salt_val] = kernel_high;
523
524 if (options.verbosity > VERB_LEGACY)
525 fprintf(stderr,
526 "Updated internal tables and buffers for salt %d.\n", salt_val);
527 }
528
extract_info(size_t current_gws,size_t * lws,WORD salt_val)529 int extract_info(size_t current_gws, size_t *lws, WORD salt_val)
530 {
531 current_salt = salt_val;
532
533 HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 1, sizeof(cl_mem), &buffer_offset_tables[current_salt]), "Failed setting kernel argument buffer_offset_tables, kernel DES_bs_cmp.\n");
534 HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 2, sizeof(cl_mem), &buffer_hash_tables[current_salt]), "Failed setting kernel argument buffer_hash_tables, kernel DES_bs_cmp.\n");
535 HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 3, sizeof(DES_hash_check_params), &hash_chk_params[current_salt]), "Failed setting kernel argument num_uncracked_hashes, kernel DES_bs_cmp.\n");
536 if (num_uncracked_hashes(current_salt) <= LOW_THRESHOLD)
537 HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 6, sizeof(cl_mem), &buffer_uncracked_hashes[current_salt]), "Failed setting kernel argument buffer_uncracked_hashes, kernel DES_bs_cmp.\n");
538 else
539 HANDLE_CLERROR(clSetKernelArg(cmp_kernel[gpu_id][current_salt], 6, sizeof(cl_mem), &buffer_bitmaps[current_salt]), "Failed setting kernel argument buffer_bitmaps, kernel DES_bs_cmp_high.\n");
540
541 ret_code = clEnqueueNDRangeKernel(queue[gpu_id], cmp_kernel[gpu_id][current_salt], 1, NULL, ¤t_gws, lws, 0, NULL, NULL);
542 HANDLE_CLERROR(ret_code, "Enque kernel DES_bs_cmp failed.\n");
543
544 HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(unsigned int), hash_ids, 0, NULL, NULL), "Failed to read buffer buffer_hash_ids.\n");
545
546 if (hash_ids[0] > num_uncracked_hashes(current_salt)) {
547 fprintf(stderr, "Error, crypt_all kernel.\n");
548 error();
549 }
550
551 if (hash_ids[0]) {
552 HANDLE_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, (2 * hash_ids[0] + 1) * sizeof(unsigned int), hash_ids, 0, NULL, NULL), "Failed to read buffer buffer_hash_ids.\n");
553 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap_dupe, CL_TRUE, 0, ((hash_table_size(current_salt) - 1)/32 + 1) * sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_bitmap_dupe.\n");
554 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_hash_ids, CL_TRUE, 0, sizeof(cl_uint), zero_buffer, 0, NULL, NULL), "Failed to write buffer buffer_hash_ids.\n");
555 }
556
557 return hash_ids[0];
558 }
559
init_checking()560 void init_checking()
561 {
562 int i = 0;
563 cmp_kernel = (cl_kernel **) mem_calloc(MAX_GPU_DEVICES, sizeof(cl_kernel *));
564 for (i = 0; i < MAX_GPU_DEVICES; i++)
565 cmp_kernel[i] = (cl_kernel *) mem_calloc(4096, sizeof(cl_kernel));
566 hash_chk_params = (DES_hash_check_params *) mem_calloc(4096, sizeof(DES_hash_check_params));
567 }
568
finish_checking()569 void finish_checking()
570 {
571 int i;
572
573 if (kernel_high) {
574 HANDLE_CLERROR(clReleaseKernel(kernel_high), "Error releasing kernel_high.");
575 kernel_high = 0;
576 }
577 if (kernel_low) {
578 HANDLE_CLERROR(clReleaseKernel(kernel_low), "Error releasing kernel_low.");
579 kernel_low = 0;
580 }
581 for (i = 0; i < MAX_GPU_DEVICES; i++)
582 MEM_FREE(cmp_kernel[i]);
583 MEM_FREE(cmp_kernel);
584 MEM_FREE(hash_chk_params);
585 }
586
opencl_DES_bs_get_hash_0(int index)587 int opencl_DES_bs_get_hash_0(int index)
588 {
589 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_0;
590 }
591
opencl_DES_bs_get_hash_1(int index)592 int opencl_DES_bs_get_hash_1(int index)
593 {
594 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_1;
595 }
596
opencl_DES_bs_get_hash_2(int index)597 int opencl_DES_bs_get_hash_2(int index)
598 {
599 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_2;
600 }
601
opencl_DES_bs_get_hash_3(int index)602 int opencl_DES_bs_get_hash_3(int index)
603 {
604 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_3;
605 }
606
opencl_DES_bs_get_hash_4(int index)607 int opencl_DES_bs_get_hash_4(int index)
608 {
609 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_4;
610 }
611
opencl_DES_bs_get_hash_5(int index)612 int opencl_DES_bs_get_hash_5(int index)
613 {
614 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_5;
615 }
616
opencl_DES_bs_get_hash_6(int index)617 int opencl_DES_bs_get_hash_6(int index)
618 {
619 return hash_tables[current_salt][hash_ids[2 + 2 * index]] & PH_MASK_6;
620 }
621
opencl_DES_bs_cmp_one(void * binary,int index)622 int opencl_DES_bs_cmp_one(void *binary, int index)
623 {
624 if (((int *)binary)[0] == hash_tables[current_salt][hash_ids[2 + 2 * index]])
625 return 1;
626 return 0;
627 }
628
opencl_DES_bs_cmp_exact(char * source,int index)629 int opencl_DES_bs_cmp_exact(char *source, int index)
630 {
631 int *binary = fmt_opencl_DES.methods.binary(source);
632
633 if (binary[1] == hash_tables[current_salt][hash_ids[2 + 2 * index] + hash_table_size(current_salt)])
634 return 1;
635 return 0;
636 }
637
638 /* End of hash checking. */
639
640 typedef union {
641 unsigned char c[8][sizeof(DES_bs_vector)];
642 DES_bs_vector v[8];
643 } key_page;
644
645 #define vxorf(a, b) \
646 ((a) ^ (b))
647 #define vnot(dst, a) \
648 (dst) = ~(a)
649 #define vand(dst, a, b) \
650 (dst) = (a) & (b)
651 #define vor(dst, a, b) \
652 (dst) = (a) | (b)
653 #define vandn(dst, a, b) \
654 (dst) = (a) & ~(b)
655 #define vxor(dst, a, b) \
656 (dst) = vxorf((a), (b))
657 #define vshl(dst, src, shift) \
658 (dst) = (src) << (shift)
659 #define vshr(dst, src, shift) \
660 (dst) = (src) >> (shift)
661 #define vshl1(dst, src) \
662 vshl((dst), (src), 1)
663
664 #define kvtype vtype
665 #define kvand vand
666 #define kvor vor
667 #define kvshl1 vshl1
668 #define kvshl vshl
669 #define kvshr vshr
670
671 #define mask01 0x01010101
672 #define mask02 0x02020202
673 #define mask04 0x04040404
674 #define mask08 0x08080808
675 #define mask10 0x10101010
676 #define mask20 0x20202020
677 #define mask40 0x40404040
678 #define mask80 0x80808080
679
680 #define kvand_shl1_or(dst, src, mask) \
681 kvand(tmp, src, mask); \
682 kvshl1(tmp, tmp); \
683 kvor(dst, dst, tmp)
684
685 #define kvand_shl_or(dst, src, mask, shift) \
686 kvand(tmp, src, mask); \
687 kvshl(tmp, tmp, shift); \
688 kvor(dst, dst, tmp)
689
690 #define kvand_shl1(dst, src, mask) \
691 kvand(tmp, src, mask) ; \
692 kvshl1(dst, tmp)
693
694 #define kvand_or(dst, src, mask) \
695 kvand(tmp, src, mask); \
696 kvor(dst, dst, tmp)
697
698 #define kvand_shr_or(dst, src, mask, shift) \
699 kvand(tmp, src, mask); \
700 kvshr(tmp, tmp, shift); \
701 kvor(dst, dst, tmp)
702
703 #define kvand_shr(dst, src, mask, shift) \
704 kvand(tmp, src, mask); \
705 kvshr(dst, tmp, shift)
706
707 #define LOAD_V \
708 kvtype v0 = *(kvtype *)&vp[0]; \
709 kvtype v1 = *(kvtype *)&vp[1]; \
710 kvtype v2 = *(kvtype *)&vp[2]; \
711 kvtype v3 = *(kvtype *)&vp[3]; \
712 kvtype v4 = *(kvtype *)&vp[4]; \
713 kvtype v5 = *(kvtype *)&vp[5]; \
714 kvtype v6 = *(kvtype *)&vp[6]; \
715 kvtype v7 = *(kvtype *)&vp[7];
716
717 #define FINALIZE_NEXT_KEY_BIT_0g { \
718 kvtype m = mask01, va, vb, tmp; \
719 kvand(va, v0, m); \
720 kvand_shl1(vb, v1, m); \
721 kvand_shl_or(va, v2, m, 2); \
722 kvand_shl_or(vb, v3, m, 3); \
723 kvand_shl_or(va, v4, m, 4); \
724 kvand_shl_or(vb, v5, m, 5); \
725 kvand_shl_or(va, v6, m, 6); \
726 kvand_shl_or(vb, v7, m, 7); \
727 kvor(kp[0], va, vb); \
728 kp += 1; \
729 }
730
731 #define FINALIZE_NEXT_KEY_BIT_1g { \
732 kvtype m = mask02, va, vb, tmp; \
733 kvand_shr(va, v0, m, 1); \
734 kvand(vb, v1, m); \
735 kvand_shl1_or(va, v2, m); \
736 kvand_shl_or(vb, v3, m, 2); \
737 kvand_shl_or(va, v4, m, 3); \
738 kvand_shl_or(vb, v5, m, 4); \
739 kvand_shl_or(va, v6, m, 5); \
740 kvand_shl_or(vb, v7, m, 6); \
741 kvor(kp[0], va, vb); \
742 kp += 1; \
743 }
744
745 #define FINALIZE_NEXT_KEY_BIT_2g { \
746 kvtype m = mask04, va, vb, tmp; \
747 kvand_shr(va, v0, m, 2); \
748 kvand_shr(vb, v1, m, 1); \
749 kvand_or(va, v2, m); \
750 kvand_shl1_or(vb, v3, m); \
751 kvand_shl_or(va, v4, m, 2); \
752 kvand_shl_or(vb, v5, m, 3); \
753 kvand_shl_or(va, v6, m, 4); \
754 kvand_shl_or(vb, v7, m, 5); \
755 kvor(kp[0], va, vb); \
756 kp += 1; \
757 }
758
759 #define FINALIZE_NEXT_KEY_BIT_3g { \
760 kvtype m = mask08, va, vb, tmp; \
761 kvand_shr(va, v0, m, 3); \
762 kvand_shr(vb, v1, m, 2); \
763 kvand_shr_or(va, v2, m, 1); \
764 kvand_or(vb, v3, m); \
765 kvand_shl1_or(va, v4, m); \
766 kvand_shl_or(vb, v5, m, 2); \
767 kvand_shl_or(va, v6, m, 3); \
768 kvand_shl_or(vb, v7, m, 4); \
769 kvor(kp[0], va, vb); \
770 kp += 1; \
771 }
772
773 #define FINALIZE_NEXT_KEY_BIT_4g { \
774 kvtype m = mask10, va, vb, tmp; \
775 kvand_shr(va, v0, m, 4); \
776 kvand_shr(vb, v1, m, 3); \
777 kvand_shr_or(va, v2, m, 2); \
778 kvand_shr_or(vb, v3, m, 1); \
779 kvand_or(va, v4, m); \
780 kvand_shl1_or(vb, v5, m); \
781 kvand_shl_or(va, v6, m, 2); \
782 kvand_shl_or(vb, v7, m, 3); \
783 kvor(kp[0], va, vb); \
784 kp += 1; \
785 }
786
787 #define FINALIZE_NEXT_KEY_BIT_5g { \
788 kvtype m = mask20, va, vb, tmp; \
789 kvand_shr(va, v0, m, 5); \
790 kvand_shr(vb, v1, m, 4); \
791 kvand_shr_or(va, v2, m, 3); \
792 kvand_shr_or(vb, v3, m, 2); \
793 kvand_shr_or(va, v4, m, 1); \
794 kvand_or(vb, v5, m); \
795 kvand_shl1_or(va, v6, m); \
796 kvand_shl_or(vb, v7, m, 2); \
797 kvor(kp[0], va, vb); \
798 kp += 1; \
799 }
800
801 #define FINALIZE_NEXT_KEY_BIT_6g { \
802 kvtype m = mask40, va, vb, tmp; \
803 kvand_shr(va, v0, m, 6); \
804 kvand_shr(vb, v1, m, 5); \
805 kvand_shr_or(va, v2, m, 4); \
806 kvand_shr_or(vb, v3, m, 3); \
807 kvand_shr_or(va, v4, m, 2); \
808 kvand_shr_or(vb, v5, m, 1); \
809 kvand_or(va, v6, m); \
810 kvand_shl1_or(vb, v7, m); \
811 kvor(kp[0], va, vb); \
812 kp += 1; \
813 }
814
des_finalize_int_keys()815 static void des_finalize_int_keys()
816 {
817 key_page *int_key_page[MASK_FMT_INT_PLHDR];
818 unsigned int *final_key_pages[MASK_FMT_INT_PLHDR], i, j;
819
820 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
821 int_key_page[i] = (key_page *) mem_alloc(((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(key_page));
822 final_key_pages[i] = (unsigned int *) mem_alloc(7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int));
823 memset(int_key_page[i], 0x7f, ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(key_page));
824 memset(final_key_pages[i], 0xff, 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int));
825 }
826
827 for (i = 0; i < mask_int_cand.num_int_cand && mask_int_cand.int_cand; i++) {
828 j = i >> DES_LOG_DEPTH;
829 int_key_page[0][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[0] & 0xFF;
830 #if MASK_FMT_INT_PLHDR > 1
831 if (mask_skip_ranges[1] != -1)
832 int_key_page[1][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[1] & 0xFF;
833 #endif
834 #if MASK_FMT_INT_PLHDR > 2
835 if (mask_skip_ranges[2] != -1)
836 int_key_page[2][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[2] & 0xFF;
837 #endif
838 #if MASK_FMT_INT_PLHDR > 3
839 if (mask_skip_ranges[3] != -1)
840 int_key_page[3][j].c[(i & (DES_BS_DEPTH - 1)) & 7][(i & (DES_BS_DEPTH - 1)) >> 3] = mask_int_cand.int_cand[i].x[3] & 0xFF;
841 #endif
842 }
843
844 for (j = 0; j < MASK_FMT_INT_PLHDR; j++) {
845 if (mask_skip_ranges == NULL || mask_skip_ranges[j] == -1)
846 continue;
847 for (i = 0; i < ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH); i++) {
848 DES_bs_vector *kp = (DES_bs_vector *)&final_key_pages[j][7 * i];
849 DES_bs_vector *vp = (DES_bs_vector *)&int_key_page[j][i].v[0];
850 LOAD_V
851 FINALIZE_NEXT_KEY_BIT_0g
852 FINALIZE_NEXT_KEY_BIT_1g
853 FINALIZE_NEXT_KEY_BIT_2g
854 FINALIZE_NEXT_KEY_BIT_3g
855 FINALIZE_NEXT_KEY_BIT_4g
856 FINALIZE_NEXT_KEY_BIT_5g
857 FINALIZE_NEXT_KEY_BIT_6g
858 }
859 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_des_keys, CL_TRUE, j * 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int),
860 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int), final_key_pages[j], 0, NULL, NULL ), "Failed Copy data to gpu");
861 }
862
863 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
864 MEM_FREE(int_key_page[i]);
865 MEM_FREE(final_key_pages[i]);
866 }
867 }
868
opencl_DES_bs_init_index()869 void opencl_DES_bs_init_index()
870 {
871 int p,q,s,t ;
872 int round, index, bit;
873
874 s = 0;
875 t = 0;
876 for (round = 0; round < 16; round++) {
877 s += opencl_DES_ROT[round];
878 for (index = 0; index < 48; index++) {
879 p = opencl_DES_PC2[index];
880 q = p < 28 ? 0 : 28;
881 p += s;
882 while (p >= 28) p -= 28;
883 bit = opencl_DES_PC1[p + q];
884 bit ^= 070;
885 bit -= bit >> 3;
886 bit = 55 - bit;
887 opencl_DES_bs_index768[t++] = bit;
888 }
889 }
890 }
891
DES_bs_init(int block)892 static void DES_bs_init(int block)
893 {
894 int index;
895
896 for (index = 0; index < DES_BS_DEPTH; index++)
897 des_all[block].pxkeys[index] =
898 &des_raw_keys[block].xkeys.c[0][index & 7][index >> 3];
899 }
900
create_keys_buffer(size_t gws,size_t padding)901 void create_keys_buffer(size_t gws, size_t padding)
902 {
903 int i;
904
905 des_all = (des_combined *) mem_alloc((gws + padding) * sizeof(des_combined));
906 des_raw_keys = (opencl_DES_bs_transfer *) mem_alloc((gws + padding) * sizeof(opencl_DES_bs_transfer));
907 des_int_key_loc = (unsigned int *) mem_calloc((gws + padding), sizeof(unsigned int));
908
909 for (i = 0; i < gws; i++)
910 DES_bs_init(i);
911
912 buffer_raw_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, (gws + padding) * sizeof(opencl_DES_bs_transfer), NULL, &ret_code);
913 HANDLE_CLERROR(ret_code, "Create buffer_raw_keys failed.\n");
914
915 buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE, (gws + padding) * sizeof(unsigned int), NULL, &ret_code);
916 HANDLE_CLERROR(ret_code, "Create buffer_int_key_loc failed.\n");
917 }
918
create_int_keys_buffer()919 void create_int_keys_buffer()
920 {
921 unsigned int active_placeholders, i;
922
923 active_placeholders = 1;
924 if (mask_skip_ranges)
925 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
926 if (mask_skip_ranges[i] != -1)
927 active_placeholders++;
928 }
929
930 buffer_int_des_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY, active_placeholders * 7 * ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH) * sizeof(unsigned int), NULL, &ret_code);
931 HANDLE_CLERROR(ret_code, "Create buffer_int_des_keys failed.\n");
932 }
933
release_int_keys_buffer()934 void release_int_keys_buffer()
935 {
936 if (buffer_int_des_keys) {
937 HANDLE_CLERROR(clReleaseMemObject(buffer_int_des_keys), "Release buffer_int_des_keys failed.\n");
938 HANDLE_CLERROR(clReleaseKernel(keys_kernel), "Release keys_kernel failed.\n");
939 buffer_int_des_keys = 0;
940 }
941 }
942
release_keys_buffer()943 void release_keys_buffer()
944 {
945 if (buffer_raw_keys) {
946 MEM_FREE(des_all);
947 MEM_FREE(des_raw_keys);
948 MEM_FREE(des_int_key_loc);
949 HANDLE_CLERROR(clReleaseMemObject(buffer_raw_keys), "Release buffer_raw_keys failed.\n");
950 HANDLE_CLERROR(clReleaseMemObject(buffer_int_key_loc), "Release buffer_int_key_loc failed.\n");
951 buffer_raw_keys = 0;
952 }
953 }
954
opencl_DES_bs_set_key(char * key,int index)955 void opencl_DES_bs_set_key(char *key, int index)
956 {
957 unsigned char *dst;
958 unsigned int sector,key_index;
959 unsigned int flag = key[0];
960
961 sector = index >> DES_LOG_DEPTH;
962 key_index = index & (DES_BS_DEPTH - 1);
963 dst = des_all[sector].pxkeys[key_index];
964
965 keys_changed = 1;
966
967 dst[0] = (!flag) ? 0 : key[0];
968 dst[sizeof(DES_bs_vector) * 8] = (!flag) ? 0 : key[1];
969 flag = flag&&key[1] ;
970 dst[sizeof(DES_bs_vector) * 8 * 2] = (!flag) ? 0 : key[2];
971 flag = flag&&key[2];
972 dst[sizeof(DES_bs_vector) * 8 * 3] = (!flag) ? 0 : key[3];
973 flag = flag&&key[3];
974 dst[sizeof(DES_bs_vector) * 8 * 4] = (!flag) ? 0 : key[4];
975 flag = flag&&key[4]&&key[5];
976 dst[sizeof(DES_bs_vector) * 8 * 5] = (!flag) ? 0 : key[5];
977 flag = flag&&key[6];
978 dst[sizeof(DES_bs_vector) * 8 * 6] = (!flag) ? 0 : key[6];
979 dst[sizeof(DES_bs_vector) * 8 * 7] = (!flag) ? 0 : key[7];
980
981 /*
982 if (!key[0]) goto fill8;
983 *dst = key[0];
984 *(dst + sizeof(DES_bs_vector) * 8) = key[1];
985 *(dst + sizeof(DES_bs_vector) * 8 * 2) = key[2];
986 if (!key[1]) goto fill6;
987 if (!key[2]) goto fill5;
988 *(dst + sizeof(DES_bs_vector) * 8 * 3) = key[3];
989 *(dst + sizeof(DES_bs_vector) * 8 * 4) = key[4];
990 if (!key[3]) goto fill4;
991 if (!key[4] || !key[5]) goto fill3;
992 *(dst + sizeof(DES_bs_vector) * 8 * 5) = key[5];
993 if (!key[6]) goto fill2;
994 *(dst + sizeof(DES_bs_vector) * 8 * 6) = key[6];
995 *(dst + sizeof(DES_bs_vector) * 8 * 7) = key[7];
996 return;
997 fill8:
998 dst[0] = 0;
999 dst[sizeof(DES_bs_vector) * 8] = 0;
1000 fill6:
1001 dst[sizeof(DES_bs_vector) * 8 * 2] = 0;
1002 fill5:
1003 dst[sizeof(DES_bs_vector) * 8 * 3] = 0;
1004 fill4:
1005 dst[sizeof(DES_bs_vector) * 8 * 4] = 0;
1006 fill3:
1007 dst[sizeof(DES_bs_vector) * 8 * 5] = 0;
1008 fill2:
1009 dst[sizeof(DES_bs_vector) * 8 * 6] = 0;
1010 dst[sizeof(DES_bs_vector) * 8 * 7] = 0;
1011 */
1012 }
1013
opencl_DES_bs_get_key(int index)1014 char *opencl_DES_bs_get_key(int index)
1015 {
1016 static char out[PLAINTEXT_LENGTH + 1];
1017 unsigned int section, block;
1018 unsigned char *src;
1019 char *dst;
1020
1021 if (hash_ids == NULL || hash_ids[0] == 0 ||
1022 index >= hash_ids[0] || hash_ids[0] > num_uncracked_hashes(current_salt)) {
1023 section = 0;
1024 block = 0;
1025 }
1026 else {
1027 section = hash_ids[2 * index + 1] / 32;
1028 block = hash_ids[2 * index + 1] & 31;
1029
1030 }
1031
1032 if (section > global_work_size) {
1033 //fprintf(stderr, "Get key error! %d "Zu"\n", section, global_work_size);
1034 section = 0;
1035 }
1036
1037 src = des_all[section].pxkeys[block];
1038 dst = out;
1039 while (dst < &out[PLAINTEXT_LENGTH] && (*dst = *src)) {
1040 src += sizeof(DES_bs_vector) * 8;
1041 dst++;
1042 }
1043 *dst = 0;
1044
1045 return out;
1046 }
1047
set_key_mm(char * key,int index)1048 static void set_key_mm(char *key, int index)
1049 {
1050 unsigned int len = strlen(key);
1051 unsigned int i;
1052 unsigned long c;
1053
1054 for (i = 0; i < len; i++) {
1055 c = (unsigned char) key[i];
1056 memset(des_raw_keys[index].xkeys.v[i], c, 8 * sizeof(DES_bs_vector));
1057 }
1058
1059 for (i = len; i < PLAINTEXT_LENGTH; i++)
1060 memset(des_raw_keys[index].xkeys.v[i], 0, 8 * sizeof(DES_bs_vector));
1061
1062 if (!mask_gpu_is_static) {
1063 des_int_key_loc[index] = 0;
1064 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
1065 if (mask_skip_ranges[i] != -1) {
1066 des_int_key_loc[index] |= ((mask_int_cand.
1067 int_cpu_mask_ctx->ranges[mask_skip_ranges[i]].offset +
1068 mask_int_cand.int_cpu_mask_ctx->
1069 ranges[mask_skip_ranges[i]].pos) & 0xff) << (i << 3);
1070 }
1071 else
1072 des_int_key_loc[index] |= 0x80 << (i << 3);
1073 }
1074 }
1075
1076 keys_changed = 1;
1077 }
1078
1079 /* des_bs_key arrangement.
1080 iter 0 iter 1 iter n-1
1081 0 w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1)
1082 1 w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1)
1083 2 w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1)
1084 .
1085 .
1086 .
1087 55 w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1) w0 w1 w2.. w(gws-1) */
get_key_mm(int index)1088 static char *get_key_mm(int index)
1089 {
1090 static char out[PLAINTEXT_LENGTH + 1];
1091 unsigned int section, depth, iter;
1092 unsigned char *src, i;
1093 char *dst;
1094
1095 if (hash_ids == NULL || hash_ids[0] == 0 ||
1096 index >= hash_ids[0] || hash_ids[0] > num_uncracked_hashes(current_salt)) {
1097 section = 0;
1098 depth = 0;
1099 iter = 0;
1100 }
1101 else {
1102 section = (hash_ids[2 * index + 1] >> DES_LOG_DEPTH) % process_key_gws;
1103 depth = hash_ids[2 * index + 1] & (DES_BS_DEPTH - 1);
1104 iter = (hash_ids[2 * index + 1] >> DES_LOG_DEPTH) / process_key_gws;
1105 }
1106
1107 if (section > process_key_gws) {
1108 fprintf(stderr, "Get key error! %u "Zu"\n", section,
1109 process_key_gws);
1110 section = 0;
1111 depth = 0;
1112 iter = 0;
1113 }
1114
1115 if (mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
1116 for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
1117 if (mask_gpu_is_static)
1118 des_raw_keys[section].xkeys.c[static_gpu_locations[i]][depth & 7][depth >> 3] = mask_int_cand.int_cand[iter * 32 + depth].x[i];
1119 else
1120 des_raw_keys[section].xkeys.c[(des_int_key_loc[section] & (0xff << (i * 8))) >> (i * 8)][depth & 7][depth >> 3] = mask_int_cand.int_cand[iter * 32 + depth].x[i];
1121 }
1122
1123 src = des_all[section].pxkeys[depth];
1124 dst = out;
1125 while (dst < &out[PLAINTEXT_LENGTH] && (*dst = *src)) {
1126 src += sizeof(DES_bs_vector) * 8;
1127 dst++;
1128 }
1129 *dst = 0;
1130
1131 return out;
1132 }
1133
opencl_DES_bs_clear_keys()1134 void opencl_DES_bs_clear_keys()
1135 { /* Auto-tune might set hash_ids[0] to some value, which interferes with
1136 set-key/get-key checking during self test. */
1137 hash_ids[0] = 0;
1138 }
1139
create_keys_kernel_set_args(int mask_mode)1140 size_t create_keys_kernel_set_args(int mask_mode)
1141 {
1142 char build_opts[400];
1143 cl_ulong const_cache_size;
1144 int i;
1145
1146 if (mask_mode) {
1147 fmt_opencl_DES.methods.set_key = set_key_mm;
1148 fmt_opencl_DES.methods.get_key = get_key_mm;
1149 }
1150
1151 des_finalize_int_keys();
1152
1153 for (i = 0; i < MASK_FMT_INT_PLHDR; i++)
1154 if (mask_skip_ranges && mask_skip_ranges[i] != -1)
1155 static_gpu_locations[i] = mask_int_cand.int_cpu_mask_ctx->
1156 ranges[mask_skip_ranges[i]].pos;
1157 else
1158 static_gpu_locations[i] = -1;
1159
1160 HANDLE_CLERROR(clGetDeviceInfo(devices[gpu_id], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &const_cache_size, 0), "failed to get CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE.");
1161
1162 sprintf(build_opts, "-D ITER_COUNT=%u -D MASK_ENABLED=%d -D LOC_0=%d"
1163 #if MASK_FMT_INT_PLHDR > 1
1164 " -D LOC_1=%d "
1165 #endif
1166 #if MASK_FMT_INT_PLHDR > 2
1167 "-D LOC_2=%d "
1168 #endif
1169 #if MASK_FMT_INT_PLHDR > 3
1170 "-D LOC_3=%d"
1171 #endif
1172 " -D IS_STATIC_GPU_MASK=%d -D CONST_CACHE_SIZE=%llu"
1173 , ((mask_int_cand.num_int_cand + DES_BS_DEPTH - 1) >> DES_LOG_DEPTH), mask_mode, static_gpu_locations[0]
1174 #if MASK_FMT_INT_PLHDR > 1
1175 , static_gpu_locations[1]
1176 #endif
1177 #if MASK_FMT_INT_PLHDR > 2
1178 , static_gpu_locations[2]
1179 #endif
1180 #if MASK_FMT_INT_PLHDR > 3
1181 , static_gpu_locations[3]
1182 #endif
1183 , mask_gpu_is_static, (unsigned long long)const_cache_size);
1184
1185 opencl_build_kernel("$JOHN/kernels/DES_bs_finalize_keys_kernel.cl",
1186 gpu_id, build_opts, 0);
1187 keys_kernel = clCreateKernel(program[gpu_id], "DES_bs_finalize_keys", &ret_code);
1188 HANDLE_CLERROR(ret_code, "Failed creating kernel DES_bs_finalize_keys.\n");
1189
1190 HANDLE_CLERROR(clSetKernelArg(keys_kernel, 1, sizeof(cl_mem), &buffer_int_des_keys), "Failed setting kernel argument buffer_int_des_keys, kernel DES_bs_finalize_keys.\n");
1191
1192 return get_kernel_max_lws(gpu_id, keys_kernel);
1193 }
1194
process_keys(size_t current_gws,size_t * lws)1195 void process_keys(size_t current_gws, size_t *lws)
1196 {
1197 process_key_gws = current_gws;
1198
1199 if (keys_changed) {
1200 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_raw_keys, CL_TRUE, 0, current_gws * sizeof(opencl_DES_bs_transfer), des_raw_keys, 0, NULL, NULL ), "Failed to write buffer buffer_raw_keys.\n");
1201
1202 if (!mask_gpu_is_static)
1203 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_TRUE, 0, current_gws * sizeof(unsigned int), des_int_key_loc, 0, NULL, NULL ), "Failed Copy data to gpu");
1204
1205 ret_code = clEnqueueNDRangeKernel(queue[gpu_id], keys_kernel, 1, NULL, ¤t_gws, lws, 0, NULL, NULL);
1206 HANDLE_CLERROR(ret_code, "Enque kernel DES_bs_finalize_keys failed.\n");
1207
1208 keys_changed = 0;
1209 }
1210 }
1211
get_device_name(int id)1212 char *get_device_name(int id)
1213 {
1214 char *d_name;
1215
1216 d_name = (char *) mem_calloc(600, sizeof(char));
1217 HANDLE_CLERROR(clGetDeviceInfo(devices[id], CL_DEVICE_NAME, 600, d_name, NULL), "Failed to get device name.\n");
1218 return d_name;
1219 }
1220
save_lws_config(const char * config_file,int id_gpu,size_t lws,unsigned int forced_global_key)1221 void save_lws_config(const char* config_file, int id_gpu, size_t lws, unsigned int forced_global_key)
1222 {
1223 FILE *file;
1224 char config_file_name[500];
1225 char *d_name;
1226
1227 sprintf(config_file_name, config_file, d_name = get_device_name(id_gpu));
1228 MEM_FREE(d_name);
1229
1230 file = fopen(path_expand(config_file_name), "r");
1231 if (file != NULL) {
1232 fclose(file);
1233 return;
1234 }
1235 file = fopen(path_expand(config_file_name), "w");
1236
1237 #if OS_FLOCK || FCNTL_LOCKS
1238 {
1239 #if FCNTL_LOCKS
1240 struct flock lock;
1241
1242 memset(&lock, 0, sizeof(lock));
1243 lock.l_type = F_WRLCK;
1244 while (fcntl(fileno(file), F_SETLKW, &lock)) {
1245 if (errno != EINTR)
1246 pexit("fcntl(F_WRLCK)");
1247 }
1248 #else
1249 while (flock(fileno(file), LOCK_EX)) {
1250 if (errno != EINTR)
1251 pexit("flock(LOCK_EX)");
1252 }
1253 #endif
1254 }
1255 #endif
1256 fprintf(file, ""Zu" %u", lws, forced_global_key);
1257 fclose(file);
1258 }
1259
restore_lws_config(const char * config_file,int id_gpu,size_t * lws,size_t extern_lws_limit,unsigned int * forced_global_key)1260 int restore_lws_config(const char *config_file, int id_gpu, size_t *lws, size_t extern_lws_limit, unsigned int *forced_global_key)
1261 {
1262 FILE *file;
1263 char config_file_name[500];
1264 char *d_name;
1265 unsigned int param;
1266
1267 sprintf(config_file_name, config_file, d_name = get_device_name(id_gpu));
1268 MEM_FREE(d_name);
1269
1270 file = fopen(path_expand(config_file_name), "r");
1271 if (file == NULL)
1272 return 0;
1273
1274
1275 #if OS_FLOCK || FCNTL_LOCKS
1276 {
1277 #if FCNTL_LOCKS
1278 struct flock lock;
1279
1280 memset(&lock, 0, sizeof(lock));
1281 lock.l_type = F_RDLCK;
1282 while (fcntl(fileno(fp), F_SETLKW, &lock)) {
1283 if (errno != EINTR)
1284 pexit("fcntl(F_RDLCK)");
1285 }
1286 #else
1287 while (flock(fileno(fp), LOCK_SH)) {
1288 if (errno != EINTR)
1289 pexit("flock(LOCK_SH)");
1290 }
1291 #endif
1292 }
1293 #endif
1294 if (fscanf(file, ""Zu" %u", lws, ¶m) != 2 || *lws > extern_lws_limit) {
1295 if (forced_global_key)
1296 *forced_global_key = param;
1297 fclose(file);
1298 return 0;
1299 }
1300
1301 fclose(file);
1302 return 1;
1303 }
1304 #endif /* HAVE_OPENCL */
1305