1 /*
2 * Developed by Claudio André <claudioandre.br at gmail.com> in 2012
3 * Based on source code provided by Samuele Giovanni Tonon
4 *
5 * More information at http://openwall.info/wiki/john/OpenCL-RAWSHA-256
6 * More information at http://openwall.info/wiki/john/OpenCL-CISCO4
7 *
8 * Copyright (c) 2011 Samuele Giovanni Tonon <samu at linuxasylum dot net>
9 * Copyright (c) 2012-2016 Claudio André <claudioandre.br at gmail.com>
10 * This program comes with ABSOLUTELY NO WARRANTY; express or implied .
11 * This is free software, and you are welcome to redistribute it
12 * under certain conditions; as expressed here
13 * http://www.gnu.org/licenses/gpl-2.0.html
14 */
15
16 #ifdef HAVE_OPENCL
17
18 #if FMT_EXTERNS_H
19 extern struct fmt_main fmt_opencl_rawsha256;
20 #elif FMT_REGISTERS_H
21 john_register_one(&fmt_opencl_rawsha256);
22 #else
23
24 #include <string.h>
25
26 #include "sha.h"
27 #include "sha2.h"
28 #include "johnswap.h"
29 #include "opencl_common.h"
30 #include "config.h"
31 #include "options.h"
32 #include "opencl_rawsha256.h"
33 #include "rawSHA256_common.h"
34
35 #include "mask_ext.h"
36 #include "opencl_mask_extras.h"
37
38 #define FORMAT_LABEL "raw-SHA256-opencl"
39 #define FORMAT_NAME ""
40
41 #define ALGORITHM_NAME "SHA256 OpenCL"
42
43 #define BINARY_SIZE DIGEST_SIZE
44
45 //plaintext: keys to compute the hash function
46 //saved_idx: offset and length of each plaintext (data is sent using chunks)
47 static uint32_t *plaintext, *saved_idx;
48
49 static cl_mem pass_buffer; //Plaintext buffer.
50 static cl_mem idx_buffer; //Sizes and offsets buffer.
51 static cl_kernel prepare_kernel;
52
53 //Pinned buffers
54 static cl_mem pinned_plaintext, pinned_saved_idx, pinned_int_key_loc;
55
56 //Reference to self
57 static struct fmt_main *self;
58
59 //Reference to the first element in salt list
60 static struct db_main *main_db;
61
62 //Device (GPU) buffers
63 //int_keys: mask to apply
64 //hash_ids: information about how recover the cracked password
65 //bitmap: a bitmap memory space.
66 //int_key_loc: the position of the mask to apply.
67 static cl_mem buffer_int_keys, buffer_hash_ids, buffer_bitmap, buffer_int_key_loc;
68
69 //Host buffers
70 //saved_int_key_loc: the position of the mask to apply
71 //num_loaded_hashes: number of binary hashes transferred/loaded to GPU
72 //hash_ids: information about how recover the cracked password
73 static uint32_t *saved_int_key_loc, num_loaded_hashes, *hash_ids, *saved_bitmap;
74
75 //ocl_initialized: a reference counter of the openCL objetcts (expect to be 0 or 1)
76 static unsigned ocl_initialized = 0;
77
78 // Keeps track of whether we should tune for this reset() call.
79 static int should_tune;
80
81 //Used to control partial key transfers.
82 static uint32_t key_idx = 0;
83 static size_t offset = 0, offset_idx = 0;
84
85 static uint32_t bitmap_size;
86
87 static void load_hash();
88 static char *get_key(int index);
89 static void build_kernel();
90 static void release_kernel();
91 static void release_mask_buffers(void);
92
93 //This file contains auto-tuning routine(s). It has to be included after formats definitions.
94 #include "opencl_autotune.h"
95
96 /* ------- Helper functions ------- */
get_task_max_work_group_size()97 static size_t get_task_max_work_group_size()
98 {
99 size_t s;
100
101 s = autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel);
102 s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0,
103 prepare_kernel));
104 return MIN(s, 512);
105 }
106
get_num_loaded_hashes()107 static uint32_t get_num_loaded_hashes()
108 {
109 uint32_t num_hashes;
110 struct db_salt *current_salt;
111
112 num_hashes = 0;
113 current_salt = main_db->salts;
114
115 do
116 num_hashes += current_salt->count;
117 while ((current_salt = current_salt->next));
118
119 return num_hashes;
120 }
121
crypt_one(int index)122 static uint32_t *crypt_one(int index) {
123 SHA256_CTX ctx;
124 static uint32_t hash[DIGEST_SIZE / sizeof(uint32_t)];
125
126 char * key = get_key(index);
127 int len = strlen(key);
128
129 SHA256_Init(&ctx);
130 SHA256_Update(&ctx, key, len);
131 SHA256_Final((unsigned char *) (hash), &ctx);
132
133 alter_endianity_to_BE(hash, DIGEST_SIZE / sizeof(uint32_t));
134
135 return hash;
136 }
137
138 /* ------- Create and destroy necessary objects ------- */
create_mask_buffers()139 static void create_mask_buffers()
140 {
141 release_mask_buffers();
142
143 saved_bitmap = (uint32_t *)
144 mem_alloc((bitmap_size / 32 + 1) * sizeof(uint32_t));
145 buffer_bitmap = clCreateBuffer(context[gpu_id], CL_MEM_WRITE_ONLY,
146 (bitmap_size / 32 + 1) * sizeof(uint32_t), NULL, &ret_code);
147 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_bitmap");
148
149 //Set crypt kernel arguments
150 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 6, sizeof(buffer_bitmap),
151 (void *)&buffer_bitmap), "Error setting argument 6");
152 }
153
release_mask_buffers()154 static void release_mask_buffers()
155 {
156 MEM_FREE(saved_bitmap);
157
158 if (buffer_bitmap)
159 clReleaseMemObject(buffer_bitmap);
160 buffer_bitmap = NULL;
161 }
162
create_clobj(size_t gws,struct fmt_main * self)163 static void create_clobj(size_t gws, struct fmt_main *self)
164 {
165 uint32_t hash_id_size;
166 size_t mask_cand = 1, mask_gws = 1;
167
168 if (mask_int_cand.num_int_cand > 1) {
169 mask_cand = mask_int_cand.num_int_cand;
170 mask_gws = gws;
171 }
172
173 pinned_plaintext = clCreateBuffer(context[gpu_id],
174 CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
175 BUFFER_SIZE * gws, NULL, &ret_code);
176 HANDLE_CLERROR(ret_code,
177 "Error creating page-locked memory pinned_plaintext");
178
179 plaintext = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
180 pinned_plaintext, CL_TRUE, CL_MAP_WRITE, 0,
181 BUFFER_SIZE * gws, 0, NULL, NULL, &ret_code);
182 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory plaintext");
183
184 pinned_saved_idx = clCreateBuffer(context[gpu_id],
185 CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
186 sizeof(uint32_t) * gws, NULL, &ret_code);
187 HANDLE_CLERROR(ret_code,
188 "Error creating page-locked memory pinned_saved_idx");
189
190 saved_idx = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
191 pinned_saved_idx, CL_TRUE, CL_MAP_WRITE, 0,
192 sizeof(uint32_t) * gws, 0, NULL, NULL, &ret_code);
193 HANDLE_CLERROR(ret_code, "Error mapping page-locked memory saved_idx");
194
195 // create arguments (buffers)
196 pass_buffer = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
197 BUFFER_SIZE * gws, NULL, &ret_code);
198 HANDLE_CLERROR(ret_code, "Error creating buffer argument pass_buffer");
199
200 idx_buffer = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
201 sizeof(uint32_t) * gws, NULL, &ret_code);
202 HANDLE_CLERROR(ret_code, "Error creating buffer argument idx_buffer");
203
204 hash_id_size = mask_int_cand.num_int_cand * gws;
205 hash_ids = (uint32_t *) mem_alloc(
206 hash_id_size * 3 * sizeof(uint32_t) + sizeof(uint32_t));
207 buffer_hash_ids = clCreateBuffer(context[gpu_id], CL_MEM_READ_WRITE,
208 hash_id_size * 3 * sizeof(uint32_t) + sizeof(uint32_t),
209 NULL, &ret_code);
210
211 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_buffer_hash_ids");
212
213 //Mask mode
214 pinned_int_key_loc = clCreateBuffer(context[gpu_id],
215 CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
216 sizeof(uint32_t) * mask_gws, NULL, &ret_code);
217 HANDLE_CLERROR(ret_code,
218 "Error creating page-locked memory pinned_int_key_loc");
219
220 saved_int_key_loc = (uint32_t *) clEnqueueMapBuffer(queue[gpu_id],
221 pinned_int_key_loc, CL_TRUE, CL_MAP_WRITE, 0,
222 sizeof(uint32_t) * mask_gws, 0, NULL, NULL, &ret_code);
223 HANDLE_CLERROR(ret_code,
224 "Error mapping page-locked memory saved_int_key_loc");
225
226 buffer_int_key_loc = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
227 sizeof(uint32_t) * mask_gws, NULL, &ret_code);
228 HANDLE_CLERROR(ret_code,
229 "Error creating buffer argument buffer_int_key_loc");
230
231 buffer_int_keys = clCreateBuffer(context[gpu_id], CL_MEM_READ_ONLY,
232 4 * mask_cand, NULL, &ret_code);
233 HANDLE_CLERROR(ret_code, "Error creating buffer argument buffer_int_keys");
234
235 //Set prepare kernel arguments
236 HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 0, sizeof(cl_uint),
237 (void *)&mask_int_cand.num_int_cand), "Error setting argument 0");
238 HANDLE_CLERROR(clSetKernelArg(prepare_kernel, 1, sizeof(buffer_hash_ids),
239 (void *)&buffer_hash_ids), "Error setting argument 1");
240
241 //Set kernel arguments
242 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(cl_mem),
243 (void *)&pass_buffer), "Error setting argument 0");
244 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(cl_mem),
245 (void *)&idx_buffer), "Error setting argument 1");
246 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(buffer_int_key_loc),
247 (void *)&buffer_int_key_loc), "Error setting argument 2");
248 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 3, sizeof(buffer_int_keys),
249 (void *)&buffer_int_keys), "Error setting argument 3");
250 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 4, sizeof(cl_uint),
251 (void *)&(mask_int_cand.num_int_cand)),
252 "Error setting argument 4");
253 HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 5, sizeof(buffer_hash_ids),
254 (void *)&buffer_hash_ids), "Error setting argument 5");
255
256 //Indicates that the OpenCL objetcs are initialized.
257 ocl_initialized++;
258
259 //Assure buffers have no "trash data".
260 memset(plaintext, '\0', BUFFER_SIZE * gws);
261 memset(saved_idx, '\0', sizeof(uint32_t) * gws);
262 memset(saved_int_key_loc, 0x80, sizeof(uint32_t) * mask_gws);
263 }
264
release_clobj()265 static void release_clobj()
266 {
267 cl_int ret_code;
268
269 if (ocl_initialized) {
270 ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_plaintext,
271 plaintext, 0, NULL, NULL);
272 HANDLE_CLERROR(ret_code, "Error Unmapping keys");
273 ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_idx,
274 saved_idx, 0, NULL, NULL);
275 HANDLE_CLERROR(ret_code, "Error Unmapping indexes");
276 ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_int_key_loc,
277 saved_int_key_loc, 0, NULL, NULL);
278 HANDLE_CLERROR(ret_code, "Error Unmapping key locations");
279 HANDLE_CLERROR(clFinish(queue[gpu_id]),
280 "Error releasing memory mappings");
281
282 ret_code = clReleaseMemObject(pass_buffer);
283 HANDLE_CLERROR(ret_code, "Error Releasing pass_buffer");
284 ret_code = clReleaseMemObject(idx_buffer);
285 HANDLE_CLERROR(ret_code, "Error Releasing idx_buffer");
286
287 MEM_FREE(hash_ids);
288 clReleaseMemObject(buffer_hash_ids);
289 HANDLE_CLERROR(ret_code, "Error Releasing buffer_hash_ids");
290
291 ret_code = clReleaseMemObject(buffer_int_key_loc);
292 HANDLE_CLERROR(ret_code, "Error Releasing buffer_int_key_loc");
293 ret_code = clReleaseMemObject(buffer_int_keys);
294 HANDLE_CLERROR(ret_code, "Error Releasing buffer_int_keys");
295 ret_code = clReleaseMemObject(pinned_plaintext);
296 HANDLE_CLERROR(ret_code, "Error Releasing pinned_plaintext");
297 ret_code = clReleaseMemObject(pinned_saved_idx);
298 HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_idx");
299 ret_code = clReleaseMemObject(pinned_int_key_loc);
300 HANDLE_CLERROR(ret_code, "Error Releasing pinned_int_key_loc");
301
302 ocl_initialized--;
303 }
304 }
305
306 /* ------- Reset functions ------- */
tune(struct db_main * db)307 static void tune(struct db_main *db)
308 {
309 char *tmp_value;
310 size_t gws_limit;
311 int autotune_limit = 500;
312
313 if ((tmp_value = getenv("_GPU_AUTOTUNE_LIMIT")))
314 autotune_limit = atoi(tmp_value);
315
316 // Auto-tune / Benckmark / Self-test.
317 gws_limit = MIN((0xf << 22) * 4 / BUFFER_SIZE,
318 get_max_mem_alloc_size(gpu_id) / BUFFER_SIZE);
319
320 if (options.flags & FLG_MASK_CHK)
321 gws_limit = MIN(gws_limit,
322 get_max_mem_alloc_size(gpu_id) /
323 (mask_int_cand.num_int_cand * 3 * sizeof(uint32_t)));
324
325 //Initialize openCL tuning (library) for this format.
326 opencl_init_auto_setup(SEED, 0, NULL,
327 warn, 3, self, create_clobj, release_clobj,
328 2 * BUFFER_SIZE, gws_limit, db);
329
330 //Auto tune execution from shared/included code.
331 autotune_run(self, 1, gws_limit, autotune_limit);
332 }
333
reset(struct db_main * db)334 static void reset(struct db_main *db)
335 {
336 static size_t saved_lws, saved_gws;
337
338 offset = 0;
339 offset_idx = 0;
340 key_idx = 0;
341
342 if (!db)
343 return;
344
345 main_db = db;
346 num_loaded_hashes = get_num_loaded_hashes();
347
348 //Adjust kernel parameters and rebuild (if necessary).
349 build_kernel();
350
351 if (!should_tune) {
352 /* Read LWS/GWS prefs from config or environment */
353 opencl_get_user_preferences(FORMAT_LABEL);
354
355 //Save the local and global work sizes.
356 saved_lws = local_work_size;
357 saved_gws = global_work_size;
358 }
359
360 /*
361 * First reset() call. Don't run autotune.
362 * -> If self test is running.
363 * -> If --skip-self-test is running.
364 * -> And if benchmark is NOT running.
365 * Instead, use sane defauts. Tune is going to run later/below.
366 */
367 if (!should_tune && (self_test_running || options.flags & FLG_NOTESTS) &&
368 !benchmark_running) {
369 opencl_get_sane_lws_gws_values();
370 tune(db);
371
372 //Tune later.
373 autotuned = 0;
374 } else if (!autotuned) {
375 //Retrieve LWS/GWS prefs saved on first round
376 local_work_size = saved_lws;
377 global_work_size = saved_gws;
378
379 tune(db);
380 } else if ((options.flags & FLG_MASK_CHK)) {
381 //Tune for mask mode (for each mask change).
382 // auto-tune for eg. ?a and then a reset for re-tuning for ?a?a
383 local_work_size = saved_lws;
384 global_work_size = saved_gws;
385
386 tune(db);
387 } else {
388 //Since it might re-compiled the kernel after tuning.
389 if (ocl_initialized > 0)
390 release_clobj();
391
392 create_clobj(global_work_size, self);
393 }
394 should_tune++;
395 hash_ids[0] = 0;
396 load_hash();
397 }
398
399 /* ------- Key functions ------- */
clear_keys(void)400 static void clear_keys(void)
401 {
402 offset = 0;
403 offset_idx = 0;
404 key_idx = 0;
405 }
406
set_key(char * _key,int index)407 static void set_key(char *_key, int index)
408 {
409
410 const uint32_t *key = (uint32_t *) _key;
411 int len = strlen(_key);
412
413 saved_idx[index] = (key_idx << 6) | len;
414
415 do {
416 plaintext[key_idx++] = *key++;
417 len -= 4;
418 } while (len > 4);
419
420 if (len > 0)
421 plaintext[key_idx++] = *key;
422
423 //Mask Mode ranges setup
424 if (mask_int_cand.num_int_cand > 1) {
425 int i;
426
427 saved_int_key_loc[index] = 0;
428
429 for (i = 0; i < MASK_FMT_INT_PLHDR; i++) {
430
431 if (mask_skip_ranges[i] != -1) {
432 saved_int_key_loc[index] |=
433 ((mask_int_cand.int_cpu_mask_ctx->
434 ranges[mask_skip_ranges[i]].offset +
435 mask_int_cand.int_cpu_mask_ctx->
436 ranges[mask_skip_ranges[i]].pos) & 0xff)
437 << (i << 3);
438 } else
439 saved_int_key_loc[index] |= 0x80 << (i << 3);
440 }
441 }
442 //Batch transfers to GPU.
443 if ((index % TRANSFER_SIZE) == 0 && (index > 0)) {
444 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], pass_buffer,
445 CL_FALSE, sizeof(uint32_t) * offset,
446 sizeof(uint32_t) * TRANSFER_SIZE,
447 plaintext + offset, 0, NULL, NULL),
448 "failed in clEnqueueWriteBuffer pass_buffer");
449 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], idx_buffer,
450 CL_FALSE, sizeof(uint32_t) * offset,
451 sizeof(uint32_t) * TRANSFER_SIZE,
452 saved_idx + offset, 0, NULL, NULL),
453 "failed in clEnqueueWriteBuffer idx_buffer");
454
455 HANDLE_CLERROR(clFlush(queue[gpu_id]), "failed in clFlush");
456 offset += TRANSFER_SIZE;
457 offset_idx = key_idx;
458 }
459 }
460
get_key(int index)461 static char *get_key(int index)
462 {
463 static char *ret;
464 int int_index, t, i;
465
466 if (!ret)
467 ret = mem_alloc_tiny(PLAINTEXT_LENGTH + 1, MEM_ALIGN_WORD);
468
469 //Mask Mode plaintext recovery
470 if (hash_ids == NULL || hash_ids[0] == 0 || index > hash_ids[0]) {
471 t = index;
472 int_index = 0;
473
474 } else {
475 t = hash_ids[1 + 3 * index];
476 int_index = hash_ids[2 + 3 * index];
477 }
478
479 //Mask Mode plaintext recovery.
480 if (t >= global_work_size)
481 t = 0;
482
483 memcpy(ret, ((char *)&plaintext[saved_idx[t] >> 6]), PLAINTEXT_LENGTH);
484 ret[saved_idx[t] & 63] = '\0';
485
486 if (saved_idx[t] & 63 &&
487 mask_skip_ranges && mask_int_cand.num_int_cand > 1) {
488 for (i = 0; i < MASK_FMT_INT_PLHDR && mask_skip_ranges[i] != -1; i++)
489 ret[(saved_int_key_loc[t] & (0xff << (i * 8))) >> (i * 8)] =
490 mask_int_cand.int_cand[int_index].x[i];
491 }
492
493 return ret;
494
495 }
496
497 /* ------- Initialization ------- */
build_kernel()498 static void build_kernel()
499 {
500 static int previous_size, num_int_cand;
501
502 char *task = "$JOHN/kernels/sha256_kernel.cl";
503 char opt[MAX_OCLINFO_STRING_LEN];
504
505 bitmap_size = get_bitmap_size_bits(num_loaded_hashes, gpu_id);
506
507 if (previous_size != bitmap_size || num_int_cand != mask_int_cand.num_int_cand) {
508 previous_size = bitmap_size;
509 num_int_cand = mask_int_cand.num_int_cand;
510
511 if (prepare_kernel)
512 release_kernel();
513
514 snprintf(opt, sizeof(opt), "-DBITMAP_SIZE_MINUS1=%u", bitmap_size - 1U);
515
516 if (mask_int_cand.num_int_cand > 1)
517 strncat(opt, " -DGPU_MASK_MODE", 64U);
518
519 opencl_build_kernel(task, gpu_id, opt, 0);
520
521 // create kernel(s) to execute
522 prepare_kernel = clCreateKernel(program[gpu_id], "kernel_prepare",
523 &ret_code);
524 HANDLE_CLERROR(ret_code,
525 "Error creating kernel_prepare. Double-check kernel name?");
526
527 crypt_kernel = clCreateKernel(program[gpu_id],
528 "kernel_crypt_raw", &ret_code);
529 HANDLE_CLERROR(ret_code,
530 "Error creating kernel. Double-check kernel name?");
531 }
532 //Allocate bit array and pass its size to OpenCL.
533 create_mask_buffers();
534 }
535
release_kernel()536 static void release_kernel()
537 {
538 HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel");
539 HANDLE_CLERROR(clReleaseKernel(prepare_kernel), "Release kernel");
540 HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program");
541
542 prepare_kernel = NULL;
543 }
544
init(struct fmt_main * _self)545 static void init(struct fmt_main *_self)
546 {
547 char *tmp_value;
548
549 self = _self;
550 opencl_prepare_dev(gpu_id);
551 mask_int_cand_target = opencl_speed_index(gpu_id) / 300;
552
553 if ((tmp_value = getenv("_GPU_MASK_CAND")))
554 mask_int_cand_target = atoi(tmp_value);
555 }
556
done(void)557 static void done(void)
558 {
559 if (autotuned) {
560 release_clobj();
561 release_kernel();
562 release_mask_buffers();
563 }
564 autotuned = 0;
565 should_tune = 0;
566 ocl_initialized = 0;
567 }
568
prepare_bit_array()569 static void prepare_bit_array()
570 {
571 uint32_t *binary;
572 struct db_password *pw;
573 struct db_salt *current_salt;
574
575 current_salt = main_db->salts;
576 #ifdef DEBUG
577 fprintf(stderr, "Clear bitmap array\n");
578 #endif
579 memset(saved_bitmap, '\0', (bitmap_size / 8 + 1));
580
581 do {
582 pw = current_salt->list;
583
584 do {
585 unsigned int bit_mask_x, bit_mask_y;
586 binary = (uint32_t *) pw->binary;
587
588 // Skip cracked.
589 if (binary) {
590 SPREAD_32(binary[0], binary[1], binary[2], binary[3],
591 (bitmap_size - 1U), bit_mask_x, bit_mask_y)
592 #ifdef DEBUG
593 if (saved_bitmap[bit_mask_x >> 5] & (1U << (bit_mask_x & 31)) &&
594 saved_bitmap[bit_mask_y >> 5] & (1U << (bit_mask_y & 31)))
595 fprintf(stderr, "Collision: %u %08x %08x %08x %08x\n",
596 num_loaded_hashes, (unsigned int) binary[0],
597 bit_mask_x, bit_mask_y,
598 saved_bitmap[bit_mask_x >> 5]);
599 #endif
600 saved_bitmap[bit_mask_x >> 5] |= (1U << (bit_mask_x & 31));
601 saved_bitmap[bit_mask_y >> 5] |= (1U << (bit_mask_y & 31));
602 }
603 } while ((pw = pw->next));
604
605 } while ((current_salt = current_salt->next));
606 }
607
608 /* ------- Send hashes to crack (binary) to GPU ------- */
load_hash()609 static void load_hash()
610 {
611 num_loaded_hashes = get_num_loaded_hashes();
612
613 prepare_bit_array();
614
615 HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_bitmap, CL_TRUE, 0,
616 (bitmap_size / 32 + 1) * sizeof(uint32_t),
617 saved_bitmap, 0, NULL, NULL),
618 "failed in clEnqueueWriteBuffer buffer_bitmap");
619
620 HANDLE_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
621 }
622
623 /* ------- Crypt function ------- */
crypt_all(int * pcount,struct db_salt * _salt)624 static int crypt_all(int *pcount, struct db_salt *_salt)
625 {
626 const int count = *pcount;
627 const struct db_salt *salt = _salt;
628 size_t gws;
629 size_t *lws = local_work_size ? &local_work_size : NULL;
630
631 gws = GET_NEXT_MULTIPLE(count, local_work_size);
632
633 //Check if any password was cracked and reload (if necessary)
634 if (num_loaded_hashes != salt->count)
635 load_hash();
636
637 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], prepare_kernel, 1,
638 NULL, &gws, lws, 0, NULL, multi_profilingEvent[0]),
639 "failed in clEnqueueNDRangeKernel I");
640
641 //Send data to device.
642 if (key_idx > offset)
643 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], pass_buffer,
644 CL_FALSE, sizeof(uint32_t) * offset,
645 sizeof(uint32_t) * (key_idx - offset), plaintext + offset, 0,
646 NULL, multi_profilingEvent[1]),
647 "failed in clEnqueueWriteBuffer pass_buffer");
648
649 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], idx_buffer, CL_FALSE,
650 sizeof(uint32_t) * offset,
651 sizeof(uint32_t) * (gws - offset),
652 saved_idx + offset, 0, NULL, multi_profilingEvent[2]),
653 "failed in clEnqueueWriteBuffer idx_buffer");
654
655 if (mask_int_cand.num_int_cand > 1) {
656 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc,
657 CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL,
658 multi_profilingEvent[5]),
659 "failed in clEnqueueWriteBuffer buffer_int_key_loc");
660
661 BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_keys,
662 CL_FALSE, 0, 4 * mask_int_cand.num_int_cand,
663 mask_int_cand.int_cand, 0, NULL, multi_profilingEvent[6]),
664 "failed in clEnqueueWriteBuffer buffer_int_keys");
665 }
666 //Enqueue the kernel
667 BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL,
668 &gws, lws, 0, NULL, multi_profilingEvent[3]),
669 "failed in clEnqueueNDRangeKernel");
670
671 //Possible cracked hashes
672 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_FALSE,
673 0, sizeof(uint32_t), hash_ids,
674 0, NULL, multi_profilingEvent[4]),
675 "failed in reading data back buffer_hash_ids");
676
677 //Do the work
678 BENCH_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
679
680 #ifdef DEBUG
681 if (hash_ids[0])
682 fprintf(stderr, "Some checks are going to be done on CPU: %u: %1.4f%%\n", hash_ids[0],
683 ((double) hash_ids[0]) / (global_work_size * mask_int_cand.num_int_cand) * 100);
684 #endif
685 if (hash_ids[0] > global_work_size * mask_int_cand.num_int_cand) {
686 fprintf(stderr, "Error, crypt_all() kernel: %u.\n", hash_ids[0]);
687 error();
688 }
689
690 if (hash_ids[0]) {
691 BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], buffer_hash_ids, CL_FALSE,
692 0, (hash_ids[0] * 3 * sizeof(uint32_t) + sizeof(uint32_t)), hash_ids,
693 0, NULL, NULL),
694 "failed in reading data back buffer_hash_ids");
695
696 //Do the work
697 BENCH_CLERROR(clFinish(queue[gpu_id]), "failed in clFinish");
698 }
699 *pcount *= mask_int_cand.num_int_cand;
700 return hash_ids[0];
701 }
702
703 /* ------- Compare functins ------- */
cmp_all(void * binary,int count)704 static int cmp_all(void *binary, int count)
705 {
706 return (count > 0);
707 }
708
cmp_one(void * binary,int index)709 static int cmp_one(void *binary, int index)
710 {
711 return (hash_ids[3 + 3 * index] == ((uint32_t *) binary)[0]);
712 }
713
cmp_exact(char * source,int index)714 static int cmp_exact(char *source, int index)
715 {
716 uint32_t *binary;
717 uint32_t *full_hash;
718
719 #ifdef DEBUG
720 fprintf(stderr, "Stressing CPU\n");
721 #endif
722 binary = (uint32_t *) sha256_common_binary_BE(source);
723
724 full_hash = crypt_one(index);
725 return !memcmp(binary, (void *) full_hash, BINARY_SIZE);
726 }
727
728 //Get Hash functions group.
get_hash_0(int index)729 static int get_hash_0(int index)
730 {
731 return hash_ids[3 + 3 * index] & PH_MASK_0;
732 }
733
get_hash_1(int index)734 static int get_hash_1(int index)
735 {
736 return hash_ids[3 + 3 * index] & PH_MASK_1;
737 }
738
get_hash_2(int index)739 static int get_hash_2(int index)
740 {
741 return hash_ids[3 + 3 * index] & PH_MASK_2;
742 }
743
get_hash_3(int index)744 static int get_hash_3(int index)
745 {
746 return hash_ids[3 + 3 * index] & PH_MASK_3;
747 }
748
get_hash_4(int index)749 static int get_hash_4(int index)
750 {
751 return hash_ids[3 + 3 * index] & PH_MASK_4;
752 }
753
get_hash_5(int index)754 static int get_hash_5(int index)
755 {
756 return hash_ids[3 + 3 * index] & PH_MASK_5;
757 }
758
get_hash_6(int index)759 static int get_hash_6(int index)
760 {
761 return hash_ids[3 + 3 * index] & PH_MASK_6;
762 }
763
764 /* ------- Format structure ------- */
765 struct fmt_main fmt_opencl_rawsha256 = {
766 {
767 FORMAT_LABEL,
768 FORMAT_NAME,
769 ALGORITHM_NAME,
770 BENCHMARK_COMMENT,
771 BENCHMARK_LENGTH,
772 0,
773 PLAINTEXT_LENGTH,
774 BINARY_SIZE,
775 BINARY_ALIGN,
776 SALT_SIZE,
777 SALT_ALIGN,
778 MIN_KEYS_PER_CRYPT,
779 MAX_KEYS_PER_CRYPT,
780 FMT_CASE | FMT_8_BIT | FMT_SPLIT_UNIFIES_CASE | FMT_MASK,
781 {NULL},
782 {
783 HEX_TAG,
784 CISCO_TAG
785 },
786 sha256_common_tests
787 }, {
788 init,
789 done,
790 reset,
791 sha256_common_prepare,
792 sha256_common_valid,
793 sha256_common_split,
794 sha256_common_binary_BE,
795 fmt_default_salt,
796 {NULL},
797 fmt_default_source,
798 {
799 fmt_default_binary_hash_0,
800 fmt_default_binary_hash_1,
801 fmt_default_binary_hash_2,
802 fmt_default_binary_hash_3,
803 fmt_default_binary_hash_4,
804 fmt_default_binary_hash_5,
805 fmt_default_binary_hash_6
806 },
807 fmt_default_salt_hash,
808 NULL,
809 fmt_default_set_salt,
810 set_key,
811 get_key,
812 clear_keys,
813 crypt_all,
814 {
815 get_hash_0,
816 get_hash_1,
817 get_hash_2,
818 get_hash_3,
819 get_hash_4,
820 get_hash_5,
821 get_hash_6
822 },
823 cmp_all,
824 cmp_one,
825 cmp_exact
826 }
827 };
828
829 #endif /* plugin stanza */
830
831 #endif /* HAVE_OPENCL */
832