1/* 2 * Copyright (c) 2012-2016, magnum 3 * and Copyright (c) 2015, Sayantan Datta <sdatta@openwall.com> 4 * This software is hereby released to the general public under 5 * the following terms: Redistribution and use in source and binary 6 * forms, with or without modification, are permitted. 7 */ 8 9#include "opencl_device_info.h" 10#define AMD_PUTCHAR_NOCAST 11#include "opencl_misc.h" 12#include "opencl_sha1.h" 13#include "opencl_mask.h" 14 15#if BITMAP_SIZE_BITS_LESS_ONE < 0xffffffff 16#define BITMAP_SIZE_BITS (BITMAP_SIZE_BITS_LESS_ONE + 1) 17#else 18/*undefined, cause error.*/ 19#endif 20 21inline void cmp_final(uint gid, 22 uint iter, 23 __private uint *hash, 24 __global uint *offset_table, 25 __global uint *hash_table, 26 __global uint *return_hashes, 27 volatile __global uint *output, 28 volatile __global uint *bitmap_dupe) { 29 30 uint t, offset_table_index, hash_table_index; 31 unsigned long LO, MI, HI; 32 unsigned long p; 33 34 HI = (unsigned long)hash[4]; 35 MI = ((unsigned long)hash[3] << 32) | (unsigned long)hash[2]; 36 LO = ((unsigned long)hash[1] << 32) | (unsigned long)hash[0]; 37 38 p = (HI % OFFSET_TABLE_SIZE) * SHIFT128_OT_SZ; 39 p += (MI % OFFSET_TABLE_SIZE) * SHIFT64_OT_SZ; 40 p += LO % OFFSET_TABLE_SIZE; 41 p %= OFFSET_TABLE_SIZE; 42 offset_table_index = (unsigned int)p; 43 44 //error: chances of overflow is extremely low. 45 LO += (unsigned long)offset_table[offset_table_index]; 46 47 p = (HI % HASH_TABLE_SIZE) * SHIFT128_HT_SZ; 48 p += (MI % HASH_TABLE_SIZE) * SHIFT64_HT_SZ; 49 p += LO % HASH_TABLE_SIZE; 50 p %= HASH_TABLE_SIZE; 51 hash_table_index = (unsigned int)p; 52 53 if (hash_table[hash_table_index] == hash[0]) 54 if (hash_table[HASH_TABLE_SIZE + hash_table_index] == hash[1]) 55 { 56/* 57 * Prevent duplicate keys from cracking same hash 58 */ 59 if (!(atomic_or(&bitmap_dupe[hash_table_index/32], (1U << (hash_table_index % 32))) & (1U << (hash_table_index % 32)))) { 60 t = atomic_inc(&output[0]); 61 output[1 + 3 * t] = gid; 62 output[2 + 3 * t] = iter; 63 output[3 + 3 * t] = hash_table_index; 64 return_hashes[2 * t] = hash[2]; 65 return_hashes[2 * t + 1] = hash[3]; 66 } 67 } 68} 69 70inline void cmp(uint gid, 71 uint iter, 72 __private uint *hash, 73#if USE_LOCAL_BITMAPS 74 __local 75#else 76 __global 77#endif 78 uint *bitmaps, 79 __global uint *offset_table, 80 __global uint *hash_table, 81 __global uint *return_hashes, 82 volatile __global uint *output, 83 volatile __global uint *bitmap_dupe) { 84 uint bitmap_index, tmp = 1; 85 86#if SELECT_CMP_STEPS > 4 87 bitmap_index = hash[0] & (BITMAP_SIZE_BITS - 1); 88 tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; 89 bitmap_index = (hash[0] >> 16) & (BITMAP_SIZE_BITS - 1); 90 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 91 bitmap_index = hash[1] & (BITMAP_SIZE_BITS - 1); 92 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 4) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 93 bitmap_index = (hash[1] >> 16) & (BITMAP_SIZE_BITS - 1); 94 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 95 bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1); 96 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 3) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 97 bitmap_index = (hash[2] >> 16) & (BITMAP_SIZE_BITS - 1); 98 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 5 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 99 bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1); 100 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 6 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 101 bitmap_index = (hash[3] >> 16) & (BITMAP_SIZE_BITS - 1); 102 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 7 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 103#elif SELECT_CMP_STEPS > 2 104 bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1); 105 tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; 106 bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1); 107 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 108 bitmap_index = hash[1] & (BITMAP_SIZE_BITS - 1); 109 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 4) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 110 bitmap_index = hash[0] & (BITMAP_SIZE_BITS - 1); 111 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) * 3 + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 112#elif SELECT_CMP_STEPS > 1 113 bitmap_index = hash[3] & (BITMAP_SIZE_BITS - 1); 114 tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; 115 bitmap_index = hash[2] & (BITMAP_SIZE_BITS - 1); 116 tmp &= (bitmaps[(BITMAP_SIZE_BITS >> 5) + (bitmap_index >> 5)] >> (bitmap_index & 31)) & 1U; 117#else 118 bitmap_index = hash[3] & BITMAP_SIZE_BITS_LESS_ONE; 119 tmp &= (bitmaps[bitmap_index >> 5] >> (bitmap_index & 31)) & 1U; 120#endif 121 122 if (tmp) 123 cmp_final(gid, iter, hash, offset_table, hash_table, return_hashes, output, bitmap_dupe); 124} 125 126#define USE_CONST_CACHE \ 127 (CONST_CACHE_SIZE >= (NUM_INT_KEYS * 4)) 128 129__kernel void sha1(__global uint *keys, 130 __global uint *index, 131 __global uint *int_key_loc, 132#if USE_CONST_CACHE 133 constant 134#else 135 __global 136#endif 137 uint *int_keys 138#if !defined(__OS_X__) && USE_CONST_CACHE && gpu_amd(DEVICE_INFO) 139 __attribute__((max_constant_size (NUM_INT_KEYS * 4))) 140#endif 141 , __global uint *bitmaps, 142 __global uint *offset_table, 143 __global uint *hash_table, 144 __global uint *return_hashes, 145 volatile __global uint *out_hash_ids, 146 volatile __global uint *bitmap_dupe) 147{ 148 uint i; 149 uint gid = get_global_id(0); 150 uint base = index[gid]; 151 uint W[16] = { 0 }; 152 uint len = base & 63; 153 uint hash[5]; 154#if NUM_INT_KEYS > 1 && !IS_STATIC_GPU_MASK 155 uint ikl = int_key_loc[gid]; 156 uint loc0 = ikl & 0xff; 157#if MASK_FMT_INT_PLHDR > 1 158#if LOC_1 >= 0 159 uint loc1 = (ikl & 0xff00) >> 8; 160#endif 161#endif 162#if MASK_FMT_INT_PLHDR > 2 163#if LOC_2 >= 0 164 uint loc2 = (ikl & 0xff0000) >> 16; 165#endif 166#endif 167#if MASK_FMT_INT_PLHDR > 3 168#if LOC_3 >= 0 169 uint loc3 = (ikl & 0xff000000) >> 24; 170#endif 171#endif 172#endif 173 174#if !IS_STATIC_GPU_MASK 175#define GPU_LOC_0 loc0 176#define GPU_LOC_1 loc1 177#define GPU_LOC_2 loc2 178#define GPU_LOC_3 loc3 179#else 180#define GPU_LOC_0 LOC_0 181#define GPU_LOC_1 LOC_1 182#define GPU_LOC_2 LOC_2 183#define GPU_LOC_3 LOC_3 184#endif 185 186#if USE_LOCAL_BITMAPS 187 uint lid = get_local_id(0); 188 uint lws = get_local_size(0); 189 uint __local s_bitmaps[(BITMAP_SIZE_BITS >> 5) * SELECT_CMP_STEPS]; 190 191 for (i = 0; i < (((BITMAP_SIZE_BITS >> 5) * SELECT_CMP_STEPS) / lws); i++) 192 s_bitmaps[i*lws + lid] = bitmaps[i*lws + lid]; 193 194 barrier(CLK_LOCAL_MEM_FENCE); 195#endif 196 197 keys += base >> 6; 198 199#ifndef TWICE 200 for (i = 0; i < (len+3)/4; i++) 201 W[i] = SWAP32(keys[i]); 202 203 PUTCHAR_BE(W, len, 0x80); 204 W[15] = len << 3; 205#endif 206 207 for (i = 0; i < NUM_INT_KEYS; i++) { 208#ifdef TWICE 209 uint j; 210 211 for (j = 0; j < (len+3)/4; j++) 212 W[j] = SWAP32(keys[j]); 213 for (; j < 15; j++) 214 W[j] = 0; 215 216 PUTCHAR_BE(W, len, 0x80); 217 W[15] = len << 3; 218#endif 219#if NUM_INT_KEYS > 1 220 PUTCHAR_BE(W, GPU_LOC_0, (int_keys[i] & 0xff)); 221 222#if MASK_FMT_INT_PLHDR > 1 223#if LOC_1 >= 0 224 PUTCHAR_BE(W, GPU_LOC_1, ((int_keys[i] & 0xff00) >> 8)); 225#endif 226#endif 227#if MASK_FMT_INT_PLHDR > 2 228#if LOC_2 >= 0 229 PUTCHAR_BE(W, GPU_LOC_2, ((int_keys[i] & 0xff0000) >> 16)); 230#endif 231#endif 232#if MASK_FMT_INT_PLHDR > 3 233#if LOC_3 >= 0 234 PUTCHAR_BE(W, GPU_LOC_3, ((int_keys[i] & 0xff000000) >> 24)); 235#endif 236#endif 237#endif 238 sha1_single(uint, W, hash); 239#ifdef TWICE 240 W[0] = hash[0]; 241 W[1] = hash[1]; 242 W[2] = hash[2]; 243 W[3] = hash[3]; 244 W[4] = hash[4]; 245 W[5] = 0x80000000; 246 W[15] = 20 << 3; 247 sha1_single_160Z(uint, W, hash); 248#endif 249 250 cmp(gid, i, hash, 251#if USE_LOCAL_BITMAPS 252 s_bitmaps 253#else 254 bitmaps 255#endif 256 , offset_table, hash_table, return_hashes, out_hash_ids, bitmap_dupe); 257 } 258} 259