1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6#ifdef KERNEL_STATIC 7#include "inc_vendor.h" 8#include "inc_types.h" 9#include "inc_platform.cl" 10#include "inc_common.cl" 11#include "inc_hash_sha1.cl" 12#include "inc_cipher_aes.cl" 13#endif 14 15#define COMPARE_S "inc_comp_single.cl" 16#define COMPARE_M "inc_comp_multi.cl" 17 18#define ROUNDS 0x40000 19 20#define MIN(a,b) (((a) < (b)) ? (a) : (b)) 21 22typedef struct rar3 23{ 24 u32 data[81920]; 25 26 u32 pack_size; 27 u32 unpack_size; 28 29} rar3_t; 30 31typedef struct rar3_tmp 32{ 33 u32 dgst[5]; 34 35 u32 w[66]; // 256 byte pass + 8 byte salt 36 37 u32 iv[4]; 38 39} rar3_tmp_t; 40 41CONSTANT_VK u32a crc32tab[0x100] = 42{ 43 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 44 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 45 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 46 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 47 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 48 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 49 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 50 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 51 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 52 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 53 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 54 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 55 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 56 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 57 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 58 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 59 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 60 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 61 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 62 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 63 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 64 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 65 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 66 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 67 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 68 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 69 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 70 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 71 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 72 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 73 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 74 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 75 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 76 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 77 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 78 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 79 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 80 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 81 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 82 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 83 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 84 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 85 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 86 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 87 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 88 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 89 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 90 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 91 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 92 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 93 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 94 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 95 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 96 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 97 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 98 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 99 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 100 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 101 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 102 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 103 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 104 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 105 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 106 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d 107}; 108 109DECLSPEC u32 round_crc32 (const u32 a, const u32 v, LOCAL_AS u32 *l_crc32tab) 110{ 111 const u32 k = (a ^ v) & 0xff; 112 113 const u32 s = a >> 8; 114 115 return l_crc32tab[k] ^ s; 116} 117 118DECLSPEC u32 round_crc32_16 (const u32 crc32, const u32 *buf, const u32 len, LOCAL_AS u32 *l_crc32tab) 119{ 120 const int crc_len = MIN (len, 16); 121 122 u32 c = crc32; 123 124 for (int i = 0; i < crc_len; i++) 125 { 126 const u32 idx = i / 4; 127 const u32 mod = i % 4; 128 const u32 sht = (3 - mod) * 8; 129 130 const u32 b = buf[idx] >> sht; // b & 0xff (but already done in round_crc32 ()) 131 132 c = round_crc32 (c, b, l_crc32tab); 133 } 134 135 return c; 136} 137 138DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest) 139{ 140 const u32 func_len = len & 63; 141 142 //const u32 mod = func_len & 3; 143 const u32 div = func_len / 4; 144 145 u32 tmp0; 146 u32 tmp1; 147 148 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 149 tmp0 = hc_bytealign_be (0, append, func_len); 150 tmp1 = hc_bytealign_be (append, 0, func_len); 151 #endif 152 153 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 154 155 #if defined IS_NV 156 const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; 157 #endif 158 159 #if (defined IS_AMD || defined IS_HIP) 160 const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); 161 #endif 162 163 tmp0 = hc_byte_perm (append, 0, selector); 164 tmp1 = hc_byte_perm (0, append, selector); 165 #endif 166 167 u32 carry = 0; 168 169 switch (div) 170 { 171 case 0: w0[0] |= tmp0; w0[1] = tmp1; break; 172 case 1: w0[1] |= tmp0; w0[2] = tmp1; break; 173 case 2: w0[2] |= tmp0; w0[3] = tmp1; break; 174 case 3: w0[3] |= tmp0; w1[0] = tmp1; break; 175 case 4: w1[0] |= tmp0; w1[1] = tmp1; break; 176 case 5: w1[1] |= tmp0; w1[2] = tmp1; break; 177 case 6: w1[2] |= tmp0; w1[3] = tmp1; break; 178 case 7: w1[3] |= tmp0; w2[0] = tmp1; break; 179 case 8: w2[0] |= tmp0; w2[1] = tmp1; break; 180 case 9: w2[1] |= tmp0; w2[2] = tmp1; break; 181 case 10: w2[2] |= tmp0; w2[3] = tmp1; break; 182 case 11: w2[3] |= tmp0; w3[0] = tmp1; break; 183 case 12: w3[0] |= tmp0; w3[1] = tmp1; break; 184 case 13: w3[1] |= tmp0; w3[2] = tmp1; break; 185 case 14: w3[2] |= tmp0; w3[3] = tmp1; break; 186 default: w3[3] |= tmp0; carry = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15: 187 } 188 189 const u32 new_len = func_len + 3; 190 191 if (new_len >= 64) 192 { 193 sha1_transform (w0, w1, w2, w3, digest); 194 195 w0[0] = carry; 196 w0[1] = 0; 197 w0[2] = 0; 198 w0[3] = 0; 199 w1[0] = 0; 200 w1[1] = 0; 201 w1[2] = 0; 202 w1[3] = 0; 203 w2[0] = 0; 204 w2[1] = 0; 205 w2[2] = 0; 206 w2[3] = 0; 207 w3[0] = 0; 208 w3[1] = 0; 209 w3[2] = 0; 210 w3[3] = 0; 211 } 212} 213 214// only change in this function compared to OpenCL/inc_hash_sha1.cl is that it returns 215// the expanded 64 byte buffer w0_t..wf_t in t[]: 216 217DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, u32 *t) 218{ 219 u32 a = digest[0]; 220 u32 b = digest[1]; 221 u32 c = digest[2]; 222 u32 d = digest[3]; 223 u32 e = digest[4]; 224 225 #ifdef IS_CPU 226 227 u32 w0_t = w0[0]; 228 u32 w1_t = w0[1]; 229 u32 w2_t = w0[2]; 230 u32 w3_t = w0[3]; 231 u32 w4_t = w1[0]; 232 u32 w5_t = w1[1]; 233 u32 w6_t = w1[2]; 234 u32 w7_t = w1[3]; 235 u32 w8_t = w2[0]; 236 u32 w9_t = w2[1]; 237 u32 wa_t = w2[2]; 238 u32 wb_t = w2[3]; 239 u32 wc_t = w3[0]; 240 u32 wd_t = w3[1]; 241 u32 we_t = w3[2]; 242 u32 wf_t = w3[3]; 243 244 #define K SHA1C00 245 246 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0_t); 247 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w1_t); 248 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w2_t); 249 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w3_t); 250 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w4_t); 251 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w5_t); 252 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w6_t); 253 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w7_t); 254 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w8_t); 255 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w9_t); 256 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wa_t); 257 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, wb_t); 258 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, wc_t); 259 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, wd_t); 260 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, we_t); 261 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wf_t); 262 w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0_t); 263 w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w1_t); 264 w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w2_t); 265 w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w3_t); 266 267 #undef K 268 #define K SHA1C01 269 270 w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4_t); 271 w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w5_t); 272 w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w6_t); 273 w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w7_t); 274 w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w8_t); 275 w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w9_t); 276 wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wa_t); 277 wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wb_t); 278 wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wc_t); 279 wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wd_t); 280 we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, we_t); 281 wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wf_t); 282 w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w0_t); 283 w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1_t); 284 w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w2_t); 285 w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3_t); 286 w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4_t); 287 w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w5_t); 288 w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w6_t); 289 w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w7_t); 290 291 #undef K 292 #define K SHA1C02 293 294 w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w8_t); 295 w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w9_t); 296 wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wa_t); 297 wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wb_t); 298 wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wc_t); 299 wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, wd_t); 300 we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, we_t); 301 wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wf_t); 302 w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w0_t); 303 w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w1_t); 304 w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2_t); 305 w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w3_t); 306 w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w4_t); 307 w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w5_t); 308 w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w6_t); 309 w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w7_t); 310 w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w8_t); 311 w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w9_t); 312 wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wa_t); 313 wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wb_t); 314 315 #undef K 316 #define K SHA1C03 317 318 wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wc_t); 319 wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wd_t); 320 we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, we_t); 321 wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wf_t); 322 w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w0_t); 323 w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1_t); 324 w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w2_t); 325 w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3_t); 326 w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4_t); 327 w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w5_t); 328 w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w6_t); 329 w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w7_t); 330 w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w8_t); 331 w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w9_t); 332 wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wa_t); 333 wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wb_t); 334 wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wc_t); 335 wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wd_t); 336 we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, we_t); 337 wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wf_t); 338 339 t[ 0] = w0_t; 340 t[ 1] = w1_t; 341 t[ 2] = w2_t; 342 t[ 3] = w3_t; 343 t[ 4] = w4_t; 344 t[ 5] = w5_t; 345 t[ 6] = w6_t; 346 t[ 7] = w7_t; 347 t[ 8] = w8_t; 348 t[ 9] = w9_t; 349 t[10] = wa_t; 350 t[11] = wb_t; 351 t[12] = wc_t; 352 t[13] = wd_t; 353 t[14] = we_t; 354 t[15] = wf_t; 355 356 #undef K 357 358 #else 359 360 u32 w00_t = w0[0]; 361 u32 w01_t = w0[1]; 362 u32 w02_t = w0[2]; 363 u32 w03_t = w0[3]; 364 u32 w04_t = w1[0]; 365 u32 w05_t = w1[1]; 366 u32 w06_t = w1[2]; 367 u32 w07_t = w1[3]; 368 u32 w08_t = w2[0]; 369 u32 w09_t = w2[1]; 370 u32 w0a_t = w2[2]; 371 u32 w0b_t = w2[3]; 372 u32 w0c_t = w3[0]; 373 u32 w0d_t = w3[1]; 374 u32 w0e_t = w3[2]; 375 u32 w0f_t = w3[3]; 376 u32 w10_t; 377 u32 w11_t; 378 u32 w12_t; 379 u32 w13_t; 380 u32 w14_t; 381 u32 w15_t; 382 u32 w16_t; 383 u32 w17_t; 384 u32 w18_t; 385 u32 w19_t; 386 u32 w1a_t; 387 u32 w1b_t; 388 u32 w1c_t; 389 u32 w1d_t; 390 u32 w1e_t; 391 u32 w1f_t; 392 u32 w20_t; 393 u32 w21_t; 394 u32 w22_t; 395 u32 w23_t; 396 u32 w24_t; 397 u32 w25_t; 398 u32 w26_t; 399 u32 w27_t; 400 u32 w28_t; 401 u32 w29_t; 402 u32 w2a_t; 403 u32 w2b_t; 404 u32 w2c_t; 405 u32 w2d_t; 406 u32 w2e_t; 407 u32 w2f_t; 408 u32 w30_t; 409 u32 w31_t; 410 u32 w32_t; 411 u32 w33_t; 412 u32 w34_t; 413 u32 w35_t; 414 u32 w36_t; 415 u32 w37_t; 416 u32 w38_t; 417 u32 w39_t; 418 u32 w3a_t; 419 u32 w3b_t; 420 u32 w3c_t; 421 u32 w3d_t; 422 u32 w3e_t; 423 u32 w3f_t; 424 u32 w40_t; 425 u32 w41_t; 426 u32 w42_t; 427 u32 w43_t; 428 u32 w44_t; 429 u32 w45_t; 430 u32 w46_t; 431 u32 w47_t; 432 u32 w48_t; 433 u32 w49_t; 434 u32 w4a_t; 435 u32 w4b_t; 436 u32 w4c_t; 437 u32 w4d_t; 438 u32 w4e_t; 439 u32 w4f_t; 440 441 #define K SHA1C00 442 443 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w00_t); 444 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w01_t); 445 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w02_t); 446 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w03_t); 447 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w04_t); 448 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w05_t); 449 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w06_t); 450 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w07_t); 451 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w08_t); 452 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w09_t); 453 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0a_t); 454 SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0b_t); 455 SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w0c_t); 456 SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w0d_t); 457 SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w0e_t); 458 SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0f_t); 459 w10_t = hc_rotl32_S ((w0d_t ^ w08_t ^ w02_t ^ w00_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w10_t); 460 w11_t = hc_rotl32_S ((w0e_t ^ w09_t ^ w03_t ^ w01_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w11_t); 461 w12_t = hc_rotl32_S ((w0f_t ^ w0a_t ^ w04_t ^ w02_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w12_t); 462 w13_t = hc_rotl32_S ((w10_t ^ w0b_t ^ w05_t ^ w03_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w13_t); 463 464 #undef K 465 #define K SHA1C01 466 467 w14_t = hc_rotl32_S ((w11_t ^ w0c_t ^ w06_t ^ w04_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w14_t); 468 w15_t = hc_rotl32_S ((w12_t ^ w0d_t ^ w07_t ^ w05_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w15_t); 469 w16_t = hc_rotl32_S ((w13_t ^ w0e_t ^ w08_t ^ w06_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w16_t); 470 w17_t = hc_rotl32_S ((w14_t ^ w0f_t ^ w09_t ^ w07_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w17_t); 471 w18_t = hc_rotl32_S ((w15_t ^ w10_t ^ w0a_t ^ w08_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w18_t); 472 w19_t = hc_rotl32_S ((w16_t ^ w11_t ^ w0b_t ^ w09_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w19_t); 473 w1a_t = hc_rotl32_S ((w17_t ^ w12_t ^ w0c_t ^ w0a_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1a_t); 474 w1b_t = hc_rotl32_S ((w18_t ^ w13_t ^ w0d_t ^ w0b_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w1b_t); 475 w1c_t = hc_rotl32_S ((w19_t ^ w14_t ^ w0e_t ^ w0c_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1c_t); 476 w1d_t = hc_rotl32_S ((w1a_t ^ w15_t ^ w0f_t ^ w0d_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w1d_t); 477 w1e_t = hc_rotl32_S ((w1b_t ^ w16_t ^ w10_t ^ w0e_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1e_t); 478 w1f_t = hc_rotl32_S ((w1c_t ^ w17_t ^ w11_t ^ w0f_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1f_t); 479 w20_t = hc_rotl32_S ((w1a_t ^ w10_t ^ w04_t ^ w00_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w20_t); 480 w21_t = hc_rotl32_S ((w1b_t ^ w11_t ^ w05_t ^ w01_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w21_t); 481 w22_t = hc_rotl32_S ((w1c_t ^ w12_t ^ w06_t ^ w02_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w22_t); 482 w23_t = hc_rotl32_S ((w1d_t ^ w13_t ^ w07_t ^ w03_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w23_t); 483 w24_t = hc_rotl32_S ((w1e_t ^ w14_t ^ w08_t ^ w04_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w24_t); 484 w25_t = hc_rotl32_S ((w1f_t ^ w15_t ^ w09_t ^ w05_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w25_t); 485 w26_t = hc_rotl32_S ((w20_t ^ w16_t ^ w0a_t ^ w06_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w26_t); 486 w27_t = hc_rotl32_S ((w21_t ^ w17_t ^ w0b_t ^ w07_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w27_t); 487 488 #undef K 489 #define K SHA1C02 490 491 w28_t = hc_rotl32_S ((w22_t ^ w18_t ^ w0c_t ^ w08_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w28_t); 492 w29_t = hc_rotl32_S ((w23_t ^ w19_t ^ w0d_t ^ w09_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w29_t); 493 w2a_t = hc_rotl32_S ((w24_t ^ w1a_t ^ w0e_t ^ w0a_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2a_t); 494 w2b_t = hc_rotl32_S ((w25_t ^ w1b_t ^ w0f_t ^ w0b_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w2b_t); 495 w2c_t = hc_rotl32_S ((w26_t ^ w1c_t ^ w10_t ^ w0c_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w2c_t); 496 w2d_t = hc_rotl32_S ((w27_t ^ w1d_t ^ w11_t ^ w0d_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2d_t); 497 w2e_t = hc_rotl32_S ((w28_t ^ w1e_t ^ w12_t ^ w0e_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w2e_t); 498 w2f_t = hc_rotl32_S ((w29_t ^ w1f_t ^ w13_t ^ w0f_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2f_t); 499 w30_t = hc_rotl32_S ((w2a_t ^ w20_t ^ w14_t ^ w10_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w30_t); 500 w31_t = hc_rotl32_S ((w2b_t ^ w21_t ^ w15_t ^ w11_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w31_t); 501 w32_t = hc_rotl32_S ((w2c_t ^ w22_t ^ w16_t ^ w12_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w32_t); 502 w33_t = hc_rotl32_S ((w2d_t ^ w23_t ^ w17_t ^ w13_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w33_t); 503 w34_t = hc_rotl32_S ((w2e_t ^ w24_t ^ w18_t ^ w14_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w34_t); 504 w35_t = hc_rotl32_S ((w2f_t ^ w25_t ^ w19_t ^ w15_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w35_t); 505 w36_t = hc_rotl32_S ((w30_t ^ w26_t ^ w1a_t ^ w16_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w36_t); 506 w37_t = hc_rotl32_S ((w31_t ^ w27_t ^ w1b_t ^ w17_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w37_t); 507 w38_t = hc_rotl32_S ((w32_t ^ w28_t ^ w1c_t ^ w18_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w38_t); 508 w39_t = hc_rotl32_S ((w33_t ^ w29_t ^ w1d_t ^ w19_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w39_t); 509 w3a_t = hc_rotl32_S ((w34_t ^ w2a_t ^ w1e_t ^ w1a_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w3a_t); 510 w3b_t = hc_rotl32_S ((w35_t ^ w2b_t ^ w1f_t ^ w1b_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w3b_t); 511 512 #undef K 513 #define K SHA1C03 514 515 w3c_t = hc_rotl32_S ((w36_t ^ w2c_t ^ w20_t ^ w1c_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3c_t); 516 w3d_t = hc_rotl32_S ((w37_t ^ w2d_t ^ w21_t ^ w1d_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w3d_t); 517 w3e_t = hc_rotl32_S ((w38_t ^ w2e_t ^ w22_t ^ w1e_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3e_t); 518 w3f_t = hc_rotl32_S ((w39_t ^ w2f_t ^ w23_t ^ w1f_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w3f_t); 519 w40_t = hc_rotl32_S ((w34_t ^ w20_t ^ w08_t ^ w00_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w40_t); 520 w41_t = hc_rotl32_S ((w35_t ^ w21_t ^ w09_t ^ w01_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w41_t); 521 w42_t = hc_rotl32_S ((w36_t ^ w22_t ^ w0a_t ^ w02_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w42_t); 522 w43_t = hc_rotl32_S ((w37_t ^ w23_t ^ w0b_t ^ w03_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w43_t); 523 w44_t = hc_rotl32_S ((w38_t ^ w24_t ^ w0c_t ^ w04_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w44_t); 524 w45_t = hc_rotl32_S ((w39_t ^ w25_t ^ w0d_t ^ w05_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w45_t); 525 w46_t = hc_rotl32_S ((w3a_t ^ w26_t ^ w0e_t ^ w06_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w46_t); 526 w47_t = hc_rotl32_S ((w3b_t ^ w27_t ^ w0f_t ^ w07_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w47_t); 527 w48_t = hc_rotl32_S ((w3c_t ^ w28_t ^ w10_t ^ w08_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w48_t); 528 w49_t = hc_rotl32_S ((w3d_t ^ w29_t ^ w11_t ^ w09_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w49_t); 529 w4a_t = hc_rotl32_S ((w3e_t ^ w2a_t ^ w12_t ^ w0a_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4a_t); 530 w4b_t = hc_rotl32_S ((w3f_t ^ w2b_t ^ w13_t ^ w0b_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4b_t); 531 w4c_t = hc_rotl32_S ((w40_t ^ w2c_t ^ w14_t ^ w0c_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4c_t); 532 w4d_t = hc_rotl32_S ((w41_t ^ w2d_t ^ w15_t ^ w0d_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w4d_t); 533 w4e_t = hc_rotl32_S ((w42_t ^ w2e_t ^ w16_t ^ w0e_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4e_t); 534 w4f_t = hc_rotl32_S ((w43_t ^ w2f_t ^ w17_t ^ w0f_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4f_t); 535 536 t[ 0] = w40_t; 537 t[ 1] = w41_t; 538 t[ 2] = w42_t; 539 t[ 3] = w43_t; 540 t[ 4] = w44_t; 541 t[ 5] = w45_t; 542 t[ 6] = w46_t; 543 t[ 7] = w47_t; 544 t[ 8] = w48_t; 545 t[ 9] = w49_t; 546 t[10] = w4a_t; 547 t[11] = w4b_t; 548 t[12] = w4c_t; 549 t[13] = w4d_t; 550 t[14] = w4e_t; 551 t[15] = w4f_t; 552 553 #undef K 554 #endif 555 556 digest[0] += a; 557 digest[1] += b; 558 digest[2] += c; 559 digest[3] += d; 560 digest[4] += e; 561} 562 563// only change in this function compared to OpenCL/inc_hash_sha1.cl is that 564// it calls our modified sha1_transform_rar29 () function 565 566DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t) 567{ 568 if (bytes == 0) return; 569 570 const int pos = ctx->len & 63; 571 572 int len = 64; 573 574 if (bytes < 64) 575 { 576 len = bytes; 577 } 578 579 ctx->len += len; 580 581 if (pos == 0) 582 { 583 ctx->w0[0] = w0[0]; 584 ctx->w0[1] = w0[1]; 585 ctx->w0[2] = w0[2]; 586 ctx->w0[3] = w0[3]; 587 ctx->w1[0] = w1[0]; 588 ctx->w1[1] = w1[1]; 589 ctx->w1[2] = w1[2]; 590 ctx->w1[3] = w1[3]; 591 ctx->w2[0] = w2[0]; 592 ctx->w2[1] = w2[1]; 593 ctx->w2[2] = w2[2]; 594 ctx->w2[3] = w2[3]; 595 ctx->w3[0] = w3[0]; 596 ctx->w3[1] = w3[1]; 597 ctx->w3[2] = w3[2]; 598 ctx->w3[3] = w3[3]; 599 600 if (len == 64) 601 { 602 sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); 603 604 ctx->w0[0] = 0; 605 ctx->w0[1] = 0; 606 ctx->w0[2] = 0; 607 ctx->w0[3] = 0; 608 ctx->w1[0] = 0; 609 ctx->w1[1] = 0; 610 ctx->w1[2] = 0; 611 ctx->w1[3] = 0; 612 ctx->w2[0] = 0; 613 ctx->w2[1] = 0; 614 ctx->w2[2] = 0; 615 ctx->w2[3] = 0; 616 ctx->w3[0] = 0; 617 ctx->w3[1] = 0; 618 ctx->w3[2] = 0; 619 ctx->w3[3] = 0; 620 } 621 } 622 else 623 { 624 if ((pos + len) < 64) 625 { 626 switch_buffer_by_offset_be_S (w0, w1, w2, w3, pos); 627 628 ctx->w0[0] |= w0[0]; 629 ctx->w0[1] |= w0[1]; 630 ctx->w0[2] |= w0[2]; 631 ctx->w0[3] |= w0[3]; 632 ctx->w1[0] |= w1[0]; 633 ctx->w1[1] |= w1[1]; 634 ctx->w1[2] |= w1[2]; 635 ctx->w1[3] |= w1[3]; 636 ctx->w2[0] |= w2[0]; 637 ctx->w2[1] |= w2[1]; 638 ctx->w2[2] |= w2[2]; 639 ctx->w2[3] |= w2[3]; 640 ctx->w3[0] |= w3[0]; 641 ctx->w3[1] |= w3[1]; 642 ctx->w3[2] |= w3[2]; 643 ctx->w3[3] |= w3[3]; 644 } 645 else 646 { 647 u32 c0[4] = { 0 }; 648 u32 c1[4] = { 0 }; 649 u32 c2[4] = { 0 }; 650 u32 c3[4] = { 0 }; 651 652 switch_buffer_by_offset_carry_be_S (w0, w1, w2, w3, c0, c1, c2, c3, pos); 653 654 ctx->w0[0] |= w0[0]; 655 ctx->w0[1] |= w0[1]; 656 ctx->w0[2] |= w0[2]; 657 ctx->w0[3] |= w0[3]; 658 ctx->w1[0] |= w1[0]; 659 ctx->w1[1] |= w1[1]; 660 ctx->w1[2] |= w1[2]; 661 ctx->w1[3] |= w1[3]; 662 ctx->w2[0] |= w2[0]; 663 ctx->w2[1] |= w2[1]; 664 ctx->w2[2] |= w2[2]; 665 ctx->w2[3] |= w2[3]; 666 ctx->w3[0] |= w3[0]; 667 ctx->w3[1] |= w3[1]; 668 ctx->w3[2] |= w3[2]; 669 ctx->w3[3] |= w3[3]; 670 671 sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t); 672 673 ctx->w0[0] = c0[0]; 674 ctx->w0[1] = c0[1]; 675 ctx->w0[2] = c0[2]; 676 ctx->w0[3] = c0[3]; 677 ctx->w1[0] = c1[0]; 678 ctx->w1[1] = c1[1]; 679 ctx->w1[2] = c1[2]; 680 ctx->w1[3] = c1[3]; 681 ctx->w2[0] = c2[0]; 682 ctx->w2[1] = c2[1]; 683 ctx->w2[2] = c2[2]; 684 ctx->w2[3] = c2[3]; 685 ctx->w3[0] = c3[0]; 686 ctx->w3[1] = c3[1]; 687 ctx->w3[2] = c3[2]; 688 ctx->w3[3] = c3[3]; 689 } 690 } 691} 692 693// main change in this function compared to OpenCL/inc_hash_sha1.cl is that 694// we call sha1_update_64_rar29 () and sometimes replace w[] 695 696DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len) 697{ 698 u32 w0[4]; 699 u32 w1[4]; 700 u32 w2[4]; 701 u32 w3[4]; 702 703 if (len == 0) return; 704 705 const int pos = ctx->len & 63; 706 707 int pos1 = 0; 708 int pos4 = 0; 709 710 if (len > 64) // or: if (pos1 < (len - 64)) 711 { 712 w0[0] = w[pos4 + 0]; 713 w0[1] = w[pos4 + 1]; 714 w0[2] = w[pos4 + 2]; 715 w0[3] = w[pos4 + 3]; 716 w1[0] = w[pos4 + 4]; 717 w1[1] = w[pos4 + 5]; 718 w1[2] = w[pos4 + 6]; 719 w1[3] = w[pos4 + 7]; 720 w2[0] = w[pos4 + 8]; 721 w2[1] = w[pos4 + 9]; 722 w2[2] = w[pos4 + 10]; 723 w2[3] = w[pos4 + 11]; 724 w3[0] = w[pos4 + 12]; 725 w3[1] = w[pos4 + 13]; 726 w3[2] = w[pos4 + 14]; 727 w3[3] = w[pos4 + 15]; 728 729 sha1_update_64 (ctx, w0, w1, w2, w3, 64); 730 731 pos1 += 64; 732 pos4 += 16; 733 } 734 735 for (int diff = 64 - pos; pos1 < len; pos1 += 64, pos4 += 16, diff += 64) 736 { 737 w0[0] = w[pos4 + 0]; 738 w0[1] = w[pos4 + 1]; 739 w0[2] = w[pos4 + 2]; 740 w0[3] = w[pos4 + 3]; 741 w1[0] = w[pos4 + 4]; 742 w1[1] = w[pos4 + 5]; 743 w1[2] = w[pos4 + 6]; 744 w1[3] = w[pos4 + 7]; 745 w2[0] = w[pos4 + 8]; 746 w2[1] = w[pos4 + 9]; 747 w2[2] = w[pos4 + 10]; 748 w2[3] = w[pos4 + 11]; 749 w3[0] = w[pos4 + 12]; 750 w3[1] = w[pos4 + 13]; 751 w3[2] = w[pos4 + 14]; 752 w3[3] = w[pos4 + 15]; 753 754 // only major change in this function compared to OpenCL/inc_hash_sha1.cl: 755 756 u32 t[17] = { 0 }; 757 758 sha1_update_64_rar29 (ctx, w0, w1, w2, w3, len - pos1, t); 759 760 761 if ((diff + 63) >= len) break; 762 763 // replaces 64 bytes (with offset diff) of the underlying data w[] with t[]: 764 765 // for (int i = 0; i < 16; i++) t[i] = hc_swap32_S (t[i]); 766 767 t[ 0] = hc_swap32_S (t[ 0]); // unroll seems to be faster 768 t[ 1] = hc_swap32_S (t[ 1]); 769 t[ 2] = hc_swap32_S (t[ 2]); 770 t[ 3] = hc_swap32_S (t[ 3]); 771 t[ 4] = hc_swap32_S (t[ 4]); 772 t[ 5] = hc_swap32_S (t[ 5]); 773 t[ 6] = hc_swap32_S (t[ 6]); 774 t[ 7] = hc_swap32_S (t[ 7]); 775 t[ 8] = hc_swap32_S (t[ 8]); 776 t[ 9] = hc_swap32_S (t[ 9]); 777 t[10] = hc_swap32_S (t[10]); 778 t[11] = hc_swap32_S (t[11]); 779 t[12] = hc_swap32_S (t[12]); 780 t[13] = hc_swap32_S (t[13]); 781 t[14] = hc_swap32_S (t[14]); 782 t[15] = hc_swap32_S (t[15]); 783 784 const u32 n_idx = diff / 4; 785 const u32 n_off = diff % 4; 786 787 if (n_off) 788 { 789 const u32 off_mul = n_off * 8; 790 const u32 off_sub = 32 - off_mul; 791 792 t[16] = (t[15] << off_sub); 793 t[15] = (t[15] >> off_mul) | (t[14] << off_sub); 794 t[14] = (t[14] >> off_mul) | (t[13] << off_sub); 795 t[13] = (t[13] >> off_mul) | (t[12] << off_sub); 796 t[12] = (t[12] >> off_mul) | (t[11] << off_sub); 797 t[11] = (t[11] >> off_mul) | (t[10] << off_sub); 798 t[10] = (t[10] >> off_mul) | (t[ 9] << off_sub); 799 t[ 9] = (t[ 9] >> off_mul) | (t[ 8] << off_sub); 800 t[ 8] = (t[ 8] >> off_mul) | (t[ 7] << off_sub); 801 t[ 7] = (t[ 7] >> off_mul) | (t[ 6] << off_sub); 802 t[ 6] = (t[ 6] >> off_mul) | (t[ 5] << off_sub); 803 t[ 5] = (t[ 5] >> off_mul) | (t[ 4] << off_sub); 804 t[ 4] = (t[ 4] >> off_mul) | (t[ 3] << off_sub); 805 t[ 3] = (t[ 3] >> off_mul) | (t[ 2] << off_sub); 806 t[ 2] = (t[ 2] >> off_mul) | (t[ 1] << off_sub); 807 t[ 1] = (t[ 1] >> off_mul) | (t[ 0] << off_sub); 808 t[ 0] = (t[ 0] >> off_mul); 809 } 810 811 w[n_idx] &= 0xffffff00 << ((3 - n_off) * 8); 812 813 w[n_idx] |= t[0]; 814 815 w[n_idx + 1] = t[ 1]; 816 w[n_idx + 2] = t[ 2]; 817 w[n_idx + 3] = t[ 3]; 818 w[n_idx + 4] = t[ 4]; 819 w[n_idx + 5] = t[ 5]; 820 w[n_idx + 6] = t[ 6]; 821 w[n_idx + 7] = t[ 7]; 822 w[n_idx + 8] = t[ 8]; 823 w[n_idx + 9] = t[ 9]; 824 w[n_idx + 10] = t[10]; 825 w[n_idx + 11] = t[11]; 826 w[n_idx + 12] = t[12]; 827 w[n_idx + 13] = t[13]; 828 w[n_idx + 14] = t[14]; 829 w[n_idx + 15] = t[15]; 830 831 // the final set is only meaningful: if (n_off) 832 833 w[n_idx + 16] &= 0xffffffff >> (n_off * 8); 834 835 w[n_idx + 16] |= t[16]; 836 } 837} 838 839KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) 840{ 841 /** 842 * base 843 */ 844 845 const u64 gid = get_global_id (0); 846 847 if (gid >= gid_max) return; 848 849 tmps[gid].dgst[0] = SHA1M_A; 850 tmps[gid].dgst[1] = SHA1M_B; 851 tmps[gid].dgst[2] = SHA1M_C; 852 tmps[gid].dgst[3] = SHA1M_D; 853 tmps[gid].dgst[4] = SHA1M_E; 854 855 // store pass and salt in tmps: 856 857 const u32 pw_len = pws[gid].pw_len; 858 859 u32 w[80] = { 0 }; 860 861 for (int i = 0, j = 0; i < pw_len; i += 4, j += 1) 862 { 863 w[j] = hc_swap32_S (pws[gid].i[j]); 864 } 865 866 // append salt: 867 868 const u32 salt_idx = pw_len / 4; 869 const u32 salt_off = pw_len & 3; 870 871 u32 salt_buf[3]; 872 873 salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); // swap needed due to -O kernel 874 salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); 875 salt_buf[2] = 0; 876 877 // switch buffer by offset (can only be 0 or 2 because of utf16): 878 879 if (salt_off == 2) // or just: if (salt_off) 880 { 881 salt_buf[2] = (salt_buf[1] << 16); 882 salt_buf[1] = (salt_buf[1] >> 16) | (salt_buf[0] << 16); 883 salt_buf[0] = (salt_buf[0] >> 16); 884 } 885 886 w[salt_idx + 0] |= salt_buf[0]; 887 w[salt_idx + 1] = salt_buf[1]; 888 w[salt_idx + 2] = salt_buf[2]; 889 890 // store initial w[] (pass and salt) in tmps: 891 892 for (u32 i = 0; i < 66; i++) // unroll ? 893 { 894 tmps[gid].w[i] = w[i]; 895 } 896 897 // iv: 898 899 tmps[gid].iv[0] = 0; 900 tmps[gid].iv[1] = 0; 901 tmps[gid].iv[2] = 0; 902 tmps[gid].iv[3] = 0; 903} 904 905KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) 906{ 907 const u64 gid = get_global_id (0); 908 909 if (gid >= gid_max) return; 910 911 /** 912 * base 913 */ 914 915 const u32 pw_len = pws[gid].pw_len; 916 917 const u32 salt_len = 8; 918 919 const u32 pw_salt_len = pw_len + salt_len; 920 921 const u32 p3 = pw_salt_len + 3; 922 923 u32 w[80] = { 0 }; 924 925 for (u32 i = 0; i < 66; i++) 926 { 927 w[i] = tmps[gid].w[i]; 928 } 929 930 // update IV: 931 932 const u32 init_pos = loop_pos / (ROUNDS / 16); 933 934 sha1_ctx_t ctx_iv; 935 936 sha1_init (&ctx_iv); 937 938 ctx_iv.h[0] = tmps[gid].dgst[0]; 939 ctx_iv.h[1] = tmps[gid].dgst[1]; 940 ctx_iv.h[2] = tmps[gid].dgst[2]; 941 ctx_iv.h[3] = tmps[gid].dgst[3]; 942 ctx_iv.h[4] = tmps[gid].dgst[4]; 943 944 ctx_iv.len = loop_pos * p3; 945 946 sha1_update_rar29 (&ctx_iv, w, pw_salt_len); 947 948 memcat8c_be (ctx_iv.w0, ctx_iv.w1, ctx_iv.w2, ctx_iv.w3, ctx_iv.len, hc_swap32_S (loop_pos), ctx_iv.h); 949 950 ctx_iv.len += 3; 951 952 953 // copy the context from ctx_iv to ctx: 954 955 sha1_ctx_t ctx; 956 957 ctx.h[0] = ctx_iv.h[0]; 958 ctx.h[1] = ctx_iv.h[1]; 959 ctx.h[2] = ctx_iv.h[2]; 960 ctx.h[3] = ctx_iv.h[3]; 961 ctx.h[4] = ctx_iv.h[4]; 962 963 ctx.w0[0] = ctx_iv.w0[0]; 964 ctx.w0[1] = ctx_iv.w0[1]; 965 ctx.w0[2] = ctx_iv.w0[2]; 966 ctx.w0[3] = ctx_iv.w0[3]; 967 968 ctx.w1[0] = ctx_iv.w1[0]; 969 ctx.w1[1] = ctx_iv.w1[1]; 970 ctx.w1[2] = ctx_iv.w1[2]; 971 ctx.w1[3] = ctx_iv.w1[3]; 972 973 ctx.w2[0] = ctx_iv.w2[0]; 974 ctx.w2[1] = ctx_iv.w2[1]; 975 ctx.w2[2] = ctx_iv.w2[2]; 976 ctx.w2[3] = ctx_iv.w2[3]; 977 978 ctx.w3[0] = ctx_iv.w3[0]; 979 ctx.w3[1] = ctx_iv.w3[1]; 980 ctx.w3[2] = ctx_iv.w3[2]; 981 ctx.w3[3] = ctx_iv.w3[3]; 982 983 ctx.len = p3; // or ctx_iv.len ? 984 985 // final () for the IV byte: 986 987 sha1_final (&ctx_iv); 988 989 const u32 iv_idx = init_pos / 4; 990 const u32 iv_off = init_pos % 4; 991 992 tmps[gid].iv[iv_idx] |= (ctx_iv.h[4] & 0xff) << (iv_off * 8); 993 994 // main loop: 995 996 for (u32 i = 0, j = (loop_pos + 1); i < 16383; i++, j++) 997 { 998 sha1_update_rar29 (&ctx, w, pw_salt_len); 999 1000 memcat8c_be (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.len, hc_swap32_S (j), ctx.h); 1001 1002 ctx.len += 3; 1003 } 1004 1005 tmps[gid].dgst[0] = ctx.h[0]; 1006 tmps[gid].dgst[1] = ctx.h[1]; 1007 tmps[gid].dgst[2] = ctx.h[2]; 1008 tmps[gid].dgst[3] = ctx.h[3]; 1009 tmps[gid].dgst[4] = ctx.h[4]; 1010 1011 // only needed if pw_len > 28: 1012 1013 for (u32 i = 0; i < 66; i++) // unroll ? 1014 { 1015 tmps[gid].w[i] = w[i]; 1016 } 1017} 1018 1019KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t)) 1020{ 1021 const u64 gid = get_global_id (0); 1022 const u64 lid = get_local_id (0); 1023 const u64 lsz = get_local_size (0); 1024 1025 /** 1026 * aes shared 1027 */ 1028 1029 #ifdef REAL_SHM 1030 1031 LOCAL_VK u32 s_td0[256]; 1032 LOCAL_VK u32 s_td1[256]; 1033 LOCAL_VK u32 s_td2[256]; 1034 LOCAL_VK u32 s_td3[256]; 1035 LOCAL_VK u32 s_td4[256]; 1036 1037 LOCAL_VK u32 s_te0[256]; 1038 LOCAL_VK u32 s_te1[256]; 1039 LOCAL_VK u32 s_te2[256]; 1040 LOCAL_VK u32 s_te3[256]; 1041 LOCAL_VK u32 s_te4[256]; 1042 1043 for (u32 i = lid; i < 256; i += lsz) 1044 { 1045 s_td0[i] = td0[i]; 1046 s_td1[i] = td1[i]; 1047 s_td2[i] = td2[i]; 1048 s_td3[i] = td3[i]; 1049 s_td4[i] = td4[i]; 1050 1051 s_te0[i] = te0[i]; 1052 s_te1[i] = te1[i]; 1053 s_te2[i] = te2[i]; 1054 s_te3[i] = te3[i]; 1055 s_te4[i] = te4[i]; 1056 } 1057 1058 #else 1059 1060 CONSTANT_AS u32a *s_td0 = td0; 1061 CONSTANT_AS u32a *s_td1 = td1; 1062 CONSTANT_AS u32a *s_td2 = td2; 1063 CONSTANT_AS u32a *s_td3 = td3; 1064 CONSTANT_AS u32a *s_td4 = td4; 1065 1066 CONSTANT_AS u32a *s_te0 = te0; 1067 CONSTANT_AS u32a *s_te1 = te1; 1068 CONSTANT_AS u32a *s_te2 = te2; 1069 CONSTANT_AS u32a *s_te3 = te3; 1070 CONSTANT_AS u32a *s_te4 = te4; 1071 1072 #endif 1073 1074 LOCAL_VK u32 l_crc32tab[256]; 1075 1076 for (int i = lid; i < 256; i += lsz) 1077 { 1078 l_crc32tab[i] = crc32tab[i]; 1079 } 1080 1081 SYNC_THREADS (); 1082 1083 if (gid >= gid_max) return; 1084 1085 /** 1086 * base 1087 */ 1088 1089 const u32 pw_len = pws[gid].pw_len; 1090 1091 const u32 salt_len = 8; 1092 1093 const u32 pw_salt_len = pw_len + salt_len; 1094 1095 const u32 p3 = pw_salt_len + 3; 1096 1097 u32 h[5]; 1098 1099 h[0] = tmps[gid].dgst[0]; 1100 h[1] = tmps[gid].dgst[1]; 1101 h[2] = tmps[gid].dgst[2]; 1102 h[3] = tmps[gid].dgst[3]; 1103 h[4] = tmps[gid].dgst[4]; 1104 1105 u32 w0[4]; 1106 u32 w1[4]; 1107 u32 w2[4]; 1108 u32 w3[4]; 1109 1110 w0[0] = 0x80000000; 1111 w0[1] = 0; 1112 w0[2] = 0; 1113 w0[3] = 0; 1114 w1[0] = 0; 1115 w1[1] = 0; 1116 w1[2] = 0; 1117 w1[3] = 0; 1118 w2[0] = 0; 1119 w2[1] = 0; 1120 w2[2] = 0; 1121 w2[3] = 0; 1122 w3[0] = 0; 1123 w3[1] = 0; 1124 w3[2] = 0; 1125 w3[3] = (ROUNDS * p3) * 8; 1126 1127 sha1_transform (w0, w1, w2, w3, h); 1128 1129 u32 ukey[4]; 1130 1131 ukey[0] = hc_swap32_S (h[0]); 1132 ukey[1] = hc_swap32_S (h[1]); 1133 ukey[2] = hc_swap32_S (h[2]); 1134 ukey[3] = hc_swap32_S (h[3]); 1135 1136 u32 ks[44]; 1137 1138 AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); 1139 1140 const u32 pack_size = esalt_bufs[DIGESTS_OFFSET].pack_size; 1141 const u32 unpack_size = esalt_bufs[DIGESTS_OFFSET].unpack_size; 1142 1143 if (pack_size > unpack_size) // could be aligned 1144 { 1145 if (pack_size >= 32) // otherwise IV... 1146 { 1147 const u32 pack_size_elements = pack_size / 4; 1148 1149 u32 last_block_encrypted[4]; 1150 1151 last_block_encrypted[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 0]; 1152 last_block_encrypted[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 1]; 1153 last_block_encrypted[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 2]; 1154 last_block_encrypted[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 3]; 1155 1156 u32 last_block_decrypted[4]; 1157 1158 AES128_decrypt (ks, last_block_encrypted, last_block_decrypted, s_td0, s_td1, s_td2, s_td3, s_td4); 1159 1160 u32 last_block_iv[4]; 1161 1162 last_block_iv[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 0]; 1163 last_block_iv[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 1]; 1164 last_block_iv[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 2]; 1165 last_block_iv[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 3]; 1166 1167 last_block_decrypted[0] ^= last_block_iv[0]; 1168 last_block_decrypted[1] ^= last_block_iv[1]; 1169 last_block_decrypted[2] ^= last_block_iv[2]; 1170 last_block_decrypted[3] ^= last_block_iv[3]; 1171 1172 if ((last_block_decrypted[3] & 0xff) != 0) return; 1173 } 1174 } 1175 1176 u32 iv[4]; 1177 1178 iv[0] = tmps[gid].iv[0]; 1179 iv[1] = tmps[gid].iv[1]; 1180 iv[2] = tmps[gid].iv[2]; 1181 iv[3] = tmps[gid].iv[3]; 1182 1183 iv[0] = hc_swap32_S (iv[0]); 1184 iv[1] = hc_swap32_S (iv[1]); 1185 iv[2] = hc_swap32_S (iv[2]); 1186 iv[3] = hc_swap32_S (iv[3]); 1187 1188 u32 data_left = unpack_size; 1189 1190 u32 crc32 = ~0; 1191 1192 for (u32 i = 0, j = 0; i < pack_size / 16; i += 1, j += 4) 1193 { 1194 u32 data[4]; 1195 1196 data[0] = esalt_bufs[DIGESTS_OFFSET].data[j + 0]; 1197 data[1] = esalt_bufs[DIGESTS_OFFSET].data[j + 1]; 1198 data[2] = esalt_bufs[DIGESTS_OFFSET].data[j + 2]; 1199 data[3] = esalt_bufs[DIGESTS_OFFSET].data[j + 3]; 1200 1201 u32 out[4]; 1202 1203 AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); 1204 1205 out[0] ^= iv[0]; 1206 out[1] ^= iv[1]; 1207 out[2] ^= iv[2]; 1208 out[3] ^= iv[3]; 1209 1210 crc32 = round_crc32_16 (crc32, out, data_left, l_crc32tab); 1211 1212 iv[0] = data[0]; 1213 iv[1] = data[1]; 1214 iv[2] = data[2]; 1215 iv[3] = data[3]; 1216 1217 data_left -= 16; 1218 } 1219 1220 const u32 r0 = crc32; 1221 const u32 r1 = 0; 1222 const u32 r2 = 0; 1223 const u32 r3 = 0; 1224 1225 #define il_pos 0 1226 1227 #ifdef KERNEL_STATIC 1228 #include COMPARE_M 1229 #endif 1230} 1231