1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6//#define NEW_SIMD_CODE 7 8#ifdef KERNEL_STATIC 9#include "inc_vendor.h" 10#include "inc_types.h" 11#include "inc_platform.cl" 12#include "inc_common.cl" 13#include "inc_scalar.cl" 14#include "inc_simd.cl" 15#include "inc_cipher_aes.cl" 16#endif 17 18KERNEL_FQ void m26401_m04 (KERN_ATTR_BASIC ()) 19{ 20 const u64 gid = get_global_id (0); 21 const u64 lid = get_local_id (0); 22 const u64 lsz = get_local_size (0); 23 24 /** 25 * aes shared 26 */ 27 28 #ifdef REAL_SHM 29 30 LOCAL_VK u32 s_td0[256]; 31 LOCAL_VK u32 s_td1[256]; 32 LOCAL_VK u32 s_td2[256]; 33 LOCAL_VK u32 s_td3[256]; 34 LOCAL_VK u32 s_td4[256]; 35 36 LOCAL_VK u32 s_te0[256]; 37 LOCAL_VK u32 s_te1[256]; 38 LOCAL_VK u32 s_te2[256]; 39 LOCAL_VK u32 s_te3[256]; 40 LOCAL_VK u32 s_te4[256]; 41 42 for (u32 i = lid; i < 256; i += lsz) 43 { 44 s_td0[i] = td0[i]; 45 s_td1[i] = td1[i]; 46 s_td2[i] = td2[i]; 47 s_td3[i] = td3[i]; 48 s_td4[i] = td4[i]; 49 50 s_te0[i] = te0[i]; 51 s_te1[i] = te1[i]; 52 s_te2[i] = te2[i]; 53 s_te3[i] = te3[i]; 54 s_te4[i] = te4[i]; 55 } 56 57 SYNC_THREADS (); 58 59 #else 60 61 CONSTANT_AS u32a *s_td0 = td0; 62 CONSTANT_AS u32a *s_td1 = td1; 63 CONSTANT_AS u32a *s_td2 = td2; 64 CONSTANT_AS u32a *s_td3 = td3; 65 CONSTANT_AS u32a *s_td4 = td4; 66 67 CONSTANT_AS u32a *s_te0 = te0; 68 CONSTANT_AS u32a *s_te1 = te1; 69 CONSTANT_AS u32a *s_te2 = te2; 70 CONSTANT_AS u32a *s_te3 = te3; 71 CONSTANT_AS u32a *s_te4 = te4; 72 73 #endif 74 75 if (gid >= gid_max) return; 76 77 /** 78 * base 79 */ 80 81 u32 pw_buf0[4]; 82 u32 pw_buf1[4]; 83 84 pw_buf0[0] = pws[gid].i[0]; 85 pw_buf0[1] = pws[gid].i[1]; 86 pw_buf0[2] = pws[gid].i[2]; 87 pw_buf0[3] = pws[gid].i[3]; 88 pw_buf1[0] = pws[gid].i[4]; 89 pw_buf1[1] = pws[gid].i[5]; 90 pw_buf1[2] = pws[gid].i[6]; 91 pw_buf1[3] = pws[gid].i[7]; 92 93 const u32 pw_l_len = pws[gid].pw_len & 63; 94 95 /** 96 * Salt prep 97 */ 98 99 u32 pt[4]; 100 101 pt[0] = salt_bufs[SALT_POS].salt_buf[0]; 102 pt[1] = salt_bufs[SALT_POS].salt_buf[1]; 103 pt[2] = salt_bufs[SALT_POS].salt_buf[2]; 104 pt[3] = salt_bufs[SALT_POS].salt_buf[3]; 105 106 /** 107 * loop 108 */ 109 110 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) 111 { 112 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; 113 114 const u32x pw_len = (pw_l_len + pw_r_len) & 15; 115 116 /** 117 * concat password candidate 118 */ 119 120 u32x wordl0[4] = { 0 }; 121 u32x wordl1[4] = { 0 }; 122 u32x wordl2[4] = { 0 }; 123 u32x wordl3[4] = { 0 }; 124 125 wordl0[0] = pw_buf0[0]; 126 wordl0[1] = pw_buf0[1]; 127 wordl0[2] = pw_buf0[2]; 128 wordl0[3] = pw_buf0[3]; 129 130 u32x wordr0[4] = { 0 }; 131 u32x wordr1[4] = { 0 }; 132 u32x wordr2[4] = { 0 }; 133 u32x wordr3[4] = { 0 }; 134 135 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); 136 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); 137 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); 138 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); 139 140 if (combs_mode == COMBINATOR_MODE_BASE_LEFT) 141 { 142 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); 143 } 144 else 145 { 146 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); 147 } 148 149 u32x w0[4]; 150 u32x w1[4]; 151 u32x w2[4]; 152 u32x w3[4]; 153 154 w0[0] = wordl0[0] | wordr0[0]; 155 w0[1] = wordl0[1] | wordr0[1]; 156 w0[2] = wordl0[2] | wordr0[2]; 157 w0[3] = wordl0[3] | wordr0[3]; 158 w1[0] = 0; 159 w1[1] = 0; 160 w1[2] = 0; 161 w1[3] = 0; 162 w2[0] = 0; 163 w2[1] = 0; 164 w2[2] = 0; 165 w2[3] = 0; 166 w3[0] = 0; 167 w3[1] = 0; 168 w3[2] = 0; 169 w3[3] = 0; 170 171 u32 ukey[4]; 172 173 ukey[0] = w0[0]; 174 ukey[1] = w0[1]; 175 ukey[2] = w0[2]; 176 ukey[3] = w0[3]; 177 178 #define KEYLEN 44 179 180 u32 ks[KEYLEN]; 181 182 aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); 183 184 u32 ct[4]; 185 186 aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); 187 188 const u32 r0 = ct[0]; 189 const u32 r1 = ct[1]; 190 const u32 r2 = ct[2]; 191 const u32 r3 = ct[3]; 192 193 COMPARE_M_SIMD (r0, r1, r2, r3); 194 } 195} 196 197KERNEL_FQ void m26401_m08 (KERN_ATTR_BASIC ()) 198{ 199} 200 201KERNEL_FQ void m26401_m16 (KERN_ATTR_BASIC ()) 202{ 203} 204 205KERNEL_FQ void m26401_s04 (KERN_ATTR_BASIC ()) 206{ 207 const u64 gid = get_global_id (0); 208 const u64 lid = get_local_id (0); 209 const u64 lsz = get_local_size (0); 210 211 /** 212 * aes shared 213 */ 214 215 #ifdef REAL_SHM 216 217 LOCAL_VK u32 s_td0[256]; 218 LOCAL_VK u32 s_td1[256]; 219 LOCAL_VK u32 s_td2[256]; 220 LOCAL_VK u32 s_td3[256]; 221 LOCAL_VK u32 s_td4[256]; 222 223 LOCAL_VK u32 s_te0[256]; 224 LOCAL_VK u32 s_te1[256]; 225 LOCAL_VK u32 s_te2[256]; 226 LOCAL_VK u32 s_te3[256]; 227 LOCAL_VK u32 s_te4[256]; 228 229 for (u32 i = lid; i < 256; i += lsz) 230 { 231 s_td0[i] = td0[i]; 232 s_td1[i] = td1[i]; 233 s_td2[i] = td2[i]; 234 s_td3[i] = td3[i]; 235 s_td4[i] = td4[i]; 236 237 s_te0[i] = te0[i]; 238 s_te1[i] = te1[i]; 239 s_te2[i] = te2[i]; 240 s_te3[i] = te3[i]; 241 s_te4[i] = te4[i]; 242 } 243 244 SYNC_THREADS (); 245 246 #else 247 248 CONSTANT_AS u32a *s_td0 = td0; 249 CONSTANT_AS u32a *s_td1 = td1; 250 CONSTANT_AS u32a *s_td2 = td2; 251 CONSTANT_AS u32a *s_td3 = td3; 252 CONSTANT_AS u32a *s_td4 = td4; 253 254 CONSTANT_AS u32a *s_te0 = te0; 255 CONSTANT_AS u32a *s_te1 = te1; 256 CONSTANT_AS u32a *s_te2 = te2; 257 CONSTANT_AS u32a *s_te3 = te3; 258 CONSTANT_AS u32a *s_te4 = te4; 259 260 #endif 261 262 if (gid >= gid_max) return; 263 264 /** 265 * base 266 */ 267 268 u32 pw_buf0[4]; 269 u32 pw_buf1[4]; 270 271 pw_buf0[0] = pws[gid].i[0]; 272 pw_buf0[1] = pws[gid].i[1]; 273 pw_buf0[2] = pws[gid].i[2]; 274 pw_buf0[3] = pws[gid].i[3]; 275 pw_buf1[0] = pws[gid].i[4]; 276 pw_buf1[1] = pws[gid].i[5]; 277 pw_buf1[2] = pws[gid].i[6]; 278 pw_buf1[3] = pws[gid].i[7]; 279 280 const u32 pw_l_len = pws[gid].pw_len & 63; 281 282 /** 283 * Salt prep 284 */ 285 286 u32 pt[4]; 287 288 pt[0] = salt_bufs[SALT_POS].salt_buf[0]; 289 pt[1] = salt_bufs[SALT_POS].salt_buf[1]; 290 pt[2] = salt_bufs[SALT_POS].salt_buf[2]; 291 pt[3] = salt_bufs[SALT_POS].salt_buf[3]; 292 293 /** 294 * digest 295 */ 296 297 const u32 search[4] = 298 { 299 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0], 300 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1], 301 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2], 302 digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3] 303 }; 304 305 /** 306 * loop 307 */ 308 309 for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE) 310 { 311 const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15; 312 313 const u32x pw_len = (pw_l_len + pw_r_len) & 15; 314 315 /** 316 * concat password candidate 317 */ 318 319 u32x wordl0[4] = { 0 }; 320 u32x wordl1[4] = { 0 }; 321 u32x wordl2[4] = { 0 }; 322 u32x wordl3[4] = { 0 }; 323 324 wordl0[0] = pw_buf0[0]; 325 wordl0[1] = pw_buf0[1]; 326 wordl0[2] = pw_buf0[2]; 327 wordl0[3] = pw_buf0[3]; 328 329 u32x wordr0[4] = { 0 }; 330 u32x wordr1[4] = { 0 }; 331 u32x wordr2[4] = { 0 }; 332 u32x wordr3[4] = { 0 }; 333 334 wordr0[0] = ix_create_combt (combs_buf, il_pos, 0); 335 wordr0[1] = ix_create_combt (combs_buf, il_pos, 1); 336 wordr0[2] = ix_create_combt (combs_buf, il_pos, 2); 337 wordr0[3] = ix_create_combt (combs_buf, il_pos, 3); 338 339 if (combs_mode == COMBINATOR_MODE_BASE_LEFT) 340 { 341 switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len); 342 } 343 else 344 { 345 switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len); 346 } 347 348 u32x w0[4]; 349 u32x w1[4]; 350 u32x w2[4]; 351 u32x w3[4]; 352 353 w0[0] = wordl0[0] | wordr0[0]; 354 w0[1] = wordl0[1] | wordr0[1]; 355 w0[2] = wordl0[2] | wordr0[2]; 356 w0[3] = wordl0[3] | wordr0[3]; 357 w1[0] = 0; 358 w1[1] = 0; 359 w1[2] = 0; 360 w1[3] = 0; 361 w2[0] = 0; 362 w2[1] = 0; 363 w2[2] = 0; 364 w2[3] = 0; 365 w3[0] = 0; 366 w3[1] = 0; 367 w3[2] = 0; 368 w3[3] = 0; 369 370 u32 ukey[4]; 371 372 ukey[0] = w0[0]; 373 ukey[1] = w0[1]; 374 ukey[2] = w0[2]; 375 ukey[3] = w0[3]; 376 377 #define KEYLEN 44 378 379 u32 ks[KEYLEN]; 380 381 aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); 382 383 u32 ct[4]; 384 385 aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4); 386 387 const u32 r0 = ct[0]; 388 const u32 r1 = ct[1]; 389 const u32 r2 = ct[2]; 390 const u32 r3 = ct[3]; 391 392 COMPARE_S_SIMD (r0, r1, r2, r3); 393 } 394} 395 396KERNEL_FQ void m26401_s08 (KERN_ATTR_BASIC ()) 397{ 398} 399 400KERNEL_FQ void m26401_s16 (KERN_ATTR_BASIC ()) 401{ 402} 403