1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6#ifdef KERNEL_STATIC 7#include "inc_vendor.h" 8#include "inc_types.h" 9#include "inc_platform.cl" 10#include "inc_common.cl" 11#include "inc_hash_sha256.cl" 12#endif 13 14#define COMPARE_S "inc_comp_single.cl" 15#define COMPARE_M "inc_comp_multi.cl" 16 17#define MIN(a,b) (((a) < (b)) ? (a) : (b)) 18 19typedef struct sha256crypt_tmp 20{ 21 // pure version 22 23 u32 alt_result[8]; 24 u32 p_bytes[64]; 25 u32 s_bytes[64]; 26 27} sha256crypt_tmp_t; 28 29DECLSPEC void init_ctx (u32 *digest) 30{ 31 digest[0] = SHA256M_A; 32 digest[1] = SHA256M_B; 33 digest[2] = SHA256M_C; 34 digest[3] = SHA256M_D; 35 digest[4] = SHA256M_E; 36 digest[5] = SHA256M_F; 37 digest[6] = SHA256M_G; 38 digest[7] = SHA256M_H; 39} 40 41DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u32 append_len) 42{ 43 u32 in0 = append[0]; 44 u32 in1 = append[1]; 45 u32 in2 = append[2]; 46 u32 in3 = append[3]; 47 48 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 49 const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); 50 const u32 tmp1 = hc_bytealign_be (in0, in1, offset); 51 const u32 tmp2 = hc_bytealign_be (in1, in2, offset); 52 const u32 tmp3 = hc_bytealign_be (in2, in3, offset); 53 const u32 tmp4 = hc_bytealign_be (in3, 0, offset); 54 #endif 55 56 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 57 58 #if defined IS_NV 59 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 60 #endif 61 62 #if (defined IS_AMD || defined IS_HIP) 63 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 64 #endif 65 66 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 67 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 68 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 69 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 70 const u32 tmp4 = hc_byte_perm_S (0, in3, selector); 71 #endif 72 73 switch (offset / 4) 74 { 75 case 0: block[ 0] |= tmp0; 76 block[ 1] = tmp1; 77 block[ 2] = tmp2; 78 block[ 3] = tmp3; 79 block[ 4] = tmp4; 80 break; 81 case 1: block[ 1] |= tmp0; 82 block[ 2] = tmp1; 83 block[ 3] = tmp2; 84 block[ 4] = tmp3; 85 block[ 5] = tmp4; 86 break; 87 case 2: block[ 2] |= tmp0; 88 block[ 3] = tmp1; 89 block[ 4] = tmp2; 90 block[ 5] = tmp3; 91 block[ 6] = tmp4; 92 break; 93 case 3: block[ 3] |= tmp0; 94 block[ 4] = tmp1; 95 block[ 5] = tmp2; 96 block[ 6] = tmp3; 97 block[ 7] = tmp4; 98 break; 99 case 4: block[ 4] |= tmp0; 100 block[ 5] = tmp1; 101 block[ 6] = tmp2; 102 block[ 7] = tmp3; 103 block[ 8] = tmp4; 104 break; 105 case 5: block[ 5] |= tmp0; 106 block[ 6] = tmp1; 107 block[ 7] = tmp2; 108 block[ 8] = tmp3; 109 block[ 9] = tmp4; 110 break; 111 case 6: block[ 6] |= tmp0; 112 block[ 7] = tmp1; 113 block[ 8] = tmp2; 114 block[ 9] = tmp3; 115 block[10] = tmp4; 116 break; 117 case 7: block[ 7] |= tmp0; 118 block[ 8] = tmp1; 119 block[ 9] = tmp2; 120 block[10] = tmp3; 121 block[11] = tmp4; 122 break; 123 case 8: block[ 8] |= tmp0; 124 block[ 9] = tmp1; 125 block[10] = tmp2; 126 block[11] = tmp3; 127 block[12] = tmp4; 128 break; 129 case 9: block[ 9] |= tmp0; 130 block[10] = tmp1; 131 block[11] = tmp2; 132 block[12] = tmp3; 133 block[13] = tmp4; 134 break; 135 case 10: block[10] |= tmp0; 136 block[11] = tmp1; 137 block[12] = tmp2; 138 block[13] = tmp3; 139 block[14] = tmp4; 140 break; 141 case 11: block[11] |= tmp0; 142 block[12] = tmp1; 143 block[13] = tmp2; 144 block[14] = tmp3; 145 block[15] = tmp4; 146 break; 147 case 12: block[12] |= tmp0; 148 block[13] = tmp1; 149 block[14] = tmp2; 150 block[15] = tmp3; 151 break; 152 case 13: block[13] |= tmp0; 153 block[14] = tmp1; 154 block[15] = tmp2; 155 break; 156 case 14: block[14] |= tmp0; 157 block[15] = tmp1; 158 break; 159 case 15: block[15] |= tmp0; 160 break; 161 } 162 163 u32 new_len = offset + append_len; 164 165 return new_len; 166} 167 168DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u32 append_len, u32 *digest) 169{ 170 u32 in0 = append[0]; 171 u32 in1 = append[1]; 172 u32 in2 = append[2]; 173 u32 in3 = append[3]; 174 175 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 176 const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); 177 const u32 tmp1 = hc_bytealign_be (in0, in1, offset); 178 const u32 tmp2 = hc_bytealign_be (in1, in2, offset); 179 const u32 tmp3 = hc_bytealign_be (in2, in3, offset); 180 const u32 tmp4 = hc_bytealign_be (in3, 0, offset); 181 #endif 182 183 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 184 185 #if defined IS_NV 186 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 187 #endif 188 189 #if (defined IS_AMD || defined IS_HIP) 190 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 191 #endif 192 193 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 194 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 195 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 196 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 197 const u32 tmp4 = hc_byte_perm_S (0, in3, selector); 198 #endif 199 200 u32 carry[4] = { 0 }; 201 202 switch (offset / 4) 203 { 204 case 0: block[ 0] |= tmp0; 205 block[ 1] = tmp1; 206 block[ 2] = tmp2; 207 block[ 3] = tmp3; 208 block[ 4] = tmp4; 209 break; 210 case 1: block[ 1] |= tmp0; 211 block[ 2] = tmp1; 212 block[ 3] = tmp2; 213 block[ 4] = tmp3; 214 block[ 5] = tmp4; 215 break; 216 case 2: block[ 2] |= tmp0; 217 block[ 3] = tmp1; 218 block[ 4] = tmp2; 219 block[ 5] = tmp3; 220 block[ 6] = tmp4; 221 break; 222 case 3: block[ 3] |= tmp0; 223 block[ 4] = tmp1; 224 block[ 5] = tmp2; 225 block[ 6] = tmp3; 226 block[ 7] = tmp4; 227 break; 228 case 4: block[ 4] |= tmp0; 229 block[ 5] = tmp1; 230 block[ 6] = tmp2; 231 block[ 7] = tmp3; 232 block[ 8] = tmp4; 233 break; 234 case 5: block[ 5] |= tmp0; 235 block[ 6] = tmp1; 236 block[ 7] = tmp2; 237 block[ 8] = tmp3; 238 block[ 9] = tmp4; 239 break; 240 case 6: block[ 6] |= tmp0; 241 block[ 7] = tmp1; 242 block[ 8] = tmp2; 243 block[ 9] = tmp3; 244 block[10] = tmp4; 245 break; 246 case 7: block[ 7] |= tmp0; 247 block[ 8] = tmp1; 248 block[ 9] = tmp2; 249 block[10] = tmp3; 250 block[11] = tmp4; 251 break; 252 case 8: block[ 8] |= tmp0; 253 block[ 9] = tmp1; 254 block[10] = tmp2; 255 block[11] = tmp3; 256 block[12] = tmp4; 257 break; 258 case 9: block[ 9] |= tmp0; 259 block[10] = tmp1; 260 block[11] = tmp2; 261 block[12] = tmp3; 262 block[13] = tmp4; 263 break; 264 case 10: block[10] |= tmp0; 265 block[11] = tmp1; 266 block[12] = tmp2; 267 block[13] = tmp3; 268 block[14] = tmp4; 269 break; 270 case 11: block[11] |= tmp0; 271 block[12] = tmp1; 272 block[13] = tmp2; 273 block[14] = tmp3; 274 block[15] = tmp4; 275 break; 276 case 12: block[12] |= tmp0; 277 block[13] = tmp1; 278 block[14] = tmp2; 279 block[15] = tmp3; 280 carry[ 0] = tmp4; 281 break; 282 case 13: block[13] |= tmp0; 283 block[14] = tmp1; 284 block[15] = tmp2; 285 carry[ 0] = tmp3; 286 carry[ 1] = tmp4; 287 break; 288 case 14: block[14] |= tmp0; 289 block[15] = tmp1; 290 carry[ 0] = tmp2; 291 carry[ 1] = tmp3; 292 carry[ 2] = tmp4; 293 break; 294 case 15: block[15] |= tmp0; 295 carry[ 0] = tmp1; 296 carry[ 1] = tmp2; 297 carry[ 2] = tmp3; 298 carry[ 3] = tmp4; 299 break; 300 } 301 302 u32 new_len = offset + append_len; 303 304 if (new_len >= 64) 305 { 306 new_len -= 64; 307 308 sha256_transform (block + 0, block + 4, block + 8, block + 12, digest); 309 310 block[ 0] = carry[0]; 311 block[ 1] = carry[1]; 312 block[ 2] = carry[2]; 313 block[ 3] = carry[3]; 314 block[ 4] = 0; 315 block[ 5] = 0; 316 block[ 6] = 0; 317 block[ 7] = 0; 318 block[ 8] = 0; 319 block[ 9] = 0; 320 block[10] = 0; 321 block[11] = 0; 322 block[12] = 0; 323 block[13] = 0; 324 block[14] = 0; 325 block[15] = 0; 326 } 327 328 return new_len; 329} 330 331DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u32 append_len) 332{ 333 u32 in0 = append[0]; 334 u32 in1 = append[1]; 335 u32 in2 = append[2]; 336 u32 in3 = append[3]; 337 u32 in4 = append[4]; 338 339 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 340 const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); 341 const u32 tmp1 = hc_bytealign_be (in0, in1, offset); 342 const u32 tmp2 = hc_bytealign_be (in1, in2, offset); 343 const u32 tmp3 = hc_bytealign_be (in2, in3, offset); 344 const u32 tmp4 = hc_bytealign_be (in3, in4, offset); 345 const u32 tmp5 = hc_bytealign_be (in4, 0, offset); 346 #endif 347 348 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 349 350 #if defined IS_NV 351 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 352 #endif 353 354 #if (defined IS_AMD || defined IS_HIP) 355 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 356 #endif 357 358 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 359 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 360 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 361 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 362 const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); 363 const u32 tmp5 = hc_byte_perm_S (0, in4, selector); 364 #endif 365 366 switch (offset / 4) 367 { 368 case 0: block[ 0] |= tmp0; 369 block[ 1] = tmp1; 370 block[ 2] = tmp2; 371 block[ 3] = tmp3; 372 block[ 4] = tmp4; 373 block[ 5] = tmp5; 374 break; 375 case 1: block[ 1] |= tmp0; 376 block[ 2] = tmp1; 377 block[ 3] = tmp2; 378 block[ 4] = tmp3; 379 block[ 5] = tmp4; 380 block[ 6] = tmp5; 381 break; 382 case 2: block[ 2] |= tmp0; 383 block[ 3] = tmp1; 384 block[ 4] = tmp2; 385 block[ 5] = tmp3; 386 block[ 6] = tmp4; 387 block[ 7] = tmp5; 388 break; 389 case 3: block[ 3] |= tmp0; 390 block[ 4] = tmp1; 391 block[ 5] = tmp2; 392 block[ 6] = tmp3; 393 block[ 7] = tmp4; 394 block[ 8] = tmp5; 395 break; 396 case 4: block[ 4] |= tmp0; 397 block[ 5] = tmp1; 398 block[ 6] = tmp2; 399 block[ 7] = tmp3; 400 block[ 8] = tmp4; 401 block[ 9] = tmp5; 402 break; 403 case 5: block[ 5] |= tmp0; 404 block[ 6] = tmp1; 405 block[ 7] = tmp2; 406 block[ 8] = tmp3; 407 block[ 9] = tmp4; 408 block[10] = tmp5; 409 break; 410 case 6: block[ 6] |= tmp0; 411 block[ 7] = tmp1; 412 block[ 8] = tmp2; 413 block[ 9] = tmp3; 414 block[10] = tmp4; 415 block[11] = tmp5; 416 break; 417 case 7: block[ 7] |= tmp0; 418 block[ 8] = tmp1; 419 block[ 9] = tmp2; 420 block[10] = tmp3; 421 block[11] = tmp4; 422 block[12] = tmp5; 423 break; 424 case 8: block[ 8] |= tmp0; 425 block[ 9] = tmp1; 426 block[10] = tmp2; 427 block[11] = tmp3; 428 block[12] = tmp4; 429 block[13] = tmp5; 430 break; 431 case 9: block[ 9] |= tmp0; 432 block[10] = tmp1; 433 block[11] = tmp2; 434 block[12] = tmp3; 435 block[13] = tmp4; 436 block[14] = tmp5; 437 break; 438 case 10: block[10] |= tmp0; 439 block[11] = tmp1; 440 block[12] = tmp2; 441 block[13] = tmp3; 442 block[14] = tmp4; 443 block[15] = tmp5; 444 break; 445 case 11: block[11] |= tmp0; 446 block[12] = tmp1; 447 block[13] = tmp2; 448 block[14] = tmp3; 449 block[15] = tmp4; 450 break; 451 case 12: block[12] |= tmp0; 452 block[13] = tmp1; 453 block[14] = tmp2; 454 block[15] = tmp3; 455 break; 456 case 13: block[13] |= tmp0; 457 block[14] = tmp1; 458 block[15] = tmp2; 459 break; 460 case 14: block[14] |= tmp0; 461 block[15] = tmp1; 462 break; 463 case 15: block[15] |= tmp0; 464 break; 465 } 466 467 u32 new_len = offset + append_len; 468 469 return new_len; 470} 471 472DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const u32 append_len, u32 *digest) 473{ 474 u32 in0 = append[0]; 475 u32 in1 = append[1]; 476 u32 in2 = append[2]; 477 u32 in3 = append[3]; 478 u32 in4 = append[4]; 479 480 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 481 const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); 482 const u32 tmp1 = hc_bytealign_be (in0, in1, offset); 483 const u32 tmp2 = hc_bytealign_be (in1, in2, offset); 484 const u32 tmp3 = hc_bytealign_be (in2, in3, offset); 485 const u32 tmp4 = hc_bytealign_be (in3, in4, offset); 486 const u32 tmp5 = hc_bytealign_be (in4, 0, offset); 487 #endif 488 489 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 490 491 #if defined IS_NV 492 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 493 #endif 494 495 #if (defined IS_AMD || defined IS_HIP) 496 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 497 #endif 498 499 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 500 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 501 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 502 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 503 const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); 504 const u32 tmp5 = hc_byte_perm_S (0, in4, selector); 505 #endif 506 507 u32 carry[5] = { 0 }; 508 509 switch (offset / 4) 510 { 511 case 0: block[ 0] |= tmp0; 512 block[ 1] = tmp1; 513 block[ 2] = tmp2; 514 block[ 3] = tmp3; 515 block[ 4] = tmp4; 516 block[ 5] = tmp5; 517 break; 518 case 1: block[ 1] |= tmp0; 519 block[ 2] = tmp1; 520 block[ 3] = tmp2; 521 block[ 4] = tmp3; 522 block[ 5] = tmp4; 523 block[ 6] = tmp5; 524 break; 525 case 2: block[ 2] |= tmp0; 526 block[ 3] = tmp1; 527 block[ 4] = tmp2; 528 block[ 5] = tmp3; 529 block[ 6] = tmp4; 530 block[ 7] = tmp5; 531 break; 532 case 3: block[ 3] |= tmp0; 533 block[ 4] = tmp1; 534 block[ 5] = tmp2; 535 block[ 6] = tmp3; 536 block[ 7] = tmp4; 537 block[ 8] = tmp5; 538 break; 539 case 4: block[ 4] |= tmp0; 540 block[ 5] = tmp1; 541 block[ 6] = tmp2; 542 block[ 7] = tmp3; 543 block[ 8] = tmp4; 544 block[ 9] = tmp5; 545 break; 546 case 5: block[ 5] |= tmp0; 547 block[ 6] = tmp1; 548 block[ 7] = tmp2; 549 block[ 8] = tmp3; 550 block[ 9] = tmp4; 551 block[10] = tmp5; 552 break; 553 case 6: block[ 6] |= tmp0; 554 block[ 7] = tmp1; 555 block[ 8] = tmp2; 556 block[ 9] = tmp3; 557 block[10] = tmp4; 558 block[11] = tmp5; 559 break; 560 case 7: block[ 7] |= tmp0; 561 block[ 8] = tmp1; 562 block[ 9] = tmp2; 563 block[10] = tmp3; 564 block[11] = tmp4; 565 block[12] = tmp5; 566 break; 567 case 8: block[ 8] |= tmp0; 568 block[ 9] = tmp1; 569 block[10] = tmp2; 570 block[11] = tmp3; 571 block[12] = tmp4; 572 block[13] = tmp5; 573 break; 574 case 9: block[ 9] |= tmp0; 575 block[10] = tmp1; 576 block[11] = tmp2; 577 block[12] = tmp3; 578 block[13] = tmp4; 579 block[14] = tmp5; 580 break; 581 case 10: block[10] |= tmp0; 582 block[11] = tmp1; 583 block[12] = tmp2; 584 block[13] = tmp3; 585 block[14] = tmp4; 586 block[15] = tmp5; 587 break; 588 case 11: block[11] |= tmp0; 589 block[12] = tmp1; 590 block[13] = tmp2; 591 block[14] = tmp3; 592 block[15] = tmp4; 593 carry[ 0] = tmp5; 594 break; 595 case 12: block[12] |= tmp0; 596 block[13] = tmp1; 597 block[14] = tmp2; 598 block[15] = tmp3; 599 carry[ 0] = tmp4; 600 carry[ 1] = tmp5; 601 break; 602 case 13: block[13] |= tmp0; 603 block[14] = tmp1; 604 block[15] = tmp2; 605 carry[ 0] = tmp3; 606 carry[ 1] = tmp4; 607 carry[ 2] = tmp5; 608 break; 609 case 14: block[14] |= tmp0; 610 block[15] = tmp1; 611 carry[ 0] = tmp2; 612 carry[ 1] = tmp3; 613 carry[ 2] = tmp4; 614 carry[ 3] = tmp5; 615 break; 616 case 15: block[15] |= tmp0; 617 carry[ 0] = tmp1; 618 carry[ 1] = tmp2; 619 carry[ 2] = tmp3; 620 carry[ 3] = tmp4; 621 carry[ 4] = tmp5; 622 break; 623 } 624 625 u32 new_len = offset + append_len; 626 627 if (new_len >= 64) 628 { 629 new_len -= 64; 630 631 sha256_transform (block + 0, block + 4, block + 8, block + 12, digest); 632 633 block[ 0] = carry[0]; 634 block[ 1] = carry[1]; 635 block[ 2] = carry[2]; 636 block[ 3] = carry[3]; 637 block[ 4] = carry[4]; 638 block[ 5] = 0; 639 block[ 6] = 0; 640 block[ 7] = 0; 641 block[ 8] = 0; 642 block[ 9] = 0; 643 block[10] = 0; 644 block[11] = 0; 645 block[12] = 0; 646 block[13] = 0; 647 block[14] = 0; 648 block[15] = 0; 649 } 650 651 return new_len; 652} 653 654DECLSPEC void truncate_block_5x4_be_S (u32 *w0, const u32 len) 655{ 656 switch (len) 657 { 658 case 0: 659 w0[0] = 0; 660 w0[1] = 0; 661 w0[2] = 0; 662 w0[3] = 0; 663 w0[4] = 0; 664 break; 665 666 case 1: 667 w0[0] &= 0xff000000; 668 w0[1] = 0; 669 w0[2] = 0; 670 w0[3] = 0; 671 w0[4] = 0; 672 break; 673 674 case 2: 675 w0[0] &= 0xffff0000; 676 w0[1] = 0; 677 w0[2] = 0; 678 w0[3] = 0; 679 w0[4] = 0; 680 break; 681 682 case 3: 683 w0[0] &= 0xffffff00; 684 w0[1] = 0; 685 w0[2] = 0; 686 w0[3] = 0; 687 w0[4] = 0; 688 break; 689 690 case 4: 691 w0[1] = 0; 692 w0[2] = 0; 693 w0[3] = 0; 694 w0[4] = 0; 695 break; 696 697 case 5: 698 w0[1] &= 0xff000000; 699 w0[2] = 0; 700 w0[3] = 0; 701 w0[4] = 0; 702 break; 703 704 case 6: 705 w0[1] &= 0xffff0000; 706 w0[2] = 0; 707 w0[3] = 0; 708 w0[4] = 0; 709 break; 710 711 case 7: 712 w0[1] &= 0xffffff00; 713 w0[2] = 0; 714 w0[3] = 0; 715 w0[4] = 0; 716 break; 717 718 case 8: 719 w0[2] = 0; 720 w0[3] = 0; 721 w0[4] = 0; 722 break; 723 724 case 9: 725 w0[2] &= 0xff000000; 726 w0[3] = 0; 727 w0[4] = 0; 728 break; 729 730 case 10: 731 w0[2] &= 0xffff0000; 732 w0[3] = 0; 733 w0[4] = 0; 734 break; 735 736 case 11: 737 w0[2] &= 0xffffff00; 738 w0[3] = 0; 739 w0[4] = 0; 740 break; 741 742 case 12: 743 w0[3] = 0; 744 w0[4] = 0; 745 break; 746 747 case 13: 748 w0[3] &= 0xff000000; 749 w0[4] = 0; 750 break; 751 752 case 14: 753 w0[3] &= 0xffff0000; 754 w0[4] = 0; 755 break; 756 757 case 15: 758 w0[3] &= 0xffffff00; 759 w0[4] = 0; 760 break; 761 762 case 16: 763 w0[4] = 0; 764 break; 765 766 case 17: 767 w0[4] &= 0xff000000; 768 break; 769 770 case 18: 771 w0[4] &= 0xffff0000; 772 break; 773 774 case 19: 775 w0[4] &= 0xffffff00; 776 break; 777 } 778} 779 780DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u32 append_len) 781{ 782 u32 in0 = append[0]; 783 u32 in1 = append[1]; 784 u32 in2 = append[2]; 785 u32 in3 = append[3]; 786 787 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 788 const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); 789 const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); 790 const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); 791 const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); 792 const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); 793 #endif 794 795 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 796 797 #if defined IS_NV 798 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 799 #endif 800 801 #if (defined IS_AMD || defined IS_HIP) 802 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 803 #endif 804 805 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 806 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 807 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 808 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 809 const u32 tmp4 = hc_byte_perm_S (0, in3, selector); 810 #endif 811 812 switch (offset / 4) 813 { 814 case 0: block[ 0] |= tmp0; 815 block[ 1] = tmp1; 816 block[ 2] = tmp2; 817 block[ 3] = tmp3; 818 block[ 4] = tmp4; 819 break; 820 case 1: block[ 1] |= tmp0; 821 block[ 2] = tmp1; 822 block[ 3] = tmp2; 823 block[ 4] = tmp3; 824 block[ 5] = tmp4; 825 break; 826 case 2: block[ 2] |= tmp0; 827 block[ 3] = tmp1; 828 block[ 4] = tmp2; 829 block[ 5] = tmp3; 830 block[ 6] = tmp4; 831 break; 832 case 3: block[ 3] |= tmp0; 833 block[ 4] = tmp1; 834 block[ 5] = tmp2; 835 block[ 6] = tmp3; 836 block[ 7] = tmp4; 837 break; 838 case 4: block[ 4] |= tmp0; 839 block[ 5] = tmp1; 840 block[ 6] = tmp2; 841 block[ 7] = tmp3; 842 block[ 8] = tmp4; 843 break; 844 case 5: block[ 5] |= tmp0; 845 block[ 6] = tmp1; 846 block[ 7] = tmp2; 847 block[ 8] = tmp3; 848 block[ 9] = tmp4; 849 break; 850 case 6: block[ 6] |= tmp0; 851 block[ 7] = tmp1; 852 block[ 8] = tmp2; 853 block[ 9] = tmp3; 854 block[10] = tmp4; 855 break; 856 case 7: block[ 7] |= tmp0; 857 block[ 8] = tmp1; 858 block[ 9] = tmp2; 859 block[10] = tmp3; 860 block[11] = tmp4; 861 break; 862 case 8: block[ 8] |= tmp0; 863 block[ 9] = tmp1; 864 block[10] = tmp2; 865 block[11] = tmp3; 866 block[12] = tmp4; 867 break; 868 case 9: block[ 9] |= tmp0; 869 block[10] = tmp1; 870 block[11] = tmp2; 871 block[12] = tmp3; 872 block[13] = tmp4; 873 break; 874 case 10: block[10] |= tmp0; 875 block[11] = tmp1; 876 block[12] = tmp2; 877 block[13] = tmp3; 878 block[14] = tmp4; 879 break; 880 case 11: block[11] |= tmp0; 881 block[12] = tmp1; 882 block[13] = tmp2; 883 block[14] = tmp3; 884 block[15] = tmp4; 885 break; 886 case 12: block[12] |= tmp0; 887 block[13] = tmp1; 888 block[14] = tmp2; 889 block[15] = tmp3; 890 block[16] = tmp4; 891 break; 892 case 13: block[13] |= tmp0; 893 block[14] = tmp1; 894 block[15] = tmp2; 895 block[16] = tmp3; 896 block[17] = tmp4; 897 break; 898 case 14: block[14] |= tmp0; 899 block[15] = tmp1; 900 block[16] = tmp2; 901 block[17] = tmp3; 902 block[18] = tmp4; 903 break; 904 case 15: block[15] |= tmp0; 905 block[16] = tmp1; 906 block[17] = tmp2; 907 block[18] = tmp3; 908 block[19] = tmp4; 909 break; 910 case 16: block[16] |= tmp0; 911 block[17] = tmp1; 912 block[18] = tmp2; 913 block[19] = tmp3; 914 block[20] = tmp4; 915 break; 916 case 17: block[17] |= tmp0; 917 block[18] = tmp1; 918 block[19] = tmp2; 919 block[20] = tmp3; 920 block[21] = tmp4; 921 break; 922 case 18: block[18] |= tmp0; 923 block[19] = tmp1; 924 block[20] = tmp2; 925 block[21] = tmp3; 926 block[22] = tmp4; 927 break; 928 case 19: block[19] |= tmp0; 929 block[20] = tmp1; 930 block[21] = tmp2; 931 block[22] = tmp3; 932 block[23] = tmp4; 933 break; 934 case 20: block[20] |= tmp0; 935 block[21] = tmp1; 936 block[22] = tmp2; 937 block[23] = tmp3; 938 block[24] = tmp4; 939 break; 940 } 941 942 return offset + append_len; 943} 944 945DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, const u32 append_len) 946{ 947 u32 in0 = append[0]; 948 u32 in1 = append[1]; 949 u32 in2 = append[2]; 950 u32 in3 = append[3]; 951 u32 in4 = 0x80000000; 952 953 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 954 const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); 955 const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); 956 const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); 957 const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); 958 const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); 959 #endif 960 961 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 962 963 #if defined IS_NV 964 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 965 #endif 966 967 #if (defined IS_AMD || defined IS_HIP) 968 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 969 #endif 970 971 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 972 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 973 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 974 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 975 const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); 976 #endif 977 978 switch (offset / 4) 979 { 980 case 0: block[ 0] |= tmp0; 981 block[ 1] = tmp1; 982 block[ 2] = tmp2; 983 block[ 3] = tmp3; 984 block[ 4] = tmp4; 985 break; 986 case 1: block[ 1] |= tmp0; 987 block[ 2] = tmp1; 988 block[ 3] = tmp2; 989 block[ 4] = tmp3; 990 block[ 5] = tmp4; 991 break; 992 case 2: block[ 2] |= tmp0; 993 block[ 3] = tmp1; 994 block[ 4] = tmp2; 995 block[ 5] = tmp3; 996 block[ 6] = tmp4; 997 break; 998 case 3: block[ 3] |= tmp0; 999 block[ 4] = tmp1; 1000 block[ 5] = tmp2; 1001 block[ 6] = tmp3; 1002 block[ 7] = tmp4; 1003 break; 1004 case 4: block[ 4] |= tmp0; 1005 block[ 5] = tmp1; 1006 block[ 6] = tmp2; 1007 block[ 7] = tmp3; 1008 block[ 8] = tmp4; 1009 break; 1010 case 5: block[ 5] |= tmp0; 1011 block[ 6] = tmp1; 1012 block[ 7] = tmp2; 1013 block[ 8] = tmp3; 1014 block[ 9] = tmp4; 1015 break; 1016 case 6: block[ 6] |= tmp0; 1017 block[ 7] = tmp1; 1018 block[ 8] = tmp2; 1019 block[ 9] = tmp3; 1020 block[10] = tmp4; 1021 break; 1022 case 7: block[ 7] |= tmp0; 1023 block[ 8] = tmp1; 1024 block[ 9] = tmp2; 1025 block[10] = tmp3; 1026 block[11] = tmp4; 1027 break; 1028 case 8: block[ 8] |= tmp0; 1029 block[ 9] = tmp1; 1030 block[10] = tmp2; 1031 block[11] = tmp3; 1032 block[12] = tmp4; 1033 break; 1034 case 9: block[ 9] |= tmp0; 1035 block[10] = tmp1; 1036 block[11] = tmp2; 1037 block[12] = tmp3; 1038 block[13] = tmp4; 1039 break; 1040 case 10: block[10] |= tmp0; 1041 block[11] = tmp1; 1042 block[12] = tmp2; 1043 block[13] = tmp3; 1044 block[14] = tmp4; 1045 break; 1046 case 11: block[11] |= tmp0; 1047 block[12] = tmp1; 1048 block[13] = tmp2; 1049 block[14] = tmp3; 1050 block[15] = tmp4; 1051 break; 1052 case 12: block[12] |= tmp0; 1053 block[13] = tmp1; 1054 block[14] = tmp2; 1055 block[15] = tmp3; 1056 block[16] = tmp4; 1057 break; 1058 case 13: block[13] |= tmp0; 1059 block[14] = tmp1; 1060 block[15] = tmp2; 1061 block[16] = tmp3; 1062 block[17] = tmp4; 1063 break; 1064 case 14: block[14] |= tmp0; 1065 block[15] = tmp1; 1066 block[16] = tmp2; 1067 block[17] = tmp3; 1068 block[18] = tmp4; 1069 break; 1070 case 15: block[15] |= tmp0; 1071 block[16] = tmp1; 1072 block[17] = tmp2; 1073 block[18] = tmp3; 1074 block[19] = tmp4; 1075 break; 1076 case 16: block[16] |= tmp0; 1077 block[17] = tmp1; 1078 block[18] = tmp2; 1079 block[19] = tmp3; 1080 block[20] = tmp4; 1081 break; 1082 case 17: block[17] |= tmp0; 1083 block[18] = tmp1; 1084 block[19] = tmp2; 1085 block[20] = tmp3; 1086 block[21] = tmp4; 1087 break; 1088 case 18: block[18] |= tmp0; 1089 block[19] = tmp1; 1090 block[20] = tmp2; 1091 block[21] = tmp3; 1092 block[22] = tmp4; 1093 break; 1094 case 19: block[19] |= tmp0; 1095 block[20] = tmp1; 1096 block[21] = tmp2; 1097 block[22] = tmp3; 1098 block[23] = tmp4; 1099 break; 1100 case 20: block[20] |= tmp0; 1101 block[21] = tmp1; 1102 block[22] = tmp2; 1103 block[23] = tmp3; 1104 block[24] = tmp4; 1105 break; 1106 } 1107 1108 return offset + append_len; 1109} 1110 1111DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u32 append_len) 1112{ 1113 u32 in0 = append[0]; 1114 u32 in1 = append[1]; 1115 u32 in2 = append[2]; 1116 u32 in3 = append[3]; 1117 u32 in4 = append[4]; 1118 1119 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 1120 const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); 1121 const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); 1122 const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); 1123 const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); 1124 const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); 1125 const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); 1126 #endif 1127 1128 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 1129 1130 #if defined IS_NV 1131 const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; 1132 #endif 1133 1134 #if (defined IS_AMD || defined IS_HIP) 1135 const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); 1136 #endif 1137 1138 const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); 1139 const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); 1140 const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); 1141 const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); 1142 const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); 1143 const u32 tmp5 = hc_byte_perm_S (0, in4, selector); 1144 #endif 1145 1146 switch (offset / 4) 1147 { 1148 case 0: block[ 0] |= tmp0; 1149 block[ 1] = tmp1; 1150 block[ 2] = tmp2; 1151 block[ 3] = tmp3; 1152 block[ 4] = tmp4; 1153 block[ 5] = tmp5; 1154 break; 1155 case 1: block[ 1] |= tmp0; 1156 block[ 2] = tmp1; 1157 block[ 3] = tmp2; 1158 block[ 4] = tmp3; 1159 block[ 5] = tmp4; 1160 block[ 6] = tmp5; 1161 break; 1162 case 2: block[ 2] |= tmp0; 1163 block[ 3] = tmp1; 1164 block[ 4] = tmp2; 1165 block[ 5] = tmp3; 1166 block[ 6] = tmp4; 1167 block[ 7] = tmp5; 1168 break; 1169 case 3: block[ 3] |= tmp0; 1170 block[ 4] = tmp1; 1171 block[ 5] = tmp2; 1172 block[ 6] = tmp3; 1173 block[ 7] = tmp4; 1174 block[ 8] = tmp5; 1175 break; 1176 } 1177 1178 return offset + append_len; 1179} 1180 1181KERNEL_FQ void m07400_init (KERN_ATTR_TMPS (sha256crypt_tmp_t)) 1182{ 1183 /** 1184 * base 1185 */ 1186 1187 const u64 gid = get_global_id (0); 1188 1189 if (gid >= gid_max) return; 1190 1191 u32 w0[4]; 1192 1193 w0[0] = hc_swap32_S (pws[gid].i[0]); 1194 w0[1] = hc_swap32_S (pws[gid].i[1]); 1195 w0[2] = hc_swap32_S (pws[gid].i[2]); 1196 w0[3] = hc_swap32_S (pws[gid].i[3]); 1197 1198 const u32 pw_len = MIN (pws[gid].pw_len, 15); 1199 1200 /** 1201 * salt 1202 */ 1203 1204 u32 salt_buf[5]; 1205 1206 salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); 1207 salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]); 1208 salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]); 1209 salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]); 1210 salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]); 1211 1212 const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20); 1213 1214 /** 1215 * buffers 1216 */ 1217 1218 u32 block_len; // never reaches > 64 1219 u32 transform_len; // required for w[15] = len * 8 1220 1221 u32 block[16]; 1222 1223 block[ 0] = 0; 1224 block[ 1] = 0; 1225 block[ 2] = 0; 1226 block[ 3] = 0; 1227 block[ 4] = 0; 1228 block[ 5] = 0; 1229 block[ 6] = 0; 1230 block[ 7] = 0; 1231 block[ 8] = 0; 1232 block[ 9] = 0; 1233 block[10] = 0; 1234 block[11] = 0; 1235 block[12] = 0; 1236 block[13] = 0; 1237 block[14] = 0; 1238 block[15] = 0; 1239 1240 u32 alt_result[8]; 1241 u32 p_bytes[8]; 1242 u32 s_bytes[8]; 1243 1244 /* Prepare for the real work. */ 1245 1246 block_len = 0; 1247 1248 /* Add key. */ 1249 1250 block_len = memcat16 (block, block_len, w0, pw_len); 1251 1252 /* Add salt. */ 1253 1254 block_len = memcat16s (block, block_len, salt_buf, salt_len); 1255 1256 /* Add key again. */ 1257 1258 block_len = memcat16 (block, block_len, w0, pw_len); 1259 1260 append_0x80_1x16 (block, block_len ^ 3); 1261 1262 block[15] = block_len * 8; 1263 1264 init_ctx (alt_result); 1265 1266 sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result); 1267 1268 u32 alt_result_tmp[8]; 1269 1270 alt_result_tmp[0] = alt_result[0]; 1271 alt_result_tmp[1] = alt_result[1]; 1272 alt_result_tmp[2] = alt_result[2]; 1273 alt_result_tmp[3] = alt_result[3]; 1274 alt_result_tmp[4] = 0; 1275 alt_result_tmp[5] = 0; 1276 alt_result_tmp[6] = 0; 1277 alt_result_tmp[7] = 0; 1278 1279 truncate_block_4x4_be_S (alt_result_tmp, pw_len); 1280 1281 block[ 0] = 0; 1282 block[ 1] = 0; 1283 block[ 2] = 0; 1284 block[ 3] = 0; 1285 block[ 4] = 0; 1286 block[ 5] = 0; 1287 block[ 6] = 0; 1288 block[ 7] = 0; 1289 block[ 8] = 0; 1290 block[ 9] = 0; 1291 block[10] = 0; 1292 block[11] = 0; 1293 block[12] = 0; 1294 block[13] = 0; 1295 block[14] = 0; 1296 block[15] = 0; 1297 1298 block_len = 0; 1299 1300 /* Add the key string. */ 1301 1302 block_len = memcat16 (block, block_len, w0, pw_len); 1303 1304 /* The last part is the salt string. This must be at most 8 1305 characters and it ends at the first `$' character (for 1306 compatibility with existing implementations). */ 1307 1308 block_len = memcat16s (block, block_len, salt_buf, salt_len); 1309 1310 /* Now get result of this (32 bytes) and add it to the other 1311 context. */ 1312 1313 block_len = memcat16 (block, block_len, alt_result_tmp, pw_len); 1314 1315 transform_len = block_len; 1316 1317 /* Take the binary representation of the length of the key and for every 1318 1 add the alternate sum, for every 0 the key. */ 1319 1320 alt_result_tmp[0] = alt_result[0]; 1321 alt_result_tmp[1] = alt_result[1]; 1322 alt_result_tmp[2] = alt_result[2]; 1323 alt_result_tmp[3] = alt_result[3]; 1324 alt_result_tmp[4] = alt_result[4]; 1325 alt_result_tmp[5] = alt_result[5]; 1326 alt_result_tmp[6] = alt_result[6]; 1327 alt_result_tmp[7] = alt_result[7]; 1328 1329 init_ctx (alt_result); 1330 1331 for (u32 j = pw_len; j; j >>= 1) 1332 { 1333 if (j & 1) 1334 { 1335 block_len = memcat16c (block, block_len, &alt_result_tmp[0], 16, alt_result); 1336 block_len = memcat16c (block, block_len, &alt_result_tmp[4], 16, alt_result); 1337 1338 transform_len += 32; 1339 } 1340 else 1341 { 1342 block_len = memcat16c (block, block_len, w0, pw_len, alt_result); 1343 1344 transform_len += pw_len; 1345 } 1346 } 1347 1348 append_0x80_1x16 (block, block_len ^ 3); 1349 1350 if (block_len >= 56) 1351 { 1352 sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result); 1353 1354 block[ 0] = 0; 1355 block[ 1] = 0; 1356 block[ 2] = 0; 1357 block[ 3] = 0; 1358 block[ 4] = 0; 1359 block[ 5] = 0; 1360 block[ 6] = 0; 1361 block[ 7] = 0; 1362 block[ 8] = 0; 1363 block[ 9] = 0; 1364 block[10] = 0; 1365 block[11] = 0; 1366 block[12] = 0; 1367 block[13] = 0; 1368 block[14] = 0; 1369 block[15] = 0; 1370 } 1371 1372 block[15] = transform_len * 8; 1373 1374 sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result); 1375 1376 tmps[gid].alt_result[0] = alt_result[0]; 1377 tmps[gid].alt_result[1] = alt_result[1]; 1378 tmps[gid].alt_result[2] = alt_result[2]; 1379 tmps[gid].alt_result[3] = alt_result[3]; 1380 tmps[gid].alt_result[4] = alt_result[4]; 1381 tmps[gid].alt_result[5] = alt_result[5]; 1382 tmps[gid].alt_result[6] = alt_result[6]; 1383 tmps[gid].alt_result[7] = alt_result[7]; 1384 1385 /* Start computation of P byte sequence. */ 1386 1387 transform_len = 0; 1388 1389 block[ 0] = 0; 1390 block[ 1] = 0; 1391 block[ 2] = 0; 1392 block[ 3] = 0; 1393 block[ 4] = 0; 1394 block[ 5] = 0; 1395 block[ 6] = 0; 1396 block[ 7] = 0; 1397 block[ 8] = 0; 1398 block[ 9] = 0; 1399 block[10] = 0; 1400 block[11] = 0; 1401 block[12] = 0; 1402 block[13] = 0; 1403 block[14] = 0; 1404 block[15] = 0; 1405 1406 block_len = 0; 1407 1408 /* For every character in the password add the entire password. */ 1409 1410 init_ctx (p_bytes); 1411 1412 for (u32 j = 0; j < pw_len; j++) 1413 { 1414 block_len = memcat16c (block, block_len, w0, pw_len, p_bytes); 1415 1416 transform_len += pw_len; 1417 } 1418 1419 /* Finish the digest. */ 1420 1421 append_0x80_1x16 (block, block_len ^ 3); 1422 1423 if (block_len >= 56) 1424 { 1425 sha256_transform (block + 0, block + 4, block + 8, block + 12, p_bytes); 1426 1427 block[ 0] = 0; 1428 block[ 1] = 0; 1429 block[ 2] = 0; 1430 block[ 3] = 0; 1431 block[ 4] = 0; 1432 block[ 5] = 0; 1433 block[ 6] = 0; 1434 block[ 7] = 0; 1435 block[ 8] = 0; 1436 block[ 9] = 0; 1437 block[10] = 0; 1438 block[11] = 0; 1439 block[12] = 0; 1440 block[13] = 0; 1441 block[14] = 0; 1442 block[15] = 0; 1443 } 1444 1445 block[15] = transform_len * 8; 1446 1447 sha256_transform (block + 0, block + 4, block + 8, block + 12, p_bytes); 1448 1449 truncate_block_4x4_be_S (p_bytes, pw_len); 1450 1451 tmps[gid].p_bytes[0] = p_bytes[0]; 1452 tmps[gid].p_bytes[1] = p_bytes[1]; 1453 tmps[gid].p_bytes[2] = p_bytes[2]; 1454 tmps[gid].p_bytes[3] = p_bytes[3]; 1455 1456 /* Start computation of S byte sequence. */ 1457 1458 transform_len = 0; 1459 1460 block[ 0] = 0; 1461 block[ 1] = 0; 1462 block[ 2] = 0; 1463 block[ 3] = 0; 1464 block[ 4] = 0; 1465 block[ 5] = 0; 1466 block[ 6] = 0; 1467 block[ 7] = 0; 1468 block[ 8] = 0; 1469 block[ 9] = 0; 1470 block[10] = 0; 1471 block[11] = 0; 1472 block[12] = 0; 1473 block[13] = 0; 1474 block[14] = 0; 1475 block[15] = 0; 1476 1477 block_len = 0; 1478 1479 /* For every character in the password add the entire password. */ 1480 1481 init_ctx (s_bytes); 1482 1483 for (u32 j = 0; j < 16 + (alt_result[0] >> 24); j++) 1484 { 1485 block_len = memcat16sc (block, block_len, salt_buf, salt_len, s_bytes); 1486 1487 transform_len += salt_len; 1488 } 1489 1490 /* Finish the digest. */ 1491 1492 append_0x80_1x16 (block, block_len ^ 3); 1493 1494 if (block_len >= 56) 1495 { 1496 sha256_transform (block + 0, block + 4, block + 8, block + 12, s_bytes); 1497 1498 block[ 0] = 0; 1499 block[ 1] = 0; 1500 block[ 2] = 0; 1501 block[ 3] = 0; 1502 block[ 4] = 0; 1503 block[ 5] = 0; 1504 block[ 6] = 0; 1505 block[ 7] = 0; 1506 block[ 8] = 0; 1507 block[ 9] = 0; 1508 block[10] = 0; 1509 block[11] = 0; 1510 block[12] = 0; 1511 block[13] = 0; 1512 block[14] = 0; 1513 block[15] = 0; 1514 } 1515 1516 block[15] = transform_len * 8; 1517 1518 sha256_transform (block + 0, block + 4, block + 8, block + 12, s_bytes); 1519 1520 truncate_block_5x4_be_S (s_bytes, salt_len); 1521 1522 tmps[gid].s_bytes[0] = s_bytes[0]; 1523 tmps[gid].s_bytes[1] = s_bytes[1]; 1524 tmps[gid].s_bytes[2] = s_bytes[2]; 1525 tmps[gid].s_bytes[3] = s_bytes[3]; 1526 tmps[gid].s_bytes[4] = s_bytes[4]; 1527} 1528 1529KERNEL_FQ void m07400_loop (KERN_ATTR_TMPS (sha256crypt_tmp_t)) 1530{ 1531 /** 1532 * base 1533 */ 1534 1535 const u64 gid = get_global_id (0); 1536 1537 if (gid >= gid_max) return; 1538 1539 const u32 pw_len = MIN (pws[gid].pw_len, 15); 1540 1541 /** 1542 * base 1543 */ 1544 1545 u32 p_bytes[4]; 1546 1547 p_bytes[0] = tmps[gid].p_bytes[0]; 1548 p_bytes[1] = tmps[gid].p_bytes[1]; 1549 p_bytes[2] = tmps[gid].p_bytes[2]; 1550 p_bytes[3] = tmps[gid].p_bytes[3]; 1551 1552 u32 s_bytes[5]; 1553 1554 s_bytes[0] = tmps[gid].s_bytes[0]; 1555 s_bytes[1] = tmps[gid].s_bytes[1]; 1556 s_bytes[2] = tmps[gid].s_bytes[2]; 1557 s_bytes[3] = tmps[gid].s_bytes[3]; 1558 s_bytes[4] = tmps[gid].s_bytes[4]; // 4 extra bytes for MySQL 7.5+ hashes 1559 1560 u32 alt_result[8]; 1561 1562 alt_result[0] = tmps[gid].alt_result[0]; 1563 alt_result[1] = tmps[gid].alt_result[1]; 1564 alt_result[2] = tmps[gid].alt_result[2]; 1565 alt_result[3] = tmps[gid].alt_result[3]; 1566 alt_result[4] = tmps[gid].alt_result[4]; 1567 alt_result[5] = tmps[gid].alt_result[5]; 1568 alt_result[6] = tmps[gid].alt_result[6]; 1569 alt_result[7] = tmps[gid].alt_result[7]; 1570 1571 const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20); 1572 1573 // just an optimization 1574 1575 u32 p_bytes_x80[4]; 1576 1577 p_bytes_x80[0] = p_bytes[0]; 1578 p_bytes_x80[1] = p_bytes[1]; 1579 p_bytes_x80[2] = p_bytes[2]; 1580 p_bytes_x80[3] = p_bytes[3]; 1581 1582 append_0x80_1x4_S (p_bytes_x80, pw_len ^ 3); 1583 1584 /* Repeatedly run the collected hash value through SHA256 to burn 1585 CPU cycles. */ 1586 1587 for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) 1588 { 1589 u32 tmp[8]; 1590 1591 init_ctx (tmp); 1592 1593 u32 block[25]; 1594 1595 u32 block_len = 0; 1596 1597 const u32 j1 = (j & 1) ? 1 : 0; 1598 const u32 j3 = (j % 3) ? 1 : 0; 1599 const u32 j7 = (j % 7) ? 1 : 0; 1600 1601 if (j1) 1602 { 1603 block[ 0] = p_bytes[0]; 1604 block[ 1] = p_bytes[1]; 1605 block[ 2] = p_bytes[2]; 1606 block[ 3] = p_bytes[3]; 1607 block[ 4] = 0; 1608 block[ 5] = 0; 1609 block[ 6] = 0; 1610 block[ 7] = 0; 1611 block[ 8] = 0; 1612 block[ 9] = 0; 1613 block[10] = 0; 1614 block[11] = 0; 1615 block[12] = 0; 1616 block[13] = 0; 1617 block[14] = 0; 1618 block[15] = 0; 1619 block[16] = 0; 1620 block[17] = 0; 1621 block[18] = 0; 1622 block[19] = 0; 1623 block[20] = 0; 1624 block[21] = 0; 1625 block[22] = 0; 1626 block[23] = 0; 1627 block[24] = 0; 1628 1629 block_len = pw_len; 1630 1631 if (j3) 1632 { 1633 block_len = memcat24 (block, block_len, s_bytes, salt_len); 1634 } 1635 } 1636 else 1637 { 1638 block[ 0] = alt_result[0]; 1639 block[ 1] = alt_result[1]; 1640 block[ 2] = alt_result[2]; 1641 block[ 3] = alt_result[3]; 1642 block[ 4] = alt_result[4]; 1643 block[ 5] = alt_result[5]; 1644 block[ 6] = alt_result[6]; 1645 block[ 7] = alt_result[7]; 1646 block[ 8] = 0; 1647 block[ 9] = 0; 1648 block[10] = 0; 1649 block[11] = 0; 1650 block[12] = 0; 1651 block[13] = 0; 1652 block[14] = 0; 1653 block[15] = 0; 1654 block[16] = 0; 1655 block[17] = 0; 1656 block[18] = 0; 1657 block[19] = 0; 1658 block[20] = 0; 1659 block[21] = 0; 1660 block[22] = 0; 1661 block[23] = 0; 1662 block[24] = 0; 1663 1664 block_len = 32; 1665 1666 if (j3) 1667 { 1668 block[ 8] = s_bytes[0]; 1669 block[ 9] = s_bytes[1]; 1670 block[10] = s_bytes[2]; 1671 block[11] = s_bytes[3]; 1672 block[12] = s_bytes[4]; 1673 1674 block_len += salt_len; 1675 } 1676 } 1677 1678 if (j7) 1679 { 1680 block_len = memcat20 (block, block_len, p_bytes, pw_len); 1681 } 1682 1683 if (j1) 1684 { 1685 block_len = memcat20 (block, block_len, &alt_result[0], 16); 1686 block_len = memcat20_x80 (block, block_len, &alt_result[4], 16); 1687 } 1688 else 1689 { 1690 block_len = memcat20 (block, block_len, p_bytes_x80, pw_len); 1691 } 1692 1693 if (block_len >= 56) 1694 { 1695 sha256_transform (block + 0, block + 4, block + 8, block + 12, tmp); 1696 1697 block[ 0] = block[16]; 1698 block[ 1] = block[17]; 1699 block[ 2] = block[18]; 1700 block[ 3] = block[19]; 1701 block[ 4] = block[20]; 1702 block[ 5] = block[21]; 1703 block[ 6] = block[22]; 1704 block[ 7] = block[23]; 1705 block[ 8] = block[24]; 1706 block[ 9] = 0; 1707 block[10] = 0; 1708 block[11] = 0; 1709 block[12] = 0; 1710 block[13] = 0; 1711 block[14] = 0; 1712 block[15] = 0; 1713 } 1714 1715 block[14] = 0; 1716 block[15] = block_len * 8; 1717 1718 sha256_transform (block + 0, block + 4, block + 8, block + 12, tmp); 1719 1720 alt_result[0] = tmp[0]; 1721 alt_result[1] = tmp[1]; 1722 alt_result[2] = tmp[2]; 1723 alt_result[3] = tmp[3]; 1724 alt_result[4] = tmp[4]; 1725 alt_result[5] = tmp[5]; 1726 alt_result[6] = tmp[6]; 1727 alt_result[7] = tmp[7]; 1728 } 1729 1730 tmps[gid].alt_result[0] = alt_result[0]; 1731 tmps[gid].alt_result[1] = alt_result[1]; 1732 tmps[gid].alt_result[2] = alt_result[2]; 1733 tmps[gid].alt_result[3] = alt_result[3]; 1734 tmps[gid].alt_result[4] = alt_result[4]; 1735 tmps[gid].alt_result[5] = alt_result[5]; 1736 tmps[gid].alt_result[6] = alt_result[6]; 1737 tmps[gid].alt_result[7] = alt_result[7]; 1738} 1739 1740KERNEL_FQ void m07400_comp (KERN_ATTR_TMPS (sha256crypt_tmp_t)) 1741{ 1742 /** 1743 * base 1744 */ 1745 1746 const u64 gid = get_global_id (0); 1747 1748 if (gid >= gid_max) return; 1749 1750 const u64 lid = get_local_id (0); 1751 1752 const u32 r0 = hc_swap32_S (tmps[gid].alt_result[0]); 1753 const u32 r1 = hc_swap32_S (tmps[gid].alt_result[1]); 1754 const u32 r2 = hc_swap32_S (tmps[gid].alt_result[2]); 1755 const u32 r3 = hc_swap32_S (tmps[gid].alt_result[3]); 1756 1757 #define il_pos 0 1758 1759 #ifdef KERNEL_STATIC 1760 #include COMPARE_M 1761 #endif 1762} 1763