1#if defined(__x86_64__) 2 3#if defined(__ELF__) && (defined(__linux__) || defined(__FreeBSD__)) 4.section .note.GNU-stack,"",%progbits 5#endif 6 7#if defined(__ELF__) && defined(__CET__) && defined(__has_include) 8#if __has_include(<cet.h>) 9#include <cet.h> 10#endif 11#endif 12 13#if !defined(_CET_ENDBR) 14#define _CET_ENDBR 15#endif 16 17#ifdef __APPLE__ 18#define HIDDEN .private_extern 19#else 20#define HIDDEN .hidden 21#endif 22 23.intel_syntax noprefix 24HIDDEN blake3_hash_many_sse41 25HIDDEN _blake3_hash_many_sse41 26HIDDEN blake3_compress_in_place_sse41 27HIDDEN _blake3_compress_in_place_sse41 28HIDDEN blake3_compress_xof_sse41 29HIDDEN _blake3_compress_xof_sse41 30.global blake3_hash_many_sse41 31.global _blake3_hash_many_sse41 32.global blake3_compress_in_place_sse41 33.global _blake3_compress_in_place_sse41 34.global blake3_compress_xof_sse41 35.global _blake3_compress_xof_sse41 36#ifdef __APPLE__ 37.text 38#else 39.section .text 40#endif 41 .p2align 6 42_blake3_hash_many_sse41: 43blake3_hash_many_sse41: 44 _CET_ENDBR 45 push r15 46 push r14 47 push r13 48 push r12 49 push rbx 50 push rbp 51 mov rbp, rsp 52 sub rsp, 360 53 and rsp, 0xFFFFFFFFFFFFFFC0 54 neg r9d 55 movd xmm0, r9d 56 pshufd xmm0, xmm0, 0x00 57 movdqa xmmword ptr [rsp+0x130], xmm0 58 movdqa xmm1, xmm0 59 pand xmm1, xmmword ptr [ADD0+rip] 60 pand xmm0, xmmword ptr [ADD1+rip] 61 movdqa xmmword ptr [rsp+0x150], xmm0 62 movd xmm0, r8d 63 pshufd xmm0, xmm0, 0x00 64 paddd xmm0, xmm1 65 movdqa xmmword ptr [rsp+0x110], xmm0 66 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 67 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 68 pcmpgtd xmm1, xmm0 69 shr r8, 32 70 movd xmm2, r8d 71 pshufd xmm2, xmm2, 0x00 72 psubd xmm2, xmm1 73 movdqa xmmword ptr [rsp+0x120], xmm2 74 mov rbx, qword ptr [rbp+0x50] 75 mov r15, rdx 76 shl r15, 6 77 movzx r13d, byte ptr [rbp+0x38] 78 movzx r12d, byte ptr [rbp+0x48] 79 cmp rsi, 4 80 jc 3f 812: 82 movdqu xmm3, xmmword ptr [rcx] 83 pshufd xmm0, xmm3, 0x00 84 pshufd xmm1, xmm3, 0x55 85 pshufd xmm2, xmm3, 0xAA 86 pshufd xmm3, xmm3, 0xFF 87 movdqu xmm7, xmmword ptr [rcx+0x10] 88 pshufd xmm4, xmm7, 0x00 89 pshufd xmm5, xmm7, 0x55 90 pshufd xmm6, xmm7, 0xAA 91 pshufd xmm7, xmm7, 0xFF 92 mov r8, qword ptr [rdi] 93 mov r9, qword ptr [rdi+0x8] 94 mov r10, qword ptr [rdi+0x10] 95 mov r11, qword ptr [rdi+0x18] 96 movzx eax, byte ptr [rbp+0x40] 97 or eax, r13d 98 xor edx, edx 999: 100 mov r14d, eax 101 or eax, r12d 102 add rdx, 64 103 cmp rdx, r15 104 cmovne eax, r14d 105 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 106 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 107 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 108 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 109 movdqa xmm12, xmm8 110 punpckldq xmm8, xmm9 111 punpckhdq xmm12, xmm9 112 movdqa xmm14, xmm10 113 punpckldq xmm10, xmm11 114 punpckhdq xmm14, xmm11 115 movdqa xmm9, xmm8 116 punpcklqdq xmm8, xmm10 117 punpckhqdq xmm9, xmm10 118 movdqa xmm13, xmm12 119 punpcklqdq xmm12, xmm14 120 punpckhqdq xmm13, xmm14 121 movdqa xmmword ptr [rsp], xmm8 122 movdqa xmmword ptr [rsp+0x10], xmm9 123 movdqa xmmword ptr [rsp+0x20], xmm12 124 movdqa xmmword ptr [rsp+0x30], xmm13 125 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 126 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 127 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 128 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 129 movdqa xmm12, xmm8 130 punpckldq xmm8, xmm9 131 punpckhdq xmm12, xmm9 132 movdqa xmm14, xmm10 133 punpckldq xmm10, xmm11 134 punpckhdq xmm14, xmm11 135 movdqa xmm9, xmm8 136 punpcklqdq xmm8, xmm10 137 punpckhqdq xmm9, xmm10 138 movdqa xmm13, xmm12 139 punpcklqdq xmm12, xmm14 140 punpckhqdq xmm13, xmm14 141 movdqa xmmword ptr [rsp+0x40], xmm8 142 movdqa xmmword ptr [rsp+0x50], xmm9 143 movdqa xmmword ptr [rsp+0x60], xmm12 144 movdqa xmmword ptr [rsp+0x70], xmm13 145 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 146 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 147 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 148 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 149 movdqa xmm12, xmm8 150 punpckldq xmm8, xmm9 151 punpckhdq xmm12, xmm9 152 movdqa xmm14, xmm10 153 punpckldq xmm10, xmm11 154 punpckhdq xmm14, xmm11 155 movdqa xmm9, xmm8 156 punpcklqdq xmm8, xmm10 157 punpckhqdq xmm9, xmm10 158 movdqa xmm13, xmm12 159 punpcklqdq xmm12, xmm14 160 punpckhqdq xmm13, xmm14 161 movdqa xmmword ptr [rsp+0x80], xmm8 162 movdqa xmmword ptr [rsp+0x90], xmm9 163 movdqa xmmword ptr [rsp+0xA0], xmm12 164 movdqa xmmword ptr [rsp+0xB0], xmm13 165 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 166 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 167 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 168 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 169 movdqa xmm12, xmm8 170 punpckldq xmm8, xmm9 171 punpckhdq xmm12, xmm9 172 movdqa xmm14, xmm10 173 punpckldq xmm10, xmm11 174 punpckhdq xmm14, xmm11 175 movdqa xmm9, xmm8 176 punpcklqdq xmm8, xmm10 177 punpckhqdq xmm9, xmm10 178 movdqa xmm13, xmm12 179 punpcklqdq xmm12, xmm14 180 punpckhqdq xmm13, xmm14 181 movdqa xmmword ptr [rsp+0xC0], xmm8 182 movdqa xmmword ptr [rsp+0xD0], xmm9 183 movdqa xmmword ptr [rsp+0xE0], xmm12 184 movdqa xmmword ptr [rsp+0xF0], xmm13 185 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 186 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 187 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 188 movdqa xmm12, xmmword ptr [rsp+0x110] 189 movdqa xmm13, xmmword ptr [rsp+0x120] 190 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 191 movd xmm15, eax 192 pshufd xmm15, xmm15, 0x00 193 prefetcht0 [r8+rdx+0x80] 194 prefetcht0 [r9+rdx+0x80] 195 prefetcht0 [r10+rdx+0x80] 196 prefetcht0 [r11+rdx+0x80] 197 paddd xmm0, xmmword ptr [rsp] 198 paddd xmm1, xmmword ptr [rsp+0x20] 199 paddd xmm2, xmmword ptr [rsp+0x40] 200 paddd xmm3, xmmword ptr [rsp+0x60] 201 paddd xmm0, xmm4 202 paddd xmm1, xmm5 203 paddd xmm2, xmm6 204 paddd xmm3, xmm7 205 pxor xmm12, xmm0 206 pxor xmm13, xmm1 207 pxor xmm14, xmm2 208 pxor xmm15, xmm3 209 movdqa xmm8, xmmword ptr [ROT16+rip] 210 pshufb xmm12, xmm8 211 pshufb xmm13, xmm8 212 pshufb xmm14, xmm8 213 pshufb xmm15, xmm8 214 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 215 paddd xmm8, xmm12 216 paddd xmm9, xmm13 217 paddd xmm10, xmm14 218 paddd xmm11, xmm15 219 pxor xmm4, xmm8 220 pxor xmm5, xmm9 221 pxor xmm6, xmm10 222 pxor xmm7, xmm11 223 movdqa xmmword ptr [rsp+0x100], xmm8 224 movdqa xmm8, xmm4 225 psrld xmm8, 12 226 pslld xmm4, 20 227 por xmm4, xmm8 228 movdqa xmm8, xmm5 229 psrld xmm8, 12 230 pslld xmm5, 20 231 por xmm5, xmm8 232 movdqa xmm8, xmm6 233 psrld xmm8, 12 234 pslld xmm6, 20 235 por xmm6, xmm8 236 movdqa xmm8, xmm7 237 psrld xmm8, 12 238 pslld xmm7, 20 239 por xmm7, xmm8 240 paddd xmm0, xmmword ptr [rsp+0x10] 241 paddd xmm1, xmmword ptr [rsp+0x30] 242 paddd xmm2, xmmword ptr [rsp+0x50] 243 paddd xmm3, xmmword ptr [rsp+0x70] 244 paddd xmm0, xmm4 245 paddd xmm1, xmm5 246 paddd xmm2, xmm6 247 paddd xmm3, xmm7 248 pxor xmm12, xmm0 249 pxor xmm13, xmm1 250 pxor xmm14, xmm2 251 pxor xmm15, xmm3 252 movdqa xmm8, xmmword ptr [ROT8+rip] 253 pshufb xmm12, xmm8 254 pshufb xmm13, xmm8 255 pshufb xmm14, xmm8 256 pshufb xmm15, xmm8 257 movdqa xmm8, xmmword ptr [rsp+0x100] 258 paddd xmm8, xmm12 259 paddd xmm9, xmm13 260 paddd xmm10, xmm14 261 paddd xmm11, xmm15 262 pxor xmm4, xmm8 263 pxor xmm5, xmm9 264 pxor xmm6, xmm10 265 pxor xmm7, xmm11 266 movdqa xmmword ptr [rsp+0x100], xmm8 267 movdqa xmm8, xmm4 268 psrld xmm8, 7 269 pslld xmm4, 25 270 por xmm4, xmm8 271 movdqa xmm8, xmm5 272 psrld xmm8, 7 273 pslld xmm5, 25 274 por xmm5, xmm8 275 movdqa xmm8, xmm6 276 psrld xmm8, 7 277 pslld xmm6, 25 278 por xmm6, xmm8 279 movdqa xmm8, xmm7 280 psrld xmm8, 7 281 pslld xmm7, 25 282 por xmm7, xmm8 283 paddd xmm0, xmmword ptr [rsp+0x80] 284 paddd xmm1, xmmword ptr [rsp+0xA0] 285 paddd xmm2, xmmword ptr [rsp+0xC0] 286 paddd xmm3, xmmword ptr [rsp+0xE0] 287 paddd xmm0, xmm5 288 paddd xmm1, xmm6 289 paddd xmm2, xmm7 290 paddd xmm3, xmm4 291 pxor xmm15, xmm0 292 pxor xmm12, xmm1 293 pxor xmm13, xmm2 294 pxor xmm14, xmm3 295 movdqa xmm8, xmmword ptr [ROT16+rip] 296 pshufb xmm15, xmm8 297 pshufb xmm12, xmm8 298 pshufb xmm13, xmm8 299 pshufb xmm14, xmm8 300 paddd xmm10, xmm15 301 paddd xmm11, xmm12 302 movdqa xmm8, xmmword ptr [rsp+0x100] 303 paddd xmm8, xmm13 304 paddd xmm9, xmm14 305 pxor xmm5, xmm10 306 pxor xmm6, xmm11 307 pxor xmm7, xmm8 308 pxor xmm4, xmm9 309 movdqa xmmword ptr [rsp+0x100], xmm8 310 movdqa xmm8, xmm5 311 psrld xmm8, 12 312 pslld xmm5, 20 313 por xmm5, xmm8 314 movdqa xmm8, xmm6 315 psrld xmm8, 12 316 pslld xmm6, 20 317 por xmm6, xmm8 318 movdqa xmm8, xmm7 319 psrld xmm8, 12 320 pslld xmm7, 20 321 por xmm7, xmm8 322 movdqa xmm8, xmm4 323 psrld xmm8, 12 324 pslld xmm4, 20 325 por xmm4, xmm8 326 paddd xmm0, xmmword ptr [rsp+0x90] 327 paddd xmm1, xmmword ptr [rsp+0xB0] 328 paddd xmm2, xmmword ptr [rsp+0xD0] 329 paddd xmm3, xmmword ptr [rsp+0xF0] 330 paddd xmm0, xmm5 331 paddd xmm1, xmm6 332 paddd xmm2, xmm7 333 paddd xmm3, xmm4 334 pxor xmm15, xmm0 335 pxor xmm12, xmm1 336 pxor xmm13, xmm2 337 pxor xmm14, xmm3 338 movdqa xmm8, xmmword ptr [ROT8+rip] 339 pshufb xmm15, xmm8 340 pshufb xmm12, xmm8 341 pshufb xmm13, xmm8 342 pshufb xmm14, xmm8 343 paddd xmm10, xmm15 344 paddd xmm11, xmm12 345 movdqa xmm8, xmmword ptr [rsp+0x100] 346 paddd xmm8, xmm13 347 paddd xmm9, xmm14 348 pxor xmm5, xmm10 349 pxor xmm6, xmm11 350 pxor xmm7, xmm8 351 pxor xmm4, xmm9 352 movdqa xmmword ptr [rsp+0x100], xmm8 353 movdqa xmm8, xmm5 354 psrld xmm8, 7 355 pslld xmm5, 25 356 por xmm5, xmm8 357 movdqa xmm8, xmm6 358 psrld xmm8, 7 359 pslld xmm6, 25 360 por xmm6, xmm8 361 movdqa xmm8, xmm7 362 psrld xmm8, 7 363 pslld xmm7, 25 364 por xmm7, xmm8 365 movdqa xmm8, xmm4 366 psrld xmm8, 7 367 pslld xmm4, 25 368 por xmm4, xmm8 369 paddd xmm0, xmmword ptr [rsp+0x20] 370 paddd xmm1, xmmword ptr [rsp+0x30] 371 paddd xmm2, xmmword ptr [rsp+0x70] 372 paddd xmm3, xmmword ptr [rsp+0x40] 373 paddd xmm0, xmm4 374 paddd xmm1, xmm5 375 paddd xmm2, xmm6 376 paddd xmm3, xmm7 377 pxor xmm12, xmm0 378 pxor xmm13, xmm1 379 pxor xmm14, xmm2 380 pxor xmm15, xmm3 381 movdqa xmm8, xmmword ptr [ROT16+rip] 382 pshufb xmm12, xmm8 383 pshufb xmm13, xmm8 384 pshufb xmm14, xmm8 385 pshufb xmm15, xmm8 386 movdqa xmm8, xmmword ptr [rsp+0x100] 387 paddd xmm8, xmm12 388 paddd xmm9, xmm13 389 paddd xmm10, xmm14 390 paddd xmm11, xmm15 391 pxor xmm4, xmm8 392 pxor xmm5, xmm9 393 pxor xmm6, xmm10 394 pxor xmm7, xmm11 395 movdqa xmmword ptr [rsp+0x100], xmm8 396 movdqa xmm8, xmm4 397 psrld xmm8, 12 398 pslld xmm4, 20 399 por xmm4, xmm8 400 movdqa xmm8, xmm5 401 psrld xmm8, 12 402 pslld xmm5, 20 403 por xmm5, xmm8 404 movdqa xmm8, xmm6 405 psrld xmm8, 12 406 pslld xmm6, 20 407 por xmm6, xmm8 408 movdqa xmm8, xmm7 409 psrld xmm8, 12 410 pslld xmm7, 20 411 por xmm7, xmm8 412 paddd xmm0, xmmword ptr [rsp+0x60] 413 paddd xmm1, xmmword ptr [rsp+0xA0] 414 paddd xmm2, xmmword ptr [rsp] 415 paddd xmm3, xmmword ptr [rsp+0xD0] 416 paddd xmm0, xmm4 417 paddd xmm1, xmm5 418 paddd xmm2, xmm6 419 paddd xmm3, xmm7 420 pxor xmm12, xmm0 421 pxor xmm13, xmm1 422 pxor xmm14, xmm2 423 pxor xmm15, xmm3 424 movdqa xmm8, xmmword ptr [ROT8+rip] 425 pshufb xmm12, xmm8 426 pshufb xmm13, xmm8 427 pshufb xmm14, xmm8 428 pshufb xmm15, xmm8 429 movdqa xmm8, xmmword ptr [rsp+0x100] 430 paddd xmm8, xmm12 431 paddd xmm9, xmm13 432 paddd xmm10, xmm14 433 paddd xmm11, xmm15 434 pxor xmm4, xmm8 435 pxor xmm5, xmm9 436 pxor xmm6, xmm10 437 pxor xmm7, xmm11 438 movdqa xmmword ptr [rsp+0x100], xmm8 439 movdqa xmm8, xmm4 440 psrld xmm8, 7 441 pslld xmm4, 25 442 por xmm4, xmm8 443 movdqa xmm8, xmm5 444 psrld xmm8, 7 445 pslld xmm5, 25 446 por xmm5, xmm8 447 movdqa xmm8, xmm6 448 psrld xmm8, 7 449 pslld xmm6, 25 450 por xmm6, xmm8 451 movdqa xmm8, xmm7 452 psrld xmm8, 7 453 pslld xmm7, 25 454 por xmm7, xmm8 455 paddd xmm0, xmmword ptr [rsp+0x10] 456 paddd xmm1, xmmword ptr [rsp+0xC0] 457 paddd xmm2, xmmword ptr [rsp+0x90] 458 paddd xmm3, xmmword ptr [rsp+0xF0] 459 paddd xmm0, xmm5 460 paddd xmm1, xmm6 461 paddd xmm2, xmm7 462 paddd xmm3, xmm4 463 pxor xmm15, xmm0 464 pxor xmm12, xmm1 465 pxor xmm13, xmm2 466 pxor xmm14, xmm3 467 movdqa xmm8, xmmword ptr [ROT16+rip] 468 pshufb xmm15, xmm8 469 pshufb xmm12, xmm8 470 pshufb xmm13, xmm8 471 pshufb xmm14, xmm8 472 paddd xmm10, xmm15 473 paddd xmm11, xmm12 474 movdqa xmm8, xmmword ptr [rsp+0x100] 475 paddd xmm8, xmm13 476 paddd xmm9, xmm14 477 pxor xmm5, xmm10 478 pxor xmm6, xmm11 479 pxor xmm7, xmm8 480 pxor xmm4, xmm9 481 movdqa xmmword ptr [rsp+0x100], xmm8 482 movdqa xmm8, xmm5 483 psrld xmm8, 12 484 pslld xmm5, 20 485 por xmm5, xmm8 486 movdqa xmm8, xmm6 487 psrld xmm8, 12 488 pslld xmm6, 20 489 por xmm6, xmm8 490 movdqa xmm8, xmm7 491 psrld xmm8, 12 492 pslld xmm7, 20 493 por xmm7, xmm8 494 movdqa xmm8, xmm4 495 psrld xmm8, 12 496 pslld xmm4, 20 497 por xmm4, xmm8 498 paddd xmm0, xmmword ptr [rsp+0xB0] 499 paddd xmm1, xmmword ptr [rsp+0x50] 500 paddd xmm2, xmmword ptr [rsp+0xE0] 501 paddd xmm3, xmmword ptr [rsp+0x80] 502 paddd xmm0, xmm5 503 paddd xmm1, xmm6 504 paddd xmm2, xmm7 505 paddd xmm3, xmm4 506 pxor xmm15, xmm0 507 pxor xmm12, xmm1 508 pxor xmm13, xmm2 509 pxor xmm14, xmm3 510 movdqa xmm8, xmmword ptr [ROT8+rip] 511 pshufb xmm15, xmm8 512 pshufb xmm12, xmm8 513 pshufb xmm13, xmm8 514 pshufb xmm14, xmm8 515 paddd xmm10, xmm15 516 paddd xmm11, xmm12 517 movdqa xmm8, xmmword ptr [rsp+0x100] 518 paddd xmm8, xmm13 519 paddd xmm9, xmm14 520 pxor xmm5, xmm10 521 pxor xmm6, xmm11 522 pxor xmm7, xmm8 523 pxor xmm4, xmm9 524 movdqa xmmword ptr [rsp+0x100], xmm8 525 movdqa xmm8, xmm5 526 psrld xmm8, 7 527 pslld xmm5, 25 528 por xmm5, xmm8 529 movdqa xmm8, xmm6 530 psrld xmm8, 7 531 pslld xmm6, 25 532 por xmm6, xmm8 533 movdqa xmm8, xmm7 534 psrld xmm8, 7 535 pslld xmm7, 25 536 por xmm7, xmm8 537 movdqa xmm8, xmm4 538 psrld xmm8, 7 539 pslld xmm4, 25 540 por xmm4, xmm8 541 paddd xmm0, xmmword ptr [rsp+0x30] 542 paddd xmm1, xmmword ptr [rsp+0xA0] 543 paddd xmm2, xmmword ptr [rsp+0xD0] 544 paddd xmm3, xmmword ptr [rsp+0x70] 545 paddd xmm0, xmm4 546 paddd xmm1, xmm5 547 paddd xmm2, xmm6 548 paddd xmm3, xmm7 549 pxor xmm12, xmm0 550 pxor xmm13, xmm1 551 pxor xmm14, xmm2 552 pxor xmm15, xmm3 553 movdqa xmm8, xmmword ptr [ROT16+rip] 554 pshufb xmm12, xmm8 555 pshufb xmm13, xmm8 556 pshufb xmm14, xmm8 557 pshufb xmm15, xmm8 558 movdqa xmm8, xmmword ptr [rsp+0x100] 559 paddd xmm8, xmm12 560 paddd xmm9, xmm13 561 paddd xmm10, xmm14 562 paddd xmm11, xmm15 563 pxor xmm4, xmm8 564 pxor xmm5, xmm9 565 pxor xmm6, xmm10 566 pxor xmm7, xmm11 567 movdqa xmmword ptr [rsp+0x100], xmm8 568 movdqa xmm8, xmm4 569 psrld xmm8, 12 570 pslld xmm4, 20 571 por xmm4, xmm8 572 movdqa xmm8, xmm5 573 psrld xmm8, 12 574 pslld xmm5, 20 575 por xmm5, xmm8 576 movdqa xmm8, xmm6 577 psrld xmm8, 12 578 pslld xmm6, 20 579 por xmm6, xmm8 580 movdqa xmm8, xmm7 581 psrld xmm8, 12 582 pslld xmm7, 20 583 por xmm7, xmm8 584 paddd xmm0, xmmword ptr [rsp+0x40] 585 paddd xmm1, xmmword ptr [rsp+0xC0] 586 paddd xmm2, xmmword ptr [rsp+0x20] 587 paddd xmm3, xmmword ptr [rsp+0xE0] 588 paddd xmm0, xmm4 589 paddd xmm1, xmm5 590 paddd xmm2, xmm6 591 paddd xmm3, xmm7 592 pxor xmm12, xmm0 593 pxor xmm13, xmm1 594 pxor xmm14, xmm2 595 pxor xmm15, xmm3 596 movdqa xmm8, xmmword ptr [ROT8+rip] 597 pshufb xmm12, xmm8 598 pshufb xmm13, xmm8 599 pshufb xmm14, xmm8 600 pshufb xmm15, xmm8 601 movdqa xmm8, xmmword ptr [rsp+0x100] 602 paddd xmm8, xmm12 603 paddd xmm9, xmm13 604 paddd xmm10, xmm14 605 paddd xmm11, xmm15 606 pxor xmm4, xmm8 607 pxor xmm5, xmm9 608 pxor xmm6, xmm10 609 pxor xmm7, xmm11 610 movdqa xmmword ptr [rsp+0x100], xmm8 611 movdqa xmm8, xmm4 612 psrld xmm8, 7 613 pslld xmm4, 25 614 por xmm4, xmm8 615 movdqa xmm8, xmm5 616 psrld xmm8, 7 617 pslld xmm5, 25 618 por xmm5, xmm8 619 movdqa xmm8, xmm6 620 psrld xmm8, 7 621 pslld xmm6, 25 622 por xmm6, xmm8 623 movdqa xmm8, xmm7 624 psrld xmm8, 7 625 pslld xmm7, 25 626 por xmm7, xmm8 627 paddd xmm0, xmmword ptr [rsp+0x60] 628 paddd xmm1, xmmword ptr [rsp+0x90] 629 paddd xmm2, xmmword ptr [rsp+0xB0] 630 paddd xmm3, xmmword ptr [rsp+0x80] 631 paddd xmm0, xmm5 632 paddd xmm1, xmm6 633 paddd xmm2, xmm7 634 paddd xmm3, xmm4 635 pxor xmm15, xmm0 636 pxor xmm12, xmm1 637 pxor xmm13, xmm2 638 pxor xmm14, xmm3 639 movdqa xmm8, xmmword ptr [ROT16+rip] 640 pshufb xmm15, xmm8 641 pshufb xmm12, xmm8 642 pshufb xmm13, xmm8 643 pshufb xmm14, xmm8 644 paddd xmm10, xmm15 645 paddd xmm11, xmm12 646 movdqa xmm8, xmmword ptr [rsp+0x100] 647 paddd xmm8, xmm13 648 paddd xmm9, xmm14 649 pxor xmm5, xmm10 650 pxor xmm6, xmm11 651 pxor xmm7, xmm8 652 pxor xmm4, xmm9 653 movdqa xmmword ptr [rsp+0x100], xmm8 654 movdqa xmm8, xmm5 655 psrld xmm8, 12 656 pslld xmm5, 20 657 por xmm5, xmm8 658 movdqa xmm8, xmm6 659 psrld xmm8, 12 660 pslld xmm6, 20 661 por xmm6, xmm8 662 movdqa xmm8, xmm7 663 psrld xmm8, 12 664 pslld xmm7, 20 665 por xmm7, xmm8 666 movdqa xmm8, xmm4 667 psrld xmm8, 12 668 pslld xmm4, 20 669 por xmm4, xmm8 670 paddd xmm0, xmmword ptr [rsp+0x50] 671 paddd xmm1, xmmword ptr [rsp] 672 paddd xmm2, xmmword ptr [rsp+0xF0] 673 paddd xmm3, xmmword ptr [rsp+0x10] 674 paddd xmm0, xmm5 675 paddd xmm1, xmm6 676 paddd xmm2, xmm7 677 paddd xmm3, xmm4 678 pxor xmm15, xmm0 679 pxor xmm12, xmm1 680 pxor xmm13, xmm2 681 pxor xmm14, xmm3 682 movdqa xmm8, xmmword ptr [ROT8+rip] 683 pshufb xmm15, xmm8 684 pshufb xmm12, xmm8 685 pshufb xmm13, xmm8 686 pshufb xmm14, xmm8 687 paddd xmm10, xmm15 688 paddd xmm11, xmm12 689 movdqa xmm8, xmmword ptr [rsp+0x100] 690 paddd xmm8, xmm13 691 paddd xmm9, xmm14 692 pxor xmm5, xmm10 693 pxor xmm6, xmm11 694 pxor xmm7, xmm8 695 pxor xmm4, xmm9 696 movdqa xmmword ptr [rsp+0x100], xmm8 697 movdqa xmm8, xmm5 698 psrld xmm8, 7 699 pslld xmm5, 25 700 por xmm5, xmm8 701 movdqa xmm8, xmm6 702 psrld xmm8, 7 703 pslld xmm6, 25 704 por xmm6, xmm8 705 movdqa xmm8, xmm7 706 psrld xmm8, 7 707 pslld xmm7, 25 708 por xmm7, xmm8 709 movdqa xmm8, xmm4 710 psrld xmm8, 7 711 pslld xmm4, 25 712 por xmm4, xmm8 713 paddd xmm0, xmmword ptr [rsp+0xA0] 714 paddd xmm1, xmmword ptr [rsp+0xC0] 715 paddd xmm2, xmmword ptr [rsp+0xE0] 716 paddd xmm3, xmmword ptr [rsp+0xD0] 717 paddd xmm0, xmm4 718 paddd xmm1, xmm5 719 paddd xmm2, xmm6 720 paddd xmm3, xmm7 721 pxor xmm12, xmm0 722 pxor xmm13, xmm1 723 pxor xmm14, xmm2 724 pxor xmm15, xmm3 725 movdqa xmm8, xmmword ptr [ROT16+rip] 726 pshufb xmm12, xmm8 727 pshufb xmm13, xmm8 728 pshufb xmm14, xmm8 729 pshufb xmm15, xmm8 730 movdqa xmm8, xmmword ptr [rsp+0x100] 731 paddd xmm8, xmm12 732 paddd xmm9, xmm13 733 paddd xmm10, xmm14 734 paddd xmm11, xmm15 735 pxor xmm4, xmm8 736 pxor xmm5, xmm9 737 pxor xmm6, xmm10 738 pxor xmm7, xmm11 739 movdqa xmmword ptr [rsp+0x100], xmm8 740 movdqa xmm8, xmm4 741 psrld xmm8, 12 742 pslld xmm4, 20 743 por xmm4, xmm8 744 movdqa xmm8, xmm5 745 psrld xmm8, 12 746 pslld xmm5, 20 747 por xmm5, xmm8 748 movdqa xmm8, xmm6 749 psrld xmm8, 12 750 pslld xmm6, 20 751 por xmm6, xmm8 752 movdqa xmm8, xmm7 753 psrld xmm8, 12 754 pslld xmm7, 20 755 por xmm7, xmm8 756 paddd xmm0, xmmword ptr [rsp+0x70] 757 paddd xmm1, xmmword ptr [rsp+0x90] 758 paddd xmm2, xmmword ptr [rsp+0x30] 759 paddd xmm3, xmmword ptr [rsp+0xF0] 760 paddd xmm0, xmm4 761 paddd xmm1, xmm5 762 paddd xmm2, xmm6 763 paddd xmm3, xmm7 764 pxor xmm12, xmm0 765 pxor xmm13, xmm1 766 pxor xmm14, xmm2 767 pxor xmm15, xmm3 768 movdqa xmm8, xmmword ptr [ROT8+rip] 769 pshufb xmm12, xmm8 770 pshufb xmm13, xmm8 771 pshufb xmm14, xmm8 772 pshufb xmm15, xmm8 773 movdqa xmm8, xmmword ptr [rsp+0x100] 774 paddd xmm8, xmm12 775 paddd xmm9, xmm13 776 paddd xmm10, xmm14 777 paddd xmm11, xmm15 778 pxor xmm4, xmm8 779 pxor xmm5, xmm9 780 pxor xmm6, xmm10 781 pxor xmm7, xmm11 782 movdqa xmmword ptr [rsp+0x100], xmm8 783 movdqa xmm8, xmm4 784 psrld xmm8, 7 785 pslld xmm4, 25 786 por xmm4, xmm8 787 movdqa xmm8, xmm5 788 psrld xmm8, 7 789 pslld xmm5, 25 790 por xmm5, xmm8 791 movdqa xmm8, xmm6 792 psrld xmm8, 7 793 pslld xmm6, 25 794 por xmm6, xmm8 795 movdqa xmm8, xmm7 796 psrld xmm8, 7 797 pslld xmm7, 25 798 por xmm7, xmm8 799 paddd xmm0, xmmword ptr [rsp+0x40] 800 paddd xmm1, xmmword ptr [rsp+0xB0] 801 paddd xmm2, xmmword ptr [rsp+0x50] 802 paddd xmm3, xmmword ptr [rsp+0x10] 803 paddd xmm0, xmm5 804 paddd xmm1, xmm6 805 paddd xmm2, xmm7 806 paddd xmm3, xmm4 807 pxor xmm15, xmm0 808 pxor xmm12, xmm1 809 pxor xmm13, xmm2 810 pxor xmm14, xmm3 811 movdqa xmm8, xmmword ptr [ROT16+rip] 812 pshufb xmm15, xmm8 813 pshufb xmm12, xmm8 814 pshufb xmm13, xmm8 815 pshufb xmm14, xmm8 816 paddd xmm10, xmm15 817 paddd xmm11, xmm12 818 movdqa xmm8, xmmword ptr [rsp+0x100] 819 paddd xmm8, xmm13 820 paddd xmm9, xmm14 821 pxor xmm5, xmm10 822 pxor xmm6, xmm11 823 pxor xmm7, xmm8 824 pxor xmm4, xmm9 825 movdqa xmmword ptr [rsp+0x100], xmm8 826 movdqa xmm8, xmm5 827 psrld xmm8, 12 828 pslld xmm5, 20 829 por xmm5, xmm8 830 movdqa xmm8, xmm6 831 psrld xmm8, 12 832 pslld xmm6, 20 833 por xmm6, xmm8 834 movdqa xmm8, xmm7 835 psrld xmm8, 12 836 pslld xmm7, 20 837 por xmm7, xmm8 838 movdqa xmm8, xmm4 839 psrld xmm8, 12 840 pslld xmm4, 20 841 por xmm4, xmm8 842 paddd xmm0, xmmword ptr [rsp] 843 paddd xmm1, xmmword ptr [rsp+0x20] 844 paddd xmm2, xmmword ptr [rsp+0x80] 845 paddd xmm3, xmmword ptr [rsp+0x60] 846 paddd xmm0, xmm5 847 paddd xmm1, xmm6 848 paddd xmm2, xmm7 849 paddd xmm3, xmm4 850 pxor xmm15, xmm0 851 pxor xmm12, xmm1 852 pxor xmm13, xmm2 853 pxor xmm14, xmm3 854 movdqa xmm8, xmmword ptr [ROT8+rip] 855 pshufb xmm15, xmm8 856 pshufb xmm12, xmm8 857 pshufb xmm13, xmm8 858 pshufb xmm14, xmm8 859 paddd xmm10, xmm15 860 paddd xmm11, xmm12 861 movdqa xmm8, xmmword ptr [rsp+0x100] 862 paddd xmm8, xmm13 863 paddd xmm9, xmm14 864 pxor xmm5, xmm10 865 pxor xmm6, xmm11 866 pxor xmm7, xmm8 867 pxor xmm4, xmm9 868 movdqa xmmword ptr [rsp+0x100], xmm8 869 movdqa xmm8, xmm5 870 psrld xmm8, 7 871 pslld xmm5, 25 872 por xmm5, xmm8 873 movdqa xmm8, xmm6 874 psrld xmm8, 7 875 pslld xmm6, 25 876 por xmm6, xmm8 877 movdqa xmm8, xmm7 878 psrld xmm8, 7 879 pslld xmm7, 25 880 por xmm7, xmm8 881 movdqa xmm8, xmm4 882 psrld xmm8, 7 883 pslld xmm4, 25 884 por xmm4, xmm8 885 paddd xmm0, xmmword ptr [rsp+0xC0] 886 paddd xmm1, xmmword ptr [rsp+0x90] 887 paddd xmm2, xmmword ptr [rsp+0xF0] 888 paddd xmm3, xmmword ptr [rsp+0xE0] 889 paddd xmm0, xmm4 890 paddd xmm1, xmm5 891 paddd xmm2, xmm6 892 paddd xmm3, xmm7 893 pxor xmm12, xmm0 894 pxor xmm13, xmm1 895 pxor xmm14, xmm2 896 pxor xmm15, xmm3 897 movdqa xmm8, xmmword ptr [ROT16+rip] 898 pshufb xmm12, xmm8 899 pshufb xmm13, xmm8 900 pshufb xmm14, xmm8 901 pshufb xmm15, xmm8 902 movdqa xmm8, xmmword ptr [rsp+0x100] 903 paddd xmm8, xmm12 904 paddd xmm9, xmm13 905 paddd xmm10, xmm14 906 paddd xmm11, xmm15 907 pxor xmm4, xmm8 908 pxor xmm5, xmm9 909 pxor xmm6, xmm10 910 pxor xmm7, xmm11 911 movdqa xmmword ptr [rsp+0x100], xmm8 912 movdqa xmm8, xmm4 913 psrld xmm8, 12 914 pslld xmm4, 20 915 por xmm4, xmm8 916 movdqa xmm8, xmm5 917 psrld xmm8, 12 918 pslld xmm5, 20 919 por xmm5, xmm8 920 movdqa xmm8, xmm6 921 psrld xmm8, 12 922 pslld xmm6, 20 923 por xmm6, xmm8 924 movdqa xmm8, xmm7 925 psrld xmm8, 12 926 pslld xmm7, 20 927 por xmm7, xmm8 928 paddd xmm0, xmmword ptr [rsp+0xD0] 929 paddd xmm1, xmmword ptr [rsp+0xB0] 930 paddd xmm2, xmmword ptr [rsp+0xA0] 931 paddd xmm3, xmmword ptr [rsp+0x80] 932 paddd xmm0, xmm4 933 paddd xmm1, xmm5 934 paddd xmm2, xmm6 935 paddd xmm3, xmm7 936 pxor xmm12, xmm0 937 pxor xmm13, xmm1 938 pxor xmm14, xmm2 939 pxor xmm15, xmm3 940 movdqa xmm8, xmmword ptr [ROT8+rip] 941 pshufb xmm12, xmm8 942 pshufb xmm13, xmm8 943 pshufb xmm14, xmm8 944 pshufb xmm15, xmm8 945 movdqa xmm8, xmmword ptr [rsp+0x100] 946 paddd xmm8, xmm12 947 paddd xmm9, xmm13 948 paddd xmm10, xmm14 949 paddd xmm11, xmm15 950 pxor xmm4, xmm8 951 pxor xmm5, xmm9 952 pxor xmm6, xmm10 953 pxor xmm7, xmm11 954 movdqa xmmword ptr [rsp+0x100], xmm8 955 movdqa xmm8, xmm4 956 psrld xmm8, 7 957 pslld xmm4, 25 958 por xmm4, xmm8 959 movdqa xmm8, xmm5 960 psrld xmm8, 7 961 pslld xmm5, 25 962 por xmm5, xmm8 963 movdqa xmm8, xmm6 964 psrld xmm8, 7 965 pslld xmm6, 25 966 por xmm6, xmm8 967 movdqa xmm8, xmm7 968 psrld xmm8, 7 969 pslld xmm7, 25 970 por xmm7, xmm8 971 paddd xmm0, xmmword ptr [rsp+0x70] 972 paddd xmm1, xmmword ptr [rsp+0x50] 973 paddd xmm2, xmmword ptr [rsp] 974 paddd xmm3, xmmword ptr [rsp+0x60] 975 paddd xmm0, xmm5 976 paddd xmm1, xmm6 977 paddd xmm2, xmm7 978 paddd xmm3, xmm4 979 pxor xmm15, xmm0 980 pxor xmm12, xmm1 981 pxor xmm13, xmm2 982 pxor xmm14, xmm3 983 movdqa xmm8, xmmword ptr [ROT16+rip] 984 pshufb xmm15, xmm8 985 pshufb xmm12, xmm8 986 pshufb xmm13, xmm8 987 pshufb xmm14, xmm8 988 paddd xmm10, xmm15 989 paddd xmm11, xmm12 990 movdqa xmm8, xmmword ptr [rsp+0x100] 991 paddd xmm8, xmm13 992 paddd xmm9, xmm14 993 pxor xmm5, xmm10 994 pxor xmm6, xmm11 995 pxor xmm7, xmm8 996 pxor xmm4, xmm9 997 movdqa xmmword ptr [rsp+0x100], xmm8 998 movdqa xmm8, xmm5 999 psrld xmm8, 12 1000 pslld xmm5, 20 1001 por xmm5, xmm8 1002 movdqa xmm8, xmm6 1003 psrld xmm8, 12 1004 pslld xmm6, 20 1005 por xmm6, xmm8 1006 movdqa xmm8, xmm7 1007 psrld xmm8, 12 1008 pslld xmm7, 20 1009 por xmm7, xmm8 1010 movdqa xmm8, xmm4 1011 psrld xmm8, 12 1012 pslld xmm4, 20 1013 por xmm4, xmm8 1014 paddd xmm0, xmmword ptr [rsp+0x20] 1015 paddd xmm1, xmmword ptr [rsp+0x30] 1016 paddd xmm2, xmmword ptr [rsp+0x10] 1017 paddd xmm3, xmmword ptr [rsp+0x40] 1018 paddd xmm0, xmm5 1019 paddd xmm1, xmm6 1020 paddd xmm2, xmm7 1021 paddd xmm3, xmm4 1022 pxor xmm15, xmm0 1023 pxor xmm12, xmm1 1024 pxor xmm13, xmm2 1025 pxor xmm14, xmm3 1026 movdqa xmm8, xmmword ptr [ROT8+rip] 1027 pshufb xmm15, xmm8 1028 pshufb xmm12, xmm8 1029 pshufb xmm13, xmm8 1030 pshufb xmm14, xmm8 1031 paddd xmm10, xmm15 1032 paddd xmm11, xmm12 1033 movdqa xmm8, xmmword ptr [rsp+0x100] 1034 paddd xmm8, xmm13 1035 paddd xmm9, xmm14 1036 pxor xmm5, xmm10 1037 pxor xmm6, xmm11 1038 pxor xmm7, xmm8 1039 pxor xmm4, xmm9 1040 movdqa xmmword ptr [rsp+0x100], xmm8 1041 movdqa xmm8, xmm5 1042 psrld xmm8, 7 1043 pslld xmm5, 25 1044 por xmm5, xmm8 1045 movdqa xmm8, xmm6 1046 psrld xmm8, 7 1047 pslld xmm6, 25 1048 por xmm6, xmm8 1049 movdqa xmm8, xmm7 1050 psrld xmm8, 7 1051 pslld xmm7, 25 1052 por xmm7, xmm8 1053 movdqa xmm8, xmm4 1054 psrld xmm8, 7 1055 pslld xmm4, 25 1056 por xmm4, xmm8 1057 paddd xmm0, xmmword ptr [rsp+0x90] 1058 paddd xmm1, xmmword ptr [rsp+0xB0] 1059 paddd xmm2, xmmword ptr [rsp+0x80] 1060 paddd xmm3, xmmword ptr [rsp+0xF0] 1061 paddd xmm0, xmm4 1062 paddd xmm1, xmm5 1063 paddd xmm2, xmm6 1064 paddd xmm3, xmm7 1065 pxor xmm12, xmm0 1066 pxor xmm13, xmm1 1067 pxor xmm14, xmm2 1068 pxor xmm15, xmm3 1069 movdqa xmm8, xmmword ptr [ROT16+rip] 1070 pshufb xmm12, xmm8 1071 pshufb xmm13, xmm8 1072 pshufb xmm14, xmm8 1073 pshufb xmm15, xmm8 1074 movdqa xmm8, xmmword ptr [rsp+0x100] 1075 paddd xmm8, xmm12 1076 paddd xmm9, xmm13 1077 paddd xmm10, xmm14 1078 paddd xmm11, xmm15 1079 pxor xmm4, xmm8 1080 pxor xmm5, xmm9 1081 pxor xmm6, xmm10 1082 pxor xmm7, xmm11 1083 movdqa xmmword ptr [rsp+0x100], xmm8 1084 movdqa xmm8, xmm4 1085 psrld xmm8, 12 1086 pslld xmm4, 20 1087 por xmm4, xmm8 1088 movdqa xmm8, xmm5 1089 psrld xmm8, 12 1090 pslld xmm5, 20 1091 por xmm5, xmm8 1092 movdqa xmm8, xmm6 1093 psrld xmm8, 12 1094 pslld xmm6, 20 1095 por xmm6, xmm8 1096 movdqa xmm8, xmm7 1097 psrld xmm8, 12 1098 pslld xmm7, 20 1099 por xmm7, xmm8 1100 paddd xmm0, xmmword ptr [rsp+0xE0] 1101 paddd xmm1, xmmword ptr [rsp+0x50] 1102 paddd xmm2, xmmword ptr [rsp+0xC0] 1103 paddd xmm3, xmmword ptr [rsp+0x10] 1104 paddd xmm0, xmm4 1105 paddd xmm1, xmm5 1106 paddd xmm2, xmm6 1107 paddd xmm3, xmm7 1108 pxor xmm12, xmm0 1109 pxor xmm13, xmm1 1110 pxor xmm14, xmm2 1111 pxor xmm15, xmm3 1112 movdqa xmm8, xmmword ptr [ROT8+rip] 1113 pshufb xmm12, xmm8 1114 pshufb xmm13, xmm8 1115 pshufb xmm14, xmm8 1116 pshufb xmm15, xmm8 1117 movdqa xmm8, xmmword ptr [rsp+0x100] 1118 paddd xmm8, xmm12 1119 paddd xmm9, xmm13 1120 paddd xmm10, xmm14 1121 paddd xmm11, xmm15 1122 pxor xmm4, xmm8 1123 pxor xmm5, xmm9 1124 pxor xmm6, xmm10 1125 pxor xmm7, xmm11 1126 movdqa xmmword ptr [rsp+0x100], xmm8 1127 movdqa xmm8, xmm4 1128 psrld xmm8, 7 1129 pslld xmm4, 25 1130 por xmm4, xmm8 1131 movdqa xmm8, xmm5 1132 psrld xmm8, 7 1133 pslld xmm5, 25 1134 por xmm5, xmm8 1135 movdqa xmm8, xmm6 1136 psrld xmm8, 7 1137 pslld xmm6, 25 1138 por xmm6, xmm8 1139 movdqa xmm8, xmm7 1140 psrld xmm8, 7 1141 pslld xmm7, 25 1142 por xmm7, xmm8 1143 paddd xmm0, xmmword ptr [rsp+0xD0] 1144 paddd xmm1, xmmword ptr [rsp] 1145 paddd xmm2, xmmword ptr [rsp+0x20] 1146 paddd xmm3, xmmword ptr [rsp+0x40] 1147 paddd xmm0, xmm5 1148 paddd xmm1, xmm6 1149 paddd xmm2, xmm7 1150 paddd xmm3, xmm4 1151 pxor xmm15, xmm0 1152 pxor xmm12, xmm1 1153 pxor xmm13, xmm2 1154 pxor xmm14, xmm3 1155 movdqa xmm8, xmmword ptr [ROT16+rip] 1156 pshufb xmm15, xmm8 1157 pshufb xmm12, xmm8 1158 pshufb xmm13, xmm8 1159 pshufb xmm14, xmm8 1160 paddd xmm10, xmm15 1161 paddd xmm11, xmm12 1162 movdqa xmm8, xmmword ptr [rsp+0x100] 1163 paddd xmm8, xmm13 1164 paddd xmm9, xmm14 1165 pxor xmm5, xmm10 1166 pxor xmm6, xmm11 1167 pxor xmm7, xmm8 1168 pxor xmm4, xmm9 1169 movdqa xmmword ptr [rsp+0x100], xmm8 1170 movdqa xmm8, xmm5 1171 psrld xmm8, 12 1172 pslld xmm5, 20 1173 por xmm5, xmm8 1174 movdqa xmm8, xmm6 1175 psrld xmm8, 12 1176 pslld xmm6, 20 1177 por xmm6, xmm8 1178 movdqa xmm8, xmm7 1179 psrld xmm8, 12 1180 pslld xmm7, 20 1181 por xmm7, xmm8 1182 movdqa xmm8, xmm4 1183 psrld xmm8, 12 1184 pslld xmm4, 20 1185 por xmm4, xmm8 1186 paddd xmm0, xmmword ptr [rsp+0x30] 1187 paddd xmm1, xmmword ptr [rsp+0xA0] 1188 paddd xmm2, xmmword ptr [rsp+0x60] 1189 paddd xmm3, xmmword ptr [rsp+0x70] 1190 paddd xmm0, xmm5 1191 paddd xmm1, xmm6 1192 paddd xmm2, xmm7 1193 paddd xmm3, xmm4 1194 pxor xmm15, xmm0 1195 pxor xmm12, xmm1 1196 pxor xmm13, xmm2 1197 pxor xmm14, xmm3 1198 movdqa xmm8, xmmword ptr [ROT8+rip] 1199 pshufb xmm15, xmm8 1200 pshufb xmm12, xmm8 1201 pshufb xmm13, xmm8 1202 pshufb xmm14, xmm8 1203 paddd xmm10, xmm15 1204 paddd xmm11, xmm12 1205 movdqa xmm8, xmmword ptr [rsp+0x100] 1206 paddd xmm8, xmm13 1207 paddd xmm9, xmm14 1208 pxor xmm5, xmm10 1209 pxor xmm6, xmm11 1210 pxor xmm7, xmm8 1211 pxor xmm4, xmm9 1212 movdqa xmmword ptr [rsp+0x100], xmm8 1213 movdqa xmm8, xmm5 1214 psrld xmm8, 7 1215 pslld xmm5, 25 1216 por xmm5, xmm8 1217 movdqa xmm8, xmm6 1218 psrld xmm8, 7 1219 pslld xmm6, 25 1220 por xmm6, xmm8 1221 movdqa xmm8, xmm7 1222 psrld xmm8, 7 1223 pslld xmm7, 25 1224 por xmm7, xmm8 1225 movdqa xmm8, xmm4 1226 psrld xmm8, 7 1227 pslld xmm4, 25 1228 por xmm4, xmm8 1229 paddd xmm0, xmmword ptr [rsp+0xB0] 1230 paddd xmm1, xmmword ptr [rsp+0x50] 1231 paddd xmm2, xmmword ptr [rsp+0x10] 1232 paddd xmm3, xmmword ptr [rsp+0x80] 1233 paddd xmm0, xmm4 1234 paddd xmm1, xmm5 1235 paddd xmm2, xmm6 1236 paddd xmm3, xmm7 1237 pxor xmm12, xmm0 1238 pxor xmm13, xmm1 1239 pxor xmm14, xmm2 1240 pxor xmm15, xmm3 1241 movdqa xmm8, xmmword ptr [ROT16+rip] 1242 pshufb xmm12, xmm8 1243 pshufb xmm13, xmm8 1244 pshufb xmm14, xmm8 1245 pshufb xmm15, xmm8 1246 movdqa xmm8, xmmword ptr [rsp+0x100] 1247 paddd xmm8, xmm12 1248 paddd xmm9, xmm13 1249 paddd xmm10, xmm14 1250 paddd xmm11, xmm15 1251 pxor xmm4, xmm8 1252 pxor xmm5, xmm9 1253 pxor xmm6, xmm10 1254 pxor xmm7, xmm11 1255 movdqa xmmword ptr [rsp+0x100], xmm8 1256 movdqa xmm8, xmm4 1257 psrld xmm8, 12 1258 pslld xmm4, 20 1259 por xmm4, xmm8 1260 movdqa xmm8, xmm5 1261 psrld xmm8, 12 1262 pslld xmm5, 20 1263 por xmm5, xmm8 1264 movdqa xmm8, xmm6 1265 psrld xmm8, 12 1266 pslld xmm6, 20 1267 por xmm6, xmm8 1268 movdqa xmm8, xmm7 1269 psrld xmm8, 12 1270 pslld xmm7, 20 1271 por xmm7, xmm8 1272 paddd xmm0, xmmword ptr [rsp+0xF0] 1273 paddd xmm1, xmmword ptr [rsp] 1274 paddd xmm2, xmmword ptr [rsp+0x90] 1275 paddd xmm3, xmmword ptr [rsp+0x60] 1276 paddd xmm0, xmm4 1277 paddd xmm1, xmm5 1278 paddd xmm2, xmm6 1279 paddd xmm3, xmm7 1280 pxor xmm12, xmm0 1281 pxor xmm13, xmm1 1282 pxor xmm14, xmm2 1283 pxor xmm15, xmm3 1284 movdqa xmm8, xmmword ptr [ROT8+rip] 1285 pshufb xmm12, xmm8 1286 pshufb xmm13, xmm8 1287 pshufb xmm14, xmm8 1288 pshufb xmm15, xmm8 1289 movdqa xmm8, xmmword ptr [rsp+0x100] 1290 paddd xmm8, xmm12 1291 paddd xmm9, xmm13 1292 paddd xmm10, xmm14 1293 paddd xmm11, xmm15 1294 pxor xmm4, xmm8 1295 pxor xmm5, xmm9 1296 pxor xmm6, xmm10 1297 pxor xmm7, xmm11 1298 movdqa xmmword ptr [rsp+0x100], xmm8 1299 movdqa xmm8, xmm4 1300 psrld xmm8, 7 1301 pslld xmm4, 25 1302 por xmm4, xmm8 1303 movdqa xmm8, xmm5 1304 psrld xmm8, 7 1305 pslld xmm5, 25 1306 por xmm5, xmm8 1307 movdqa xmm8, xmm6 1308 psrld xmm8, 7 1309 pslld xmm6, 25 1310 por xmm6, xmm8 1311 movdqa xmm8, xmm7 1312 psrld xmm8, 7 1313 pslld xmm7, 25 1314 por xmm7, xmm8 1315 paddd xmm0, xmmword ptr [rsp+0xE0] 1316 paddd xmm1, xmmword ptr [rsp+0x20] 1317 paddd xmm2, xmmword ptr [rsp+0x30] 1318 paddd xmm3, xmmword ptr [rsp+0x70] 1319 paddd xmm0, xmm5 1320 paddd xmm1, xmm6 1321 paddd xmm2, xmm7 1322 paddd xmm3, xmm4 1323 pxor xmm15, xmm0 1324 pxor xmm12, xmm1 1325 pxor xmm13, xmm2 1326 pxor xmm14, xmm3 1327 movdqa xmm8, xmmword ptr [ROT16+rip] 1328 pshufb xmm15, xmm8 1329 pshufb xmm12, xmm8 1330 pshufb xmm13, xmm8 1331 pshufb xmm14, xmm8 1332 paddd xmm10, xmm15 1333 paddd xmm11, xmm12 1334 movdqa xmm8, xmmword ptr [rsp+0x100] 1335 paddd xmm8, xmm13 1336 paddd xmm9, xmm14 1337 pxor xmm5, xmm10 1338 pxor xmm6, xmm11 1339 pxor xmm7, xmm8 1340 pxor xmm4, xmm9 1341 movdqa xmmword ptr [rsp+0x100], xmm8 1342 movdqa xmm8, xmm5 1343 psrld xmm8, 12 1344 pslld xmm5, 20 1345 por xmm5, xmm8 1346 movdqa xmm8, xmm6 1347 psrld xmm8, 12 1348 pslld xmm6, 20 1349 por xmm6, xmm8 1350 movdqa xmm8, xmm7 1351 psrld xmm8, 12 1352 pslld xmm7, 20 1353 por xmm7, xmm8 1354 movdqa xmm8, xmm4 1355 psrld xmm8, 12 1356 pslld xmm4, 20 1357 por xmm4, xmm8 1358 paddd xmm0, xmmword ptr [rsp+0xA0] 1359 paddd xmm1, xmmword ptr [rsp+0xC0] 1360 paddd xmm2, xmmword ptr [rsp+0x40] 1361 paddd xmm3, xmmword ptr [rsp+0xD0] 1362 paddd xmm0, xmm5 1363 paddd xmm1, xmm6 1364 paddd xmm2, xmm7 1365 paddd xmm3, xmm4 1366 pxor xmm15, xmm0 1367 pxor xmm12, xmm1 1368 pxor xmm13, xmm2 1369 pxor xmm14, xmm3 1370 movdqa xmm8, xmmword ptr [ROT8+rip] 1371 pshufb xmm15, xmm8 1372 pshufb xmm12, xmm8 1373 pshufb xmm13, xmm8 1374 pshufb xmm14, xmm8 1375 paddd xmm10, xmm15 1376 paddd xmm11, xmm12 1377 movdqa xmm8, xmmword ptr [rsp+0x100] 1378 paddd xmm8, xmm13 1379 paddd xmm9, xmm14 1380 pxor xmm5, xmm10 1381 pxor xmm6, xmm11 1382 pxor xmm7, xmm8 1383 pxor xmm4, xmm9 1384 pxor xmm0, xmm8 1385 pxor xmm1, xmm9 1386 pxor xmm2, xmm10 1387 pxor xmm3, xmm11 1388 movdqa xmm8, xmm5 1389 psrld xmm8, 7 1390 pslld xmm5, 25 1391 por xmm5, xmm8 1392 movdqa xmm8, xmm6 1393 psrld xmm8, 7 1394 pslld xmm6, 25 1395 por xmm6, xmm8 1396 movdqa xmm8, xmm7 1397 psrld xmm8, 7 1398 pslld xmm7, 25 1399 por xmm7, xmm8 1400 movdqa xmm8, xmm4 1401 psrld xmm8, 7 1402 pslld xmm4, 25 1403 por xmm4, xmm8 1404 pxor xmm4, xmm12 1405 pxor xmm5, xmm13 1406 pxor xmm6, xmm14 1407 pxor xmm7, xmm15 1408 mov eax, r13d 1409 jne 9b 1410 movdqa xmm9, xmm0 1411 punpckldq xmm0, xmm1 1412 punpckhdq xmm9, xmm1 1413 movdqa xmm11, xmm2 1414 punpckldq xmm2, xmm3 1415 punpckhdq xmm11, xmm3 1416 movdqa xmm1, xmm0 1417 punpcklqdq xmm0, xmm2 1418 punpckhqdq xmm1, xmm2 1419 movdqa xmm3, xmm9 1420 punpcklqdq xmm9, xmm11 1421 punpckhqdq xmm3, xmm11 1422 movdqu xmmword ptr [rbx], xmm0 1423 movdqu xmmword ptr [rbx+0x20], xmm1 1424 movdqu xmmword ptr [rbx+0x40], xmm9 1425 movdqu xmmword ptr [rbx+0x60], xmm3 1426 movdqa xmm9, xmm4 1427 punpckldq xmm4, xmm5 1428 punpckhdq xmm9, xmm5 1429 movdqa xmm11, xmm6 1430 punpckldq xmm6, xmm7 1431 punpckhdq xmm11, xmm7 1432 movdqa xmm5, xmm4 1433 punpcklqdq xmm4, xmm6 1434 punpckhqdq xmm5, xmm6 1435 movdqa xmm7, xmm9 1436 punpcklqdq xmm9, xmm11 1437 punpckhqdq xmm7, xmm11 1438 movdqu xmmword ptr [rbx+0x10], xmm4 1439 movdqu xmmword ptr [rbx+0x30], xmm5 1440 movdqu xmmword ptr [rbx+0x50], xmm9 1441 movdqu xmmword ptr [rbx+0x70], xmm7 1442 movdqa xmm1, xmmword ptr [rsp+0x110] 1443 movdqa xmm0, xmm1 1444 paddd xmm1, xmmword ptr [rsp+0x150] 1445 movdqa xmmword ptr [rsp+0x110], xmm1 1446 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1447 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1448 pcmpgtd xmm0, xmm1 1449 movdqa xmm1, xmmword ptr [rsp+0x120] 1450 psubd xmm1, xmm0 1451 movdqa xmmword ptr [rsp+0x120], xmm1 1452 add rbx, 128 1453 add rdi, 32 1454 sub rsi, 4 1455 cmp rsi, 4 1456 jnc 2b 1457 test rsi, rsi 1458 jnz 3f 14594: 1460 mov rsp, rbp 1461 pop rbp 1462 pop rbx 1463 pop r12 1464 pop r13 1465 pop r14 1466 pop r15 1467 ret 1468.p2align 5 14693: 1470 test esi, 0x2 1471 je 3f 1472 movups xmm0, xmmword ptr [rcx] 1473 movups xmm1, xmmword ptr [rcx+0x10] 1474 movaps xmm8, xmm0 1475 movaps xmm9, xmm1 1476 movd xmm13, dword ptr [rsp+0x110] 1477 pinsrd xmm13, dword ptr [rsp+0x120], 1 1478 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1479 movaps xmmword ptr [rsp], xmm13 1480 movd xmm14, dword ptr [rsp+0x114] 1481 pinsrd xmm14, dword ptr [rsp+0x124], 1 1482 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1483 movaps xmmword ptr [rsp+0x10], xmm14 1484 mov r8, qword ptr [rdi] 1485 mov r9, qword ptr [rdi+0x8] 1486 movzx eax, byte ptr [rbp+0x40] 1487 or eax, r13d 1488 xor edx, edx 14892: 1490 mov r14d, eax 1491 or eax, r12d 1492 add rdx, 64 1493 cmp rdx, r15 1494 cmovne eax, r14d 1495 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1496 movaps xmm10, xmm2 1497 movups xmm4, xmmword ptr [r8+rdx-0x40] 1498 movups xmm5, xmmword ptr [r8+rdx-0x30] 1499 movaps xmm3, xmm4 1500 shufps xmm4, xmm5, 136 1501 shufps xmm3, xmm5, 221 1502 movaps xmm5, xmm3 1503 movups xmm6, xmmword ptr [r8+rdx-0x20] 1504 movups xmm7, xmmword ptr [r8+rdx-0x10] 1505 movaps xmm3, xmm6 1506 shufps xmm6, xmm7, 136 1507 pshufd xmm6, xmm6, 0x93 1508 shufps xmm3, xmm7, 221 1509 pshufd xmm7, xmm3, 0x93 1510 movups xmm12, xmmword ptr [r9+rdx-0x40] 1511 movups xmm13, xmmword ptr [r9+rdx-0x30] 1512 movaps xmm11, xmm12 1513 shufps xmm12, xmm13, 136 1514 shufps xmm11, xmm13, 221 1515 movaps xmm13, xmm11 1516 movups xmm14, xmmword ptr [r9+rdx-0x20] 1517 movups xmm15, xmmword ptr [r9+rdx-0x10] 1518 movaps xmm11, xmm14 1519 shufps xmm14, xmm15, 136 1520 pshufd xmm14, xmm14, 0x93 1521 shufps xmm11, xmm15, 221 1522 pshufd xmm15, xmm11, 0x93 1523 movaps xmm3, xmmword ptr [rsp] 1524 movaps xmm11, xmmword ptr [rsp+0x10] 1525 pinsrd xmm3, eax, 3 1526 pinsrd xmm11, eax, 3 1527 mov al, 7 15289: 1529 paddd xmm0, xmm4 1530 paddd xmm8, xmm12 1531 movaps xmmword ptr [rsp+0x20], xmm4 1532 movaps xmmword ptr [rsp+0x30], xmm12 1533 paddd xmm0, xmm1 1534 paddd xmm8, xmm9 1535 pxor xmm3, xmm0 1536 pxor xmm11, xmm8 1537 movaps xmm12, xmmword ptr [ROT16+rip] 1538 pshufb xmm3, xmm12 1539 pshufb xmm11, xmm12 1540 paddd xmm2, xmm3 1541 paddd xmm10, xmm11 1542 pxor xmm1, xmm2 1543 pxor xmm9, xmm10 1544 movdqa xmm4, xmm1 1545 pslld xmm1, 20 1546 psrld xmm4, 12 1547 por xmm1, xmm4 1548 movdqa xmm4, xmm9 1549 pslld xmm9, 20 1550 psrld xmm4, 12 1551 por xmm9, xmm4 1552 paddd xmm0, xmm5 1553 paddd xmm8, xmm13 1554 movaps xmmword ptr [rsp+0x40], xmm5 1555 movaps xmmword ptr [rsp+0x50], xmm13 1556 paddd xmm0, xmm1 1557 paddd xmm8, xmm9 1558 pxor xmm3, xmm0 1559 pxor xmm11, xmm8 1560 movaps xmm13, xmmword ptr [ROT8+rip] 1561 pshufb xmm3, xmm13 1562 pshufb xmm11, xmm13 1563 paddd xmm2, xmm3 1564 paddd xmm10, xmm11 1565 pxor xmm1, xmm2 1566 pxor xmm9, xmm10 1567 movdqa xmm4, xmm1 1568 pslld xmm1, 25 1569 psrld xmm4, 7 1570 por xmm1, xmm4 1571 movdqa xmm4, xmm9 1572 pslld xmm9, 25 1573 psrld xmm4, 7 1574 por xmm9, xmm4 1575 pshufd xmm0, xmm0, 0x93 1576 pshufd xmm8, xmm8, 0x93 1577 pshufd xmm3, xmm3, 0x4E 1578 pshufd xmm11, xmm11, 0x4E 1579 pshufd xmm2, xmm2, 0x39 1580 pshufd xmm10, xmm10, 0x39 1581 paddd xmm0, xmm6 1582 paddd xmm8, xmm14 1583 paddd xmm0, xmm1 1584 paddd xmm8, xmm9 1585 pxor xmm3, xmm0 1586 pxor xmm11, xmm8 1587 pshufb xmm3, xmm12 1588 pshufb xmm11, xmm12 1589 paddd xmm2, xmm3 1590 paddd xmm10, xmm11 1591 pxor xmm1, xmm2 1592 pxor xmm9, xmm10 1593 movdqa xmm4, xmm1 1594 pslld xmm1, 20 1595 psrld xmm4, 12 1596 por xmm1, xmm4 1597 movdqa xmm4, xmm9 1598 pslld xmm9, 20 1599 psrld xmm4, 12 1600 por xmm9, xmm4 1601 paddd xmm0, xmm7 1602 paddd xmm8, xmm15 1603 paddd xmm0, xmm1 1604 paddd xmm8, xmm9 1605 pxor xmm3, xmm0 1606 pxor xmm11, xmm8 1607 pshufb xmm3, xmm13 1608 pshufb xmm11, xmm13 1609 paddd xmm2, xmm3 1610 paddd xmm10, xmm11 1611 pxor xmm1, xmm2 1612 pxor xmm9, xmm10 1613 movdqa xmm4, xmm1 1614 pslld xmm1, 25 1615 psrld xmm4, 7 1616 por xmm1, xmm4 1617 movdqa xmm4, xmm9 1618 pslld xmm9, 25 1619 psrld xmm4, 7 1620 por xmm9, xmm4 1621 pshufd xmm0, xmm0, 0x39 1622 pshufd xmm8, xmm8, 0x39 1623 pshufd xmm3, xmm3, 0x4E 1624 pshufd xmm11, xmm11, 0x4E 1625 pshufd xmm2, xmm2, 0x93 1626 pshufd xmm10, xmm10, 0x93 1627 dec al 1628 je 9f 1629 movdqa xmm12, xmmword ptr [rsp+0x20] 1630 movdqa xmm5, xmmword ptr [rsp+0x40] 1631 pshufd xmm13, xmm12, 0x0F 1632 shufps xmm12, xmm5, 214 1633 pshufd xmm4, xmm12, 0x39 1634 movdqa xmm12, xmm6 1635 shufps xmm12, xmm7, 250 1636 pblendw xmm13, xmm12, 0xCC 1637 movdqa xmm12, xmm7 1638 punpcklqdq xmm12, xmm5 1639 pblendw xmm12, xmm6, 0xC0 1640 pshufd xmm12, xmm12, 0x78 1641 punpckhdq xmm5, xmm7 1642 punpckldq xmm6, xmm5 1643 pshufd xmm7, xmm6, 0x1E 1644 movdqa xmmword ptr [rsp+0x20], xmm13 1645 movdqa xmmword ptr [rsp+0x40], xmm12 1646 movdqa xmm5, xmmword ptr [rsp+0x30] 1647 movdqa xmm13, xmmword ptr [rsp+0x50] 1648 pshufd xmm6, xmm5, 0x0F 1649 shufps xmm5, xmm13, 214 1650 pshufd xmm12, xmm5, 0x39 1651 movdqa xmm5, xmm14 1652 shufps xmm5, xmm15, 250 1653 pblendw xmm6, xmm5, 0xCC 1654 movdqa xmm5, xmm15 1655 punpcklqdq xmm5, xmm13 1656 pblendw xmm5, xmm14, 0xC0 1657 pshufd xmm5, xmm5, 0x78 1658 punpckhdq xmm13, xmm15 1659 punpckldq xmm14, xmm13 1660 pshufd xmm15, xmm14, 0x1E 1661 movdqa xmm13, xmm6 1662 movdqa xmm14, xmm5 1663 movdqa xmm5, xmmword ptr [rsp+0x20] 1664 movdqa xmm6, xmmword ptr [rsp+0x40] 1665 jmp 9b 16669: 1667 pxor xmm0, xmm2 1668 pxor xmm1, xmm3 1669 pxor xmm8, xmm10 1670 pxor xmm9, xmm11 1671 mov eax, r13d 1672 cmp rdx, r15 1673 jne 2b 1674 movups xmmword ptr [rbx], xmm0 1675 movups xmmword ptr [rbx+0x10], xmm1 1676 movups xmmword ptr [rbx+0x20], xmm8 1677 movups xmmword ptr [rbx+0x30], xmm9 1678 movdqa xmm0, xmmword ptr [rsp+0x130] 1679 movdqa xmm1, xmmword ptr [rsp+0x110] 1680 movdqa xmm2, xmmword ptr [rsp+0x120] 1681 movdqu xmm3, xmmword ptr [rsp+0x118] 1682 movdqu xmm4, xmmword ptr [rsp+0x128] 1683 blendvps xmm1, xmm3, xmm0 1684 blendvps xmm2, xmm4, xmm0 1685 movdqa xmmword ptr [rsp+0x110], xmm1 1686 movdqa xmmword ptr [rsp+0x120], xmm2 1687 add rdi, 16 1688 add rbx, 64 1689 sub rsi, 2 16903: 1691 test esi, 0x1 1692 je 4b 1693 movups xmm0, xmmword ptr [rcx] 1694 movups xmm1, xmmword ptr [rcx+0x10] 1695 movd xmm13, dword ptr [rsp+0x110] 1696 pinsrd xmm13, dword ptr [rsp+0x120], 1 1697 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1698 movaps xmm14, xmmword ptr [ROT8+rip] 1699 movaps xmm15, xmmword ptr [ROT16+rip] 1700 mov r8, qword ptr [rdi] 1701 movzx eax, byte ptr [rbp+0x40] 1702 or eax, r13d 1703 xor edx, edx 17042: 1705 mov r14d, eax 1706 or eax, r12d 1707 add rdx, 64 1708 cmp rdx, r15 1709 cmovne eax, r14d 1710 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1711 movaps xmm3, xmm13 1712 pinsrd xmm3, eax, 3 1713 movups xmm4, xmmword ptr [r8+rdx-0x40] 1714 movups xmm5, xmmword ptr [r8+rdx-0x30] 1715 movaps xmm8, xmm4 1716 shufps xmm4, xmm5, 136 1717 shufps xmm8, xmm5, 221 1718 movaps xmm5, xmm8 1719 movups xmm6, xmmword ptr [r8+rdx-0x20] 1720 movups xmm7, xmmword ptr [r8+rdx-0x10] 1721 movaps xmm8, xmm6 1722 shufps xmm6, xmm7, 136 1723 pshufd xmm6, xmm6, 0x93 1724 shufps xmm8, xmm7, 221 1725 pshufd xmm7, xmm8, 0x93 1726 mov al, 7 17279: 1728 paddd xmm0, xmm4 1729 paddd xmm0, xmm1 1730 pxor xmm3, xmm0 1731 pshufb xmm3, xmm15 1732 paddd xmm2, xmm3 1733 pxor xmm1, xmm2 1734 movdqa xmm11, xmm1 1735 pslld xmm1, 20 1736 psrld xmm11, 12 1737 por xmm1, xmm11 1738 paddd xmm0, xmm5 1739 paddd xmm0, xmm1 1740 pxor xmm3, xmm0 1741 pshufb xmm3, xmm14 1742 paddd xmm2, xmm3 1743 pxor xmm1, xmm2 1744 movdqa xmm11, xmm1 1745 pslld xmm1, 25 1746 psrld xmm11, 7 1747 por xmm1, xmm11 1748 pshufd xmm0, xmm0, 0x93 1749 pshufd xmm3, xmm3, 0x4E 1750 pshufd xmm2, xmm2, 0x39 1751 paddd xmm0, xmm6 1752 paddd xmm0, xmm1 1753 pxor xmm3, xmm0 1754 pshufb xmm3, xmm15 1755 paddd xmm2, xmm3 1756 pxor xmm1, xmm2 1757 movdqa xmm11, xmm1 1758 pslld xmm1, 20 1759 psrld xmm11, 12 1760 por xmm1, xmm11 1761 paddd xmm0, xmm7 1762 paddd xmm0, xmm1 1763 pxor xmm3, xmm0 1764 pshufb xmm3, xmm14 1765 paddd xmm2, xmm3 1766 pxor xmm1, xmm2 1767 movdqa xmm11, xmm1 1768 pslld xmm1, 25 1769 psrld xmm11, 7 1770 por xmm1, xmm11 1771 pshufd xmm0, xmm0, 0x39 1772 pshufd xmm3, xmm3, 0x4E 1773 pshufd xmm2, xmm2, 0x93 1774 dec al 1775 jz 9f 1776 movdqa xmm8, xmm4 1777 shufps xmm8, xmm5, 214 1778 pshufd xmm9, xmm4, 0x0F 1779 pshufd xmm4, xmm8, 0x39 1780 movdqa xmm8, xmm6 1781 shufps xmm8, xmm7, 250 1782 pblendw xmm9, xmm8, 0xCC 1783 movdqa xmm8, xmm7 1784 punpcklqdq xmm8, xmm5 1785 pblendw xmm8, xmm6, 0xC0 1786 pshufd xmm8, xmm8, 0x78 1787 punpckhdq xmm5, xmm7 1788 punpckldq xmm6, xmm5 1789 pshufd xmm7, xmm6, 0x1E 1790 movdqa xmm5, xmm9 1791 movdqa xmm6, xmm8 1792 jmp 9b 17939: 1794 pxor xmm0, xmm2 1795 pxor xmm1, xmm3 1796 mov eax, r13d 1797 cmp rdx, r15 1798 jne 2b 1799 movups xmmword ptr [rbx], xmm0 1800 movups xmmword ptr [rbx+0x10], xmm1 1801 jmp 4b 1802 1803.p2align 6 1804blake3_compress_in_place_sse41: 1805_blake3_compress_in_place_sse41: 1806 _CET_ENDBR 1807 movups xmm0, xmmword ptr [rdi] 1808 movups xmm1, xmmword ptr [rdi+0x10] 1809 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1810 shl r8, 32 1811 add rdx, r8 1812 movq xmm3, rcx 1813 movq xmm4, rdx 1814 punpcklqdq xmm3, xmm4 1815 movups xmm4, xmmword ptr [rsi] 1816 movups xmm5, xmmword ptr [rsi+0x10] 1817 movaps xmm8, xmm4 1818 shufps xmm4, xmm5, 136 1819 shufps xmm8, xmm5, 221 1820 movaps xmm5, xmm8 1821 movups xmm6, xmmword ptr [rsi+0x20] 1822 movups xmm7, xmmword ptr [rsi+0x30] 1823 movaps xmm8, xmm6 1824 shufps xmm6, xmm7, 136 1825 pshufd xmm6, xmm6, 0x93 1826 shufps xmm8, xmm7, 221 1827 pshufd xmm7, xmm8, 0x93 1828 movaps xmm14, xmmword ptr [ROT8+rip] 1829 movaps xmm15, xmmword ptr [ROT16+rip] 1830 mov al, 7 18319: 1832 paddd xmm0, xmm4 1833 paddd xmm0, xmm1 1834 pxor xmm3, xmm0 1835 pshufb xmm3, xmm15 1836 paddd xmm2, xmm3 1837 pxor xmm1, xmm2 1838 movdqa xmm11, xmm1 1839 pslld xmm1, 20 1840 psrld xmm11, 12 1841 por xmm1, xmm11 1842 paddd xmm0, xmm5 1843 paddd xmm0, xmm1 1844 pxor xmm3, xmm0 1845 pshufb xmm3, xmm14 1846 paddd xmm2, xmm3 1847 pxor xmm1, xmm2 1848 movdqa xmm11, xmm1 1849 pslld xmm1, 25 1850 psrld xmm11, 7 1851 por xmm1, xmm11 1852 pshufd xmm0, xmm0, 0x93 1853 pshufd xmm3, xmm3, 0x4E 1854 pshufd xmm2, xmm2, 0x39 1855 paddd xmm0, xmm6 1856 paddd xmm0, xmm1 1857 pxor xmm3, xmm0 1858 pshufb xmm3, xmm15 1859 paddd xmm2, xmm3 1860 pxor xmm1, xmm2 1861 movdqa xmm11, xmm1 1862 pslld xmm1, 20 1863 psrld xmm11, 12 1864 por xmm1, xmm11 1865 paddd xmm0, xmm7 1866 paddd xmm0, xmm1 1867 pxor xmm3, xmm0 1868 pshufb xmm3, xmm14 1869 paddd xmm2, xmm3 1870 pxor xmm1, xmm2 1871 movdqa xmm11, xmm1 1872 pslld xmm1, 25 1873 psrld xmm11, 7 1874 por xmm1, xmm11 1875 pshufd xmm0, xmm0, 0x39 1876 pshufd xmm3, xmm3, 0x4E 1877 pshufd xmm2, xmm2, 0x93 1878 dec al 1879 jz 9f 1880 movdqa xmm8, xmm4 1881 shufps xmm8, xmm5, 214 1882 pshufd xmm9, xmm4, 0x0F 1883 pshufd xmm4, xmm8, 0x39 1884 movdqa xmm8, xmm6 1885 shufps xmm8, xmm7, 250 1886 pblendw xmm9, xmm8, 0xCC 1887 movdqa xmm8, xmm7 1888 punpcklqdq xmm8, xmm5 1889 pblendw xmm8, xmm6, 0xC0 1890 pshufd xmm8, xmm8, 0x78 1891 punpckhdq xmm5, xmm7 1892 punpckldq xmm6, xmm5 1893 pshufd xmm7, xmm6, 0x1E 1894 movdqa xmm5, xmm9 1895 movdqa xmm6, xmm8 1896 jmp 9b 18979: 1898 pxor xmm0, xmm2 1899 pxor xmm1, xmm3 1900 movups xmmword ptr [rdi], xmm0 1901 movups xmmword ptr [rdi+0x10], xmm1 1902 ret 1903 1904.p2align 6 1905blake3_compress_xof_sse41: 1906_blake3_compress_xof_sse41: 1907 _CET_ENDBR 1908 movups xmm0, xmmword ptr [rdi] 1909 movups xmm1, xmmword ptr [rdi+0x10] 1910 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1911 movzx eax, r8b 1912 movzx edx, dl 1913 shl rax, 32 1914 add rdx, rax 1915 movq xmm3, rcx 1916 movq xmm4, rdx 1917 punpcklqdq xmm3, xmm4 1918 movups xmm4, xmmword ptr [rsi] 1919 movups xmm5, xmmword ptr [rsi+0x10] 1920 movaps xmm8, xmm4 1921 shufps xmm4, xmm5, 136 1922 shufps xmm8, xmm5, 221 1923 movaps xmm5, xmm8 1924 movups xmm6, xmmword ptr [rsi+0x20] 1925 movups xmm7, xmmword ptr [rsi+0x30] 1926 movaps xmm8, xmm6 1927 shufps xmm6, xmm7, 136 1928 pshufd xmm6, xmm6, 0x93 1929 shufps xmm8, xmm7, 221 1930 pshufd xmm7, xmm8, 0x93 1931 movaps xmm14, xmmword ptr [ROT8+rip] 1932 movaps xmm15, xmmword ptr [ROT16+rip] 1933 mov al, 7 19349: 1935 paddd xmm0, xmm4 1936 paddd xmm0, xmm1 1937 pxor xmm3, xmm0 1938 pshufb xmm3, xmm15 1939 paddd xmm2, xmm3 1940 pxor xmm1, xmm2 1941 movdqa xmm11, xmm1 1942 pslld xmm1, 20 1943 psrld xmm11, 12 1944 por xmm1, xmm11 1945 paddd xmm0, xmm5 1946 paddd xmm0, xmm1 1947 pxor xmm3, xmm0 1948 pshufb xmm3, xmm14 1949 paddd xmm2, xmm3 1950 pxor xmm1, xmm2 1951 movdqa xmm11, xmm1 1952 pslld xmm1, 25 1953 psrld xmm11, 7 1954 por xmm1, xmm11 1955 pshufd xmm0, xmm0, 0x93 1956 pshufd xmm3, xmm3, 0x4E 1957 pshufd xmm2, xmm2, 0x39 1958 paddd xmm0, xmm6 1959 paddd xmm0, xmm1 1960 pxor xmm3, xmm0 1961 pshufb xmm3, xmm15 1962 paddd xmm2, xmm3 1963 pxor xmm1, xmm2 1964 movdqa xmm11, xmm1 1965 pslld xmm1, 20 1966 psrld xmm11, 12 1967 por xmm1, xmm11 1968 paddd xmm0, xmm7 1969 paddd xmm0, xmm1 1970 pxor xmm3, xmm0 1971 pshufb xmm3, xmm14 1972 paddd xmm2, xmm3 1973 pxor xmm1, xmm2 1974 movdqa xmm11, xmm1 1975 pslld xmm1, 25 1976 psrld xmm11, 7 1977 por xmm1, xmm11 1978 pshufd xmm0, xmm0, 0x39 1979 pshufd xmm3, xmm3, 0x4E 1980 pshufd xmm2, xmm2, 0x93 1981 dec al 1982 jz 9f 1983 movdqa xmm8, xmm4 1984 shufps xmm8, xmm5, 214 1985 pshufd xmm9, xmm4, 0x0F 1986 pshufd xmm4, xmm8, 0x39 1987 movdqa xmm8, xmm6 1988 shufps xmm8, xmm7, 250 1989 pblendw xmm9, xmm8, 0xCC 1990 movdqa xmm8, xmm7 1991 punpcklqdq xmm8, xmm5 1992 pblendw xmm8, xmm6, 0xC0 1993 pshufd xmm8, xmm8, 0x78 1994 punpckhdq xmm5, xmm7 1995 punpckldq xmm6, xmm5 1996 pshufd xmm7, xmm6, 0x1E 1997 movdqa xmm5, xmm9 1998 movdqa xmm6, xmm8 1999 jmp 9b 20009: 2001 movdqu xmm4, xmmword ptr [rdi] 2002 movdqu xmm5, xmmword ptr [rdi+0x10] 2003 pxor xmm0, xmm2 2004 pxor xmm1, xmm3 2005 pxor xmm2, xmm4 2006 pxor xmm3, xmm5 2007 movups xmmword ptr [r9], xmm0 2008 movups xmmword ptr [r9+0x10], xmm1 2009 movups xmmword ptr [r9+0x20], xmm2 2010 movups xmmword ptr [r9+0x30], xmm3 2011 ret 2012 2013 2014#ifdef __APPLE__ 2015.static_data 2016#else 2017.section .rodata 2018#endif 2019.p2align 6 2020BLAKE3_IV: 2021 .long 0x6A09E667, 0xBB67AE85 2022 .long 0x3C6EF372, 0xA54FF53A 2023ROT16: 2024 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2025ROT8: 2026 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2027ADD0: 2028 .long 0, 1, 2, 3 2029ADD1: 2030 .long 4, 4, 4, 4 2031BLAKE3_IV_0: 2032 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2033BLAKE3_IV_1: 2034 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2035BLAKE3_IV_2: 2036 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2037BLAKE3_IV_3: 2038 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2039BLAKE3_BLOCK_LEN: 2040 .long 64, 64, 64, 64 2041CMP_MSB_MASK: 2042 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2043 2044#endif 2045