1#if defined(__x86_64__) 2 3#if defined(__ELF__) && (defined(__linux__) || defined(__FreeBSD__)) 4.section .note.GNU-stack,"",%progbits 5#endif 6 7#if defined(__ELF__) && defined(__CET__) && defined(__has_include) 8#if __has_include(<cet.h>) 9#include <cet.h> 10#endif 11#endif 12 13#if !defined(_CET_ENDBR) 14#define _CET_ENDBR 15#endif 16 17#ifdef __APPLE__ 18#define HIDDEN .private_extern 19#else 20#define HIDDEN .hidden 21#endif 22 23.intel_syntax noprefix 24HIDDEN blake3_hash_many_sse2 25HIDDEN _blake3_hash_many_sse2 26HIDDEN blake3_compress_in_place_sse2 27HIDDEN _blake3_compress_in_place_sse2 28HIDDEN blake3_compress_xof_sse2 29HIDDEN _blake3_compress_xof_sse2 30.global blake3_hash_many_sse2 31.global _blake3_hash_many_sse2 32.global blake3_compress_in_place_sse2 33.global _blake3_compress_in_place_sse2 34.global blake3_compress_xof_sse2 35.global _blake3_compress_xof_sse2 36#ifdef __APPLE__ 37.text 38#else 39.section .text 40#endif 41 .p2align 6 42_blake3_hash_many_sse2: 43blake3_hash_many_sse2: 44 _CET_ENDBR 45 push r15 46 push r14 47 push r13 48 push r12 49 push rbx 50 push rbp 51 mov rbp, rsp 52 sub rsp, 360 53 and rsp, 0xFFFFFFFFFFFFFFC0 54 neg r9d 55 movd xmm0, r9d 56 pshufd xmm0, xmm0, 0x00 57 movdqa xmmword ptr [rsp+0x130], xmm0 58 movdqa xmm1, xmm0 59 pand xmm1, xmmword ptr [ADD0+rip] 60 pand xmm0, xmmword ptr [ADD1+rip] 61 movdqa xmmword ptr [rsp+0x150], xmm0 62 movd xmm0, r8d 63 pshufd xmm0, xmm0, 0x00 64 paddd xmm0, xmm1 65 movdqa xmmword ptr [rsp+0x110], xmm0 66 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 67 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 68 pcmpgtd xmm1, xmm0 69 shr r8, 32 70 movd xmm2, r8d 71 pshufd xmm2, xmm2, 0x00 72 psubd xmm2, xmm1 73 movdqa xmmword ptr [rsp+0x120], xmm2 74 mov rbx, qword ptr [rbp+0x50] 75 mov r15, rdx 76 shl r15, 6 77 movzx r13d, byte ptr [rbp+0x38] 78 movzx r12d, byte ptr [rbp+0x48] 79 cmp rsi, 4 80 jc 3f 812: 82 movdqu xmm3, xmmword ptr [rcx] 83 pshufd xmm0, xmm3, 0x00 84 pshufd xmm1, xmm3, 0x55 85 pshufd xmm2, xmm3, 0xAA 86 pshufd xmm3, xmm3, 0xFF 87 movdqu xmm7, xmmword ptr [rcx+0x10] 88 pshufd xmm4, xmm7, 0x00 89 pshufd xmm5, xmm7, 0x55 90 pshufd xmm6, xmm7, 0xAA 91 pshufd xmm7, xmm7, 0xFF 92 mov r8, qword ptr [rdi] 93 mov r9, qword ptr [rdi+0x8] 94 mov r10, qword ptr [rdi+0x10] 95 mov r11, qword ptr [rdi+0x18] 96 movzx eax, byte ptr [rbp+0x40] 97 or eax, r13d 98 xor edx, edx 999: 100 mov r14d, eax 101 or eax, r12d 102 add rdx, 64 103 cmp rdx, r15 104 cmovne eax, r14d 105 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 106 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 107 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 108 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 109 movdqa xmm12, xmm8 110 punpckldq xmm8, xmm9 111 punpckhdq xmm12, xmm9 112 movdqa xmm14, xmm10 113 punpckldq xmm10, xmm11 114 punpckhdq xmm14, xmm11 115 movdqa xmm9, xmm8 116 punpcklqdq xmm8, xmm10 117 punpckhqdq xmm9, xmm10 118 movdqa xmm13, xmm12 119 punpcklqdq xmm12, xmm14 120 punpckhqdq xmm13, xmm14 121 movdqa xmmword ptr [rsp], xmm8 122 movdqa xmmword ptr [rsp+0x10], xmm9 123 movdqa xmmword ptr [rsp+0x20], xmm12 124 movdqa xmmword ptr [rsp+0x30], xmm13 125 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 126 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 127 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 128 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 129 movdqa xmm12, xmm8 130 punpckldq xmm8, xmm9 131 punpckhdq xmm12, xmm9 132 movdqa xmm14, xmm10 133 punpckldq xmm10, xmm11 134 punpckhdq xmm14, xmm11 135 movdqa xmm9, xmm8 136 punpcklqdq xmm8, xmm10 137 punpckhqdq xmm9, xmm10 138 movdqa xmm13, xmm12 139 punpcklqdq xmm12, xmm14 140 punpckhqdq xmm13, xmm14 141 movdqa xmmword ptr [rsp+0x40], xmm8 142 movdqa xmmword ptr [rsp+0x50], xmm9 143 movdqa xmmword ptr [rsp+0x60], xmm12 144 movdqa xmmword ptr [rsp+0x70], xmm13 145 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 146 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 147 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 148 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 149 movdqa xmm12, xmm8 150 punpckldq xmm8, xmm9 151 punpckhdq xmm12, xmm9 152 movdqa xmm14, xmm10 153 punpckldq xmm10, xmm11 154 punpckhdq xmm14, xmm11 155 movdqa xmm9, xmm8 156 punpcklqdq xmm8, xmm10 157 punpckhqdq xmm9, xmm10 158 movdqa xmm13, xmm12 159 punpcklqdq xmm12, xmm14 160 punpckhqdq xmm13, xmm14 161 movdqa xmmword ptr [rsp+0x80], xmm8 162 movdqa xmmword ptr [rsp+0x90], xmm9 163 movdqa xmmword ptr [rsp+0xA0], xmm12 164 movdqa xmmword ptr [rsp+0xB0], xmm13 165 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 166 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 167 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 168 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 169 movdqa xmm12, xmm8 170 punpckldq xmm8, xmm9 171 punpckhdq xmm12, xmm9 172 movdqa xmm14, xmm10 173 punpckldq xmm10, xmm11 174 punpckhdq xmm14, xmm11 175 movdqa xmm9, xmm8 176 punpcklqdq xmm8, xmm10 177 punpckhqdq xmm9, xmm10 178 movdqa xmm13, xmm12 179 punpcklqdq xmm12, xmm14 180 punpckhqdq xmm13, xmm14 181 movdqa xmmword ptr [rsp+0xC0], xmm8 182 movdqa xmmword ptr [rsp+0xD0], xmm9 183 movdqa xmmword ptr [rsp+0xE0], xmm12 184 movdqa xmmword ptr [rsp+0xF0], xmm13 185 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 186 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 187 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 188 movdqa xmm12, xmmword ptr [rsp+0x110] 189 movdqa xmm13, xmmword ptr [rsp+0x120] 190 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 191 movd xmm15, eax 192 pshufd xmm15, xmm15, 0x00 193 prefetcht0 [r8+rdx+0x80] 194 prefetcht0 [r9+rdx+0x80] 195 prefetcht0 [r10+rdx+0x80] 196 prefetcht0 [r11+rdx+0x80] 197 paddd xmm0, xmmword ptr [rsp] 198 paddd xmm1, xmmword ptr [rsp+0x20] 199 paddd xmm2, xmmword ptr [rsp+0x40] 200 paddd xmm3, xmmword ptr [rsp+0x60] 201 paddd xmm0, xmm4 202 paddd xmm1, xmm5 203 paddd xmm2, xmm6 204 paddd xmm3, xmm7 205 pxor xmm12, xmm0 206 pxor xmm13, xmm1 207 pxor xmm14, xmm2 208 pxor xmm15, xmm3 209 pshuflw xmm12, xmm12, 0xB1 210 pshufhw xmm12, xmm12, 0xB1 211 pshuflw xmm13, xmm13, 0xB1 212 pshufhw xmm13, xmm13, 0xB1 213 pshuflw xmm14, xmm14, 0xB1 214 pshufhw xmm14, xmm14, 0xB1 215 pshuflw xmm15, xmm15, 0xB1 216 pshufhw xmm15, xmm15, 0xB1 217 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 218 paddd xmm8, xmm12 219 paddd xmm9, xmm13 220 paddd xmm10, xmm14 221 paddd xmm11, xmm15 222 pxor xmm4, xmm8 223 pxor xmm5, xmm9 224 pxor xmm6, xmm10 225 pxor xmm7, xmm11 226 movdqa xmmword ptr [rsp+0x100], xmm8 227 movdqa xmm8, xmm4 228 psrld xmm8, 12 229 pslld xmm4, 20 230 por xmm4, xmm8 231 movdqa xmm8, xmm5 232 psrld xmm8, 12 233 pslld xmm5, 20 234 por xmm5, xmm8 235 movdqa xmm8, xmm6 236 psrld xmm8, 12 237 pslld xmm6, 20 238 por xmm6, xmm8 239 movdqa xmm8, xmm7 240 psrld xmm8, 12 241 pslld xmm7, 20 242 por xmm7, xmm8 243 paddd xmm0, xmmword ptr [rsp+0x10] 244 paddd xmm1, xmmword ptr [rsp+0x30] 245 paddd xmm2, xmmword ptr [rsp+0x50] 246 paddd xmm3, xmmword ptr [rsp+0x70] 247 paddd xmm0, xmm4 248 paddd xmm1, xmm5 249 paddd xmm2, xmm6 250 paddd xmm3, xmm7 251 pxor xmm12, xmm0 252 pxor xmm13, xmm1 253 pxor xmm14, xmm2 254 pxor xmm15, xmm3 255 movdqa xmm8, xmm12 256 psrld xmm12, 8 257 pslld xmm8, 24 258 pxor xmm12, xmm8 259 movdqa xmm8, xmm13 260 psrld xmm13, 8 261 pslld xmm8, 24 262 pxor xmm13, xmm8 263 movdqa xmm8, xmm14 264 psrld xmm14, 8 265 pslld xmm8, 24 266 pxor xmm14, xmm8 267 movdqa xmm8, xmm15 268 psrld xmm15, 8 269 pslld xmm8, 24 270 pxor xmm15, xmm8 271 movdqa xmm8, xmmword ptr [rsp+0x100] 272 paddd xmm8, xmm12 273 paddd xmm9, xmm13 274 paddd xmm10, xmm14 275 paddd xmm11, xmm15 276 pxor xmm4, xmm8 277 pxor xmm5, xmm9 278 pxor xmm6, xmm10 279 pxor xmm7, xmm11 280 movdqa xmmword ptr [rsp+0x100], xmm8 281 movdqa xmm8, xmm4 282 psrld xmm8, 7 283 pslld xmm4, 25 284 por xmm4, xmm8 285 movdqa xmm8, xmm5 286 psrld xmm8, 7 287 pslld xmm5, 25 288 por xmm5, xmm8 289 movdqa xmm8, xmm6 290 psrld xmm8, 7 291 pslld xmm6, 25 292 por xmm6, xmm8 293 movdqa xmm8, xmm7 294 psrld xmm8, 7 295 pslld xmm7, 25 296 por xmm7, xmm8 297 paddd xmm0, xmmword ptr [rsp+0x80] 298 paddd xmm1, xmmword ptr [rsp+0xA0] 299 paddd xmm2, xmmword ptr [rsp+0xC0] 300 paddd xmm3, xmmword ptr [rsp+0xE0] 301 paddd xmm0, xmm5 302 paddd xmm1, xmm6 303 paddd xmm2, xmm7 304 paddd xmm3, xmm4 305 pxor xmm15, xmm0 306 pxor xmm12, xmm1 307 pxor xmm13, xmm2 308 pxor xmm14, xmm3 309 pshuflw xmm15, xmm15, 0xB1 310 pshufhw xmm15, xmm15, 0xB1 311 pshuflw xmm12, xmm12, 0xB1 312 pshufhw xmm12, xmm12, 0xB1 313 pshuflw xmm13, xmm13, 0xB1 314 pshufhw xmm13, xmm13, 0xB1 315 pshuflw xmm14, xmm14, 0xB1 316 pshufhw xmm14, xmm14, 0xB1 317 paddd xmm10, xmm15 318 paddd xmm11, xmm12 319 movdqa xmm8, xmmword ptr [rsp+0x100] 320 paddd xmm8, xmm13 321 paddd xmm9, xmm14 322 pxor xmm5, xmm10 323 pxor xmm6, xmm11 324 pxor xmm7, xmm8 325 pxor xmm4, xmm9 326 movdqa xmmword ptr [rsp+0x100], xmm8 327 movdqa xmm8, xmm5 328 psrld xmm8, 12 329 pslld xmm5, 20 330 por xmm5, xmm8 331 movdqa xmm8, xmm6 332 psrld xmm8, 12 333 pslld xmm6, 20 334 por xmm6, xmm8 335 movdqa xmm8, xmm7 336 psrld xmm8, 12 337 pslld xmm7, 20 338 por xmm7, xmm8 339 movdqa xmm8, xmm4 340 psrld xmm8, 12 341 pslld xmm4, 20 342 por xmm4, xmm8 343 paddd xmm0, xmmword ptr [rsp+0x90] 344 paddd xmm1, xmmword ptr [rsp+0xB0] 345 paddd xmm2, xmmword ptr [rsp+0xD0] 346 paddd xmm3, xmmword ptr [rsp+0xF0] 347 paddd xmm0, xmm5 348 paddd xmm1, xmm6 349 paddd xmm2, xmm7 350 paddd xmm3, xmm4 351 pxor xmm15, xmm0 352 pxor xmm12, xmm1 353 pxor xmm13, xmm2 354 pxor xmm14, xmm3 355 movdqa xmm8, xmm15 356 psrld xmm15, 8 357 pslld xmm8, 24 358 pxor xmm15, xmm8 359 movdqa xmm8, xmm12 360 psrld xmm12, 8 361 pslld xmm8, 24 362 pxor xmm12, xmm8 363 movdqa xmm8, xmm13 364 psrld xmm13, 8 365 pslld xmm8, 24 366 pxor xmm13, xmm8 367 movdqa xmm8, xmm14 368 psrld xmm14, 8 369 pslld xmm8, 24 370 pxor xmm14, xmm8 371 paddd xmm10, xmm15 372 paddd xmm11, xmm12 373 movdqa xmm8, xmmword ptr [rsp+0x100] 374 paddd xmm8, xmm13 375 paddd xmm9, xmm14 376 pxor xmm5, xmm10 377 pxor xmm6, xmm11 378 pxor xmm7, xmm8 379 pxor xmm4, xmm9 380 movdqa xmmword ptr [rsp+0x100], xmm8 381 movdqa xmm8, xmm5 382 psrld xmm8, 7 383 pslld xmm5, 25 384 por xmm5, xmm8 385 movdqa xmm8, xmm6 386 psrld xmm8, 7 387 pslld xmm6, 25 388 por xmm6, xmm8 389 movdqa xmm8, xmm7 390 psrld xmm8, 7 391 pslld xmm7, 25 392 por xmm7, xmm8 393 movdqa xmm8, xmm4 394 psrld xmm8, 7 395 pslld xmm4, 25 396 por xmm4, xmm8 397 paddd xmm0, xmmword ptr [rsp+0x20] 398 paddd xmm1, xmmword ptr [rsp+0x30] 399 paddd xmm2, xmmword ptr [rsp+0x70] 400 paddd xmm3, xmmword ptr [rsp+0x40] 401 paddd xmm0, xmm4 402 paddd xmm1, xmm5 403 paddd xmm2, xmm6 404 paddd xmm3, xmm7 405 pxor xmm12, xmm0 406 pxor xmm13, xmm1 407 pxor xmm14, xmm2 408 pxor xmm15, xmm3 409 pshuflw xmm12, xmm12, 0xB1 410 pshufhw xmm12, xmm12, 0xB1 411 pshuflw xmm13, xmm13, 0xB1 412 pshufhw xmm13, xmm13, 0xB1 413 pshuflw xmm14, xmm14, 0xB1 414 pshufhw xmm14, xmm14, 0xB1 415 pshuflw xmm15, xmm15, 0xB1 416 pshufhw xmm15, xmm15, 0xB1 417 movdqa xmm8, xmmword ptr [rsp+0x100] 418 paddd xmm8, xmm12 419 paddd xmm9, xmm13 420 paddd xmm10, xmm14 421 paddd xmm11, xmm15 422 pxor xmm4, xmm8 423 pxor xmm5, xmm9 424 pxor xmm6, xmm10 425 pxor xmm7, xmm11 426 movdqa xmmword ptr [rsp+0x100], xmm8 427 movdqa xmm8, xmm4 428 psrld xmm8, 12 429 pslld xmm4, 20 430 por xmm4, xmm8 431 movdqa xmm8, xmm5 432 psrld xmm8, 12 433 pslld xmm5, 20 434 por xmm5, xmm8 435 movdqa xmm8, xmm6 436 psrld xmm8, 12 437 pslld xmm6, 20 438 por xmm6, xmm8 439 movdqa xmm8, xmm7 440 psrld xmm8, 12 441 pslld xmm7, 20 442 por xmm7, xmm8 443 paddd xmm0, xmmword ptr [rsp+0x60] 444 paddd xmm1, xmmword ptr [rsp+0xA0] 445 paddd xmm2, xmmword ptr [rsp] 446 paddd xmm3, xmmword ptr [rsp+0xD0] 447 paddd xmm0, xmm4 448 paddd xmm1, xmm5 449 paddd xmm2, xmm6 450 paddd xmm3, xmm7 451 pxor xmm12, xmm0 452 pxor xmm13, xmm1 453 pxor xmm14, xmm2 454 pxor xmm15, xmm3 455 movdqa xmm8, xmm12 456 psrld xmm12, 8 457 pslld xmm8, 24 458 pxor xmm12, xmm8 459 movdqa xmm8, xmm13 460 psrld xmm13, 8 461 pslld xmm8, 24 462 pxor xmm13, xmm8 463 movdqa xmm8, xmm14 464 psrld xmm14, 8 465 pslld xmm8, 24 466 pxor xmm14, xmm8 467 movdqa xmm8, xmm15 468 psrld xmm15, 8 469 pslld xmm8, 24 470 pxor xmm15, xmm8 471 movdqa xmm8, xmmword ptr [rsp+0x100] 472 paddd xmm8, xmm12 473 paddd xmm9, xmm13 474 paddd xmm10, xmm14 475 paddd xmm11, xmm15 476 pxor xmm4, xmm8 477 pxor xmm5, xmm9 478 pxor xmm6, xmm10 479 pxor xmm7, xmm11 480 movdqa xmmword ptr [rsp+0x100], xmm8 481 movdqa xmm8, xmm4 482 psrld xmm8, 7 483 pslld xmm4, 25 484 por xmm4, xmm8 485 movdqa xmm8, xmm5 486 psrld xmm8, 7 487 pslld xmm5, 25 488 por xmm5, xmm8 489 movdqa xmm8, xmm6 490 psrld xmm8, 7 491 pslld xmm6, 25 492 por xmm6, xmm8 493 movdqa xmm8, xmm7 494 psrld xmm8, 7 495 pslld xmm7, 25 496 por xmm7, xmm8 497 paddd xmm0, xmmword ptr [rsp+0x10] 498 paddd xmm1, xmmword ptr [rsp+0xC0] 499 paddd xmm2, xmmword ptr [rsp+0x90] 500 paddd xmm3, xmmword ptr [rsp+0xF0] 501 paddd xmm0, xmm5 502 paddd xmm1, xmm6 503 paddd xmm2, xmm7 504 paddd xmm3, xmm4 505 pxor xmm15, xmm0 506 pxor xmm12, xmm1 507 pxor xmm13, xmm2 508 pxor xmm14, xmm3 509 pshuflw xmm15, xmm15, 0xB1 510 pshufhw xmm15, xmm15, 0xB1 511 pshuflw xmm12, xmm12, 0xB1 512 pshufhw xmm12, xmm12, 0xB1 513 pshuflw xmm13, xmm13, 0xB1 514 pshufhw xmm13, xmm13, 0xB1 515 pshuflw xmm14, xmm14, 0xB1 516 pshufhw xmm14, xmm14, 0xB1 517 paddd xmm10, xmm15 518 paddd xmm11, xmm12 519 movdqa xmm8, xmmword ptr [rsp+0x100] 520 paddd xmm8, xmm13 521 paddd xmm9, xmm14 522 pxor xmm5, xmm10 523 pxor xmm6, xmm11 524 pxor xmm7, xmm8 525 pxor xmm4, xmm9 526 movdqa xmmword ptr [rsp+0x100], xmm8 527 movdqa xmm8, xmm5 528 psrld xmm8, 12 529 pslld xmm5, 20 530 por xmm5, xmm8 531 movdqa xmm8, xmm6 532 psrld xmm8, 12 533 pslld xmm6, 20 534 por xmm6, xmm8 535 movdqa xmm8, xmm7 536 psrld xmm8, 12 537 pslld xmm7, 20 538 por xmm7, xmm8 539 movdqa xmm8, xmm4 540 psrld xmm8, 12 541 pslld xmm4, 20 542 por xmm4, xmm8 543 paddd xmm0, xmmword ptr [rsp+0xB0] 544 paddd xmm1, xmmword ptr [rsp+0x50] 545 paddd xmm2, xmmword ptr [rsp+0xE0] 546 paddd xmm3, xmmword ptr [rsp+0x80] 547 paddd xmm0, xmm5 548 paddd xmm1, xmm6 549 paddd xmm2, xmm7 550 paddd xmm3, xmm4 551 pxor xmm15, xmm0 552 pxor xmm12, xmm1 553 pxor xmm13, xmm2 554 pxor xmm14, xmm3 555 movdqa xmm8, xmm15 556 psrld xmm15, 8 557 pslld xmm8, 24 558 pxor xmm15, xmm8 559 movdqa xmm8, xmm12 560 psrld xmm12, 8 561 pslld xmm8, 24 562 pxor xmm12, xmm8 563 movdqa xmm8, xmm13 564 psrld xmm13, 8 565 pslld xmm8, 24 566 pxor xmm13, xmm8 567 movdqa xmm8, xmm14 568 psrld xmm14, 8 569 pslld xmm8, 24 570 pxor xmm14, xmm8 571 paddd xmm10, xmm15 572 paddd xmm11, xmm12 573 movdqa xmm8, xmmword ptr [rsp+0x100] 574 paddd xmm8, xmm13 575 paddd xmm9, xmm14 576 pxor xmm5, xmm10 577 pxor xmm6, xmm11 578 pxor xmm7, xmm8 579 pxor xmm4, xmm9 580 movdqa xmmword ptr [rsp+0x100], xmm8 581 movdqa xmm8, xmm5 582 psrld xmm8, 7 583 pslld xmm5, 25 584 por xmm5, xmm8 585 movdqa xmm8, xmm6 586 psrld xmm8, 7 587 pslld xmm6, 25 588 por xmm6, xmm8 589 movdqa xmm8, xmm7 590 psrld xmm8, 7 591 pslld xmm7, 25 592 por xmm7, xmm8 593 movdqa xmm8, xmm4 594 psrld xmm8, 7 595 pslld xmm4, 25 596 por xmm4, xmm8 597 paddd xmm0, xmmword ptr [rsp+0x30] 598 paddd xmm1, xmmword ptr [rsp+0xA0] 599 paddd xmm2, xmmword ptr [rsp+0xD0] 600 paddd xmm3, xmmword ptr [rsp+0x70] 601 paddd xmm0, xmm4 602 paddd xmm1, xmm5 603 paddd xmm2, xmm6 604 paddd xmm3, xmm7 605 pxor xmm12, xmm0 606 pxor xmm13, xmm1 607 pxor xmm14, xmm2 608 pxor xmm15, xmm3 609 pshuflw xmm12, xmm12, 0xB1 610 pshufhw xmm12, xmm12, 0xB1 611 pshuflw xmm13, xmm13, 0xB1 612 pshufhw xmm13, xmm13, 0xB1 613 pshuflw xmm14, xmm14, 0xB1 614 pshufhw xmm14, xmm14, 0xB1 615 pshuflw xmm15, xmm15, 0xB1 616 pshufhw xmm15, xmm15, 0xB1 617 movdqa xmm8, xmmword ptr [rsp+0x100] 618 paddd xmm8, xmm12 619 paddd xmm9, xmm13 620 paddd xmm10, xmm14 621 paddd xmm11, xmm15 622 pxor xmm4, xmm8 623 pxor xmm5, xmm9 624 pxor xmm6, xmm10 625 pxor xmm7, xmm11 626 movdqa xmmword ptr [rsp+0x100], xmm8 627 movdqa xmm8, xmm4 628 psrld xmm8, 12 629 pslld xmm4, 20 630 por xmm4, xmm8 631 movdqa xmm8, xmm5 632 psrld xmm8, 12 633 pslld xmm5, 20 634 por xmm5, xmm8 635 movdqa xmm8, xmm6 636 psrld xmm8, 12 637 pslld xmm6, 20 638 por xmm6, xmm8 639 movdqa xmm8, xmm7 640 psrld xmm8, 12 641 pslld xmm7, 20 642 por xmm7, xmm8 643 paddd xmm0, xmmword ptr [rsp+0x40] 644 paddd xmm1, xmmword ptr [rsp+0xC0] 645 paddd xmm2, xmmword ptr [rsp+0x20] 646 paddd xmm3, xmmword ptr [rsp+0xE0] 647 paddd xmm0, xmm4 648 paddd xmm1, xmm5 649 paddd xmm2, xmm6 650 paddd xmm3, xmm7 651 pxor xmm12, xmm0 652 pxor xmm13, xmm1 653 pxor xmm14, xmm2 654 pxor xmm15, xmm3 655 movdqa xmm8, xmm12 656 psrld xmm12, 8 657 pslld xmm8, 24 658 pxor xmm12, xmm8 659 movdqa xmm8, xmm13 660 psrld xmm13, 8 661 pslld xmm8, 24 662 pxor xmm13, xmm8 663 movdqa xmm8, xmm14 664 psrld xmm14, 8 665 pslld xmm8, 24 666 pxor xmm14, xmm8 667 movdqa xmm8, xmm15 668 psrld xmm15, 8 669 pslld xmm8, 24 670 pxor xmm15, xmm8 671 movdqa xmm8, xmmword ptr [rsp+0x100] 672 paddd xmm8, xmm12 673 paddd xmm9, xmm13 674 paddd xmm10, xmm14 675 paddd xmm11, xmm15 676 pxor xmm4, xmm8 677 pxor xmm5, xmm9 678 pxor xmm6, xmm10 679 pxor xmm7, xmm11 680 movdqa xmmword ptr [rsp+0x100], xmm8 681 movdqa xmm8, xmm4 682 psrld xmm8, 7 683 pslld xmm4, 25 684 por xmm4, xmm8 685 movdqa xmm8, xmm5 686 psrld xmm8, 7 687 pslld xmm5, 25 688 por xmm5, xmm8 689 movdqa xmm8, xmm6 690 psrld xmm8, 7 691 pslld xmm6, 25 692 por xmm6, xmm8 693 movdqa xmm8, xmm7 694 psrld xmm8, 7 695 pslld xmm7, 25 696 por xmm7, xmm8 697 paddd xmm0, xmmword ptr [rsp+0x60] 698 paddd xmm1, xmmword ptr [rsp+0x90] 699 paddd xmm2, xmmword ptr [rsp+0xB0] 700 paddd xmm3, xmmword ptr [rsp+0x80] 701 paddd xmm0, xmm5 702 paddd xmm1, xmm6 703 paddd xmm2, xmm7 704 paddd xmm3, xmm4 705 pxor xmm15, xmm0 706 pxor xmm12, xmm1 707 pxor xmm13, xmm2 708 pxor xmm14, xmm3 709 pshuflw xmm15, xmm15, 0xB1 710 pshufhw xmm15, xmm15, 0xB1 711 pshuflw xmm12, xmm12, 0xB1 712 pshufhw xmm12, xmm12, 0xB1 713 pshuflw xmm13, xmm13, 0xB1 714 pshufhw xmm13, xmm13, 0xB1 715 pshuflw xmm14, xmm14, 0xB1 716 pshufhw xmm14, xmm14, 0xB1 717 paddd xmm10, xmm15 718 paddd xmm11, xmm12 719 movdqa xmm8, xmmword ptr [rsp+0x100] 720 paddd xmm8, xmm13 721 paddd xmm9, xmm14 722 pxor xmm5, xmm10 723 pxor xmm6, xmm11 724 pxor xmm7, xmm8 725 pxor xmm4, xmm9 726 movdqa xmmword ptr [rsp+0x100], xmm8 727 movdqa xmm8, xmm5 728 psrld xmm8, 12 729 pslld xmm5, 20 730 por xmm5, xmm8 731 movdqa xmm8, xmm6 732 psrld xmm8, 12 733 pslld xmm6, 20 734 por xmm6, xmm8 735 movdqa xmm8, xmm7 736 psrld xmm8, 12 737 pslld xmm7, 20 738 por xmm7, xmm8 739 movdqa xmm8, xmm4 740 psrld xmm8, 12 741 pslld xmm4, 20 742 por xmm4, xmm8 743 paddd xmm0, xmmword ptr [rsp+0x50] 744 paddd xmm1, xmmword ptr [rsp] 745 paddd xmm2, xmmword ptr [rsp+0xF0] 746 paddd xmm3, xmmword ptr [rsp+0x10] 747 paddd xmm0, xmm5 748 paddd xmm1, xmm6 749 paddd xmm2, xmm7 750 paddd xmm3, xmm4 751 pxor xmm15, xmm0 752 pxor xmm12, xmm1 753 pxor xmm13, xmm2 754 pxor xmm14, xmm3 755 movdqa xmm8, xmm15 756 psrld xmm15, 8 757 pslld xmm8, 24 758 pxor xmm15, xmm8 759 movdqa xmm8, xmm12 760 psrld xmm12, 8 761 pslld xmm8, 24 762 pxor xmm12, xmm8 763 movdqa xmm8, xmm13 764 psrld xmm13, 8 765 pslld xmm8, 24 766 pxor xmm13, xmm8 767 movdqa xmm8, xmm14 768 psrld xmm14, 8 769 pslld xmm8, 24 770 pxor xmm14, xmm8 771 paddd xmm10, xmm15 772 paddd xmm11, xmm12 773 movdqa xmm8, xmmword ptr [rsp+0x100] 774 paddd xmm8, xmm13 775 paddd xmm9, xmm14 776 pxor xmm5, xmm10 777 pxor xmm6, xmm11 778 pxor xmm7, xmm8 779 pxor xmm4, xmm9 780 movdqa xmmword ptr [rsp+0x100], xmm8 781 movdqa xmm8, xmm5 782 psrld xmm8, 7 783 pslld xmm5, 25 784 por xmm5, xmm8 785 movdqa xmm8, xmm6 786 psrld xmm8, 7 787 pslld xmm6, 25 788 por xmm6, xmm8 789 movdqa xmm8, xmm7 790 psrld xmm8, 7 791 pslld xmm7, 25 792 por xmm7, xmm8 793 movdqa xmm8, xmm4 794 psrld xmm8, 7 795 pslld xmm4, 25 796 por xmm4, xmm8 797 paddd xmm0, xmmword ptr [rsp+0xA0] 798 paddd xmm1, xmmword ptr [rsp+0xC0] 799 paddd xmm2, xmmword ptr [rsp+0xE0] 800 paddd xmm3, xmmword ptr [rsp+0xD0] 801 paddd xmm0, xmm4 802 paddd xmm1, xmm5 803 paddd xmm2, xmm6 804 paddd xmm3, xmm7 805 pxor xmm12, xmm0 806 pxor xmm13, xmm1 807 pxor xmm14, xmm2 808 pxor xmm15, xmm3 809 pshuflw xmm12, xmm12, 0xB1 810 pshufhw xmm12, xmm12, 0xB1 811 pshuflw xmm13, xmm13, 0xB1 812 pshufhw xmm13, xmm13, 0xB1 813 pshuflw xmm14, xmm14, 0xB1 814 pshufhw xmm14, xmm14, 0xB1 815 pshuflw xmm15, xmm15, 0xB1 816 pshufhw xmm15, xmm15, 0xB1 817 movdqa xmm8, xmmword ptr [rsp+0x100] 818 paddd xmm8, xmm12 819 paddd xmm9, xmm13 820 paddd xmm10, xmm14 821 paddd xmm11, xmm15 822 pxor xmm4, xmm8 823 pxor xmm5, xmm9 824 pxor xmm6, xmm10 825 pxor xmm7, xmm11 826 movdqa xmmword ptr [rsp+0x100], xmm8 827 movdqa xmm8, xmm4 828 psrld xmm8, 12 829 pslld xmm4, 20 830 por xmm4, xmm8 831 movdqa xmm8, xmm5 832 psrld xmm8, 12 833 pslld xmm5, 20 834 por xmm5, xmm8 835 movdqa xmm8, xmm6 836 psrld xmm8, 12 837 pslld xmm6, 20 838 por xmm6, xmm8 839 movdqa xmm8, xmm7 840 psrld xmm8, 12 841 pslld xmm7, 20 842 por xmm7, xmm8 843 paddd xmm0, xmmword ptr [rsp+0x70] 844 paddd xmm1, xmmword ptr [rsp+0x90] 845 paddd xmm2, xmmword ptr [rsp+0x30] 846 paddd xmm3, xmmword ptr [rsp+0xF0] 847 paddd xmm0, xmm4 848 paddd xmm1, xmm5 849 paddd xmm2, xmm6 850 paddd xmm3, xmm7 851 pxor xmm12, xmm0 852 pxor xmm13, xmm1 853 pxor xmm14, xmm2 854 pxor xmm15, xmm3 855 movdqa xmm8, xmm12 856 psrld xmm12, 8 857 pslld xmm8, 24 858 pxor xmm12, xmm8 859 movdqa xmm8, xmm13 860 psrld xmm13, 8 861 pslld xmm8, 24 862 pxor xmm13, xmm8 863 movdqa xmm8, xmm14 864 psrld xmm14, 8 865 pslld xmm8, 24 866 pxor xmm14, xmm8 867 movdqa xmm8, xmm15 868 psrld xmm15, 8 869 pslld xmm8, 24 870 pxor xmm15, xmm8 871 movdqa xmm8, xmmword ptr [rsp+0x100] 872 paddd xmm8, xmm12 873 paddd xmm9, xmm13 874 paddd xmm10, xmm14 875 paddd xmm11, xmm15 876 pxor xmm4, xmm8 877 pxor xmm5, xmm9 878 pxor xmm6, xmm10 879 pxor xmm7, xmm11 880 movdqa xmmword ptr [rsp+0x100], xmm8 881 movdqa xmm8, xmm4 882 psrld xmm8, 7 883 pslld xmm4, 25 884 por xmm4, xmm8 885 movdqa xmm8, xmm5 886 psrld xmm8, 7 887 pslld xmm5, 25 888 por xmm5, xmm8 889 movdqa xmm8, xmm6 890 psrld xmm8, 7 891 pslld xmm6, 25 892 por xmm6, xmm8 893 movdqa xmm8, xmm7 894 psrld xmm8, 7 895 pslld xmm7, 25 896 por xmm7, xmm8 897 paddd xmm0, xmmword ptr [rsp+0x40] 898 paddd xmm1, xmmword ptr [rsp+0xB0] 899 paddd xmm2, xmmword ptr [rsp+0x50] 900 paddd xmm3, xmmword ptr [rsp+0x10] 901 paddd xmm0, xmm5 902 paddd xmm1, xmm6 903 paddd xmm2, xmm7 904 paddd xmm3, xmm4 905 pxor xmm15, xmm0 906 pxor xmm12, xmm1 907 pxor xmm13, xmm2 908 pxor xmm14, xmm3 909 pshuflw xmm15, xmm15, 0xB1 910 pshufhw xmm15, xmm15, 0xB1 911 pshuflw xmm12, xmm12, 0xB1 912 pshufhw xmm12, xmm12, 0xB1 913 pshuflw xmm13, xmm13, 0xB1 914 pshufhw xmm13, xmm13, 0xB1 915 pshuflw xmm14, xmm14, 0xB1 916 pshufhw xmm14, xmm14, 0xB1 917 paddd xmm10, xmm15 918 paddd xmm11, xmm12 919 movdqa xmm8, xmmword ptr [rsp+0x100] 920 paddd xmm8, xmm13 921 paddd xmm9, xmm14 922 pxor xmm5, xmm10 923 pxor xmm6, xmm11 924 pxor xmm7, xmm8 925 pxor xmm4, xmm9 926 movdqa xmmword ptr [rsp+0x100], xmm8 927 movdqa xmm8, xmm5 928 psrld xmm8, 12 929 pslld xmm5, 20 930 por xmm5, xmm8 931 movdqa xmm8, xmm6 932 psrld xmm8, 12 933 pslld xmm6, 20 934 por xmm6, xmm8 935 movdqa xmm8, xmm7 936 psrld xmm8, 12 937 pslld xmm7, 20 938 por xmm7, xmm8 939 movdqa xmm8, xmm4 940 psrld xmm8, 12 941 pslld xmm4, 20 942 por xmm4, xmm8 943 paddd xmm0, xmmword ptr [rsp] 944 paddd xmm1, xmmword ptr [rsp+0x20] 945 paddd xmm2, xmmword ptr [rsp+0x80] 946 paddd xmm3, xmmword ptr [rsp+0x60] 947 paddd xmm0, xmm5 948 paddd xmm1, xmm6 949 paddd xmm2, xmm7 950 paddd xmm3, xmm4 951 pxor xmm15, xmm0 952 pxor xmm12, xmm1 953 pxor xmm13, xmm2 954 pxor xmm14, xmm3 955 movdqa xmm8, xmm15 956 psrld xmm15, 8 957 pslld xmm8, 24 958 pxor xmm15, xmm8 959 movdqa xmm8, xmm12 960 psrld xmm12, 8 961 pslld xmm8, 24 962 pxor xmm12, xmm8 963 movdqa xmm8, xmm13 964 psrld xmm13, 8 965 pslld xmm8, 24 966 pxor xmm13, xmm8 967 movdqa xmm8, xmm14 968 psrld xmm14, 8 969 pslld xmm8, 24 970 pxor xmm14, xmm8 971 paddd xmm10, xmm15 972 paddd xmm11, xmm12 973 movdqa xmm8, xmmword ptr [rsp+0x100] 974 paddd xmm8, xmm13 975 paddd xmm9, xmm14 976 pxor xmm5, xmm10 977 pxor xmm6, xmm11 978 pxor xmm7, xmm8 979 pxor xmm4, xmm9 980 movdqa xmmword ptr [rsp+0x100], xmm8 981 movdqa xmm8, xmm5 982 psrld xmm8, 7 983 pslld xmm5, 25 984 por xmm5, xmm8 985 movdqa xmm8, xmm6 986 psrld xmm8, 7 987 pslld xmm6, 25 988 por xmm6, xmm8 989 movdqa xmm8, xmm7 990 psrld xmm8, 7 991 pslld xmm7, 25 992 por xmm7, xmm8 993 movdqa xmm8, xmm4 994 psrld xmm8, 7 995 pslld xmm4, 25 996 por xmm4, xmm8 997 paddd xmm0, xmmword ptr [rsp+0xC0] 998 paddd xmm1, xmmword ptr [rsp+0x90] 999 paddd xmm2, xmmword ptr [rsp+0xF0] 1000 paddd xmm3, xmmword ptr [rsp+0xE0] 1001 paddd xmm0, xmm4 1002 paddd xmm1, xmm5 1003 paddd xmm2, xmm6 1004 paddd xmm3, xmm7 1005 pxor xmm12, xmm0 1006 pxor xmm13, xmm1 1007 pxor xmm14, xmm2 1008 pxor xmm15, xmm3 1009 pshuflw xmm12, xmm12, 0xB1 1010 pshufhw xmm12, xmm12, 0xB1 1011 pshuflw xmm13, xmm13, 0xB1 1012 pshufhw xmm13, xmm13, 0xB1 1013 pshuflw xmm14, xmm14, 0xB1 1014 pshufhw xmm14, xmm14, 0xB1 1015 pshuflw xmm15, xmm15, 0xB1 1016 pshufhw xmm15, xmm15, 0xB1 1017 movdqa xmm8, xmmword ptr [rsp+0x100] 1018 paddd xmm8, xmm12 1019 paddd xmm9, xmm13 1020 paddd xmm10, xmm14 1021 paddd xmm11, xmm15 1022 pxor xmm4, xmm8 1023 pxor xmm5, xmm9 1024 pxor xmm6, xmm10 1025 pxor xmm7, xmm11 1026 movdqa xmmword ptr [rsp+0x100], xmm8 1027 movdqa xmm8, xmm4 1028 psrld xmm8, 12 1029 pslld xmm4, 20 1030 por xmm4, xmm8 1031 movdqa xmm8, xmm5 1032 psrld xmm8, 12 1033 pslld xmm5, 20 1034 por xmm5, xmm8 1035 movdqa xmm8, xmm6 1036 psrld xmm8, 12 1037 pslld xmm6, 20 1038 por xmm6, xmm8 1039 movdqa xmm8, xmm7 1040 psrld xmm8, 12 1041 pslld xmm7, 20 1042 por xmm7, xmm8 1043 paddd xmm0, xmmword ptr [rsp+0xD0] 1044 paddd xmm1, xmmword ptr [rsp+0xB0] 1045 paddd xmm2, xmmword ptr [rsp+0xA0] 1046 paddd xmm3, xmmword ptr [rsp+0x80] 1047 paddd xmm0, xmm4 1048 paddd xmm1, xmm5 1049 paddd xmm2, xmm6 1050 paddd xmm3, xmm7 1051 pxor xmm12, xmm0 1052 pxor xmm13, xmm1 1053 pxor xmm14, xmm2 1054 pxor xmm15, xmm3 1055 movdqa xmm8, xmm12 1056 psrld xmm12, 8 1057 pslld xmm8, 24 1058 pxor xmm12, xmm8 1059 movdqa xmm8, xmm13 1060 psrld xmm13, 8 1061 pslld xmm8, 24 1062 pxor xmm13, xmm8 1063 movdqa xmm8, xmm14 1064 psrld xmm14, 8 1065 pslld xmm8, 24 1066 pxor xmm14, xmm8 1067 movdqa xmm8, xmm15 1068 psrld xmm15, 8 1069 pslld xmm8, 24 1070 pxor xmm15, xmm8 1071 movdqa xmm8, xmmword ptr [rsp+0x100] 1072 paddd xmm8, xmm12 1073 paddd xmm9, xmm13 1074 paddd xmm10, xmm14 1075 paddd xmm11, xmm15 1076 pxor xmm4, xmm8 1077 pxor xmm5, xmm9 1078 pxor xmm6, xmm10 1079 pxor xmm7, xmm11 1080 movdqa xmmword ptr [rsp+0x100], xmm8 1081 movdqa xmm8, xmm4 1082 psrld xmm8, 7 1083 pslld xmm4, 25 1084 por xmm4, xmm8 1085 movdqa xmm8, xmm5 1086 psrld xmm8, 7 1087 pslld xmm5, 25 1088 por xmm5, xmm8 1089 movdqa xmm8, xmm6 1090 psrld xmm8, 7 1091 pslld xmm6, 25 1092 por xmm6, xmm8 1093 movdqa xmm8, xmm7 1094 psrld xmm8, 7 1095 pslld xmm7, 25 1096 por xmm7, xmm8 1097 paddd xmm0, xmmword ptr [rsp+0x70] 1098 paddd xmm1, xmmword ptr [rsp+0x50] 1099 paddd xmm2, xmmword ptr [rsp] 1100 paddd xmm3, xmmword ptr [rsp+0x60] 1101 paddd xmm0, xmm5 1102 paddd xmm1, xmm6 1103 paddd xmm2, xmm7 1104 paddd xmm3, xmm4 1105 pxor xmm15, xmm0 1106 pxor xmm12, xmm1 1107 pxor xmm13, xmm2 1108 pxor xmm14, xmm3 1109 pshuflw xmm15, xmm15, 0xB1 1110 pshufhw xmm15, xmm15, 0xB1 1111 pshuflw xmm12, xmm12, 0xB1 1112 pshufhw xmm12, xmm12, 0xB1 1113 pshuflw xmm13, xmm13, 0xB1 1114 pshufhw xmm13, xmm13, 0xB1 1115 pshuflw xmm14, xmm14, 0xB1 1116 pshufhw xmm14, xmm14, 0xB1 1117 paddd xmm10, xmm15 1118 paddd xmm11, xmm12 1119 movdqa xmm8, xmmword ptr [rsp+0x100] 1120 paddd xmm8, xmm13 1121 paddd xmm9, xmm14 1122 pxor xmm5, xmm10 1123 pxor xmm6, xmm11 1124 pxor xmm7, xmm8 1125 pxor xmm4, xmm9 1126 movdqa xmmword ptr [rsp+0x100], xmm8 1127 movdqa xmm8, xmm5 1128 psrld xmm8, 12 1129 pslld xmm5, 20 1130 por xmm5, xmm8 1131 movdqa xmm8, xmm6 1132 psrld xmm8, 12 1133 pslld xmm6, 20 1134 por xmm6, xmm8 1135 movdqa xmm8, xmm7 1136 psrld xmm8, 12 1137 pslld xmm7, 20 1138 por xmm7, xmm8 1139 movdqa xmm8, xmm4 1140 psrld xmm8, 12 1141 pslld xmm4, 20 1142 por xmm4, xmm8 1143 paddd xmm0, xmmword ptr [rsp+0x20] 1144 paddd xmm1, xmmword ptr [rsp+0x30] 1145 paddd xmm2, xmmword ptr [rsp+0x10] 1146 paddd xmm3, xmmword ptr [rsp+0x40] 1147 paddd xmm0, xmm5 1148 paddd xmm1, xmm6 1149 paddd xmm2, xmm7 1150 paddd xmm3, xmm4 1151 pxor xmm15, xmm0 1152 pxor xmm12, xmm1 1153 pxor xmm13, xmm2 1154 pxor xmm14, xmm3 1155 movdqa xmm8, xmm15 1156 psrld xmm15, 8 1157 pslld xmm8, 24 1158 pxor xmm15, xmm8 1159 movdqa xmm8, xmm12 1160 psrld xmm12, 8 1161 pslld xmm8, 24 1162 pxor xmm12, xmm8 1163 movdqa xmm8, xmm13 1164 psrld xmm13, 8 1165 pslld xmm8, 24 1166 pxor xmm13, xmm8 1167 movdqa xmm8, xmm14 1168 psrld xmm14, 8 1169 pslld xmm8, 24 1170 pxor xmm14, xmm8 1171 paddd xmm10, xmm15 1172 paddd xmm11, xmm12 1173 movdqa xmm8, xmmword ptr [rsp+0x100] 1174 paddd xmm8, xmm13 1175 paddd xmm9, xmm14 1176 pxor xmm5, xmm10 1177 pxor xmm6, xmm11 1178 pxor xmm7, xmm8 1179 pxor xmm4, xmm9 1180 movdqa xmmword ptr [rsp+0x100], xmm8 1181 movdqa xmm8, xmm5 1182 psrld xmm8, 7 1183 pslld xmm5, 25 1184 por xmm5, xmm8 1185 movdqa xmm8, xmm6 1186 psrld xmm8, 7 1187 pslld xmm6, 25 1188 por xmm6, xmm8 1189 movdqa xmm8, xmm7 1190 psrld xmm8, 7 1191 pslld xmm7, 25 1192 por xmm7, xmm8 1193 movdqa xmm8, xmm4 1194 psrld xmm8, 7 1195 pslld xmm4, 25 1196 por xmm4, xmm8 1197 paddd xmm0, xmmword ptr [rsp+0x90] 1198 paddd xmm1, xmmword ptr [rsp+0xB0] 1199 paddd xmm2, xmmword ptr [rsp+0x80] 1200 paddd xmm3, xmmword ptr [rsp+0xF0] 1201 paddd xmm0, xmm4 1202 paddd xmm1, xmm5 1203 paddd xmm2, xmm6 1204 paddd xmm3, xmm7 1205 pxor xmm12, xmm0 1206 pxor xmm13, xmm1 1207 pxor xmm14, xmm2 1208 pxor xmm15, xmm3 1209 pshuflw xmm12, xmm12, 0xB1 1210 pshufhw xmm12, xmm12, 0xB1 1211 pshuflw xmm13, xmm13, 0xB1 1212 pshufhw xmm13, xmm13, 0xB1 1213 pshuflw xmm14, xmm14, 0xB1 1214 pshufhw xmm14, xmm14, 0xB1 1215 pshuflw xmm15, xmm15, 0xB1 1216 pshufhw xmm15, xmm15, 0xB1 1217 movdqa xmm8, xmmword ptr [rsp+0x100] 1218 paddd xmm8, xmm12 1219 paddd xmm9, xmm13 1220 paddd xmm10, xmm14 1221 paddd xmm11, xmm15 1222 pxor xmm4, xmm8 1223 pxor xmm5, xmm9 1224 pxor xmm6, xmm10 1225 pxor xmm7, xmm11 1226 movdqa xmmword ptr [rsp+0x100], xmm8 1227 movdqa xmm8, xmm4 1228 psrld xmm8, 12 1229 pslld xmm4, 20 1230 por xmm4, xmm8 1231 movdqa xmm8, xmm5 1232 psrld xmm8, 12 1233 pslld xmm5, 20 1234 por xmm5, xmm8 1235 movdqa xmm8, xmm6 1236 psrld xmm8, 12 1237 pslld xmm6, 20 1238 por xmm6, xmm8 1239 movdqa xmm8, xmm7 1240 psrld xmm8, 12 1241 pslld xmm7, 20 1242 por xmm7, xmm8 1243 paddd xmm0, xmmword ptr [rsp+0xE0] 1244 paddd xmm1, xmmword ptr [rsp+0x50] 1245 paddd xmm2, xmmword ptr [rsp+0xC0] 1246 paddd xmm3, xmmword ptr [rsp+0x10] 1247 paddd xmm0, xmm4 1248 paddd xmm1, xmm5 1249 paddd xmm2, xmm6 1250 paddd xmm3, xmm7 1251 pxor xmm12, xmm0 1252 pxor xmm13, xmm1 1253 pxor xmm14, xmm2 1254 pxor xmm15, xmm3 1255 movdqa xmm8, xmm12 1256 psrld xmm12, 8 1257 pslld xmm8, 24 1258 pxor xmm12, xmm8 1259 movdqa xmm8, xmm13 1260 psrld xmm13, 8 1261 pslld xmm8, 24 1262 pxor xmm13, xmm8 1263 movdqa xmm8, xmm14 1264 psrld xmm14, 8 1265 pslld xmm8, 24 1266 pxor xmm14, xmm8 1267 movdqa xmm8, xmm15 1268 psrld xmm15, 8 1269 pslld xmm8, 24 1270 pxor xmm15, xmm8 1271 movdqa xmm8, xmmword ptr [rsp+0x100] 1272 paddd xmm8, xmm12 1273 paddd xmm9, xmm13 1274 paddd xmm10, xmm14 1275 paddd xmm11, xmm15 1276 pxor xmm4, xmm8 1277 pxor xmm5, xmm9 1278 pxor xmm6, xmm10 1279 pxor xmm7, xmm11 1280 movdqa xmmword ptr [rsp+0x100], xmm8 1281 movdqa xmm8, xmm4 1282 psrld xmm8, 7 1283 pslld xmm4, 25 1284 por xmm4, xmm8 1285 movdqa xmm8, xmm5 1286 psrld xmm8, 7 1287 pslld xmm5, 25 1288 por xmm5, xmm8 1289 movdqa xmm8, xmm6 1290 psrld xmm8, 7 1291 pslld xmm6, 25 1292 por xmm6, xmm8 1293 movdqa xmm8, xmm7 1294 psrld xmm8, 7 1295 pslld xmm7, 25 1296 por xmm7, xmm8 1297 paddd xmm0, xmmword ptr [rsp+0xD0] 1298 paddd xmm1, xmmword ptr [rsp] 1299 paddd xmm2, xmmword ptr [rsp+0x20] 1300 paddd xmm3, xmmword ptr [rsp+0x40] 1301 paddd xmm0, xmm5 1302 paddd xmm1, xmm6 1303 paddd xmm2, xmm7 1304 paddd xmm3, xmm4 1305 pxor xmm15, xmm0 1306 pxor xmm12, xmm1 1307 pxor xmm13, xmm2 1308 pxor xmm14, xmm3 1309 pshuflw xmm15, xmm15, 0xB1 1310 pshufhw xmm15, xmm15, 0xB1 1311 pshuflw xmm12, xmm12, 0xB1 1312 pshufhw xmm12, xmm12, 0xB1 1313 pshuflw xmm13, xmm13, 0xB1 1314 pshufhw xmm13, xmm13, 0xB1 1315 pshuflw xmm14, xmm14, 0xB1 1316 pshufhw xmm14, xmm14, 0xB1 1317 paddd xmm10, xmm15 1318 paddd xmm11, xmm12 1319 movdqa xmm8, xmmword ptr [rsp+0x100] 1320 paddd xmm8, xmm13 1321 paddd xmm9, xmm14 1322 pxor xmm5, xmm10 1323 pxor xmm6, xmm11 1324 pxor xmm7, xmm8 1325 pxor xmm4, xmm9 1326 movdqa xmmword ptr [rsp+0x100], xmm8 1327 movdqa xmm8, xmm5 1328 psrld xmm8, 12 1329 pslld xmm5, 20 1330 por xmm5, xmm8 1331 movdqa xmm8, xmm6 1332 psrld xmm8, 12 1333 pslld xmm6, 20 1334 por xmm6, xmm8 1335 movdqa xmm8, xmm7 1336 psrld xmm8, 12 1337 pslld xmm7, 20 1338 por xmm7, xmm8 1339 movdqa xmm8, xmm4 1340 psrld xmm8, 12 1341 pslld xmm4, 20 1342 por xmm4, xmm8 1343 paddd xmm0, xmmword ptr [rsp+0x30] 1344 paddd xmm1, xmmword ptr [rsp+0xA0] 1345 paddd xmm2, xmmword ptr [rsp+0x60] 1346 paddd xmm3, xmmword ptr [rsp+0x70] 1347 paddd xmm0, xmm5 1348 paddd xmm1, xmm6 1349 paddd xmm2, xmm7 1350 paddd xmm3, xmm4 1351 pxor xmm15, xmm0 1352 pxor xmm12, xmm1 1353 pxor xmm13, xmm2 1354 pxor xmm14, xmm3 1355 movdqa xmm8, xmm15 1356 psrld xmm15, 8 1357 pslld xmm8, 24 1358 pxor xmm15, xmm8 1359 movdqa xmm8, xmm12 1360 psrld xmm12, 8 1361 pslld xmm8, 24 1362 pxor xmm12, xmm8 1363 movdqa xmm8, xmm13 1364 psrld xmm13, 8 1365 pslld xmm8, 24 1366 pxor xmm13, xmm8 1367 movdqa xmm8, xmm14 1368 psrld xmm14, 8 1369 pslld xmm8, 24 1370 pxor xmm14, xmm8 1371 paddd xmm10, xmm15 1372 paddd xmm11, xmm12 1373 movdqa xmm8, xmmword ptr [rsp+0x100] 1374 paddd xmm8, xmm13 1375 paddd xmm9, xmm14 1376 pxor xmm5, xmm10 1377 pxor xmm6, xmm11 1378 pxor xmm7, xmm8 1379 pxor xmm4, xmm9 1380 movdqa xmmword ptr [rsp+0x100], xmm8 1381 movdqa xmm8, xmm5 1382 psrld xmm8, 7 1383 pslld xmm5, 25 1384 por xmm5, xmm8 1385 movdqa xmm8, xmm6 1386 psrld xmm8, 7 1387 pslld xmm6, 25 1388 por xmm6, xmm8 1389 movdqa xmm8, xmm7 1390 psrld xmm8, 7 1391 pslld xmm7, 25 1392 por xmm7, xmm8 1393 movdqa xmm8, xmm4 1394 psrld xmm8, 7 1395 pslld xmm4, 25 1396 por xmm4, xmm8 1397 paddd xmm0, xmmword ptr [rsp+0xB0] 1398 paddd xmm1, xmmword ptr [rsp+0x50] 1399 paddd xmm2, xmmword ptr [rsp+0x10] 1400 paddd xmm3, xmmword ptr [rsp+0x80] 1401 paddd xmm0, xmm4 1402 paddd xmm1, xmm5 1403 paddd xmm2, xmm6 1404 paddd xmm3, xmm7 1405 pxor xmm12, xmm0 1406 pxor xmm13, xmm1 1407 pxor xmm14, xmm2 1408 pxor xmm15, xmm3 1409 pshuflw xmm12, xmm12, 0xB1 1410 pshufhw xmm12, xmm12, 0xB1 1411 pshuflw xmm13, xmm13, 0xB1 1412 pshufhw xmm13, xmm13, 0xB1 1413 pshuflw xmm14, xmm14, 0xB1 1414 pshufhw xmm14, xmm14, 0xB1 1415 pshuflw xmm15, xmm15, 0xB1 1416 pshufhw xmm15, xmm15, 0xB1 1417 movdqa xmm8, xmmword ptr [rsp+0x100] 1418 paddd xmm8, xmm12 1419 paddd xmm9, xmm13 1420 paddd xmm10, xmm14 1421 paddd xmm11, xmm15 1422 pxor xmm4, xmm8 1423 pxor xmm5, xmm9 1424 pxor xmm6, xmm10 1425 pxor xmm7, xmm11 1426 movdqa xmmword ptr [rsp+0x100], xmm8 1427 movdqa xmm8, xmm4 1428 psrld xmm8, 12 1429 pslld xmm4, 20 1430 por xmm4, xmm8 1431 movdqa xmm8, xmm5 1432 psrld xmm8, 12 1433 pslld xmm5, 20 1434 por xmm5, xmm8 1435 movdqa xmm8, xmm6 1436 psrld xmm8, 12 1437 pslld xmm6, 20 1438 por xmm6, xmm8 1439 movdqa xmm8, xmm7 1440 psrld xmm8, 12 1441 pslld xmm7, 20 1442 por xmm7, xmm8 1443 paddd xmm0, xmmword ptr [rsp+0xF0] 1444 paddd xmm1, xmmword ptr [rsp] 1445 paddd xmm2, xmmword ptr [rsp+0x90] 1446 paddd xmm3, xmmword ptr [rsp+0x60] 1447 paddd xmm0, xmm4 1448 paddd xmm1, xmm5 1449 paddd xmm2, xmm6 1450 paddd xmm3, xmm7 1451 pxor xmm12, xmm0 1452 pxor xmm13, xmm1 1453 pxor xmm14, xmm2 1454 pxor xmm15, xmm3 1455 movdqa xmm8, xmm12 1456 psrld xmm12, 8 1457 pslld xmm8, 24 1458 pxor xmm12, xmm8 1459 movdqa xmm8, xmm13 1460 psrld xmm13, 8 1461 pslld xmm8, 24 1462 pxor xmm13, xmm8 1463 movdqa xmm8, xmm14 1464 psrld xmm14, 8 1465 pslld xmm8, 24 1466 pxor xmm14, xmm8 1467 movdqa xmm8, xmm15 1468 psrld xmm15, 8 1469 pslld xmm8, 24 1470 pxor xmm15, xmm8 1471 movdqa xmm8, xmmword ptr [rsp+0x100] 1472 paddd xmm8, xmm12 1473 paddd xmm9, xmm13 1474 paddd xmm10, xmm14 1475 paddd xmm11, xmm15 1476 pxor xmm4, xmm8 1477 pxor xmm5, xmm9 1478 pxor xmm6, xmm10 1479 pxor xmm7, xmm11 1480 movdqa xmmword ptr [rsp+0x100], xmm8 1481 movdqa xmm8, xmm4 1482 psrld xmm8, 7 1483 pslld xmm4, 25 1484 por xmm4, xmm8 1485 movdqa xmm8, xmm5 1486 psrld xmm8, 7 1487 pslld xmm5, 25 1488 por xmm5, xmm8 1489 movdqa xmm8, xmm6 1490 psrld xmm8, 7 1491 pslld xmm6, 25 1492 por xmm6, xmm8 1493 movdqa xmm8, xmm7 1494 psrld xmm8, 7 1495 pslld xmm7, 25 1496 por xmm7, xmm8 1497 paddd xmm0, xmmword ptr [rsp+0xE0] 1498 paddd xmm1, xmmword ptr [rsp+0x20] 1499 paddd xmm2, xmmword ptr [rsp+0x30] 1500 paddd xmm3, xmmword ptr [rsp+0x70] 1501 paddd xmm0, xmm5 1502 paddd xmm1, xmm6 1503 paddd xmm2, xmm7 1504 paddd xmm3, xmm4 1505 pxor xmm15, xmm0 1506 pxor xmm12, xmm1 1507 pxor xmm13, xmm2 1508 pxor xmm14, xmm3 1509 pshuflw xmm15, xmm15, 0xB1 1510 pshufhw xmm15, xmm15, 0xB1 1511 pshuflw xmm12, xmm12, 0xB1 1512 pshufhw xmm12, xmm12, 0xB1 1513 pshuflw xmm13, xmm13, 0xB1 1514 pshufhw xmm13, xmm13, 0xB1 1515 pshuflw xmm14, xmm14, 0xB1 1516 pshufhw xmm14, xmm14, 0xB1 1517 paddd xmm10, xmm15 1518 paddd xmm11, xmm12 1519 movdqa xmm8, xmmword ptr [rsp+0x100] 1520 paddd xmm8, xmm13 1521 paddd xmm9, xmm14 1522 pxor xmm5, xmm10 1523 pxor xmm6, xmm11 1524 pxor xmm7, xmm8 1525 pxor xmm4, xmm9 1526 movdqa xmmword ptr [rsp+0x100], xmm8 1527 movdqa xmm8, xmm5 1528 psrld xmm8, 12 1529 pslld xmm5, 20 1530 por xmm5, xmm8 1531 movdqa xmm8, xmm6 1532 psrld xmm8, 12 1533 pslld xmm6, 20 1534 por xmm6, xmm8 1535 movdqa xmm8, xmm7 1536 psrld xmm8, 12 1537 pslld xmm7, 20 1538 por xmm7, xmm8 1539 movdqa xmm8, xmm4 1540 psrld xmm8, 12 1541 pslld xmm4, 20 1542 por xmm4, xmm8 1543 paddd xmm0, xmmword ptr [rsp+0xA0] 1544 paddd xmm1, xmmword ptr [rsp+0xC0] 1545 paddd xmm2, xmmword ptr [rsp+0x40] 1546 paddd xmm3, xmmword ptr [rsp+0xD0] 1547 paddd xmm0, xmm5 1548 paddd xmm1, xmm6 1549 paddd xmm2, xmm7 1550 paddd xmm3, xmm4 1551 pxor xmm15, xmm0 1552 pxor xmm12, xmm1 1553 pxor xmm13, xmm2 1554 pxor xmm14, xmm3 1555 movdqa xmm8, xmm15 1556 psrld xmm15, 8 1557 pslld xmm8, 24 1558 pxor xmm15, xmm8 1559 movdqa xmm8, xmm12 1560 psrld xmm12, 8 1561 pslld xmm8, 24 1562 pxor xmm12, xmm8 1563 movdqa xmm8, xmm13 1564 psrld xmm13, 8 1565 pslld xmm8, 24 1566 pxor xmm13, xmm8 1567 movdqa xmm8, xmm14 1568 psrld xmm14, 8 1569 pslld xmm8, 24 1570 pxor xmm14, xmm8 1571 paddd xmm10, xmm15 1572 paddd xmm11, xmm12 1573 movdqa xmm8, xmmword ptr [rsp+0x100] 1574 paddd xmm8, xmm13 1575 paddd xmm9, xmm14 1576 pxor xmm5, xmm10 1577 pxor xmm6, xmm11 1578 pxor xmm7, xmm8 1579 pxor xmm4, xmm9 1580 pxor xmm0, xmm8 1581 pxor xmm1, xmm9 1582 pxor xmm2, xmm10 1583 pxor xmm3, xmm11 1584 movdqa xmm8, xmm5 1585 psrld xmm8, 7 1586 pslld xmm5, 25 1587 por xmm5, xmm8 1588 movdqa xmm8, xmm6 1589 psrld xmm8, 7 1590 pslld xmm6, 25 1591 por xmm6, xmm8 1592 movdqa xmm8, xmm7 1593 psrld xmm8, 7 1594 pslld xmm7, 25 1595 por xmm7, xmm8 1596 movdqa xmm8, xmm4 1597 psrld xmm8, 7 1598 pslld xmm4, 25 1599 por xmm4, xmm8 1600 pxor xmm4, xmm12 1601 pxor xmm5, xmm13 1602 pxor xmm6, xmm14 1603 pxor xmm7, xmm15 1604 mov eax, r13d 1605 jne 9b 1606 movdqa xmm9, xmm0 1607 punpckldq xmm0, xmm1 1608 punpckhdq xmm9, xmm1 1609 movdqa xmm11, xmm2 1610 punpckldq xmm2, xmm3 1611 punpckhdq xmm11, xmm3 1612 movdqa xmm1, xmm0 1613 punpcklqdq xmm0, xmm2 1614 punpckhqdq xmm1, xmm2 1615 movdqa xmm3, xmm9 1616 punpcklqdq xmm9, xmm11 1617 punpckhqdq xmm3, xmm11 1618 movdqu xmmword ptr [rbx], xmm0 1619 movdqu xmmword ptr [rbx+0x20], xmm1 1620 movdqu xmmword ptr [rbx+0x40], xmm9 1621 movdqu xmmword ptr [rbx+0x60], xmm3 1622 movdqa xmm9, xmm4 1623 punpckldq xmm4, xmm5 1624 punpckhdq xmm9, xmm5 1625 movdqa xmm11, xmm6 1626 punpckldq xmm6, xmm7 1627 punpckhdq xmm11, xmm7 1628 movdqa xmm5, xmm4 1629 punpcklqdq xmm4, xmm6 1630 punpckhqdq xmm5, xmm6 1631 movdqa xmm7, xmm9 1632 punpcklqdq xmm9, xmm11 1633 punpckhqdq xmm7, xmm11 1634 movdqu xmmword ptr [rbx+0x10], xmm4 1635 movdqu xmmword ptr [rbx+0x30], xmm5 1636 movdqu xmmword ptr [rbx+0x50], xmm9 1637 movdqu xmmword ptr [rbx+0x70], xmm7 1638 movdqa xmm1, xmmword ptr [rsp+0x110] 1639 movdqa xmm0, xmm1 1640 paddd xmm1, xmmword ptr [rsp+0x150] 1641 movdqa xmmword ptr [rsp+0x110], xmm1 1642 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1643 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1644 pcmpgtd xmm0, xmm1 1645 movdqa xmm1, xmmword ptr [rsp+0x120] 1646 psubd xmm1, xmm0 1647 movdqa xmmword ptr [rsp+0x120], xmm1 1648 add rbx, 128 1649 add rdi, 32 1650 sub rsi, 4 1651 cmp rsi, 4 1652 jnc 2b 1653 test rsi, rsi 1654 jnz 3f 16554: 1656 mov rsp, rbp 1657 pop rbp 1658 pop rbx 1659 pop r12 1660 pop r13 1661 pop r14 1662 pop r15 1663 ret 1664.p2align 5 16653: 1666 test esi, 0x2 1667 je 3f 1668 movups xmm0, xmmword ptr [rcx] 1669 movups xmm1, xmmword ptr [rcx+0x10] 1670 movaps xmm8, xmm0 1671 movaps xmm9, xmm1 1672 movd xmm13, dword ptr [rsp+0x110] 1673 movd xmm14, dword ptr [rsp+0x120] 1674 punpckldq xmm13, xmm14 1675 movaps xmmword ptr [rsp], xmm13 1676 movd xmm14, dword ptr [rsp+0x114] 1677 movd xmm13, dword ptr [rsp+0x124] 1678 punpckldq xmm14, xmm13 1679 movaps xmmword ptr [rsp+0x10], xmm14 1680 mov r8, qword ptr [rdi] 1681 mov r9, qword ptr [rdi+0x8] 1682 movzx eax, byte ptr [rbp+0x40] 1683 or eax, r13d 1684 xor edx, edx 16852: 1686 mov r14d, eax 1687 or eax, r12d 1688 add rdx, 64 1689 cmp rdx, r15 1690 cmovne eax, r14d 1691 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1692 movaps xmm10, xmm2 1693 movups xmm4, xmmword ptr [r8+rdx-0x40] 1694 movups xmm5, xmmword ptr [r8+rdx-0x30] 1695 movaps xmm3, xmm4 1696 shufps xmm4, xmm5, 136 1697 shufps xmm3, xmm5, 221 1698 movaps xmm5, xmm3 1699 movups xmm6, xmmword ptr [r8+rdx-0x20] 1700 movups xmm7, xmmword ptr [r8+rdx-0x10] 1701 movaps xmm3, xmm6 1702 shufps xmm6, xmm7, 136 1703 pshufd xmm6, xmm6, 0x93 1704 shufps xmm3, xmm7, 221 1705 pshufd xmm7, xmm3, 0x93 1706 movups xmm12, xmmword ptr [r9+rdx-0x40] 1707 movups xmm13, xmmword ptr [r9+rdx-0x30] 1708 movaps xmm11, xmm12 1709 shufps xmm12, xmm13, 136 1710 shufps xmm11, xmm13, 221 1711 movaps xmm13, xmm11 1712 movups xmm14, xmmword ptr [r9+rdx-0x20] 1713 movups xmm15, xmmword ptr [r9+rdx-0x10] 1714 movaps xmm11, xmm14 1715 shufps xmm14, xmm15, 136 1716 pshufd xmm14, xmm14, 0x93 1717 shufps xmm11, xmm15, 221 1718 pshufd xmm15, xmm11, 0x93 1719 shl rax, 0x20 1720 or rax, 0x40 1721 movq xmm3, rax 1722 movdqa xmmword ptr [rsp+0x20], xmm3 1723 movaps xmm3, xmmword ptr [rsp] 1724 movaps xmm11, xmmword ptr [rsp+0x10] 1725 punpcklqdq xmm3, xmmword ptr [rsp+0x20] 1726 punpcklqdq xmm11, xmmword ptr [rsp+0x20] 1727 mov al, 7 17289: 1729 paddd xmm0, xmm4 1730 paddd xmm8, xmm12 1731 movaps xmmword ptr [rsp+0x20], xmm4 1732 movaps xmmword ptr [rsp+0x30], xmm12 1733 paddd xmm0, xmm1 1734 paddd xmm8, xmm9 1735 pxor xmm3, xmm0 1736 pxor xmm11, xmm8 1737 pshuflw xmm3, xmm3, 0xB1 1738 pshufhw xmm3, xmm3, 0xB1 1739 pshuflw xmm11, xmm11, 0xB1 1740 pshufhw xmm11, xmm11, 0xB1 1741 paddd xmm2, xmm3 1742 paddd xmm10, xmm11 1743 pxor xmm1, xmm2 1744 pxor xmm9, xmm10 1745 movdqa xmm4, xmm1 1746 pslld xmm1, 20 1747 psrld xmm4, 12 1748 por xmm1, xmm4 1749 movdqa xmm4, xmm9 1750 pslld xmm9, 20 1751 psrld xmm4, 12 1752 por xmm9, xmm4 1753 paddd xmm0, xmm5 1754 paddd xmm8, xmm13 1755 movaps xmmword ptr [rsp+0x40], xmm5 1756 movaps xmmword ptr [rsp+0x50], xmm13 1757 paddd xmm0, xmm1 1758 paddd xmm8, xmm9 1759 pxor xmm3, xmm0 1760 pxor xmm11, xmm8 1761 movdqa xmm13, xmm3 1762 psrld xmm3, 8 1763 pslld xmm13, 24 1764 pxor xmm3, xmm13 1765 movdqa xmm13, xmm11 1766 psrld xmm11, 8 1767 pslld xmm13, 24 1768 pxor xmm11, xmm13 1769 paddd xmm2, xmm3 1770 paddd xmm10, xmm11 1771 pxor xmm1, xmm2 1772 pxor xmm9, xmm10 1773 movdqa xmm4, xmm1 1774 pslld xmm1, 25 1775 psrld xmm4, 7 1776 por xmm1, xmm4 1777 movdqa xmm4, xmm9 1778 pslld xmm9, 25 1779 psrld xmm4, 7 1780 por xmm9, xmm4 1781 pshufd xmm0, xmm0, 0x93 1782 pshufd xmm8, xmm8, 0x93 1783 pshufd xmm3, xmm3, 0x4E 1784 pshufd xmm11, xmm11, 0x4E 1785 pshufd xmm2, xmm2, 0x39 1786 pshufd xmm10, xmm10, 0x39 1787 paddd xmm0, xmm6 1788 paddd xmm8, xmm14 1789 paddd xmm0, xmm1 1790 paddd xmm8, xmm9 1791 pxor xmm3, xmm0 1792 pxor xmm11, xmm8 1793 pshuflw xmm3, xmm3, 0xB1 1794 pshufhw xmm3, xmm3, 0xB1 1795 pshuflw xmm11, xmm11, 0xB1 1796 pshufhw xmm11, xmm11, 0xB1 1797 paddd xmm2, xmm3 1798 paddd xmm10, xmm11 1799 pxor xmm1, xmm2 1800 pxor xmm9, xmm10 1801 movdqa xmm4, xmm1 1802 pslld xmm1, 20 1803 psrld xmm4, 12 1804 por xmm1, xmm4 1805 movdqa xmm4, xmm9 1806 pslld xmm9, 20 1807 psrld xmm4, 12 1808 por xmm9, xmm4 1809 paddd xmm0, xmm7 1810 paddd xmm8, xmm15 1811 paddd xmm0, xmm1 1812 paddd xmm8, xmm9 1813 pxor xmm3, xmm0 1814 pxor xmm11, xmm8 1815 movdqa xmm13, xmm3 1816 psrld xmm3, 8 1817 pslld xmm13, 24 1818 pxor xmm3, xmm13 1819 movdqa xmm13, xmm11 1820 psrld xmm11, 8 1821 pslld xmm13, 24 1822 pxor xmm11, xmm13 1823 paddd xmm2, xmm3 1824 paddd xmm10, xmm11 1825 pxor xmm1, xmm2 1826 pxor xmm9, xmm10 1827 movdqa xmm4, xmm1 1828 pslld xmm1, 25 1829 psrld xmm4, 7 1830 por xmm1, xmm4 1831 movdqa xmm4, xmm9 1832 pslld xmm9, 25 1833 psrld xmm4, 7 1834 por xmm9, xmm4 1835 pshufd xmm0, xmm0, 0x39 1836 pshufd xmm8, xmm8, 0x39 1837 pshufd xmm3, xmm3, 0x4E 1838 pshufd xmm11, xmm11, 0x4E 1839 pshufd xmm2, xmm2, 0x93 1840 pshufd xmm10, xmm10, 0x93 1841 dec al 1842 je 9f 1843 movdqa xmm12, xmmword ptr [rsp+0x20] 1844 movdqa xmm5, xmmword ptr [rsp+0x40] 1845 pshufd xmm13, xmm12, 0x0F 1846 shufps xmm12, xmm5, 214 1847 pshufd xmm4, xmm12, 0x39 1848 movdqa xmm12, xmm6 1849 shufps xmm12, xmm7, 250 1850 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] 1851 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1852 por xmm13, xmm12 1853 movdqa xmmword ptr [rsp+0x20], xmm13 1854 movdqa xmm12, xmm7 1855 punpcklqdq xmm12, xmm5 1856 movdqa xmm13, xmm6 1857 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1858 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1859 por xmm12, xmm13 1860 pshufd xmm12, xmm12, 0x78 1861 punpckhdq xmm5, xmm7 1862 punpckldq xmm6, xmm5 1863 pshufd xmm7, xmm6, 0x1E 1864 movdqa xmmword ptr [rsp+0x40], xmm12 1865 movdqa xmm5, xmmword ptr [rsp+0x30] 1866 movdqa xmm13, xmmword ptr [rsp+0x50] 1867 pshufd xmm6, xmm5, 0x0F 1868 shufps xmm5, xmm13, 214 1869 pshufd xmm12, xmm5, 0x39 1870 movdqa xmm5, xmm14 1871 shufps xmm5, xmm15, 250 1872 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] 1873 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1874 por xmm6, xmm5 1875 movdqa xmm5, xmm15 1876 punpcklqdq xmm5, xmm13 1877 movdqa xmmword ptr [rsp+0x30], xmm2 1878 movdqa xmm2, xmm14 1879 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1880 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1881 por xmm5, xmm2 1882 movdqa xmm2, xmmword ptr [rsp+0x30] 1883 pshufd xmm5, xmm5, 0x78 1884 punpckhdq xmm13, xmm15 1885 punpckldq xmm14, xmm13 1886 pshufd xmm15, xmm14, 0x1E 1887 movdqa xmm13, xmm6 1888 movdqa xmm14, xmm5 1889 movdqa xmm5, xmmword ptr [rsp+0x20] 1890 movdqa xmm6, xmmword ptr [rsp+0x40] 1891 jmp 9b 18929: 1893 pxor xmm0, xmm2 1894 pxor xmm1, xmm3 1895 pxor xmm8, xmm10 1896 pxor xmm9, xmm11 1897 mov eax, r13d 1898 cmp rdx, r15 1899 jne 2b 1900 movups xmmword ptr [rbx], xmm0 1901 movups xmmword ptr [rbx+0x10], xmm1 1902 movups xmmword ptr [rbx+0x20], xmm8 1903 movups xmmword ptr [rbx+0x30], xmm9 1904 mov eax, dword ptr [rsp+0x130] 1905 neg eax 1906 mov r10d, dword ptr [rsp+0x110+8*rax] 1907 mov r11d, dword ptr [rsp+0x120+8*rax] 1908 mov dword ptr [rsp+0x110], r10d 1909 mov dword ptr [rsp+0x120], r11d 1910 add rdi, 16 1911 add rbx, 64 1912 sub rsi, 2 19133: 1914 test esi, 0x1 1915 je 4b 1916 movups xmm0, xmmword ptr [rcx] 1917 movups xmm1, xmmword ptr [rcx+0x10] 1918 movd xmm13, dword ptr [rsp+0x110] 1919 movd xmm14, dword ptr [rsp+0x120] 1920 punpckldq xmm13, xmm14 1921 mov r8, qword ptr [rdi] 1922 movzx eax, byte ptr [rbp+0x40] 1923 or eax, r13d 1924 xor edx, edx 19252: 1926 mov r14d, eax 1927 or eax, r12d 1928 add rdx, 64 1929 cmp rdx, r15 1930 cmovne eax, r14d 1931 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1932 shl rax, 32 1933 or rax, 64 1934 movq xmm12, rax 1935 movdqa xmm3, xmm13 1936 punpcklqdq xmm3, xmm12 1937 movups xmm4, xmmword ptr [r8+rdx-0x40] 1938 movups xmm5, xmmword ptr [r8+rdx-0x30] 1939 movaps xmm8, xmm4 1940 shufps xmm4, xmm5, 136 1941 shufps xmm8, xmm5, 221 1942 movaps xmm5, xmm8 1943 movups xmm6, xmmword ptr [r8+rdx-0x20] 1944 movups xmm7, xmmword ptr [r8+rdx-0x10] 1945 movaps xmm8, xmm6 1946 shufps xmm6, xmm7, 136 1947 pshufd xmm6, xmm6, 0x93 1948 shufps xmm8, xmm7, 221 1949 pshufd xmm7, xmm8, 0x93 1950 mov al, 7 19519: 1952 paddd xmm0, xmm4 1953 paddd xmm0, xmm1 1954 pxor xmm3, xmm0 1955 pshuflw xmm3, xmm3, 0xB1 1956 pshufhw xmm3, xmm3, 0xB1 1957 paddd xmm2, xmm3 1958 pxor xmm1, xmm2 1959 movdqa xmm11, xmm1 1960 pslld xmm1, 20 1961 psrld xmm11, 12 1962 por xmm1, xmm11 1963 paddd xmm0, xmm5 1964 paddd xmm0, xmm1 1965 pxor xmm3, xmm0 1966 movdqa xmm14, xmm3 1967 psrld xmm3, 8 1968 pslld xmm14, 24 1969 pxor xmm3, xmm14 1970 paddd xmm2, xmm3 1971 pxor xmm1, xmm2 1972 movdqa xmm11, xmm1 1973 pslld xmm1, 25 1974 psrld xmm11, 7 1975 por xmm1, xmm11 1976 pshufd xmm0, xmm0, 0x93 1977 pshufd xmm3, xmm3, 0x4E 1978 pshufd xmm2, xmm2, 0x39 1979 paddd xmm0, xmm6 1980 paddd xmm0, xmm1 1981 pxor xmm3, xmm0 1982 pshuflw xmm3, xmm3, 0xB1 1983 pshufhw xmm3, xmm3, 0xB1 1984 paddd xmm2, xmm3 1985 pxor xmm1, xmm2 1986 movdqa xmm11, xmm1 1987 pslld xmm1, 20 1988 psrld xmm11, 12 1989 por xmm1, xmm11 1990 paddd xmm0, xmm7 1991 paddd xmm0, xmm1 1992 pxor xmm3, xmm0 1993 movdqa xmm14, xmm3 1994 psrld xmm3, 8 1995 pslld xmm14, 24 1996 pxor xmm3, xmm14 1997 paddd xmm2, xmm3 1998 pxor xmm1, xmm2 1999 movdqa xmm11, xmm1 2000 pslld xmm1, 25 2001 psrld xmm11, 7 2002 por xmm1, xmm11 2003 pshufd xmm0, xmm0, 0x39 2004 pshufd xmm3, xmm3, 0x4E 2005 pshufd xmm2, xmm2, 0x93 2006 dec al 2007 jz 9f 2008 movdqa xmm8, xmm4 2009 shufps xmm8, xmm5, 214 2010 pshufd xmm9, xmm4, 0x0F 2011 pshufd xmm4, xmm8, 0x39 2012 movdqa xmm8, xmm6 2013 shufps xmm8, xmm7, 250 2014 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2015 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2016 por xmm9, xmm8 2017 movdqa xmm8, xmm7 2018 punpcklqdq xmm8, xmm5 2019 movdqa xmm10, xmm6 2020 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2021 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2022 por xmm8, xmm10 2023 pshufd xmm8, xmm8, 0x78 2024 punpckhdq xmm5, xmm7 2025 punpckldq xmm6, xmm5 2026 pshufd xmm7, xmm6, 0x1E 2027 movdqa xmm5, xmm9 2028 movdqa xmm6, xmm8 2029 jmp 9b 20309: 2031 pxor xmm0, xmm2 2032 pxor xmm1, xmm3 2033 mov eax, r13d 2034 cmp rdx, r15 2035 jne 2b 2036 movups xmmword ptr [rbx], xmm0 2037 movups xmmword ptr [rbx+0x10], xmm1 2038 jmp 4b 2039 2040.p2align 6 2041blake3_compress_in_place_sse2: 2042_blake3_compress_in_place_sse2: 2043 _CET_ENDBR 2044 movups xmm0, xmmword ptr [rdi] 2045 movups xmm1, xmmword ptr [rdi+0x10] 2046 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2047 shl r8, 32 2048 add rdx, r8 2049 movq xmm3, rcx 2050 movq xmm4, rdx 2051 punpcklqdq xmm3, xmm4 2052 movups xmm4, xmmword ptr [rsi] 2053 movups xmm5, xmmword ptr [rsi+0x10] 2054 movaps xmm8, xmm4 2055 shufps xmm4, xmm5, 136 2056 shufps xmm8, xmm5, 221 2057 movaps xmm5, xmm8 2058 movups xmm6, xmmword ptr [rsi+0x20] 2059 movups xmm7, xmmword ptr [rsi+0x30] 2060 movaps xmm8, xmm6 2061 shufps xmm6, xmm7, 136 2062 pshufd xmm6, xmm6, 0x93 2063 shufps xmm8, xmm7, 221 2064 pshufd xmm7, xmm8, 0x93 2065 mov al, 7 20669: 2067 paddd xmm0, xmm4 2068 paddd xmm0, xmm1 2069 pxor xmm3, xmm0 2070 pshuflw xmm3, xmm3, 0xB1 2071 pshufhw xmm3, xmm3, 0xB1 2072 paddd xmm2, xmm3 2073 pxor xmm1, xmm2 2074 movdqa xmm11, xmm1 2075 pslld xmm1, 20 2076 psrld xmm11, 12 2077 por xmm1, xmm11 2078 paddd xmm0, xmm5 2079 paddd xmm0, xmm1 2080 pxor xmm3, xmm0 2081 movdqa xmm14, xmm3 2082 psrld xmm3, 8 2083 pslld xmm14, 24 2084 pxor xmm3, xmm14 2085 paddd xmm2, xmm3 2086 pxor xmm1, xmm2 2087 movdqa xmm11, xmm1 2088 pslld xmm1, 25 2089 psrld xmm11, 7 2090 por xmm1, xmm11 2091 pshufd xmm0, xmm0, 0x93 2092 pshufd xmm3, xmm3, 0x4E 2093 pshufd xmm2, xmm2, 0x39 2094 paddd xmm0, xmm6 2095 paddd xmm0, xmm1 2096 pxor xmm3, xmm0 2097 pshuflw xmm3, xmm3, 0xB1 2098 pshufhw xmm3, xmm3, 0xB1 2099 paddd xmm2, xmm3 2100 pxor xmm1, xmm2 2101 movdqa xmm11, xmm1 2102 pslld xmm1, 20 2103 psrld xmm11, 12 2104 por xmm1, xmm11 2105 paddd xmm0, xmm7 2106 paddd xmm0, xmm1 2107 pxor xmm3, xmm0 2108 movdqa xmm14, xmm3 2109 psrld xmm3, 8 2110 pslld xmm14, 24 2111 pxor xmm3, xmm14 2112 paddd xmm2, xmm3 2113 pxor xmm1, xmm2 2114 movdqa xmm11, xmm1 2115 pslld xmm1, 25 2116 psrld xmm11, 7 2117 por xmm1, xmm11 2118 pshufd xmm0, xmm0, 0x39 2119 pshufd xmm3, xmm3, 0x4E 2120 pshufd xmm2, xmm2, 0x93 2121 dec al 2122 jz 9f 2123 movdqa xmm8, xmm4 2124 shufps xmm8, xmm5, 214 2125 pshufd xmm9, xmm4, 0x0F 2126 pshufd xmm4, xmm8, 0x39 2127 movdqa xmm8, xmm6 2128 shufps xmm8, xmm7, 250 2129 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2130 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2131 por xmm9, xmm8 2132 movdqa xmm8, xmm7 2133 punpcklqdq xmm8, xmm5 2134 movdqa xmm10, xmm6 2135 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2136 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2137 por xmm8, xmm10 2138 pshufd xmm8, xmm8, 0x78 2139 punpckhdq xmm5, xmm7 2140 punpckldq xmm6, xmm5 2141 pshufd xmm7, xmm6, 0x1E 2142 movdqa xmm5, xmm9 2143 movdqa xmm6, xmm8 2144 jmp 9b 21459: 2146 pxor xmm0, xmm2 2147 pxor xmm1, xmm3 2148 movups xmmword ptr [rdi], xmm0 2149 movups xmmword ptr [rdi+0x10], xmm1 2150 ret 2151 2152.p2align 6 2153blake3_compress_xof_sse2: 2154_blake3_compress_xof_sse2: 2155 _CET_ENDBR 2156 movups xmm0, xmmword ptr [rdi] 2157 movups xmm1, xmmword ptr [rdi+0x10] 2158 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2159 movzx eax, r8b 2160 movzx edx, dl 2161 shl rax, 32 2162 add rdx, rax 2163 movq xmm3, rcx 2164 movq xmm4, rdx 2165 punpcklqdq xmm3, xmm4 2166 movups xmm4, xmmword ptr [rsi] 2167 movups xmm5, xmmword ptr [rsi+0x10] 2168 movaps xmm8, xmm4 2169 shufps xmm4, xmm5, 136 2170 shufps xmm8, xmm5, 221 2171 movaps xmm5, xmm8 2172 movups xmm6, xmmword ptr [rsi+0x20] 2173 movups xmm7, xmmword ptr [rsi+0x30] 2174 movaps xmm8, xmm6 2175 shufps xmm6, xmm7, 136 2176 pshufd xmm6, xmm6, 0x93 2177 shufps xmm8, xmm7, 221 2178 pshufd xmm7, xmm8, 0x93 2179 mov al, 7 21809: 2181 paddd xmm0, xmm4 2182 paddd xmm0, xmm1 2183 pxor xmm3, xmm0 2184 pshuflw xmm3, xmm3, 0xB1 2185 pshufhw xmm3, xmm3, 0xB1 2186 paddd xmm2, xmm3 2187 pxor xmm1, xmm2 2188 movdqa xmm11, xmm1 2189 pslld xmm1, 20 2190 psrld xmm11, 12 2191 por xmm1, xmm11 2192 paddd xmm0, xmm5 2193 paddd xmm0, xmm1 2194 pxor xmm3, xmm0 2195 movdqa xmm14, xmm3 2196 psrld xmm3, 8 2197 pslld xmm14, 24 2198 pxor xmm3, xmm14 2199 paddd xmm2, xmm3 2200 pxor xmm1, xmm2 2201 movdqa xmm11, xmm1 2202 pslld xmm1, 25 2203 psrld xmm11, 7 2204 por xmm1, xmm11 2205 pshufd xmm0, xmm0, 0x93 2206 pshufd xmm3, xmm3, 0x4E 2207 pshufd xmm2, xmm2, 0x39 2208 paddd xmm0, xmm6 2209 paddd xmm0, xmm1 2210 pxor xmm3, xmm0 2211 pshuflw xmm3, xmm3, 0xB1 2212 pshufhw xmm3, xmm3, 0xB1 2213 paddd xmm2, xmm3 2214 pxor xmm1, xmm2 2215 movdqa xmm11, xmm1 2216 pslld xmm1, 20 2217 psrld xmm11, 12 2218 por xmm1, xmm11 2219 paddd xmm0, xmm7 2220 paddd xmm0, xmm1 2221 pxor xmm3, xmm0 2222 movdqa xmm14, xmm3 2223 psrld xmm3, 8 2224 pslld xmm14, 24 2225 pxor xmm3, xmm14 2226 paddd xmm2, xmm3 2227 pxor xmm1, xmm2 2228 movdqa xmm11, xmm1 2229 pslld xmm1, 25 2230 psrld xmm11, 7 2231 por xmm1, xmm11 2232 pshufd xmm0, xmm0, 0x39 2233 pshufd xmm3, xmm3, 0x4E 2234 pshufd xmm2, xmm2, 0x93 2235 dec al 2236 jz 9f 2237 movdqa xmm8, xmm4 2238 shufps xmm8, xmm5, 214 2239 pshufd xmm9, xmm4, 0x0F 2240 pshufd xmm4, xmm8, 0x39 2241 movdqa xmm8, xmm6 2242 shufps xmm8, xmm7, 250 2243 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2244 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2245 por xmm9, xmm8 2246 movdqa xmm8, xmm7 2247 punpcklqdq xmm8, xmm5 2248 movdqa xmm10, xmm6 2249 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2250 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2251 por xmm8, xmm10 2252 pshufd xmm8, xmm8, 0x78 2253 punpckhdq xmm5, xmm7 2254 punpckldq xmm6, xmm5 2255 pshufd xmm7, xmm6, 0x1E 2256 movdqa xmm5, xmm9 2257 movdqa xmm6, xmm8 2258 jmp 9b 22599: 2260 movdqu xmm4, xmmword ptr [rdi] 2261 movdqu xmm5, xmmword ptr [rdi+0x10] 2262 pxor xmm0, xmm2 2263 pxor xmm1, xmm3 2264 pxor xmm2, xmm4 2265 pxor xmm3, xmm5 2266 movups xmmword ptr [r9], xmm0 2267 movups xmmword ptr [r9+0x10], xmm1 2268 movups xmmword ptr [r9+0x20], xmm2 2269 movups xmmword ptr [r9+0x30], xmm3 2270 ret 2271 2272 2273#ifdef __APPLE__ 2274.static_data 2275#else 2276.section .rodata 2277#endif 2278.p2align 6 2279BLAKE3_IV: 2280 .long 0x6A09E667, 0xBB67AE85 2281 .long 0x3C6EF372, 0xA54FF53A 2282ADD0: 2283 .long 0, 1, 2, 3 2284ADD1: 2285 .long 4, 4, 4, 4 2286BLAKE3_IV_0: 2287 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2288BLAKE3_IV_1: 2289 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2290BLAKE3_IV_2: 2291 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2292BLAKE3_IV_3: 2293 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2294BLAKE3_BLOCK_LEN: 2295 .long 64, 64, 64, 64 2296CMP_MSB_MASK: 2297 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2298PBLENDW_0x33_MASK: 2299 .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 2300PBLENDW_0xCC_MASK: 2301 .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF 2302PBLENDW_0x3F_MASK: 2303 .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 2304PBLENDW_0xC0_MASK: 2305 .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF 2306 2307#endif 2308