1#include "llvm_blake3_prefix.h" 2 3.intel_syntax noprefix 4.global blake3_hash_many_sse2 5.global _blake3_hash_many_sse2 6.global blake3_compress_in_place_sse2 7.global _blake3_compress_in_place_sse2 8.global blake3_compress_xof_sse2 9.global _blake3_compress_xof_sse2 10.section .text 11 .p2align 6 12_blake3_hash_many_sse2: 13blake3_hash_many_sse2: 14 push r15 15 push r14 16 push r13 17 push r12 18 push rsi 19 push rdi 20 push rbx 21 push rbp 22 mov rbp, rsp 23 sub rsp, 528 24 and rsp, 0xFFFFFFFFFFFFFFC0 25 movdqa xmmword ptr [rsp+0x170], xmm6 26 movdqa xmmword ptr [rsp+0x180], xmm7 27 movdqa xmmword ptr [rsp+0x190], xmm8 28 movdqa xmmword ptr [rsp+0x1A0], xmm9 29 movdqa xmmword ptr [rsp+0x1B0], xmm10 30 movdqa xmmword ptr [rsp+0x1C0], xmm11 31 movdqa xmmword ptr [rsp+0x1D0], xmm12 32 movdqa xmmword ptr [rsp+0x1E0], xmm13 33 movdqa xmmword ptr [rsp+0x1F0], xmm14 34 movdqa xmmword ptr [rsp+0x200], xmm15 35 mov rdi, rcx 36 mov rsi, rdx 37 mov rdx, r8 38 mov rcx, r9 39 mov r8, qword ptr [rbp+0x68] 40 movzx r9, byte ptr [rbp+0x70] 41 neg r9d 42 movd xmm0, r9d 43 pshufd xmm0, xmm0, 0x00 44 movdqa xmmword ptr [rsp+0x130], xmm0 45 movdqa xmm1, xmm0 46 pand xmm1, xmmword ptr [ADD0+rip] 47 pand xmm0, xmmword ptr [ADD1+rip] 48 movdqa xmmword ptr [rsp+0x150], xmm0 49 movd xmm0, r8d 50 pshufd xmm0, xmm0, 0x00 51 paddd xmm0, xmm1 52 movdqa xmmword ptr [rsp+0x110], xmm0 53 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 54 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 55 pcmpgtd xmm1, xmm0 56 shr r8, 32 57 movd xmm2, r8d 58 pshufd xmm2, xmm2, 0x00 59 psubd xmm2, xmm1 60 movdqa xmmword ptr [rsp+0x120], xmm2 61 mov rbx, qword ptr [rbp+0x90] 62 mov r15, rdx 63 shl r15, 6 64 movzx r13d, byte ptr [rbp+0x78] 65 movzx r12d, byte ptr [rbp+0x88] 66 cmp rsi, 4 67 jc 3f 682: 69 movdqu xmm3, xmmword ptr [rcx] 70 pshufd xmm0, xmm3, 0x00 71 pshufd xmm1, xmm3, 0x55 72 pshufd xmm2, xmm3, 0xAA 73 pshufd xmm3, xmm3, 0xFF 74 movdqu xmm7, xmmword ptr [rcx+0x10] 75 pshufd xmm4, xmm7, 0x00 76 pshufd xmm5, xmm7, 0x55 77 pshufd xmm6, xmm7, 0xAA 78 pshufd xmm7, xmm7, 0xFF 79 mov r8, qword ptr [rdi] 80 mov r9, qword ptr [rdi+0x8] 81 mov r10, qword ptr [rdi+0x10] 82 mov r11, qword ptr [rdi+0x18] 83 movzx eax, byte ptr [rbp+0x80] 84 or eax, r13d 85 xor edx, edx 869: 87 mov r14d, eax 88 or eax, r12d 89 add rdx, 64 90 cmp rdx, r15 91 cmovne eax, r14d 92 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 93 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 94 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 95 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 96 movdqa xmm12, xmm8 97 punpckldq xmm8, xmm9 98 punpckhdq xmm12, xmm9 99 movdqa xmm14, xmm10 100 punpckldq xmm10, xmm11 101 punpckhdq xmm14, xmm11 102 movdqa xmm9, xmm8 103 punpcklqdq xmm8, xmm10 104 punpckhqdq xmm9, xmm10 105 movdqa xmm13, xmm12 106 punpcklqdq xmm12, xmm14 107 punpckhqdq xmm13, xmm14 108 movdqa xmmword ptr [rsp], xmm8 109 movdqa xmmword ptr [rsp+0x10], xmm9 110 movdqa xmmword ptr [rsp+0x20], xmm12 111 movdqa xmmword ptr [rsp+0x30], xmm13 112 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 113 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 114 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 115 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 116 movdqa xmm12, xmm8 117 punpckldq xmm8, xmm9 118 punpckhdq xmm12, xmm9 119 movdqa xmm14, xmm10 120 punpckldq xmm10, xmm11 121 punpckhdq xmm14, xmm11 122 movdqa xmm9, xmm8 123 punpcklqdq xmm8, xmm10 124 punpckhqdq xmm9, xmm10 125 movdqa xmm13, xmm12 126 punpcklqdq xmm12, xmm14 127 punpckhqdq xmm13, xmm14 128 movdqa xmmword ptr [rsp+0x40], xmm8 129 movdqa xmmword ptr [rsp+0x50], xmm9 130 movdqa xmmword ptr [rsp+0x60], xmm12 131 movdqa xmmword ptr [rsp+0x70], xmm13 132 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 133 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 134 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 135 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 136 movdqa xmm12, xmm8 137 punpckldq xmm8, xmm9 138 punpckhdq xmm12, xmm9 139 movdqa xmm14, xmm10 140 punpckldq xmm10, xmm11 141 punpckhdq xmm14, xmm11 142 movdqa xmm9, xmm8 143 punpcklqdq xmm8, xmm10 144 punpckhqdq xmm9, xmm10 145 movdqa xmm13, xmm12 146 punpcklqdq xmm12, xmm14 147 punpckhqdq xmm13, xmm14 148 movdqa xmmword ptr [rsp+0x80], xmm8 149 movdqa xmmword ptr [rsp+0x90], xmm9 150 movdqa xmmword ptr [rsp+0xA0], xmm12 151 movdqa xmmword ptr [rsp+0xB0], xmm13 152 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 153 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 154 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 155 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 156 movdqa xmm12, xmm8 157 punpckldq xmm8, xmm9 158 punpckhdq xmm12, xmm9 159 movdqa xmm14, xmm10 160 punpckldq xmm10, xmm11 161 punpckhdq xmm14, xmm11 162 movdqa xmm9, xmm8 163 punpcklqdq xmm8, xmm10 164 punpckhqdq xmm9, xmm10 165 movdqa xmm13, xmm12 166 punpcklqdq xmm12, xmm14 167 punpckhqdq xmm13, xmm14 168 movdqa xmmword ptr [rsp+0xC0], xmm8 169 movdqa xmmword ptr [rsp+0xD0], xmm9 170 movdqa xmmword ptr [rsp+0xE0], xmm12 171 movdqa xmmword ptr [rsp+0xF0], xmm13 172 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 173 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 174 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 175 movdqa xmm12, xmmword ptr [rsp+0x110] 176 movdqa xmm13, xmmword ptr [rsp+0x120] 177 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 178 movd xmm15, eax 179 pshufd xmm15, xmm15, 0x00 180 prefetcht0 [r8+rdx+0x80] 181 prefetcht0 [r9+rdx+0x80] 182 prefetcht0 [r10+rdx+0x80] 183 prefetcht0 [r11+rdx+0x80] 184 paddd xmm0, xmmword ptr [rsp] 185 paddd xmm1, xmmword ptr [rsp+0x20] 186 paddd xmm2, xmmword ptr [rsp+0x40] 187 paddd xmm3, xmmword ptr [rsp+0x60] 188 paddd xmm0, xmm4 189 paddd xmm1, xmm5 190 paddd xmm2, xmm6 191 paddd xmm3, xmm7 192 pxor xmm12, xmm0 193 pxor xmm13, xmm1 194 pxor xmm14, xmm2 195 pxor xmm15, xmm3 196 pshuflw xmm12, xmm12, 0xB1 197 pshufhw xmm12, xmm12, 0xB1 198 pshuflw xmm13, xmm13, 0xB1 199 pshufhw xmm13, xmm13, 0xB1 200 pshuflw xmm14, xmm14, 0xB1 201 pshufhw xmm14, xmm14, 0xB1 202 pshuflw xmm15, xmm15, 0xB1 203 pshufhw xmm15, xmm15, 0xB1 204 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 205 paddd xmm8, xmm12 206 paddd xmm9, xmm13 207 paddd xmm10, xmm14 208 paddd xmm11, xmm15 209 pxor xmm4, xmm8 210 pxor xmm5, xmm9 211 pxor xmm6, xmm10 212 pxor xmm7, xmm11 213 movdqa xmmword ptr [rsp+0x100], xmm8 214 movdqa xmm8, xmm4 215 psrld xmm8, 12 216 pslld xmm4, 20 217 por xmm4, xmm8 218 movdqa xmm8, xmm5 219 psrld xmm8, 12 220 pslld xmm5, 20 221 por xmm5, xmm8 222 movdqa xmm8, xmm6 223 psrld xmm8, 12 224 pslld xmm6, 20 225 por xmm6, xmm8 226 movdqa xmm8, xmm7 227 psrld xmm8, 12 228 pslld xmm7, 20 229 por xmm7, xmm8 230 paddd xmm0, xmmword ptr [rsp+0x10] 231 paddd xmm1, xmmword ptr [rsp+0x30] 232 paddd xmm2, xmmword ptr [rsp+0x50] 233 paddd xmm3, xmmword ptr [rsp+0x70] 234 paddd xmm0, xmm4 235 paddd xmm1, xmm5 236 paddd xmm2, xmm6 237 paddd xmm3, xmm7 238 pxor xmm12, xmm0 239 pxor xmm13, xmm1 240 pxor xmm14, xmm2 241 pxor xmm15, xmm3 242 movdqa xmm8, xmm12 243 psrld xmm12, 8 244 pslld xmm8, 24 245 pxor xmm12, xmm8 246 movdqa xmm8, xmm13 247 psrld xmm13, 8 248 pslld xmm8, 24 249 pxor xmm13, xmm8 250 movdqa xmm8, xmm14 251 psrld xmm14, 8 252 pslld xmm8, 24 253 pxor xmm14, xmm8 254 movdqa xmm8, xmm15 255 psrld xmm15, 8 256 pslld xmm8, 24 257 pxor xmm15, xmm8 258 movdqa xmm8, xmmword ptr [rsp+0x100] 259 paddd xmm8, xmm12 260 paddd xmm9, xmm13 261 paddd xmm10, xmm14 262 paddd xmm11, xmm15 263 pxor xmm4, xmm8 264 pxor xmm5, xmm9 265 pxor xmm6, xmm10 266 pxor xmm7, xmm11 267 movdqa xmmword ptr [rsp+0x100], xmm8 268 movdqa xmm8, xmm4 269 psrld xmm8, 7 270 pslld xmm4, 25 271 por xmm4, xmm8 272 movdqa xmm8, xmm5 273 psrld xmm8, 7 274 pslld xmm5, 25 275 por xmm5, xmm8 276 movdqa xmm8, xmm6 277 psrld xmm8, 7 278 pslld xmm6, 25 279 por xmm6, xmm8 280 movdqa xmm8, xmm7 281 psrld xmm8, 7 282 pslld xmm7, 25 283 por xmm7, xmm8 284 paddd xmm0, xmmword ptr [rsp+0x80] 285 paddd xmm1, xmmword ptr [rsp+0xA0] 286 paddd xmm2, xmmword ptr [rsp+0xC0] 287 paddd xmm3, xmmword ptr [rsp+0xE0] 288 paddd xmm0, xmm5 289 paddd xmm1, xmm6 290 paddd xmm2, xmm7 291 paddd xmm3, xmm4 292 pxor xmm15, xmm0 293 pxor xmm12, xmm1 294 pxor xmm13, xmm2 295 pxor xmm14, xmm3 296 pshuflw xmm15, xmm15, 0xB1 297 pshufhw xmm15, xmm15, 0xB1 298 pshuflw xmm12, xmm12, 0xB1 299 pshufhw xmm12, xmm12, 0xB1 300 pshuflw xmm13, xmm13, 0xB1 301 pshufhw xmm13, xmm13, 0xB1 302 pshuflw xmm14, xmm14, 0xB1 303 pshufhw xmm14, xmm14, 0xB1 304 paddd xmm10, xmm15 305 paddd xmm11, xmm12 306 movdqa xmm8, xmmword ptr [rsp+0x100] 307 paddd xmm8, xmm13 308 paddd xmm9, xmm14 309 pxor xmm5, xmm10 310 pxor xmm6, xmm11 311 pxor xmm7, xmm8 312 pxor xmm4, xmm9 313 movdqa xmmword ptr [rsp+0x100], xmm8 314 movdqa xmm8, xmm5 315 psrld xmm8, 12 316 pslld xmm5, 20 317 por xmm5, xmm8 318 movdqa xmm8, xmm6 319 psrld xmm8, 12 320 pslld xmm6, 20 321 por xmm6, xmm8 322 movdqa xmm8, xmm7 323 psrld xmm8, 12 324 pslld xmm7, 20 325 por xmm7, xmm8 326 movdqa xmm8, xmm4 327 psrld xmm8, 12 328 pslld xmm4, 20 329 por xmm4, xmm8 330 paddd xmm0, xmmword ptr [rsp+0x90] 331 paddd xmm1, xmmword ptr [rsp+0xB0] 332 paddd xmm2, xmmword ptr [rsp+0xD0] 333 paddd xmm3, xmmword ptr [rsp+0xF0] 334 paddd xmm0, xmm5 335 paddd xmm1, xmm6 336 paddd xmm2, xmm7 337 paddd xmm3, xmm4 338 pxor xmm15, xmm0 339 pxor xmm12, xmm1 340 pxor xmm13, xmm2 341 pxor xmm14, xmm3 342 movdqa xmm8, xmm15 343 psrld xmm15, 8 344 pslld xmm8, 24 345 pxor xmm15, xmm8 346 movdqa xmm8, xmm12 347 psrld xmm12, 8 348 pslld xmm8, 24 349 pxor xmm12, xmm8 350 movdqa xmm8, xmm13 351 psrld xmm13, 8 352 pslld xmm8, 24 353 pxor xmm13, xmm8 354 movdqa xmm8, xmm14 355 psrld xmm14, 8 356 pslld xmm8, 24 357 pxor xmm14, xmm8 358 paddd xmm10, xmm15 359 paddd xmm11, xmm12 360 movdqa xmm8, xmmword ptr [rsp+0x100] 361 paddd xmm8, xmm13 362 paddd xmm9, xmm14 363 pxor xmm5, xmm10 364 pxor xmm6, xmm11 365 pxor xmm7, xmm8 366 pxor xmm4, xmm9 367 movdqa xmmword ptr [rsp+0x100], xmm8 368 movdqa xmm8, xmm5 369 psrld xmm8, 7 370 pslld xmm5, 25 371 por xmm5, xmm8 372 movdqa xmm8, xmm6 373 psrld xmm8, 7 374 pslld xmm6, 25 375 por xmm6, xmm8 376 movdqa xmm8, xmm7 377 psrld xmm8, 7 378 pslld xmm7, 25 379 por xmm7, xmm8 380 movdqa xmm8, xmm4 381 psrld xmm8, 7 382 pslld xmm4, 25 383 por xmm4, xmm8 384 paddd xmm0, xmmword ptr [rsp+0x20] 385 paddd xmm1, xmmword ptr [rsp+0x30] 386 paddd xmm2, xmmword ptr [rsp+0x70] 387 paddd xmm3, xmmword ptr [rsp+0x40] 388 paddd xmm0, xmm4 389 paddd xmm1, xmm5 390 paddd xmm2, xmm6 391 paddd xmm3, xmm7 392 pxor xmm12, xmm0 393 pxor xmm13, xmm1 394 pxor xmm14, xmm2 395 pxor xmm15, xmm3 396 pshuflw xmm12, xmm12, 0xB1 397 pshufhw xmm12, xmm12, 0xB1 398 pshuflw xmm13, xmm13, 0xB1 399 pshufhw xmm13, xmm13, 0xB1 400 pshuflw xmm14, xmm14, 0xB1 401 pshufhw xmm14, xmm14, 0xB1 402 pshuflw xmm15, xmm15, 0xB1 403 pshufhw xmm15, xmm15, 0xB1 404 movdqa xmm8, xmmword ptr [rsp+0x100] 405 paddd xmm8, xmm12 406 paddd xmm9, xmm13 407 paddd xmm10, xmm14 408 paddd xmm11, xmm15 409 pxor xmm4, xmm8 410 pxor xmm5, xmm9 411 pxor xmm6, xmm10 412 pxor xmm7, xmm11 413 movdqa xmmword ptr [rsp+0x100], xmm8 414 movdqa xmm8, xmm4 415 psrld xmm8, 12 416 pslld xmm4, 20 417 por xmm4, xmm8 418 movdqa xmm8, xmm5 419 psrld xmm8, 12 420 pslld xmm5, 20 421 por xmm5, xmm8 422 movdqa xmm8, xmm6 423 psrld xmm8, 12 424 pslld xmm6, 20 425 por xmm6, xmm8 426 movdqa xmm8, xmm7 427 psrld xmm8, 12 428 pslld xmm7, 20 429 por xmm7, xmm8 430 paddd xmm0, xmmword ptr [rsp+0x60] 431 paddd xmm1, xmmword ptr [rsp+0xA0] 432 paddd xmm2, xmmword ptr [rsp] 433 paddd xmm3, xmmword ptr [rsp+0xD0] 434 paddd xmm0, xmm4 435 paddd xmm1, xmm5 436 paddd xmm2, xmm6 437 paddd xmm3, xmm7 438 pxor xmm12, xmm0 439 pxor xmm13, xmm1 440 pxor xmm14, xmm2 441 pxor xmm15, xmm3 442 movdqa xmm8, xmm12 443 psrld xmm12, 8 444 pslld xmm8, 24 445 pxor xmm12, xmm8 446 movdqa xmm8, xmm13 447 psrld xmm13, 8 448 pslld xmm8, 24 449 pxor xmm13, xmm8 450 movdqa xmm8, xmm14 451 psrld xmm14, 8 452 pslld xmm8, 24 453 pxor xmm14, xmm8 454 movdqa xmm8, xmm15 455 psrld xmm15, 8 456 pslld xmm8, 24 457 pxor xmm15, xmm8 458 movdqa xmm8, xmmword ptr [rsp+0x100] 459 paddd xmm8, xmm12 460 paddd xmm9, xmm13 461 paddd xmm10, xmm14 462 paddd xmm11, xmm15 463 pxor xmm4, xmm8 464 pxor xmm5, xmm9 465 pxor xmm6, xmm10 466 pxor xmm7, xmm11 467 movdqa xmmword ptr [rsp+0x100], xmm8 468 movdqa xmm8, xmm4 469 psrld xmm8, 7 470 pslld xmm4, 25 471 por xmm4, xmm8 472 movdqa xmm8, xmm5 473 psrld xmm8, 7 474 pslld xmm5, 25 475 por xmm5, xmm8 476 movdqa xmm8, xmm6 477 psrld xmm8, 7 478 pslld xmm6, 25 479 por xmm6, xmm8 480 movdqa xmm8, xmm7 481 psrld xmm8, 7 482 pslld xmm7, 25 483 por xmm7, xmm8 484 paddd xmm0, xmmword ptr [rsp+0x10] 485 paddd xmm1, xmmword ptr [rsp+0xC0] 486 paddd xmm2, xmmword ptr [rsp+0x90] 487 paddd xmm3, xmmword ptr [rsp+0xF0] 488 paddd xmm0, xmm5 489 paddd xmm1, xmm6 490 paddd xmm2, xmm7 491 paddd xmm3, xmm4 492 pxor xmm15, xmm0 493 pxor xmm12, xmm1 494 pxor xmm13, xmm2 495 pxor xmm14, xmm3 496 pshuflw xmm15, xmm15, 0xB1 497 pshufhw xmm15, xmm15, 0xB1 498 pshuflw xmm12, xmm12, 0xB1 499 pshufhw xmm12, xmm12, 0xB1 500 pshuflw xmm13, xmm13, 0xB1 501 pshufhw xmm13, xmm13, 0xB1 502 pshuflw xmm14, xmm14, 0xB1 503 pshufhw xmm14, xmm14, 0xB1 504 paddd xmm10, xmm15 505 paddd xmm11, xmm12 506 movdqa xmm8, xmmword ptr [rsp+0x100] 507 paddd xmm8, xmm13 508 paddd xmm9, xmm14 509 pxor xmm5, xmm10 510 pxor xmm6, xmm11 511 pxor xmm7, xmm8 512 pxor xmm4, xmm9 513 movdqa xmmword ptr [rsp+0x100], xmm8 514 movdqa xmm8, xmm5 515 psrld xmm8, 12 516 pslld xmm5, 20 517 por xmm5, xmm8 518 movdqa xmm8, xmm6 519 psrld xmm8, 12 520 pslld xmm6, 20 521 por xmm6, xmm8 522 movdqa xmm8, xmm7 523 psrld xmm8, 12 524 pslld xmm7, 20 525 por xmm7, xmm8 526 movdqa xmm8, xmm4 527 psrld xmm8, 12 528 pslld xmm4, 20 529 por xmm4, xmm8 530 paddd xmm0, xmmword ptr [rsp+0xB0] 531 paddd xmm1, xmmword ptr [rsp+0x50] 532 paddd xmm2, xmmword ptr [rsp+0xE0] 533 paddd xmm3, xmmword ptr [rsp+0x80] 534 paddd xmm0, xmm5 535 paddd xmm1, xmm6 536 paddd xmm2, xmm7 537 paddd xmm3, xmm4 538 pxor xmm15, xmm0 539 pxor xmm12, xmm1 540 pxor xmm13, xmm2 541 pxor xmm14, xmm3 542 movdqa xmm8, xmm15 543 psrld xmm15, 8 544 pslld xmm8, 24 545 pxor xmm15, xmm8 546 movdqa xmm8, xmm12 547 psrld xmm12, 8 548 pslld xmm8, 24 549 pxor xmm12, xmm8 550 movdqa xmm8, xmm13 551 psrld xmm13, 8 552 pslld xmm8, 24 553 pxor xmm13, xmm8 554 movdqa xmm8, xmm14 555 psrld xmm14, 8 556 pslld xmm8, 24 557 pxor xmm14, xmm8 558 paddd xmm10, xmm15 559 paddd xmm11, xmm12 560 movdqa xmm8, xmmword ptr [rsp+0x100] 561 paddd xmm8, xmm13 562 paddd xmm9, xmm14 563 pxor xmm5, xmm10 564 pxor xmm6, xmm11 565 pxor xmm7, xmm8 566 pxor xmm4, xmm9 567 movdqa xmmword ptr [rsp+0x100], xmm8 568 movdqa xmm8, xmm5 569 psrld xmm8, 7 570 pslld xmm5, 25 571 por xmm5, xmm8 572 movdqa xmm8, xmm6 573 psrld xmm8, 7 574 pslld xmm6, 25 575 por xmm6, xmm8 576 movdqa xmm8, xmm7 577 psrld xmm8, 7 578 pslld xmm7, 25 579 por xmm7, xmm8 580 movdqa xmm8, xmm4 581 psrld xmm8, 7 582 pslld xmm4, 25 583 por xmm4, xmm8 584 paddd xmm0, xmmword ptr [rsp+0x30] 585 paddd xmm1, xmmword ptr [rsp+0xA0] 586 paddd xmm2, xmmword ptr [rsp+0xD0] 587 paddd xmm3, xmmword ptr [rsp+0x70] 588 paddd xmm0, xmm4 589 paddd xmm1, xmm5 590 paddd xmm2, xmm6 591 paddd xmm3, xmm7 592 pxor xmm12, xmm0 593 pxor xmm13, xmm1 594 pxor xmm14, xmm2 595 pxor xmm15, xmm3 596 pshuflw xmm12, xmm12, 0xB1 597 pshufhw xmm12, xmm12, 0xB1 598 pshuflw xmm13, xmm13, 0xB1 599 pshufhw xmm13, xmm13, 0xB1 600 pshuflw xmm14, xmm14, 0xB1 601 pshufhw xmm14, xmm14, 0xB1 602 pshuflw xmm15, xmm15, 0xB1 603 pshufhw xmm15, xmm15, 0xB1 604 movdqa xmm8, xmmword ptr [rsp+0x100] 605 paddd xmm8, xmm12 606 paddd xmm9, xmm13 607 paddd xmm10, xmm14 608 paddd xmm11, xmm15 609 pxor xmm4, xmm8 610 pxor xmm5, xmm9 611 pxor xmm6, xmm10 612 pxor xmm7, xmm11 613 movdqa xmmword ptr [rsp+0x100], xmm8 614 movdqa xmm8, xmm4 615 psrld xmm8, 12 616 pslld xmm4, 20 617 por xmm4, xmm8 618 movdqa xmm8, xmm5 619 psrld xmm8, 12 620 pslld xmm5, 20 621 por xmm5, xmm8 622 movdqa xmm8, xmm6 623 psrld xmm8, 12 624 pslld xmm6, 20 625 por xmm6, xmm8 626 movdqa xmm8, xmm7 627 psrld xmm8, 12 628 pslld xmm7, 20 629 por xmm7, xmm8 630 paddd xmm0, xmmword ptr [rsp+0x40] 631 paddd xmm1, xmmword ptr [rsp+0xC0] 632 paddd xmm2, xmmword ptr [rsp+0x20] 633 paddd xmm3, xmmword ptr [rsp+0xE0] 634 paddd xmm0, xmm4 635 paddd xmm1, xmm5 636 paddd xmm2, xmm6 637 paddd xmm3, xmm7 638 pxor xmm12, xmm0 639 pxor xmm13, xmm1 640 pxor xmm14, xmm2 641 pxor xmm15, xmm3 642 movdqa xmm8, xmm12 643 psrld xmm12, 8 644 pslld xmm8, 24 645 pxor xmm12, xmm8 646 movdqa xmm8, xmm13 647 psrld xmm13, 8 648 pslld xmm8, 24 649 pxor xmm13, xmm8 650 movdqa xmm8, xmm14 651 psrld xmm14, 8 652 pslld xmm8, 24 653 pxor xmm14, xmm8 654 movdqa xmm8, xmm15 655 psrld xmm15, 8 656 pslld xmm8, 24 657 pxor xmm15, xmm8 658 movdqa xmm8, xmmword ptr [rsp+0x100] 659 paddd xmm8, xmm12 660 paddd xmm9, xmm13 661 paddd xmm10, xmm14 662 paddd xmm11, xmm15 663 pxor xmm4, xmm8 664 pxor xmm5, xmm9 665 pxor xmm6, xmm10 666 pxor xmm7, xmm11 667 movdqa xmmword ptr [rsp+0x100], xmm8 668 movdqa xmm8, xmm4 669 psrld xmm8, 7 670 pslld xmm4, 25 671 por xmm4, xmm8 672 movdqa xmm8, xmm5 673 psrld xmm8, 7 674 pslld xmm5, 25 675 por xmm5, xmm8 676 movdqa xmm8, xmm6 677 psrld xmm8, 7 678 pslld xmm6, 25 679 por xmm6, xmm8 680 movdqa xmm8, xmm7 681 psrld xmm8, 7 682 pslld xmm7, 25 683 por xmm7, xmm8 684 paddd xmm0, xmmword ptr [rsp+0x60] 685 paddd xmm1, xmmword ptr [rsp+0x90] 686 paddd xmm2, xmmword ptr [rsp+0xB0] 687 paddd xmm3, xmmword ptr [rsp+0x80] 688 paddd xmm0, xmm5 689 paddd xmm1, xmm6 690 paddd xmm2, xmm7 691 paddd xmm3, xmm4 692 pxor xmm15, xmm0 693 pxor xmm12, xmm1 694 pxor xmm13, xmm2 695 pxor xmm14, xmm3 696 pshuflw xmm15, xmm15, 0xB1 697 pshufhw xmm15, xmm15, 0xB1 698 pshuflw xmm12, xmm12, 0xB1 699 pshufhw xmm12, xmm12, 0xB1 700 pshuflw xmm13, xmm13, 0xB1 701 pshufhw xmm13, xmm13, 0xB1 702 pshuflw xmm14, xmm14, 0xB1 703 pshufhw xmm14, xmm14, 0xB1 704 paddd xmm10, xmm15 705 paddd xmm11, xmm12 706 movdqa xmm8, xmmword ptr [rsp+0x100] 707 paddd xmm8, xmm13 708 paddd xmm9, xmm14 709 pxor xmm5, xmm10 710 pxor xmm6, xmm11 711 pxor xmm7, xmm8 712 pxor xmm4, xmm9 713 movdqa xmmword ptr [rsp+0x100], xmm8 714 movdqa xmm8, xmm5 715 psrld xmm8, 12 716 pslld xmm5, 20 717 por xmm5, xmm8 718 movdqa xmm8, xmm6 719 psrld xmm8, 12 720 pslld xmm6, 20 721 por xmm6, xmm8 722 movdqa xmm8, xmm7 723 psrld xmm8, 12 724 pslld xmm7, 20 725 por xmm7, xmm8 726 movdqa xmm8, xmm4 727 psrld xmm8, 12 728 pslld xmm4, 20 729 por xmm4, xmm8 730 paddd xmm0, xmmword ptr [rsp+0x50] 731 paddd xmm1, xmmword ptr [rsp] 732 paddd xmm2, xmmword ptr [rsp+0xF0] 733 paddd xmm3, xmmword ptr [rsp+0x10] 734 paddd xmm0, xmm5 735 paddd xmm1, xmm6 736 paddd xmm2, xmm7 737 paddd xmm3, xmm4 738 pxor xmm15, xmm0 739 pxor xmm12, xmm1 740 pxor xmm13, xmm2 741 pxor xmm14, xmm3 742 movdqa xmm8, xmm15 743 psrld xmm15, 8 744 pslld xmm8, 24 745 pxor xmm15, xmm8 746 movdqa xmm8, xmm12 747 psrld xmm12, 8 748 pslld xmm8, 24 749 pxor xmm12, xmm8 750 movdqa xmm8, xmm13 751 psrld xmm13, 8 752 pslld xmm8, 24 753 pxor xmm13, xmm8 754 movdqa xmm8, xmm14 755 psrld xmm14, 8 756 pslld xmm8, 24 757 pxor xmm14, xmm8 758 paddd xmm10, xmm15 759 paddd xmm11, xmm12 760 movdqa xmm8, xmmword ptr [rsp+0x100] 761 paddd xmm8, xmm13 762 paddd xmm9, xmm14 763 pxor xmm5, xmm10 764 pxor xmm6, xmm11 765 pxor xmm7, xmm8 766 pxor xmm4, xmm9 767 movdqa xmmword ptr [rsp+0x100], xmm8 768 movdqa xmm8, xmm5 769 psrld xmm8, 7 770 pslld xmm5, 25 771 por xmm5, xmm8 772 movdqa xmm8, xmm6 773 psrld xmm8, 7 774 pslld xmm6, 25 775 por xmm6, xmm8 776 movdqa xmm8, xmm7 777 psrld xmm8, 7 778 pslld xmm7, 25 779 por xmm7, xmm8 780 movdqa xmm8, xmm4 781 psrld xmm8, 7 782 pslld xmm4, 25 783 por xmm4, xmm8 784 paddd xmm0, xmmword ptr [rsp+0xA0] 785 paddd xmm1, xmmword ptr [rsp+0xC0] 786 paddd xmm2, xmmword ptr [rsp+0xE0] 787 paddd xmm3, xmmword ptr [rsp+0xD0] 788 paddd xmm0, xmm4 789 paddd xmm1, xmm5 790 paddd xmm2, xmm6 791 paddd xmm3, xmm7 792 pxor xmm12, xmm0 793 pxor xmm13, xmm1 794 pxor xmm14, xmm2 795 pxor xmm15, xmm3 796 pshuflw xmm12, xmm12, 0xB1 797 pshufhw xmm12, xmm12, 0xB1 798 pshuflw xmm13, xmm13, 0xB1 799 pshufhw xmm13, xmm13, 0xB1 800 pshuflw xmm14, xmm14, 0xB1 801 pshufhw xmm14, xmm14, 0xB1 802 pshuflw xmm15, xmm15, 0xB1 803 pshufhw xmm15, xmm15, 0xB1 804 movdqa xmm8, xmmword ptr [rsp+0x100] 805 paddd xmm8, xmm12 806 paddd xmm9, xmm13 807 paddd xmm10, xmm14 808 paddd xmm11, xmm15 809 pxor xmm4, xmm8 810 pxor xmm5, xmm9 811 pxor xmm6, xmm10 812 pxor xmm7, xmm11 813 movdqa xmmword ptr [rsp+0x100], xmm8 814 movdqa xmm8, xmm4 815 psrld xmm8, 12 816 pslld xmm4, 20 817 por xmm4, xmm8 818 movdqa xmm8, xmm5 819 psrld xmm8, 12 820 pslld xmm5, 20 821 por xmm5, xmm8 822 movdqa xmm8, xmm6 823 psrld xmm8, 12 824 pslld xmm6, 20 825 por xmm6, xmm8 826 movdqa xmm8, xmm7 827 psrld xmm8, 12 828 pslld xmm7, 20 829 por xmm7, xmm8 830 paddd xmm0, xmmword ptr [rsp+0x70] 831 paddd xmm1, xmmword ptr [rsp+0x90] 832 paddd xmm2, xmmword ptr [rsp+0x30] 833 paddd xmm3, xmmword ptr [rsp+0xF0] 834 paddd xmm0, xmm4 835 paddd xmm1, xmm5 836 paddd xmm2, xmm6 837 paddd xmm3, xmm7 838 pxor xmm12, xmm0 839 pxor xmm13, xmm1 840 pxor xmm14, xmm2 841 pxor xmm15, xmm3 842 movdqa xmm8, xmm12 843 psrld xmm12, 8 844 pslld xmm8, 24 845 pxor xmm12, xmm8 846 movdqa xmm8, xmm13 847 psrld xmm13, 8 848 pslld xmm8, 24 849 pxor xmm13, xmm8 850 movdqa xmm8, xmm14 851 psrld xmm14, 8 852 pslld xmm8, 24 853 pxor xmm14, xmm8 854 movdqa xmm8, xmm15 855 psrld xmm15, 8 856 pslld xmm8, 24 857 pxor xmm15, xmm8 858 movdqa xmm8, xmmword ptr [rsp+0x100] 859 paddd xmm8, xmm12 860 paddd xmm9, xmm13 861 paddd xmm10, xmm14 862 paddd xmm11, xmm15 863 pxor xmm4, xmm8 864 pxor xmm5, xmm9 865 pxor xmm6, xmm10 866 pxor xmm7, xmm11 867 movdqa xmmword ptr [rsp+0x100], xmm8 868 movdqa xmm8, xmm4 869 psrld xmm8, 7 870 pslld xmm4, 25 871 por xmm4, xmm8 872 movdqa xmm8, xmm5 873 psrld xmm8, 7 874 pslld xmm5, 25 875 por xmm5, xmm8 876 movdqa xmm8, xmm6 877 psrld xmm8, 7 878 pslld xmm6, 25 879 por xmm6, xmm8 880 movdqa xmm8, xmm7 881 psrld xmm8, 7 882 pslld xmm7, 25 883 por xmm7, xmm8 884 paddd xmm0, xmmword ptr [rsp+0x40] 885 paddd xmm1, xmmword ptr [rsp+0xB0] 886 paddd xmm2, xmmword ptr [rsp+0x50] 887 paddd xmm3, xmmword ptr [rsp+0x10] 888 paddd xmm0, xmm5 889 paddd xmm1, xmm6 890 paddd xmm2, xmm7 891 paddd xmm3, xmm4 892 pxor xmm15, xmm0 893 pxor xmm12, xmm1 894 pxor xmm13, xmm2 895 pxor xmm14, xmm3 896 pshuflw xmm15, xmm15, 0xB1 897 pshufhw xmm15, xmm15, 0xB1 898 pshuflw xmm12, xmm12, 0xB1 899 pshufhw xmm12, xmm12, 0xB1 900 pshuflw xmm13, xmm13, 0xB1 901 pshufhw xmm13, xmm13, 0xB1 902 pshuflw xmm14, xmm14, 0xB1 903 pshufhw xmm14, xmm14, 0xB1 904 paddd xmm10, xmm15 905 paddd xmm11, xmm12 906 movdqa xmm8, xmmword ptr [rsp+0x100] 907 paddd xmm8, xmm13 908 paddd xmm9, xmm14 909 pxor xmm5, xmm10 910 pxor xmm6, xmm11 911 pxor xmm7, xmm8 912 pxor xmm4, xmm9 913 movdqa xmmword ptr [rsp+0x100], xmm8 914 movdqa xmm8, xmm5 915 psrld xmm8, 12 916 pslld xmm5, 20 917 por xmm5, xmm8 918 movdqa xmm8, xmm6 919 psrld xmm8, 12 920 pslld xmm6, 20 921 por xmm6, xmm8 922 movdqa xmm8, xmm7 923 psrld xmm8, 12 924 pslld xmm7, 20 925 por xmm7, xmm8 926 movdqa xmm8, xmm4 927 psrld xmm8, 12 928 pslld xmm4, 20 929 por xmm4, xmm8 930 paddd xmm0, xmmword ptr [rsp] 931 paddd xmm1, xmmword ptr [rsp+0x20] 932 paddd xmm2, xmmword ptr [rsp+0x80] 933 paddd xmm3, xmmword ptr [rsp+0x60] 934 paddd xmm0, xmm5 935 paddd xmm1, xmm6 936 paddd xmm2, xmm7 937 paddd xmm3, xmm4 938 pxor xmm15, xmm0 939 pxor xmm12, xmm1 940 pxor xmm13, xmm2 941 pxor xmm14, xmm3 942 movdqa xmm8, xmm15 943 psrld xmm15, 8 944 pslld xmm8, 24 945 pxor xmm15, xmm8 946 movdqa xmm8, xmm12 947 psrld xmm12, 8 948 pslld xmm8, 24 949 pxor xmm12, xmm8 950 movdqa xmm8, xmm13 951 psrld xmm13, 8 952 pslld xmm8, 24 953 pxor xmm13, xmm8 954 movdqa xmm8, xmm14 955 psrld xmm14, 8 956 pslld xmm8, 24 957 pxor xmm14, xmm8 958 paddd xmm10, xmm15 959 paddd xmm11, xmm12 960 movdqa xmm8, xmmword ptr [rsp+0x100] 961 paddd xmm8, xmm13 962 paddd xmm9, xmm14 963 pxor xmm5, xmm10 964 pxor xmm6, xmm11 965 pxor xmm7, xmm8 966 pxor xmm4, xmm9 967 movdqa xmmword ptr [rsp+0x100], xmm8 968 movdqa xmm8, xmm5 969 psrld xmm8, 7 970 pslld xmm5, 25 971 por xmm5, xmm8 972 movdqa xmm8, xmm6 973 psrld xmm8, 7 974 pslld xmm6, 25 975 por xmm6, xmm8 976 movdqa xmm8, xmm7 977 psrld xmm8, 7 978 pslld xmm7, 25 979 por xmm7, xmm8 980 movdqa xmm8, xmm4 981 psrld xmm8, 7 982 pslld xmm4, 25 983 por xmm4, xmm8 984 paddd xmm0, xmmword ptr [rsp+0xC0] 985 paddd xmm1, xmmword ptr [rsp+0x90] 986 paddd xmm2, xmmword ptr [rsp+0xF0] 987 paddd xmm3, xmmword ptr [rsp+0xE0] 988 paddd xmm0, xmm4 989 paddd xmm1, xmm5 990 paddd xmm2, xmm6 991 paddd xmm3, xmm7 992 pxor xmm12, xmm0 993 pxor xmm13, xmm1 994 pxor xmm14, xmm2 995 pxor xmm15, xmm3 996 pshuflw xmm12, xmm12, 0xB1 997 pshufhw xmm12, xmm12, 0xB1 998 pshuflw xmm13, xmm13, 0xB1 999 pshufhw xmm13, xmm13, 0xB1 1000 pshuflw xmm14, xmm14, 0xB1 1001 pshufhw xmm14, xmm14, 0xB1 1002 pshuflw xmm15, xmm15, 0xB1 1003 pshufhw xmm15, xmm15, 0xB1 1004 movdqa xmm8, xmmword ptr [rsp+0x100] 1005 paddd xmm8, xmm12 1006 paddd xmm9, xmm13 1007 paddd xmm10, xmm14 1008 paddd xmm11, xmm15 1009 pxor xmm4, xmm8 1010 pxor xmm5, xmm9 1011 pxor xmm6, xmm10 1012 pxor xmm7, xmm11 1013 movdqa xmmword ptr [rsp+0x100], xmm8 1014 movdqa xmm8, xmm4 1015 psrld xmm8, 12 1016 pslld xmm4, 20 1017 por xmm4, xmm8 1018 movdqa xmm8, xmm5 1019 psrld xmm8, 12 1020 pslld xmm5, 20 1021 por xmm5, xmm8 1022 movdqa xmm8, xmm6 1023 psrld xmm8, 12 1024 pslld xmm6, 20 1025 por xmm6, xmm8 1026 movdqa xmm8, xmm7 1027 psrld xmm8, 12 1028 pslld xmm7, 20 1029 por xmm7, xmm8 1030 paddd xmm0, xmmword ptr [rsp+0xD0] 1031 paddd xmm1, xmmword ptr [rsp+0xB0] 1032 paddd xmm2, xmmword ptr [rsp+0xA0] 1033 paddd xmm3, xmmword ptr [rsp+0x80] 1034 paddd xmm0, xmm4 1035 paddd xmm1, xmm5 1036 paddd xmm2, xmm6 1037 paddd xmm3, xmm7 1038 pxor xmm12, xmm0 1039 pxor xmm13, xmm1 1040 pxor xmm14, xmm2 1041 pxor xmm15, xmm3 1042 movdqa xmm8, xmm12 1043 psrld xmm12, 8 1044 pslld xmm8, 24 1045 pxor xmm12, xmm8 1046 movdqa xmm8, xmm13 1047 psrld xmm13, 8 1048 pslld xmm8, 24 1049 pxor xmm13, xmm8 1050 movdqa xmm8, xmm14 1051 psrld xmm14, 8 1052 pslld xmm8, 24 1053 pxor xmm14, xmm8 1054 movdqa xmm8, xmm15 1055 psrld xmm15, 8 1056 pslld xmm8, 24 1057 pxor xmm15, xmm8 1058 movdqa xmm8, xmmword ptr [rsp+0x100] 1059 paddd xmm8, xmm12 1060 paddd xmm9, xmm13 1061 paddd xmm10, xmm14 1062 paddd xmm11, xmm15 1063 pxor xmm4, xmm8 1064 pxor xmm5, xmm9 1065 pxor xmm6, xmm10 1066 pxor xmm7, xmm11 1067 movdqa xmmword ptr [rsp+0x100], xmm8 1068 movdqa xmm8, xmm4 1069 psrld xmm8, 7 1070 pslld xmm4, 25 1071 por xmm4, xmm8 1072 movdqa xmm8, xmm5 1073 psrld xmm8, 7 1074 pslld xmm5, 25 1075 por xmm5, xmm8 1076 movdqa xmm8, xmm6 1077 psrld xmm8, 7 1078 pslld xmm6, 25 1079 por xmm6, xmm8 1080 movdqa xmm8, xmm7 1081 psrld xmm8, 7 1082 pslld xmm7, 25 1083 por xmm7, xmm8 1084 paddd xmm0, xmmword ptr [rsp+0x70] 1085 paddd xmm1, xmmword ptr [rsp+0x50] 1086 paddd xmm2, xmmword ptr [rsp] 1087 paddd xmm3, xmmword ptr [rsp+0x60] 1088 paddd xmm0, xmm5 1089 paddd xmm1, xmm6 1090 paddd xmm2, xmm7 1091 paddd xmm3, xmm4 1092 pxor xmm15, xmm0 1093 pxor xmm12, xmm1 1094 pxor xmm13, xmm2 1095 pxor xmm14, xmm3 1096 pshuflw xmm15, xmm15, 0xB1 1097 pshufhw xmm15, xmm15, 0xB1 1098 pshuflw xmm12, xmm12, 0xB1 1099 pshufhw xmm12, xmm12, 0xB1 1100 pshuflw xmm13, xmm13, 0xB1 1101 pshufhw xmm13, xmm13, 0xB1 1102 pshuflw xmm14, xmm14, 0xB1 1103 pshufhw xmm14, xmm14, 0xB1 1104 paddd xmm10, xmm15 1105 paddd xmm11, xmm12 1106 movdqa xmm8, xmmword ptr [rsp+0x100] 1107 paddd xmm8, xmm13 1108 paddd xmm9, xmm14 1109 pxor xmm5, xmm10 1110 pxor xmm6, xmm11 1111 pxor xmm7, xmm8 1112 pxor xmm4, xmm9 1113 movdqa xmmword ptr [rsp+0x100], xmm8 1114 movdqa xmm8, xmm5 1115 psrld xmm8, 12 1116 pslld xmm5, 20 1117 por xmm5, xmm8 1118 movdqa xmm8, xmm6 1119 psrld xmm8, 12 1120 pslld xmm6, 20 1121 por xmm6, xmm8 1122 movdqa xmm8, xmm7 1123 psrld xmm8, 12 1124 pslld xmm7, 20 1125 por xmm7, xmm8 1126 movdqa xmm8, xmm4 1127 psrld xmm8, 12 1128 pslld xmm4, 20 1129 por xmm4, xmm8 1130 paddd xmm0, xmmword ptr [rsp+0x20] 1131 paddd xmm1, xmmword ptr [rsp+0x30] 1132 paddd xmm2, xmmword ptr [rsp+0x10] 1133 paddd xmm3, xmmword ptr [rsp+0x40] 1134 paddd xmm0, xmm5 1135 paddd xmm1, xmm6 1136 paddd xmm2, xmm7 1137 paddd xmm3, xmm4 1138 pxor xmm15, xmm0 1139 pxor xmm12, xmm1 1140 pxor xmm13, xmm2 1141 pxor xmm14, xmm3 1142 movdqa xmm8, xmm15 1143 psrld xmm15, 8 1144 pslld xmm8, 24 1145 pxor xmm15, xmm8 1146 movdqa xmm8, xmm12 1147 psrld xmm12, 8 1148 pslld xmm8, 24 1149 pxor xmm12, xmm8 1150 movdqa xmm8, xmm13 1151 psrld xmm13, 8 1152 pslld xmm8, 24 1153 pxor xmm13, xmm8 1154 movdqa xmm8, xmm14 1155 psrld xmm14, 8 1156 pslld xmm8, 24 1157 pxor xmm14, xmm8 1158 paddd xmm10, xmm15 1159 paddd xmm11, xmm12 1160 movdqa xmm8, xmmword ptr [rsp+0x100] 1161 paddd xmm8, xmm13 1162 paddd xmm9, xmm14 1163 pxor xmm5, xmm10 1164 pxor xmm6, xmm11 1165 pxor xmm7, xmm8 1166 pxor xmm4, xmm9 1167 movdqa xmmword ptr [rsp+0x100], xmm8 1168 movdqa xmm8, xmm5 1169 psrld xmm8, 7 1170 pslld xmm5, 25 1171 por xmm5, xmm8 1172 movdqa xmm8, xmm6 1173 psrld xmm8, 7 1174 pslld xmm6, 25 1175 por xmm6, xmm8 1176 movdqa xmm8, xmm7 1177 psrld xmm8, 7 1178 pslld xmm7, 25 1179 por xmm7, xmm8 1180 movdqa xmm8, xmm4 1181 psrld xmm8, 7 1182 pslld xmm4, 25 1183 por xmm4, xmm8 1184 paddd xmm0, xmmword ptr [rsp+0x90] 1185 paddd xmm1, xmmword ptr [rsp+0xB0] 1186 paddd xmm2, xmmword ptr [rsp+0x80] 1187 paddd xmm3, xmmword ptr [rsp+0xF0] 1188 paddd xmm0, xmm4 1189 paddd xmm1, xmm5 1190 paddd xmm2, xmm6 1191 paddd xmm3, xmm7 1192 pxor xmm12, xmm0 1193 pxor xmm13, xmm1 1194 pxor xmm14, xmm2 1195 pxor xmm15, xmm3 1196 pshuflw xmm12, xmm12, 0xB1 1197 pshufhw xmm12, xmm12, 0xB1 1198 pshuflw xmm13, xmm13, 0xB1 1199 pshufhw xmm13, xmm13, 0xB1 1200 pshuflw xmm14, xmm14, 0xB1 1201 pshufhw xmm14, xmm14, 0xB1 1202 pshuflw xmm15, xmm15, 0xB1 1203 pshufhw xmm15, xmm15, 0xB1 1204 movdqa xmm8, xmmword ptr [rsp+0x100] 1205 paddd xmm8, xmm12 1206 paddd xmm9, xmm13 1207 paddd xmm10, xmm14 1208 paddd xmm11, xmm15 1209 pxor xmm4, xmm8 1210 pxor xmm5, xmm9 1211 pxor xmm6, xmm10 1212 pxor xmm7, xmm11 1213 movdqa xmmword ptr [rsp+0x100], xmm8 1214 movdqa xmm8, xmm4 1215 psrld xmm8, 12 1216 pslld xmm4, 20 1217 por xmm4, xmm8 1218 movdqa xmm8, xmm5 1219 psrld xmm8, 12 1220 pslld xmm5, 20 1221 por xmm5, xmm8 1222 movdqa xmm8, xmm6 1223 psrld xmm8, 12 1224 pslld xmm6, 20 1225 por xmm6, xmm8 1226 movdqa xmm8, xmm7 1227 psrld xmm8, 12 1228 pslld xmm7, 20 1229 por xmm7, xmm8 1230 paddd xmm0, xmmword ptr [rsp+0xE0] 1231 paddd xmm1, xmmword ptr [rsp+0x50] 1232 paddd xmm2, xmmword ptr [rsp+0xC0] 1233 paddd xmm3, xmmword ptr [rsp+0x10] 1234 paddd xmm0, xmm4 1235 paddd xmm1, xmm5 1236 paddd xmm2, xmm6 1237 paddd xmm3, xmm7 1238 pxor xmm12, xmm0 1239 pxor xmm13, xmm1 1240 pxor xmm14, xmm2 1241 pxor xmm15, xmm3 1242 movdqa xmm8, xmm12 1243 psrld xmm12, 8 1244 pslld xmm8, 24 1245 pxor xmm12, xmm8 1246 movdqa xmm8, xmm13 1247 psrld xmm13, 8 1248 pslld xmm8, 24 1249 pxor xmm13, xmm8 1250 movdqa xmm8, xmm14 1251 psrld xmm14, 8 1252 pslld xmm8, 24 1253 pxor xmm14, xmm8 1254 movdqa xmm8, xmm15 1255 psrld xmm15, 8 1256 pslld xmm8, 24 1257 pxor xmm15, xmm8 1258 movdqa xmm8, xmmword ptr [rsp+0x100] 1259 paddd xmm8, xmm12 1260 paddd xmm9, xmm13 1261 paddd xmm10, xmm14 1262 paddd xmm11, xmm15 1263 pxor xmm4, xmm8 1264 pxor xmm5, xmm9 1265 pxor xmm6, xmm10 1266 pxor xmm7, xmm11 1267 movdqa xmmword ptr [rsp+0x100], xmm8 1268 movdqa xmm8, xmm4 1269 psrld xmm8, 7 1270 pslld xmm4, 25 1271 por xmm4, xmm8 1272 movdqa xmm8, xmm5 1273 psrld xmm8, 7 1274 pslld xmm5, 25 1275 por xmm5, xmm8 1276 movdqa xmm8, xmm6 1277 psrld xmm8, 7 1278 pslld xmm6, 25 1279 por xmm6, xmm8 1280 movdqa xmm8, xmm7 1281 psrld xmm8, 7 1282 pslld xmm7, 25 1283 por xmm7, xmm8 1284 paddd xmm0, xmmword ptr [rsp+0xD0] 1285 paddd xmm1, xmmword ptr [rsp] 1286 paddd xmm2, xmmword ptr [rsp+0x20] 1287 paddd xmm3, xmmword ptr [rsp+0x40] 1288 paddd xmm0, xmm5 1289 paddd xmm1, xmm6 1290 paddd xmm2, xmm7 1291 paddd xmm3, xmm4 1292 pxor xmm15, xmm0 1293 pxor xmm12, xmm1 1294 pxor xmm13, xmm2 1295 pxor xmm14, xmm3 1296 pshuflw xmm15, xmm15, 0xB1 1297 pshufhw xmm15, xmm15, 0xB1 1298 pshuflw xmm12, xmm12, 0xB1 1299 pshufhw xmm12, xmm12, 0xB1 1300 pshuflw xmm13, xmm13, 0xB1 1301 pshufhw xmm13, xmm13, 0xB1 1302 pshuflw xmm14, xmm14, 0xB1 1303 pshufhw xmm14, xmm14, 0xB1 1304 paddd xmm10, xmm15 1305 paddd xmm11, xmm12 1306 movdqa xmm8, xmmword ptr [rsp+0x100] 1307 paddd xmm8, xmm13 1308 paddd xmm9, xmm14 1309 pxor xmm5, xmm10 1310 pxor xmm6, xmm11 1311 pxor xmm7, xmm8 1312 pxor xmm4, xmm9 1313 movdqa xmmword ptr [rsp+0x100], xmm8 1314 movdqa xmm8, xmm5 1315 psrld xmm8, 12 1316 pslld xmm5, 20 1317 por xmm5, xmm8 1318 movdqa xmm8, xmm6 1319 psrld xmm8, 12 1320 pslld xmm6, 20 1321 por xmm6, xmm8 1322 movdqa xmm8, xmm7 1323 psrld xmm8, 12 1324 pslld xmm7, 20 1325 por xmm7, xmm8 1326 movdqa xmm8, xmm4 1327 psrld xmm8, 12 1328 pslld xmm4, 20 1329 por xmm4, xmm8 1330 paddd xmm0, xmmword ptr [rsp+0x30] 1331 paddd xmm1, xmmword ptr [rsp+0xA0] 1332 paddd xmm2, xmmword ptr [rsp+0x60] 1333 paddd xmm3, xmmword ptr [rsp+0x70] 1334 paddd xmm0, xmm5 1335 paddd xmm1, xmm6 1336 paddd xmm2, xmm7 1337 paddd xmm3, xmm4 1338 pxor xmm15, xmm0 1339 pxor xmm12, xmm1 1340 pxor xmm13, xmm2 1341 pxor xmm14, xmm3 1342 movdqa xmm8, xmm15 1343 psrld xmm15, 8 1344 pslld xmm8, 24 1345 pxor xmm15, xmm8 1346 movdqa xmm8, xmm12 1347 psrld xmm12, 8 1348 pslld xmm8, 24 1349 pxor xmm12, xmm8 1350 movdqa xmm8, xmm13 1351 psrld xmm13, 8 1352 pslld xmm8, 24 1353 pxor xmm13, xmm8 1354 movdqa xmm8, xmm14 1355 psrld xmm14, 8 1356 pslld xmm8, 24 1357 pxor xmm14, xmm8 1358 paddd xmm10, xmm15 1359 paddd xmm11, xmm12 1360 movdqa xmm8, xmmword ptr [rsp+0x100] 1361 paddd xmm8, xmm13 1362 paddd xmm9, xmm14 1363 pxor xmm5, xmm10 1364 pxor xmm6, xmm11 1365 pxor xmm7, xmm8 1366 pxor xmm4, xmm9 1367 movdqa xmmword ptr [rsp+0x100], xmm8 1368 movdqa xmm8, xmm5 1369 psrld xmm8, 7 1370 pslld xmm5, 25 1371 por xmm5, xmm8 1372 movdqa xmm8, xmm6 1373 psrld xmm8, 7 1374 pslld xmm6, 25 1375 por xmm6, xmm8 1376 movdqa xmm8, xmm7 1377 psrld xmm8, 7 1378 pslld xmm7, 25 1379 por xmm7, xmm8 1380 movdqa xmm8, xmm4 1381 psrld xmm8, 7 1382 pslld xmm4, 25 1383 por xmm4, xmm8 1384 paddd xmm0, xmmword ptr [rsp+0xB0] 1385 paddd xmm1, xmmword ptr [rsp+0x50] 1386 paddd xmm2, xmmword ptr [rsp+0x10] 1387 paddd xmm3, xmmword ptr [rsp+0x80] 1388 paddd xmm0, xmm4 1389 paddd xmm1, xmm5 1390 paddd xmm2, xmm6 1391 paddd xmm3, xmm7 1392 pxor xmm12, xmm0 1393 pxor xmm13, xmm1 1394 pxor xmm14, xmm2 1395 pxor xmm15, xmm3 1396 pshuflw xmm12, xmm12, 0xB1 1397 pshufhw xmm12, xmm12, 0xB1 1398 pshuflw xmm13, xmm13, 0xB1 1399 pshufhw xmm13, xmm13, 0xB1 1400 pshuflw xmm14, xmm14, 0xB1 1401 pshufhw xmm14, xmm14, 0xB1 1402 pshuflw xmm15, xmm15, 0xB1 1403 pshufhw xmm15, xmm15, 0xB1 1404 movdqa xmm8, xmmword ptr [rsp+0x100] 1405 paddd xmm8, xmm12 1406 paddd xmm9, xmm13 1407 paddd xmm10, xmm14 1408 paddd xmm11, xmm15 1409 pxor xmm4, xmm8 1410 pxor xmm5, xmm9 1411 pxor xmm6, xmm10 1412 pxor xmm7, xmm11 1413 movdqa xmmword ptr [rsp+0x100], xmm8 1414 movdqa xmm8, xmm4 1415 psrld xmm8, 12 1416 pslld xmm4, 20 1417 por xmm4, xmm8 1418 movdqa xmm8, xmm5 1419 psrld xmm8, 12 1420 pslld xmm5, 20 1421 por xmm5, xmm8 1422 movdqa xmm8, xmm6 1423 psrld xmm8, 12 1424 pslld xmm6, 20 1425 por xmm6, xmm8 1426 movdqa xmm8, xmm7 1427 psrld xmm8, 12 1428 pslld xmm7, 20 1429 por xmm7, xmm8 1430 paddd xmm0, xmmword ptr [rsp+0xF0] 1431 paddd xmm1, xmmword ptr [rsp] 1432 paddd xmm2, xmmword ptr [rsp+0x90] 1433 paddd xmm3, xmmword ptr [rsp+0x60] 1434 paddd xmm0, xmm4 1435 paddd xmm1, xmm5 1436 paddd xmm2, xmm6 1437 paddd xmm3, xmm7 1438 pxor xmm12, xmm0 1439 pxor xmm13, xmm1 1440 pxor xmm14, xmm2 1441 pxor xmm15, xmm3 1442 movdqa xmm8, xmm12 1443 psrld xmm12, 8 1444 pslld xmm8, 24 1445 pxor xmm12, xmm8 1446 movdqa xmm8, xmm13 1447 psrld xmm13, 8 1448 pslld xmm8, 24 1449 pxor xmm13, xmm8 1450 movdqa xmm8, xmm14 1451 psrld xmm14, 8 1452 pslld xmm8, 24 1453 pxor xmm14, xmm8 1454 movdqa xmm8, xmm15 1455 psrld xmm15, 8 1456 pslld xmm8, 24 1457 pxor xmm15, xmm8 1458 movdqa xmm8, xmmword ptr [rsp+0x100] 1459 paddd xmm8, xmm12 1460 paddd xmm9, xmm13 1461 paddd xmm10, xmm14 1462 paddd xmm11, xmm15 1463 pxor xmm4, xmm8 1464 pxor xmm5, xmm9 1465 pxor xmm6, xmm10 1466 pxor xmm7, xmm11 1467 movdqa xmmword ptr [rsp+0x100], xmm8 1468 movdqa xmm8, xmm4 1469 psrld xmm8, 7 1470 pslld xmm4, 25 1471 por xmm4, xmm8 1472 movdqa xmm8, xmm5 1473 psrld xmm8, 7 1474 pslld xmm5, 25 1475 por xmm5, xmm8 1476 movdqa xmm8, xmm6 1477 psrld xmm8, 7 1478 pslld xmm6, 25 1479 por xmm6, xmm8 1480 movdqa xmm8, xmm7 1481 psrld xmm8, 7 1482 pslld xmm7, 25 1483 por xmm7, xmm8 1484 paddd xmm0, xmmword ptr [rsp+0xE0] 1485 paddd xmm1, xmmword ptr [rsp+0x20] 1486 paddd xmm2, xmmword ptr [rsp+0x30] 1487 paddd xmm3, xmmword ptr [rsp+0x70] 1488 paddd xmm0, xmm5 1489 paddd xmm1, xmm6 1490 paddd xmm2, xmm7 1491 paddd xmm3, xmm4 1492 pxor xmm15, xmm0 1493 pxor xmm12, xmm1 1494 pxor xmm13, xmm2 1495 pxor xmm14, xmm3 1496 pshuflw xmm15, xmm15, 0xB1 1497 pshufhw xmm15, xmm15, 0xB1 1498 pshuflw xmm12, xmm12, 0xB1 1499 pshufhw xmm12, xmm12, 0xB1 1500 pshuflw xmm13, xmm13, 0xB1 1501 pshufhw xmm13, xmm13, 0xB1 1502 pshuflw xmm14, xmm14, 0xB1 1503 pshufhw xmm14, xmm14, 0xB1 1504 paddd xmm10, xmm15 1505 paddd xmm11, xmm12 1506 movdqa xmm8, xmmword ptr [rsp+0x100] 1507 paddd xmm8, xmm13 1508 paddd xmm9, xmm14 1509 pxor xmm5, xmm10 1510 pxor xmm6, xmm11 1511 pxor xmm7, xmm8 1512 pxor xmm4, xmm9 1513 movdqa xmmword ptr [rsp+0x100], xmm8 1514 movdqa xmm8, xmm5 1515 psrld xmm8, 12 1516 pslld xmm5, 20 1517 por xmm5, xmm8 1518 movdqa xmm8, xmm6 1519 psrld xmm8, 12 1520 pslld xmm6, 20 1521 por xmm6, xmm8 1522 movdqa xmm8, xmm7 1523 psrld xmm8, 12 1524 pslld xmm7, 20 1525 por xmm7, xmm8 1526 movdqa xmm8, xmm4 1527 psrld xmm8, 12 1528 pslld xmm4, 20 1529 por xmm4, xmm8 1530 paddd xmm0, xmmword ptr [rsp+0xA0] 1531 paddd xmm1, xmmword ptr [rsp+0xC0] 1532 paddd xmm2, xmmword ptr [rsp+0x40] 1533 paddd xmm3, xmmword ptr [rsp+0xD0] 1534 paddd xmm0, xmm5 1535 paddd xmm1, xmm6 1536 paddd xmm2, xmm7 1537 paddd xmm3, xmm4 1538 pxor xmm15, xmm0 1539 pxor xmm12, xmm1 1540 pxor xmm13, xmm2 1541 pxor xmm14, xmm3 1542 movdqa xmm8, xmm15 1543 psrld xmm15, 8 1544 pslld xmm8, 24 1545 pxor xmm15, xmm8 1546 movdqa xmm8, xmm12 1547 psrld xmm12, 8 1548 pslld xmm8, 24 1549 pxor xmm12, xmm8 1550 movdqa xmm8, xmm13 1551 psrld xmm13, 8 1552 pslld xmm8, 24 1553 pxor xmm13, xmm8 1554 movdqa xmm8, xmm14 1555 psrld xmm14, 8 1556 pslld xmm8, 24 1557 pxor xmm14, xmm8 1558 paddd xmm10, xmm15 1559 paddd xmm11, xmm12 1560 movdqa xmm8, xmmword ptr [rsp+0x100] 1561 paddd xmm8, xmm13 1562 paddd xmm9, xmm14 1563 pxor xmm5, xmm10 1564 pxor xmm6, xmm11 1565 pxor xmm7, xmm8 1566 pxor xmm4, xmm9 1567 pxor xmm0, xmm8 1568 pxor xmm1, xmm9 1569 pxor xmm2, xmm10 1570 pxor xmm3, xmm11 1571 movdqa xmm8, xmm5 1572 psrld xmm8, 7 1573 pslld xmm5, 25 1574 por xmm5, xmm8 1575 movdqa xmm8, xmm6 1576 psrld xmm8, 7 1577 pslld xmm6, 25 1578 por xmm6, xmm8 1579 movdqa xmm8, xmm7 1580 psrld xmm8, 7 1581 pslld xmm7, 25 1582 por xmm7, xmm8 1583 movdqa xmm8, xmm4 1584 psrld xmm8, 7 1585 pslld xmm4, 25 1586 por xmm4, xmm8 1587 pxor xmm4, xmm12 1588 pxor xmm5, xmm13 1589 pxor xmm6, xmm14 1590 pxor xmm7, xmm15 1591 mov eax, r13d 1592 jne 9b 1593 movdqa xmm9, xmm0 1594 punpckldq xmm0, xmm1 1595 punpckhdq xmm9, xmm1 1596 movdqa xmm11, xmm2 1597 punpckldq xmm2, xmm3 1598 punpckhdq xmm11, xmm3 1599 movdqa xmm1, xmm0 1600 punpcklqdq xmm0, xmm2 1601 punpckhqdq xmm1, xmm2 1602 movdqa xmm3, xmm9 1603 punpcklqdq xmm9, xmm11 1604 punpckhqdq xmm3, xmm11 1605 movdqu xmmword ptr [rbx], xmm0 1606 movdqu xmmword ptr [rbx+0x20], xmm1 1607 movdqu xmmword ptr [rbx+0x40], xmm9 1608 movdqu xmmword ptr [rbx+0x60], xmm3 1609 movdqa xmm9, xmm4 1610 punpckldq xmm4, xmm5 1611 punpckhdq xmm9, xmm5 1612 movdqa xmm11, xmm6 1613 punpckldq xmm6, xmm7 1614 punpckhdq xmm11, xmm7 1615 movdqa xmm5, xmm4 1616 punpcklqdq xmm4, xmm6 1617 punpckhqdq xmm5, xmm6 1618 movdqa xmm7, xmm9 1619 punpcklqdq xmm9, xmm11 1620 punpckhqdq xmm7, xmm11 1621 movdqu xmmword ptr [rbx+0x10], xmm4 1622 movdqu xmmword ptr [rbx+0x30], xmm5 1623 movdqu xmmword ptr [rbx+0x50], xmm9 1624 movdqu xmmword ptr [rbx+0x70], xmm7 1625 movdqa xmm1, xmmword ptr [rsp+0x110] 1626 movdqa xmm0, xmm1 1627 paddd xmm1, xmmword ptr [rsp+0x150] 1628 movdqa xmmword ptr [rsp+0x110], xmm1 1629 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1630 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1631 pcmpgtd xmm0, xmm1 1632 movdqa xmm1, xmmword ptr [rsp+0x120] 1633 psubd xmm1, xmm0 1634 movdqa xmmword ptr [rsp+0x120], xmm1 1635 add rbx, 128 1636 add rdi, 32 1637 sub rsi, 4 1638 cmp rsi, 4 1639 jnc 2b 1640 test rsi, rsi 1641 jne 3f 16424: 1643 movdqa xmm6, xmmword ptr [rsp+0x170] 1644 movdqa xmm7, xmmword ptr [rsp+0x180] 1645 movdqa xmm8, xmmword ptr [rsp+0x190] 1646 movdqa xmm9, xmmword ptr [rsp+0x1A0] 1647 movdqa xmm10, xmmword ptr [rsp+0x1B0] 1648 movdqa xmm11, xmmword ptr [rsp+0x1C0] 1649 movdqa xmm12, xmmword ptr [rsp+0x1D0] 1650 movdqa xmm13, xmmword ptr [rsp+0x1E0] 1651 movdqa xmm14, xmmword ptr [rsp+0x1F0] 1652 movdqa xmm15, xmmword ptr [rsp+0x200] 1653 mov rsp, rbp 1654 pop rbp 1655 pop rbx 1656 pop rdi 1657 pop rsi 1658 pop r12 1659 pop r13 1660 pop r14 1661 pop r15 1662 ret 1663.p2align 5 16643: 1665 test esi, 0x2 1666 je 3f 1667 movups xmm0, xmmword ptr [rcx] 1668 movups xmm1, xmmword ptr [rcx+0x10] 1669 movaps xmm8, xmm0 1670 movaps xmm9, xmm1 1671 movd xmm13, dword ptr [rsp+0x110] 1672 movd xmm14, dword ptr [rsp+0x120] 1673 punpckldq xmm13, xmm14 1674 movaps xmmword ptr [rsp], xmm13 1675 movd xmm14, dword ptr [rsp+0x114] 1676 movd xmm13, dword ptr [rsp+0x124] 1677 punpckldq xmm14, xmm13 1678 movaps xmmword ptr [rsp+0x10], xmm14 1679 mov r8, qword ptr [rdi] 1680 mov r9, qword ptr [rdi+0x8] 1681 movzx eax, byte ptr [rbp+0x80] 1682 or eax, r13d 1683 xor edx, edx 16842: 1685 mov r14d, eax 1686 or eax, r12d 1687 add rdx, 64 1688 cmp rdx, r15 1689 cmovne eax, r14d 1690 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1691 movaps xmm10, xmm2 1692 movups xmm4, xmmword ptr [r8+rdx-0x40] 1693 movups xmm5, xmmword ptr [r8+rdx-0x30] 1694 movaps xmm3, xmm4 1695 shufps xmm4, xmm5, 136 1696 shufps xmm3, xmm5, 221 1697 movaps xmm5, xmm3 1698 movups xmm6, xmmword ptr [r8+rdx-0x20] 1699 movups xmm7, xmmword ptr [r8+rdx-0x10] 1700 movaps xmm3, xmm6 1701 shufps xmm6, xmm7, 136 1702 pshufd xmm6, xmm6, 0x93 1703 shufps xmm3, xmm7, 221 1704 pshufd xmm7, xmm3, 0x93 1705 movups xmm12, xmmword ptr [r9+rdx-0x40] 1706 movups xmm13, xmmword ptr [r9+rdx-0x30] 1707 movaps xmm11, xmm12 1708 shufps xmm12, xmm13, 136 1709 shufps xmm11, xmm13, 221 1710 movaps xmm13, xmm11 1711 movups xmm14, xmmword ptr [r9+rdx-0x20] 1712 movups xmm15, xmmword ptr [r9+rdx-0x10] 1713 movaps xmm11, xmm14 1714 shufps xmm14, xmm15, 136 1715 pshufd xmm14, xmm14, 0x93 1716 shufps xmm11, xmm15, 221 1717 pshufd xmm15, xmm11, 0x93 1718 shl rax, 0x20 1719 or rax, 0x40 1720 movq xmm3, rax 1721 movdqa xmmword ptr [rsp+0x20], xmm3 1722 movaps xmm3, xmmword ptr [rsp] 1723 movaps xmm11, xmmword ptr [rsp+0x10] 1724 punpcklqdq xmm3, xmmword ptr [rsp+0x20] 1725 punpcklqdq xmm11, xmmword ptr [rsp+0x20] 1726 mov al, 7 17279: 1728 paddd xmm0, xmm4 1729 paddd xmm8, xmm12 1730 movaps xmmword ptr [rsp+0x20], xmm4 1731 movaps xmmword ptr [rsp+0x30], xmm12 1732 paddd xmm0, xmm1 1733 paddd xmm8, xmm9 1734 pxor xmm3, xmm0 1735 pxor xmm11, xmm8 1736 pshuflw xmm3, xmm3, 0xB1 1737 pshufhw xmm3, xmm3, 0xB1 1738 pshuflw xmm11, xmm11, 0xB1 1739 pshufhw xmm11, xmm11, 0xB1 1740 paddd xmm2, xmm3 1741 paddd xmm10, xmm11 1742 pxor xmm1, xmm2 1743 pxor xmm9, xmm10 1744 movdqa xmm4, xmm1 1745 pslld xmm1, 20 1746 psrld xmm4, 12 1747 por xmm1, xmm4 1748 movdqa xmm4, xmm9 1749 pslld xmm9, 20 1750 psrld xmm4, 12 1751 por xmm9, xmm4 1752 paddd xmm0, xmm5 1753 paddd xmm8, xmm13 1754 movaps xmmword ptr [rsp+0x40], xmm5 1755 movaps xmmword ptr [rsp+0x50], xmm13 1756 paddd xmm0, xmm1 1757 paddd xmm8, xmm9 1758 pxor xmm3, xmm0 1759 pxor xmm11, xmm8 1760 movdqa xmm13, xmm3 1761 psrld xmm3, 8 1762 pslld xmm13, 24 1763 pxor xmm3, xmm13 1764 movdqa xmm13, xmm11 1765 psrld xmm11, 8 1766 pslld xmm13, 24 1767 pxor xmm11, xmm13 1768 paddd xmm2, xmm3 1769 paddd xmm10, xmm11 1770 pxor xmm1, xmm2 1771 pxor xmm9, xmm10 1772 movdqa xmm4, xmm1 1773 pslld xmm1, 25 1774 psrld xmm4, 7 1775 por xmm1, xmm4 1776 movdqa xmm4, xmm9 1777 pslld xmm9, 25 1778 psrld xmm4, 7 1779 por xmm9, xmm4 1780 pshufd xmm0, xmm0, 0x93 1781 pshufd xmm8, xmm8, 0x93 1782 pshufd xmm3, xmm3, 0x4E 1783 pshufd xmm11, xmm11, 0x4E 1784 pshufd xmm2, xmm2, 0x39 1785 pshufd xmm10, xmm10, 0x39 1786 paddd xmm0, xmm6 1787 paddd xmm8, xmm14 1788 paddd xmm0, xmm1 1789 paddd xmm8, xmm9 1790 pxor xmm3, xmm0 1791 pxor xmm11, xmm8 1792 pshuflw xmm3, xmm3, 0xB1 1793 pshufhw xmm3, xmm3, 0xB1 1794 pshuflw xmm11, xmm11, 0xB1 1795 pshufhw xmm11, xmm11, 0xB1 1796 paddd xmm2, xmm3 1797 paddd xmm10, xmm11 1798 pxor xmm1, xmm2 1799 pxor xmm9, xmm10 1800 movdqa xmm4, xmm1 1801 pslld xmm1, 20 1802 psrld xmm4, 12 1803 por xmm1, xmm4 1804 movdqa xmm4, xmm9 1805 pslld xmm9, 20 1806 psrld xmm4, 12 1807 por xmm9, xmm4 1808 paddd xmm0, xmm7 1809 paddd xmm8, xmm15 1810 paddd xmm0, xmm1 1811 paddd xmm8, xmm9 1812 pxor xmm3, xmm0 1813 pxor xmm11, xmm8 1814 movdqa xmm13, xmm3 1815 psrld xmm3, 8 1816 pslld xmm13, 24 1817 pxor xmm3, xmm13 1818 movdqa xmm13, xmm11 1819 psrld xmm11, 8 1820 pslld xmm13, 24 1821 pxor xmm11, xmm13 1822 paddd xmm2, xmm3 1823 paddd xmm10, xmm11 1824 pxor xmm1, xmm2 1825 pxor xmm9, xmm10 1826 movdqa xmm4, xmm1 1827 pslld xmm1, 25 1828 psrld xmm4, 7 1829 por xmm1, xmm4 1830 movdqa xmm4, xmm9 1831 pslld xmm9, 25 1832 psrld xmm4, 7 1833 por xmm9, xmm4 1834 pshufd xmm0, xmm0, 0x39 1835 pshufd xmm8, xmm8, 0x39 1836 pshufd xmm3, xmm3, 0x4E 1837 pshufd xmm11, xmm11, 0x4E 1838 pshufd xmm2, xmm2, 0x93 1839 pshufd xmm10, xmm10, 0x93 1840 dec al 1841 je 9f 1842 movdqa xmm12, xmmword ptr [rsp+0x20] 1843 movdqa xmm5, xmmword ptr [rsp+0x40] 1844 pshufd xmm13, xmm12, 0x0F 1845 shufps xmm12, xmm5, 214 1846 pshufd xmm4, xmm12, 0x39 1847 movdqa xmm12, xmm6 1848 shufps xmm12, xmm7, 250 1849 pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] 1850 pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1851 por xmm13, xmm12 1852 movdqa xmmword ptr [rsp+0x20], xmm13 1853 movdqa xmm12, xmm7 1854 punpcklqdq xmm12, xmm5 1855 movdqa xmm13, xmm6 1856 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1857 pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1858 por xmm12, xmm13 1859 pshufd xmm12, xmm12, 0x78 1860 punpckhdq xmm5, xmm7 1861 punpckldq xmm6, xmm5 1862 pshufd xmm7, xmm6, 0x1E 1863 movdqa xmmword ptr [rsp+0x40], xmm12 1864 movdqa xmm5, xmmword ptr [rsp+0x30] 1865 movdqa xmm13, xmmword ptr [rsp+0x50] 1866 pshufd xmm6, xmm5, 0x0F 1867 shufps xmm5, xmm13, 214 1868 pshufd xmm12, xmm5, 0x39 1869 movdqa xmm5, xmm14 1870 shufps xmm5, xmm15, 250 1871 pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] 1872 pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] 1873 por xmm6, xmm5 1874 movdqa xmm5, xmm15 1875 punpcklqdq xmm5, xmm13 1876 movdqa xmmword ptr [rsp+0x30], xmm2 1877 movdqa xmm2, xmm14 1878 pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] 1879 pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] 1880 por xmm5, xmm2 1881 movdqa xmm2, xmmword ptr [rsp+0x30] 1882 pshufd xmm5, xmm5, 0x78 1883 punpckhdq xmm13, xmm15 1884 punpckldq xmm14, xmm13 1885 pshufd xmm15, xmm14, 0x1E 1886 movdqa xmm13, xmm6 1887 movdqa xmm14, xmm5 1888 movdqa xmm5, xmmword ptr [rsp+0x20] 1889 movdqa xmm6, xmmword ptr [rsp+0x40] 1890 jmp 9b 18919: 1892 pxor xmm0, xmm2 1893 pxor xmm1, xmm3 1894 pxor xmm8, xmm10 1895 pxor xmm9, xmm11 1896 mov eax, r13d 1897 cmp rdx, r15 1898 jne 2b 1899 movups xmmword ptr [rbx], xmm0 1900 movups xmmword ptr [rbx+0x10], xmm1 1901 movups xmmword ptr [rbx+0x20], xmm8 1902 movups xmmword ptr [rbx+0x30], xmm9 1903 mov eax, dword ptr [rsp+0x130] 1904 neg eax 1905 mov r10d, dword ptr [rsp+0x110+8*rax] 1906 mov r11d, dword ptr [rsp+0x120+8*rax] 1907 mov dword ptr [rsp+0x110], r10d 1908 mov dword ptr [rsp+0x120], r11d 1909 add rdi, 16 1910 add rbx, 64 1911 sub rsi, 2 19123: 1913 test esi, 0x1 1914 je 4b 1915 movups xmm0, xmmword ptr [rcx] 1916 movups xmm1, xmmword ptr [rcx+0x10] 1917 movd xmm13, dword ptr [rsp+0x110] 1918 movd xmm14, dword ptr [rsp+0x120] 1919 punpckldq xmm13, xmm14 1920 mov r8, qword ptr [rdi] 1921 movzx eax, byte ptr [rbp+0x80] 1922 or eax, r13d 1923 xor edx, edx 19242: 1925 mov r14d, eax 1926 or eax, r12d 1927 add rdx, 64 1928 cmp rdx, r15 1929 cmovne eax, r14d 1930 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1931 shl rax, 32 1932 or rax, 64 1933 movq xmm12, rax 1934 movdqa xmm3, xmm13 1935 punpcklqdq xmm3, xmm12 1936 movups xmm4, xmmword ptr [r8+rdx-0x40] 1937 movups xmm5, xmmword ptr [r8+rdx-0x30] 1938 movaps xmm8, xmm4 1939 shufps xmm4, xmm5, 136 1940 shufps xmm8, xmm5, 221 1941 movaps xmm5, xmm8 1942 movups xmm6, xmmword ptr [r8+rdx-0x20] 1943 movups xmm7, xmmword ptr [r8+rdx-0x10] 1944 movaps xmm8, xmm6 1945 shufps xmm6, xmm7, 136 1946 pshufd xmm6, xmm6, 0x93 1947 shufps xmm8, xmm7, 221 1948 pshufd xmm7, xmm8, 0x93 1949 mov al, 7 19509: 1951 paddd xmm0, xmm4 1952 paddd xmm0, xmm1 1953 pxor xmm3, xmm0 1954 pshuflw xmm3, xmm3, 0xB1 1955 pshufhw xmm3, xmm3, 0xB1 1956 paddd xmm2, xmm3 1957 pxor xmm1, xmm2 1958 movdqa xmm11, xmm1 1959 pslld xmm1, 20 1960 psrld xmm11, 12 1961 por xmm1, xmm11 1962 paddd xmm0, xmm5 1963 paddd xmm0, xmm1 1964 pxor xmm3, xmm0 1965 movdqa xmm14, xmm3 1966 psrld xmm3, 8 1967 pslld xmm14, 24 1968 pxor xmm3, xmm14 1969 paddd xmm2, xmm3 1970 pxor xmm1, xmm2 1971 movdqa xmm11, xmm1 1972 pslld xmm1, 25 1973 psrld xmm11, 7 1974 por xmm1, xmm11 1975 pshufd xmm0, xmm0, 0x93 1976 pshufd xmm3, xmm3, 0x4E 1977 pshufd xmm2, xmm2, 0x39 1978 paddd xmm0, xmm6 1979 paddd xmm0, xmm1 1980 pxor xmm3, xmm0 1981 pshuflw xmm3, xmm3, 0xB1 1982 pshufhw xmm3, xmm3, 0xB1 1983 paddd xmm2, xmm3 1984 pxor xmm1, xmm2 1985 movdqa xmm11, xmm1 1986 pslld xmm1, 20 1987 psrld xmm11, 12 1988 por xmm1, xmm11 1989 paddd xmm0, xmm7 1990 paddd xmm0, xmm1 1991 pxor xmm3, xmm0 1992 movdqa xmm14, xmm3 1993 psrld xmm3, 8 1994 pslld xmm14, 24 1995 pxor xmm3, xmm14 1996 paddd xmm2, xmm3 1997 pxor xmm1, xmm2 1998 movdqa xmm11, xmm1 1999 pslld xmm1, 25 2000 psrld xmm11, 7 2001 por xmm1, xmm11 2002 pshufd xmm0, xmm0, 0x39 2003 pshufd xmm3, xmm3, 0x4E 2004 pshufd xmm2, xmm2, 0x93 2005 dec al 2006 jz 9f 2007 movdqa xmm8, xmm4 2008 shufps xmm8, xmm5, 214 2009 pshufd xmm9, xmm4, 0x0F 2010 pshufd xmm4, xmm8, 0x39 2011 movdqa xmm8, xmm6 2012 shufps xmm8, xmm7, 250 2013 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2014 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2015 por xmm9, xmm8 2016 movdqa xmm8, xmm7 2017 punpcklqdq xmm8, xmm5 2018 movdqa xmm10, xmm6 2019 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2020 pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2021 por xmm8, xmm10 2022 pshufd xmm8, xmm8, 0x78 2023 punpckhdq xmm5, xmm7 2024 punpckldq xmm6, xmm5 2025 pshufd xmm7, xmm6, 0x1E 2026 movdqa xmm5, xmm9 2027 movdqa xmm6, xmm8 2028 jmp 9b 20299: 2030 pxor xmm0, xmm2 2031 pxor xmm1, xmm3 2032 mov eax, r13d 2033 cmp rdx, r15 2034 jne 2b 2035 movups xmmword ptr [rbx], xmm0 2036 movups xmmword ptr [rbx+0x10], xmm1 2037 jmp 4b 2038 2039.p2align 6 2040blake3_compress_in_place_sse2: 2041_blake3_compress_in_place_sse2: 2042 sub rsp, 120 2043 movdqa xmmword ptr [rsp], xmm6 2044 movdqa xmmword ptr [rsp+0x10], xmm7 2045 movdqa xmmword ptr [rsp+0x20], xmm8 2046 movdqa xmmword ptr [rsp+0x30], xmm9 2047 movdqa xmmword ptr [rsp+0x40], xmm11 2048 movdqa xmmword ptr [rsp+0x50], xmm14 2049 movdqa xmmword ptr [rsp+0x60], xmm15 2050 movups xmm0, xmmword ptr [rcx] 2051 movups xmm1, xmmword ptr [rcx+0x10] 2052 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2053 movzx eax, byte ptr [rsp+0xA0] 2054 movzx r8d, r8b 2055 shl rax, 32 2056 add r8, rax 2057 movq xmm3, r9 2058 movq xmm4, r8 2059 punpcklqdq xmm3, xmm4 2060 movups xmm4, xmmword ptr [rdx] 2061 movups xmm5, xmmword ptr [rdx+0x10] 2062 movaps xmm8, xmm4 2063 shufps xmm4, xmm5, 136 2064 shufps xmm8, xmm5, 221 2065 movaps xmm5, xmm8 2066 movups xmm6, xmmword ptr [rdx+0x20] 2067 movups xmm7, xmmword ptr [rdx+0x30] 2068 movaps xmm8, xmm6 2069 shufps xmm6, xmm7, 136 2070 pshufd xmm6, xmm6, 0x93 2071 shufps xmm8, xmm7, 221 2072 pshufd xmm7, xmm8, 0x93 2073 mov al, 7 20749: 2075 paddd xmm0, xmm4 2076 paddd xmm0, xmm1 2077 pxor xmm3, xmm0 2078 pshuflw xmm3, xmm3, 0xB1 2079 pshufhw xmm3, xmm3, 0xB1 2080 paddd xmm2, xmm3 2081 pxor xmm1, xmm2 2082 movdqa xmm11, xmm1 2083 pslld xmm1, 20 2084 psrld xmm11, 12 2085 por xmm1, xmm11 2086 paddd xmm0, xmm5 2087 paddd xmm0, xmm1 2088 pxor xmm3, xmm0 2089 movdqa xmm14, xmm3 2090 psrld xmm3, 8 2091 pslld xmm14, 24 2092 pxor xmm3, xmm14 2093 paddd xmm2, xmm3 2094 pxor xmm1, xmm2 2095 movdqa xmm11, xmm1 2096 pslld xmm1, 25 2097 psrld xmm11, 7 2098 por xmm1, xmm11 2099 pshufd xmm0, xmm0, 0x93 2100 pshufd xmm3, xmm3, 0x4E 2101 pshufd xmm2, xmm2, 0x39 2102 paddd xmm0, xmm6 2103 paddd xmm0, xmm1 2104 pxor xmm3, xmm0 2105 pshuflw xmm3, xmm3, 0xB1 2106 pshufhw xmm3, xmm3, 0xB1 2107 paddd xmm2, xmm3 2108 pxor xmm1, xmm2 2109 movdqa xmm11, xmm1 2110 pslld xmm1, 20 2111 psrld xmm11, 12 2112 por xmm1, xmm11 2113 paddd xmm0, xmm7 2114 paddd xmm0, xmm1 2115 pxor xmm3, xmm0 2116 movdqa xmm14, xmm3 2117 psrld xmm3, 8 2118 pslld xmm14, 24 2119 pxor xmm3, xmm14 2120 paddd xmm2, xmm3 2121 pxor xmm1, xmm2 2122 movdqa xmm11, xmm1 2123 pslld xmm1, 25 2124 psrld xmm11, 7 2125 por xmm1, xmm11 2126 pshufd xmm0, xmm0, 0x39 2127 pshufd xmm3, xmm3, 0x4E 2128 pshufd xmm2, xmm2, 0x93 2129 dec al 2130 jz 9f 2131 movdqa xmm8, xmm4 2132 shufps xmm8, xmm5, 214 2133 pshufd xmm9, xmm4, 0x0F 2134 pshufd xmm4, xmm8, 0x39 2135 movdqa xmm8, xmm6 2136 shufps xmm8, xmm7, 250 2137 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2138 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2139 por xmm9, xmm8 2140 movdqa xmm8, xmm7 2141 punpcklqdq xmm8, xmm5 2142 movdqa xmm14, xmm6 2143 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2144 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2145 por xmm8, xmm14 2146 pshufd xmm8, xmm8, 0x78 2147 punpckhdq xmm5, xmm7 2148 punpckldq xmm6, xmm5 2149 pshufd xmm7, xmm6, 0x1E 2150 movdqa xmm5, xmm9 2151 movdqa xmm6, xmm8 2152 jmp 9b 21539: 2154 pxor xmm0, xmm2 2155 pxor xmm1, xmm3 2156 movups xmmword ptr [rcx], xmm0 2157 movups xmmword ptr [rcx+0x10], xmm1 2158 movdqa xmm6, xmmword ptr [rsp] 2159 movdqa xmm7, xmmword ptr [rsp+0x10] 2160 movdqa xmm8, xmmword ptr [rsp+0x20] 2161 movdqa xmm9, xmmword ptr [rsp+0x30] 2162 movdqa xmm11, xmmword ptr [rsp+0x40] 2163 movdqa xmm14, xmmword ptr [rsp+0x50] 2164 movdqa xmm15, xmmword ptr [rsp+0x60] 2165 add rsp, 120 2166 ret 2167 2168 2169.p2align 6 2170_blake3_compress_xof_sse2: 2171blake3_compress_xof_sse2: 2172 sub rsp, 120 2173 movdqa xmmword ptr [rsp], xmm6 2174 movdqa xmmword ptr [rsp+0x10], xmm7 2175 movdqa xmmword ptr [rsp+0x20], xmm8 2176 movdqa xmmword ptr [rsp+0x30], xmm9 2177 movdqa xmmword ptr [rsp+0x40], xmm11 2178 movdqa xmmword ptr [rsp+0x50], xmm14 2179 movdqa xmmword ptr [rsp+0x60], xmm15 2180 movups xmm0, xmmword ptr [rcx] 2181 movups xmm1, xmmword ptr [rcx+0x10] 2182 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 2183 movzx eax, byte ptr [rsp+0xA0] 2184 movzx r8d, r8b 2185 mov r10, qword ptr [rsp+0xA8] 2186 shl rax, 32 2187 add r8, rax 2188 movq xmm3, r9 2189 movq xmm4, r8 2190 punpcklqdq xmm3, xmm4 2191 movups xmm4, xmmword ptr [rdx] 2192 movups xmm5, xmmword ptr [rdx+0x10] 2193 movaps xmm8, xmm4 2194 shufps xmm4, xmm5, 136 2195 shufps xmm8, xmm5, 221 2196 movaps xmm5, xmm8 2197 movups xmm6, xmmword ptr [rdx+0x20] 2198 movups xmm7, xmmword ptr [rdx+0x30] 2199 movaps xmm8, xmm6 2200 shufps xmm6, xmm7, 136 2201 pshufd xmm6, xmm6, 0x93 2202 shufps xmm8, xmm7, 221 2203 pshufd xmm7, xmm8, 0x93 2204 mov al, 7 22059: 2206 paddd xmm0, xmm4 2207 paddd xmm0, xmm1 2208 pxor xmm3, xmm0 2209 pshuflw xmm3, xmm3, 0xB1 2210 pshufhw xmm3, xmm3, 0xB1 2211 paddd xmm2, xmm3 2212 pxor xmm1, xmm2 2213 movdqa xmm11, xmm1 2214 pslld xmm1, 20 2215 psrld xmm11, 12 2216 por xmm1, xmm11 2217 paddd xmm0, xmm5 2218 paddd xmm0, xmm1 2219 pxor xmm3, xmm0 2220 movdqa xmm14, xmm3 2221 psrld xmm3, 8 2222 pslld xmm14, 24 2223 pxor xmm3, xmm14 2224 paddd xmm2, xmm3 2225 pxor xmm1, xmm2 2226 movdqa xmm11, xmm1 2227 pslld xmm1, 25 2228 psrld xmm11, 7 2229 por xmm1, xmm11 2230 pshufd xmm0, xmm0, 0x93 2231 pshufd xmm3, xmm3, 0x4E 2232 pshufd xmm2, xmm2, 0x39 2233 paddd xmm0, xmm6 2234 paddd xmm0, xmm1 2235 pxor xmm3, xmm0 2236 pshuflw xmm3, xmm3, 0xB1 2237 pshufhw xmm3, xmm3, 0xB1 2238 paddd xmm2, xmm3 2239 pxor xmm1, xmm2 2240 movdqa xmm11, xmm1 2241 pslld xmm1, 20 2242 psrld xmm11, 12 2243 por xmm1, xmm11 2244 paddd xmm0, xmm7 2245 paddd xmm0, xmm1 2246 pxor xmm3, xmm0 2247 movdqa xmm14, xmm3 2248 psrld xmm3, 8 2249 pslld xmm14, 24 2250 pxor xmm3, xmm14 2251 paddd xmm2, xmm3 2252 pxor xmm1, xmm2 2253 movdqa xmm11, xmm1 2254 pslld xmm1, 25 2255 psrld xmm11, 7 2256 por xmm1, xmm11 2257 pshufd xmm0, xmm0, 0x39 2258 pshufd xmm3, xmm3, 0x4E 2259 pshufd xmm2, xmm2, 0x93 2260 dec al 2261 jz 9f 2262 movdqa xmm8, xmm4 2263 shufps xmm8, xmm5, 214 2264 pshufd xmm9, xmm4, 0x0F 2265 pshufd xmm4, xmm8, 0x39 2266 movdqa xmm8, xmm6 2267 shufps xmm8, xmm7, 250 2268 pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] 2269 pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] 2270 por xmm9, xmm8 2271 movdqa xmm8, xmm7 2272 punpcklqdq xmm8, xmm5 2273 movdqa xmm14, xmm6 2274 pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] 2275 pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip] 2276 por xmm8, xmm14 2277 pshufd xmm8, xmm8, 0x78 2278 punpckhdq xmm5, xmm7 2279 punpckldq xmm6, xmm5 2280 pshufd xmm7, xmm6, 0x1E 2281 movdqa xmm5, xmm9 2282 movdqa xmm6, xmm8 2283 jmp 9b 22849: 2285 movdqu xmm4, xmmword ptr [rcx] 2286 movdqu xmm5, xmmword ptr [rcx+0x10] 2287 pxor xmm0, xmm2 2288 pxor xmm1, xmm3 2289 pxor xmm2, xmm4 2290 pxor xmm3, xmm5 2291 movups xmmword ptr [r10], xmm0 2292 movups xmmword ptr [r10+0x10], xmm1 2293 movups xmmword ptr [r10+0x20], xmm2 2294 movups xmmword ptr [r10+0x30], xmm3 2295 movdqa xmm6, xmmword ptr [rsp] 2296 movdqa xmm7, xmmword ptr [rsp+0x10] 2297 movdqa xmm8, xmmword ptr [rsp+0x20] 2298 movdqa xmm9, xmmword ptr [rsp+0x30] 2299 movdqa xmm11, xmmword ptr [rsp+0x40] 2300 movdqa xmm14, xmmword ptr [rsp+0x50] 2301 movdqa xmm15, xmmword ptr [rsp+0x60] 2302 add rsp, 120 2303 ret 2304 2305 2306.section .rodata 2307.p2align 6 2308BLAKE3_IV: 2309 .long 0x6A09E667, 0xBB67AE85 2310 .long 0x3C6EF372, 0xA54FF53A 2311ADD0: 2312 .long 0, 1, 2, 3 2313ADD1: 2314 .long 4, 4, 4, 4 2315BLAKE3_IV_0: 2316 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2317BLAKE3_IV_1: 2318 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2319BLAKE3_IV_2: 2320 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2321BLAKE3_IV_3: 2322 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2323BLAKE3_BLOCK_LEN: 2324 .long 64, 64, 64, 64 2325CMP_MSB_MASK: 2326 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2327PBLENDW_0x33_MASK: 2328 .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 2329PBLENDW_0xCC_MASK: 2330 .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF 2331PBLENDW_0x3F_MASK: 2332 .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 2333PBLENDW_0xC0_MASK: 2334 .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF 2335