1#include "llvm_blake3_prefix.h" 2 3.intel_syntax noprefix 4.global blake3_hash_many_sse41 5.global _blake3_hash_many_sse41 6.global blake3_compress_in_place_sse41 7.global _blake3_compress_in_place_sse41 8.global blake3_compress_xof_sse41 9.global _blake3_compress_xof_sse41 10.section .text 11 .p2align 6 12_blake3_hash_many_sse41: 13blake3_hash_many_sse41: 14 push r15 15 push r14 16 push r13 17 push r12 18 push rsi 19 push rdi 20 push rbx 21 push rbp 22 mov rbp, rsp 23 sub rsp, 528 24 and rsp, 0xFFFFFFFFFFFFFFC0 25 movdqa xmmword ptr [rsp+0x170], xmm6 26 movdqa xmmword ptr [rsp+0x180], xmm7 27 movdqa xmmword ptr [rsp+0x190], xmm8 28 movdqa xmmword ptr [rsp+0x1A0], xmm9 29 movdqa xmmword ptr [rsp+0x1B0], xmm10 30 movdqa xmmword ptr [rsp+0x1C0], xmm11 31 movdqa xmmword ptr [rsp+0x1D0], xmm12 32 movdqa xmmword ptr [rsp+0x1E0], xmm13 33 movdqa xmmword ptr [rsp+0x1F0], xmm14 34 movdqa xmmword ptr [rsp+0x200], xmm15 35 mov rdi, rcx 36 mov rsi, rdx 37 mov rdx, r8 38 mov rcx, r9 39 mov r8, qword ptr [rbp+0x68] 40 movzx r9, byte ptr [rbp+0x70] 41 neg r9d 42 movd xmm0, r9d 43 pshufd xmm0, xmm0, 0x00 44 movdqa xmmword ptr [rsp+0x130], xmm0 45 movdqa xmm1, xmm0 46 pand xmm1, xmmword ptr [ADD0+rip] 47 pand xmm0, xmmword ptr [ADD1+rip] 48 movdqa xmmword ptr [rsp+0x150], xmm0 49 movd xmm0, r8d 50 pshufd xmm0, xmm0, 0x00 51 paddd xmm0, xmm1 52 movdqa xmmword ptr [rsp+0x110], xmm0 53 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 54 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 55 pcmpgtd xmm1, xmm0 56 shr r8, 32 57 movd xmm2, r8d 58 pshufd xmm2, xmm2, 0x00 59 psubd xmm2, xmm1 60 movdqa xmmword ptr [rsp+0x120], xmm2 61 mov rbx, qword ptr [rbp+0x90] 62 mov r15, rdx 63 shl r15, 6 64 movzx r13d, byte ptr [rbp+0x78] 65 movzx r12d, byte ptr [rbp+0x88] 66 cmp rsi, 4 67 jc 3f 682: 69 movdqu xmm3, xmmword ptr [rcx] 70 pshufd xmm0, xmm3, 0x00 71 pshufd xmm1, xmm3, 0x55 72 pshufd xmm2, xmm3, 0xAA 73 pshufd xmm3, xmm3, 0xFF 74 movdqu xmm7, xmmword ptr [rcx+0x10] 75 pshufd xmm4, xmm7, 0x00 76 pshufd xmm5, xmm7, 0x55 77 pshufd xmm6, xmm7, 0xAA 78 pshufd xmm7, xmm7, 0xFF 79 mov r8, qword ptr [rdi] 80 mov r9, qword ptr [rdi+0x8] 81 mov r10, qword ptr [rdi+0x10] 82 mov r11, qword ptr [rdi+0x18] 83 movzx eax, byte ptr [rbp+0x80] 84 or eax, r13d 85 xor edx, edx 869: 87 mov r14d, eax 88 or eax, r12d 89 add rdx, 64 90 cmp rdx, r15 91 cmovne eax, r14d 92 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 93 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 94 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 95 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 96 movdqa xmm12, xmm8 97 punpckldq xmm8, xmm9 98 punpckhdq xmm12, xmm9 99 movdqa xmm14, xmm10 100 punpckldq xmm10, xmm11 101 punpckhdq xmm14, xmm11 102 movdqa xmm9, xmm8 103 punpcklqdq xmm8, xmm10 104 punpckhqdq xmm9, xmm10 105 movdqa xmm13, xmm12 106 punpcklqdq xmm12, xmm14 107 punpckhqdq xmm13, xmm14 108 movdqa xmmword ptr [rsp], xmm8 109 movdqa xmmword ptr [rsp+0x10], xmm9 110 movdqa xmmword ptr [rsp+0x20], xmm12 111 movdqa xmmword ptr [rsp+0x30], xmm13 112 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 113 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 114 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 115 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 116 movdqa xmm12, xmm8 117 punpckldq xmm8, xmm9 118 punpckhdq xmm12, xmm9 119 movdqa xmm14, xmm10 120 punpckldq xmm10, xmm11 121 punpckhdq xmm14, xmm11 122 movdqa xmm9, xmm8 123 punpcklqdq xmm8, xmm10 124 punpckhqdq xmm9, xmm10 125 movdqa xmm13, xmm12 126 punpcklqdq xmm12, xmm14 127 punpckhqdq xmm13, xmm14 128 movdqa xmmword ptr [rsp+0x40], xmm8 129 movdqa xmmword ptr [rsp+0x50], xmm9 130 movdqa xmmword ptr [rsp+0x60], xmm12 131 movdqa xmmword ptr [rsp+0x70], xmm13 132 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 133 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 134 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 135 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 136 movdqa xmm12, xmm8 137 punpckldq xmm8, xmm9 138 punpckhdq xmm12, xmm9 139 movdqa xmm14, xmm10 140 punpckldq xmm10, xmm11 141 punpckhdq xmm14, xmm11 142 movdqa xmm9, xmm8 143 punpcklqdq xmm8, xmm10 144 punpckhqdq xmm9, xmm10 145 movdqa xmm13, xmm12 146 punpcklqdq xmm12, xmm14 147 punpckhqdq xmm13, xmm14 148 movdqa xmmword ptr [rsp+0x80], xmm8 149 movdqa xmmword ptr [rsp+0x90], xmm9 150 movdqa xmmword ptr [rsp+0xA0], xmm12 151 movdqa xmmword ptr [rsp+0xB0], xmm13 152 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 153 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 154 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 155 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 156 movdqa xmm12, xmm8 157 punpckldq xmm8, xmm9 158 punpckhdq xmm12, xmm9 159 movdqa xmm14, xmm10 160 punpckldq xmm10, xmm11 161 punpckhdq xmm14, xmm11 162 movdqa xmm9, xmm8 163 punpcklqdq xmm8, xmm10 164 punpckhqdq xmm9, xmm10 165 movdqa xmm13, xmm12 166 punpcklqdq xmm12, xmm14 167 punpckhqdq xmm13, xmm14 168 movdqa xmmword ptr [rsp+0xC0], xmm8 169 movdqa xmmword ptr [rsp+0xD0], xmm9 170 movdqa xmmword ptr [rsp+0xE0], xmm12 171 movdqa xmmword ptr [rsp+0xF0], xmm13 172 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 173 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 174 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 175 movdqa xmm12, xmmword ptr [rsp+0x110] 176 movdqa xmm13, xmmword ptr [rsp+0x120] 177 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 178 movd xmm15, eax 179 pshufd xmm15, xmm15, 0x00 180 prefetcht0 [r8+rdx+0x80] 181 prefetcht0 [r9+rdx+0x80] 182 prefetcht0 [r10+rdx+0x80] 183 prefetcht0 [r11+rdx+0x80] 184 paddd xmm0, xmmword ptr [rsp] 185 paddd xmm1, xmmword ptr [rsp+0x20] 186 paddd xmm2, xmmword ptr [rsp+0x40] 187 paddd xmm3, xmmword ptr [rsp+0x60] 188 paddd xmm0, xmm4 189 paddd xmm1, xmm5 190 paddd xmm2, xmm6 191 paddd xmm3, xmm7 192 pxor xmm12, xmm0 193 pxor xmm13, xmm1 194 pxor xmm14, xmm2 195 pxor xmm15, xmm3 196 movdqa xmm8, xmmword ptr [ROT16+rip] 197 pshufb xmm12, xmm8 198 pshufb xmm13, xmm8 199 pshufb xmm14, xmm8 200 pshufb xmm15, xmm8 201 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 202 paddd xmm8, xmm12 203 paddd xmm9, xmm13 204 paddd xmm10, xmm14 205 paddd xmm11, xmm15 206 pxor xmm4, xmm8 207 pxor xmm5, xmm9 208 pxor xmm6, xmm10 209 pxor xmm7, xmm11 210 movdqa xmmword ptr [rsp+0x100], xmm8 211 movdqa xmm8, xmm4 212 psrld xmm8, 12 213 pslld xmm4, 20 214 por xmm4, xmm8 215 movdqa xmm8, xmm5 216 psrld xmm8, 12 217 pslld xmm5, 20 218 por xmm5, xmm8 219 movdqa xmm8, xmm6 220 psrld xmm8, 12 221 pslld xmm6, 20 222 por xmm6, xmm8 223 movdqa xmm8, xmm7 224 psrld xmm8, 12 225 pslld xmm7, 20 226 por xmm7, xmm8 227 paddd xmm0, xmmword ptr [rsp+0x10] 228 paddd xmm1, xmmword ptr [rsp+0x30] 229 paddd xmm2, xmmword ptr [rsp+0x50] 230 paddd xmm3, xmmword ptr [rsp+0x70] 231 paddd xmm0, xmm4 232 paddd xmm1, xmm5 233 paddd xmm2, xmm6 234 paddd xmm3, xmm7 235 pxor xmm12, xmm0 236 pxor xmm13, xmm1 237 pxor xmm14, xmm2 238 pxor xmm15, xmm3 239 movdqa xmm8, xmmword ptr [ROT8+rip] 240 pshufb xmm12, xmm8 241 pshufb xmm13, xmm8 242 pshufb xmm14, xmm8 243 pshufb xmm15, xmm8 244 movdqa xmm8, xmmword ptr [rsp+0x100] 245 paddd xmm8, xmm12 246 paddd xmm9, xmm13 247 paddd xmm10, xmm14 248 paddd xmm11, xmm15 249 pxor xmm4, xmm8 250 pxor xmm5, xmm9 251 pxor xmm6, xmm10 252 pxor xmm7, xmm11 253 movdqa xmmword ptr [rsp+0x100], xmm8 254 movdqa xmm8, xmm4 255 psrld xmm8, 7 256 pslld xmm4, 25 257 por xmm4, xmm8 258 movdqa xmm8, xmm5 259 psrld xmm8, 7 260 pslld xmm5, 25 261 por xmm5, xmm8 262 movdqa xmm8, xmm6 263 psrld xmm8, 7 264 pslld xmm6, 25 265 por xmm6, xmm8 266 movdqa xmm8, xmm7 267 psrld xmm8, 7 268 pslld xmm7, 25 269 por xmm7, xmm8 270 paddd xmm0, xmmword ptr [rsp+0x80] 271 paddd xmm1, xmmword ptr [rsp+0xA0] 272 paddd xmm2, xmmword ptr [rsp+0xC0] 273 paddd xmm3, xmmword ptr [rsp+0xE0] 274 paddd xmm0, xmm5 275 paddd xmm1, xmm6 276 paddd xmm2, xmm7 277 paddd xmm3, xmm4 278 pxor xmm15, xmm0 279 pxor xmm12, xmm1 280 pxor xmm13, xmm2 281 pxor xmm14, xmm3 282 movdqa xmm8, xmmword ptr [ROT16+rip] 283 pshufb xmm15, xmm8 284 pshufb xmm12, xmm8 285 pshufb xmm13, xmm8 286 pshufb xmm14, xmm8 287 paddd xmm10, xmm15 288 paddd xmm11, xmm12 289 movdqa xmm8, xmmword ptr [rsp+0x100] 290 paddd xmm8, xmm13 291 paddd xmm9, xmm14 292 pxor xmm5, xmm10 293 pxor xmm6, xmm11 294 pxor xmm7, xmm8 295 pxor xmm4, xmm9 296 movdqa xmmword ptr [rsp+0x100], xmm8 297 movdqa xmm8, xmm5 298 psrld xmm8, 12 299 pslld xmm5, 20 300 por xmm5, xmm8 301 movdqa xmm8, xmm6 302 psrld xmm8, 12 303 pslld xmm6, 20 304 por xmm6, xmm8 305 movdqa xmm8, xmm7 306 psrld xmm8, 12 307 pslld xmm7, 20 308 por xmm7, xmm8 309 movdqa xmm8, xmm4 310 psrld xmm8, 12 311 pslld xmm4, 20 312 por xmm4, xmm8 313 paddd xmm0, xmmword ptr [rsp+0x90] 314 paddd xmm1, xmmword ptr [rsp+0xB0] 315 paddd xmm2, xmmword ptr [rsp+0xD0] 316 paddd xmm3, xmmword ptr [rsp+0xF0] 317 paddd xmm0, xmm5 318 paddd xmm1, xmm6 319 paddd xmm2, xmm7 320 paddd xmm3, xmm4 321 pxor xmm15, xmm0 322 pxor xmm12, xmm1 323 pxor xmm13, xmm2 324 pxor xmm14, xmm3 325 movdqa xmm8, xmmword ptr [ROT8+rip] 326 pshufb xmm15, xmm8 327 pshufb xmm12, xmm8 328 pshufb xmm13, xmm8 329 pshufb xmm14, xmm8 330 paddd xmm10, xmm15 331 paddd xmm11, xmm12 332 movdqa xmm8, xmmword ptr [rsp+0x100] 333 paddd xmm8, xmm13 334 paddd xmm9, xmm14 335 pxor xmm5, xmm10 336 pxor xmm6, xmm11 337 pxor xmm7, xmm8 338 pxor xmm4, xmm9 339 movdqa xmmword ptr [rsp+0x100], xmm8 340 movdqa xmm8, xmm5 341 psrld xmm8, 7 342 pslld xmm5, 25 343 por xmm5, xmm8 344 movdqa xmm8, xmm6 345 psrld xmm8, 7 346 pslld xmm6, 25 347 por xmm6, xmm8 348 movdqa xmm8, xmm7 349 psrld xmm8, 7 350 pslld xmm7, 25 351 por xmm7, xmm8 352 movdqa xmm8, xmm4 353 psrld xmm8, 7 354 pslld xmm4, 25 355 por xmm4, xmm8 356 paddd xmm0, xmmword ptr [rsp+0x20] 357 paddd xmm1, xmmword ptr [rsp+0x30] 358 paddd xmm2, xmmword ptr [rsp+0x70] 359 paddd xmm3, xmmword ptr [rsp+0x40] 360 paddd xmm0, xmm4 361 paddd xmm1, xmm5 362 paddd xmm2, xmm6 363 paddd xmm3, xmm7 364 pxor xmm12, xmm0 365 pxor xmm13, xmm1 366 pxor xmm14, xmm2 367 pxor xmm15, xmm3 368 movdqa xmm8, xmmword ptr [ROT16+rip] 369 pshufb xmm12, xmm8 370 pshufb xmm13, xmm8 371 pshufb xmm14, xmm8 372 pshufb xmm15, xmm8 373 movdqa xmm8, xmmword ptr [rsp+0x100] 374 paddd xmm8, xmm12 375 paddd xmm9, xmm13 376 paddd xmm10, xmm14 377 paddd xmm11, xmm15 378 pxor xmm4, xmm8 379 pxor xmm5, xmm9 380 pxor xmm6, xmm10 381 pxor xmm7, xmm11 382 movdqa xmmword ptr [rsp+0x100], xmm8 383 movdqa xmm8, xmm4 384 psrld xmm8, 12 385 pslld xmm4, 20 386 por xmm4, xmm8 387 movdqa xmm8, xmm5 388 psrld xmm8, 12 389 pslld xmm5, 20 390 por xmm5, xmm8 391 movdqa xmm8, xmm6 392 psrld xmm8, 12 393 pslld xmm6, 20 394 por xmm6, xmm8 395 movdqa xmm8, xmm7 396 psrld xmm8, 12 397 pslld xmm7, 20 398 por xmm7, xmm8 399 paddd xmm0, xmmword ptr [rsp+0x60] 400 paddd xmm1, xmmword ptr [rsp+0xA0] 401 paddd xmm2, xmmword ptr [rsp] 402 paddd xmm3, xmmword ptr [rsp+0xD0] 403 paddd xmm0, xmm4 404 paddd xmm1, xmm5 405 paddd xmm2, xmm6 406 paddd xmm3, xmm7 407 pxor xmm12, xmm0 408 pxor xmm13, xmm1 409 pxor xmm14, xmm2 410 pxor xmm15, xmm3 411 movdqa xmm8, xmmword ptr [ROT8+rip] 412 pshufb xmm12, xmm8 413 pshufb xmm13, xmm8 414 pshufb xmm14, xmm8 415 pshufb xmm15, xmm8 416 movdqa xmm8, xmmword ptr [rsp+0x100] 417 paddd xmm8, xmm12 418 paddd xmm9, xmm13 419 paddd xmm10, xmm14 420 paddd xmm11, xmm15 421 pxor xmm4, xmm8 422 pxor xmm5, xmm9 423 pxor xmm6, xmm10 424 pxor xmm7, xmm11 425 movdqa xmmword ptr [rsp+0x100], xmm8 426 movdqa xmm8, xmm4 427 psrld xmm8, 7 428 pslld xmm4, 25 429 por xmm4, xmm8 430 movdqa xmm8, xmm5 431 psrld xmm8, 7 432 pslld xmm5, 25 433 por xmm5, xmm8 434 movdqa xmm8, xmm6 435 psrld xmm8, 7 436 pslld xmm6, 25 437 por xmm6, xmm8 438 movdqa xmm8, xmm7 439 psrld xmm8, 7 440 pslld xmm7, 25 441 por xmm7, xmm8 442 paddd xmm0, xmmword ptr [rsp+0x10] 443 paddd xmm1, xmmword ptr [rsp+0xC0] 444 paddd xmm2, xmmword ptr [rsp+0x90] 445 paddd xmm3, xmmword ptr [rsp+0xF0] 446 paddd xmm0, xmm5 447 paddd xmm1, xmm6 448 paddd xmm2, xmm7 449 paddd xmm3, xmm4 450 pxor xmm15, xmm0 451 pxor xmm12, xmm1 452 pxor xmm13, xmm2 453 pxor xmm14, xmm3 454 movdqa xmm8, xmmword ptr [ROT16+rip] 455 pshufb xmm15, xmm8 456 pshufb xmm12, xmm8 457 pshufb xmm13, xmm8 458 pshufb xmm14, xmm8 459 paddd xmm10, xmm15 460 paddd xmm11, xmm12 461 movdqa xmm8, xmmword ptr [rsp+0x100] 462 paddd xmm8, xmm13 463 paddd xmm9, xmm14 464 pxor xmm5, xmm10 465 pxor xmm6, xmm11 466 pxor xmm7, xmm8 467 pxor xmm4, xmm9 468 movdqa xmmword ptr [rsp+0x100], xmm8 469 movdqa xmm8, xmm5 470 psrld xmm8, 12 471 pslld xmm5, 20 472 por xmm5, xmm8 473 movdqa xmm8, xmm6 474 psrld xmm8, 12 475 pslld xmm6, 20 476 por xmm6, xmm8 477 movdqa xmm8, xmm7 478 psrld xmm8, 12 479 pslld xmm7, 20 480 por xmm7, xmm8 481 movdqa xmm8, xmm4 482 psrld xmm8, 12 483 pslld xmm4, 20 484 por xmm4, xmm8 485 paddd xmm0, xmmword ptr [rsp+0xB0] 486 paddd xmm1, xmmword ptr [rsp+0x50] 487 paddd xmm2, xmmword ptr [rsp+0xE0] 488 paddd xmm3, xmmword ptr [rsp+0x80] 489 paddd xmm0, xmm5 490 paddd xmm1, xmm6 491 paddd xmm2, xmm7 492 paddd xmm3, xmm4 493 pxor xmm15, xmm0 494 pxor xmm12, xmm1 495 pxor xmm13, xmm2 496 pxor xmm14, xmm3 497 movdqa xmm8, xmmword ptr [ROT8+rip] 498 pshufb xmm15, xmm8 499 pshufb xmm12, xmm8 500 pshufb xmm13, xmm8 501 pshufb xmm14, xmm8 502 paddd xmm10, xmm15 503 paddd xmm11, xmm12 504 movdqa xmm8, xmmword ptr [rsp+0x100] 505 paddd xmm8, xmm13 506 paddd xmm9, xmm14 507 pxor xmm5, xmm10 508 pxor xmm6, xmm11 509 pxor xmm7, xmm8 510 pxor xmm4, xmm9 511 movdqa xmmword ptr [rsp+0x100], xmm8 512 movdqa xmm8, xmm5 513 psrld xmm8, 7 514 pslld xmm5, 25 515 por xmm5, xmm8 516 movdqa xmm8, xmm6 517 psrld xmm8, 7 518 pslld xmm6, 25 519 por xmm6, xmm8 520 movdqa xmm8, xmm7 521 psrld xmm8, 7 522 pslld xmm7, 25 523 por xmm7, xmm8 524 movdqa xmm8, xmm4 525 psrld xmm8, 7 526 pslld xmm4, 25 527 por xmm4, xmm8 528 paddd xmm0, xmmword ptr [rsp+0x30] 529 paddd xmm1, xmmword ptr [rsp+0xA0] 530 paddd xmm2, xmmword ptr [rsp+0xD0] 531 paddd xmm3, xmmword ptr [rsp+0x70] 532 paddd xmm0, xmm4 533 paddd xmm1, xmm5 534 paddd xmm2, xmm6 535 paddd xmm3, xmm7 536 pxor xmm12, xmm0 537 pxor xmm13, xmm1 538 pxor xmm14, xmm2 539 pxor xmm15, xmm3 540 movdqa xmm8, xmmword ptr [ROT16+rip] 541 pshufb xmm12, xmm8 542 pshufb xmm13, xmm8 543 pshufb xmm14, xmm8 544 pshufb xmm15, xmm8 545 movdqa xmm8, xmmword ptr [rsp+0x100] 546 paddd xmm8, xmm12 547 paddd xmm9, xmm13 548 paddd xmm10, xmm14 549 paddd xmm11, xmm15 550 pxor xmm4, xmm8 551 pxor xmm5, xmm9 552 pxor xmm6, xmm10 553 pxor xmm7, xmm11 554 movdqa xmmword ptr [rsp+0x100], xmm8 555 movdqa xmm8, xmm4 556 psrld xmm8, 12 557 pslld xmm4, 20 558 por xmm4, xmm8 559 movdqa xmm8, xmm5 560 psrld xmm8, 12 561 pslld xmm5, 20 562 por xmm5, xmm8 563 movdqa xmm8, xmm6 564 psrld xmm8, 12 565 pslld xmm6, 20 566 por xmm6, xmm8 567 movdqa xmm8, xmm7 568 psrld xmm8, 12 569 pslld xmm7, 20 570 por xmm7, xmm8 571 paddd xmm0, xmmword ptr [rsp+0x40] 572 paddd xmm1, xmmword ptr [rsp+0xC0] 573 paddd xmm2, xmmword ptr [rsp+0x20] 574 paddd xmm3, xmmword ptr [rsp+0xE0] 575 paddd xmm0, xmm4 576 paddd xmm1, xmm5 577 paddd xmm2, xmm6 578 paddd xmm3, xmm7 579 pxor xmm12, xmm0 580 pxor xmm13, xmm1 581 pxor xmm14, xmm2 582 pxor xmm15, xmm3 583 movdqa xmm8, xmmword ptr [ROT8+rip] 584 pshufb xmm12, xmm8 585 pshufb xmm13, xmm8 586 pshufb xmm14, xmm8 587 pshufb xmm15, xmm8 588 movdqa xmm8, xmmword ptr [rsp+0x100] 589 paddd xmm8, xmm12 590 paddd xmm9, xmm13 591 paddd xmm10, xmm14 592 paddd xmm11, xmm15 593 pxor xmm4, xmm8 594 pxor xmm5, xmm9 595 pxor xmm6, xmm10 596 pxor xmm7, xmm11 597 movdqa xmmword ptr [rsp+0x100], xmm8 598 movdqa xmm8, xmm4 599 psrld xmm8, 7 600 pslld xmm4, 25 601 por xmm4, xmm8 602 movdqa xmm8, xmm5 603 psrld xmm8, 7 604 pslld xmm5, 25 605 por xmm5, xmm8 606 movdqa xmm8, xmm6 607 psrld xmm8, 7 608 pslld xmm6, 25 609 por xmm6, xmm8 610 movdqa xmm8, xmm7 611 psrld xmm8, 7 612 pslld xmm7, 25 613 por xmm7, xmm8 614 paddd xmm0, xmmword ptr [rsp+0x60] 615 paddd xmm1, xmmword ptr [rsp+0x90] 616 paddd xmm2, xmmword ptr [rsp+0xB0] 617 paddd xmm3, xmmword ptr [rsp+0x80] 618 paddd xmm0, xmm5 619 paddd xmm1, xmm6 620 paddd xmm2, xmm7 621 paddd xmm3, xmm4 622 pxor xmm15, xmm0 623 pxor xmm12, xmm1 624 pxor xmm13, xmm2 625 pxor xmm14, xmm3 626 movdqa xmm8, xmmword ptr [ROT16+rip] 627 pshufb xmm15, xmm8 628 pshufb xmm12, xmm8 629 pshufb xmm13, xmm8 630 pshufb xmm14, xmm8 631 paddd xmm10, xmm15 632 paddd xmm11, xmm12 633 movdqa xmm8, xmmword ptr [rsp+0x100] 634 paddd xmm8, xmm13 635 paddd xmm9, xmm14 636 pxor xmm5, xmm10 637 pxor xmm6, xmm11 638 pxor xmm7, xmm8 639 pxor xmm4, xmm9 640 movdqa xmmword ptr [rsp+0x100], xmm8 641 movdqa xmm8, xmm5 642 psrld xmm8, 12 643 pslld xmm5, 20 644 por xmm5, xmm8 645 movdqa xmm8, xmm6 646 psrld xmm8, 12 647 pslld xmm6, 20 648 por xmm6, xmm8 649 movdqa xmm8, xmm7 650 psrld xmm8, 12 651 pslld xmm7, 20 652 por xmm7, xmm8 653 movdqa xmm8, xmm4 654 psrld xmm8, 12 655 pslld xmm4, 20 656 por xmm4, xmm8 657 paddd xmm0, xmmword ptr [rsp+0x50] 658 paddd xmm1, xmmword ptr [rsp] 659 paddd xmm2, xmmword ptr [rsp+0xF0] 660 paddd xmm3, xmmword ptr [rsp+0x10] 661 paddd xmm0, xmm5 662 paddd xmm1, xmm6 663 paddd xmm2, xmm7 664 paddd xmm3, xmm4 665 pxor xmm15, xmm0 666 pxor xmm12, xmm1 667 pxor xmm13, xmm2 668 pxor xmm14, xmm3 669 movdqa xmm8, xmmword ptr [ROT8+rip] 670 pshufb xmm15, xmm8 671 pshufb xmm12, xmm8 672 pshufb xmm13, xmm8 673 pshufb xmm14, xmm8 674 paddd xmm10, xmm15 675 paddd xmm11, xmm12 676 movdqa xmm8, xmmword ptr [rsp+0x100] 677 paddd xmm8, xmm13 678 paddd xmm9, xmm14 679 pxor xmm5, xmm10 680 pxor xmm6, xmm11 681 pxor xmm7, xmm8 682 pxor xmm4, xmm9 683 movdqa xmmword ptr [rsp+0x100], xmm8 684 movdqa xmm8, xmm5 685 psrld xmm8, 7 686 pslld xmm5, 25 687 por xmm5, xmm8 688 movdqa xmm8, xmm6 689 psrld xmm8, 7 690 pslld xmm6, 25 691 por xmm6, xmm8 692 movdqa xmm8, xmm7 693 psrld xmm8, 7 694 pslld xmm7, 25 695 por xmm7, xmm8 696 movdqa xmm8, xmm4 697 psrld xmm8, 7 698 pslld xmm4, 25 699 por xmm4, xmm8 700 paddd xmm0, xmmword ptr [rsp+0xA0] 701 paddd xmm1, xmmword ptr [rsp+0xC0] 702 paddd xmm2, xmmword ptr [rsp+0xE0] 703 paddd xmm3, xmmword ptr [rsp+0xD0] 704 paddd xmm0, xmm4 705 paddd xmm1, xmm5 706 paddd xmm2, xmm6 707 paddd xmm3, xmm7 708 pxor xmm12, xmm0 709 pxor xmm13, xmm1 710 pxor xmm14, xmm2 711 pxor xmm15, xmm3 712 movdqa xmm8, xmmword ptr [ROT16+rip] 713 pshufb xmm12, xmm8 714 pshufb xmm13, xmm8 715 pshufb xmm14, xmm8 716 pshufb xmm15, xmm8 717 movdqa xmm8, xmmword ptr [rsp+0x100] 718 paddd xmm8, xmm12 719 paddd xmm9, xmm13 720 paddd xmm10, xmm14 721 paddd xmm11, xmm15 722 pxor xmm4, xmm8 723 pxor xmm5, xmm9 724 pxor xmm6, xmm10 725 pxor xmm7, xmm11 726 movdqa xmmword ptr [rsp+0x100], xmm8 727 movdqa xmm8, xmm4 728 psrld xmm8, 12 729 pslld xmm4, 20 730 por xmm4, xmm8 731 movdqa xmm8, xmm5 732 psrld xmm8, 12 733 pslld xmm5, 20 734 por xmm5, xmm8 735 movdqa xmm8, xmm6 736 psrld xmm8, 12 737 pslld xmm6, 20 738 por xmm6, xmm8 739 movdqa xmm8, xmm7 740 psrld xmm8, 12 741 pslld xmm7, 20 742 por xmm7, xmm8 743 paddd xmm0, xmmword ptr [rsp+0x70] 744 paddd xmm1, xmmword ptr [rsp+0x90] 745 paddd xmm2, xmmword ptr [rsp+0x30] 746 paddd xmm3, xmmword ptr [rsp+0xF0] 747 paddd xmm0, xmm4 748 paddd xmm1, xmm5 749 paddd xmm2, xmm6 750 paddd xmm3, xmm7 751 pxor xmm12, xmm0 752 pxor xmm13, xmm1 753 pxor xmm14, xmm2 754 pxor xmm15, xmm3 755 movdqa xmm8, xmmword ptr [ROT8+rip] 756 pshufb xmm12, xmm8 757 pshufb xmm13, xmm8 758 pshufb xmm14, xmm8 759 pshufb xmm15, xmm8 760 movdqa xmm8, xmmword ptr [rsp+0x100] 761 paddd xmm8, xmm12 762 paddd xmm9, xmm13 763 paddd xmm10, xmm14 764 paddd xmm11, xmm15 765 pxor xmm4, xmm8 766 pxor xmm5, xmm9 767 pxor xmm6, xmm10 768 pxor xmm7, xmm11 769 movdqa xmmword ptr [rsp+0x100], xmm8 770 movdqa xmm8, xmm4 771 psrld xmm8, 7 772 pslld xmm4, 25 773 por xmm4, xmm8 774 movdqa xmm8, xmm5 775 psrld xmm8, 7 776 pslld xmm5, 25 777 por xmm5, xmm8 778 movdqa xmm8, xmm6 779 psrld xmm8, 7 780 pslld xmm6, 25 781 por xmm6, xmm8 782 movdqa xmm8, xmm7 783 psrld xmm8, 7 784 pslld xmm7, 25 785 por xmm7, xmm8 786 paddd xmm0, xmmword ptr [rsp+0x40] 787 paddd xmm1, xmmword ptr [rsp+0xB0] 788 paddd xmm2, xmmword ptr [rsp+0x50] 789 paddd xmm3, xmmword ptr [rsp+0x10] 790 paddd xmm0, xmm5 791 paddd xmm1, xmm6 792 paddd xmm2, xmm7 793 paddd xmm3, xmm4 794 pxor xmm15, xmm0 795 pxor xmm12, xmm1 796 pxor xmm13, xmm2 797 pxor xmm14, xmm3 798 movdqa xmm8, xmmword ptr [ROT16+rip] 799 pshufb xmm15, xmm8 800 pshufb xmm12, xmm8 801 pshufb xmm13, xmm8 802 pshufb xmm14, xmm8 803 paddd xmm10, xmm15 804 paddd xmm11, xmm12 805 movdqa xmm8, xmmword ptr [rsp+0x100] 806 paddd xmm8, xmm13 807 paddd xmm9, xmm14 808 pxor xmm5, xmm10 809 pxor xmm6, xmm11 810 pxor xmm7, xmm8 811 pxor xmm4, xmm9 812 movdqa xmmword ptr [rsp+0x100], xmm8 813 movdqa xmm8, xmm5 814 psrld xmm8, 12 815 pslld xmm5, 20 816 por xmm5, xmm8 817 movdqa xmm8, xmm6 818 psrld xmm8, 12 819 pslld xmm6, 20 820 por xmm6, xmm8 821 movdqa xmm8, xmm7 822 psrld xmm8, 12 823 pslld xmm7, 20 824 por xmm7, xmm8 825 movdqa xmm8, xmm4 826 psrld xmm8, 12 827 pslld xmm4, 20 828 por xmm4, xmm8 829 paddd xmm0, xmmword ptr [rsp] 830 paddd xmm1, xmmword ptr [rsp+0x20] 831 paddd xmm2, xmmword ptr [rsp+0x80] 832 paddd xmm3, xmmword ptr [rsp+0x60] 833 paddd xmm0, xmm5 834 paddd xmm1, xmm6 835 paddd xmm2, xmm7 836 paddd xmm3, xmm4 837 pxor xmm15, xmm0 838 pxor xmm12, xmm1 839 pxor xmm13, xmm2 840 pxor xmm14, xmm3 841 movdqa xmm8, xmmword ptr [ROT8+rip] 842 pshufb xmm15, xmm8 843 pshufb xmm12, xmm8 844 pshufb xmm13, xmm8 845 pshufb xmm14, xmm8 846 paddd xmm10, xmm15 847 paddd xmm11, xmm12 848 movdqa xmm8, xmmword ptr [rsp+0x100] 849 paddd xmm8, xmm13 850 paddd xmm9, xmm14 851 pxor xmm5, xmm10 852 pxor xmm6, xmm11 853 pxor xmm7, xmm8 854 pxor xmm4, xmm9 855 movdqa xmmword ptr [rsp+0x100], xmm8 856 movdqa xmm8, xmm5 857 psrld xmm8, 7 858 pslld xmm5, 25 859 por xmm5, xmm8 860 movdqa xmm8, xmm6 861 psrld xmm8, 7 862 pslld xmm6, 25 863 por xmm6, xmm8 864 movdqa xmm8, xmm7 865 psrld xmm8, 7 866 pslld xmm7, 25 867 por xmm7, xmm8 868 movdqa xmm8, xmm4 869 psrld xmm8, 7 870 pslld xmm4, 25 871 por xmm4, xmm8 872 paddd xmm0, xmmword ptr [rsp+0xC0] 873 paddd xmm1, xmmword ptr [rsp+0x90] 874 paddd xmm2, xmmword ptr [rsp+0xF0] 875 paddd xmm3, xmmword ptr [rsp+0xE0] 876 paddd xmm0, xmm4 877 paddd xmm1, xmm5 878 paddd xmm2, xmm6 879 paddd xmm3, xmm7 880 pxor xmm12, xmm0 881 pxor xmm13, xmm1 882 pxor xmm14, xmm2 883 pxor xmm15, xmm3 884 movdqa xmm8, xmmword ptr [ROT16+rip] 885 pshufb xmm12, xmm8 886 pshufb xmm13, xmm8 887 pshufb xmm14, xmm8 888 pshufb xmm15, xmm8 889 movdqa xmm8, xmmword ptr [rsp+0x100] 890 paddd xmm8, xmm12 891 paddd xmm9, xmm13 892 paddd xmm10, xmm14 893 paddd xmm11, xmm15 894 pxor xmm4, xmm8 895 pxor xmm5, xmm9 896 pxor xmm6, xmm10 897 pxor xmm7, xmm11 898 movdqa xmmword ptr [rsp+0x100], xmm8 899 movdqa xmm8, xmm4 900 psrld xmm8, 12 901 pslld xmm4, 20 902 por xmm4, xmm8 903 movdqa xmm8, xmm5 904 psrld xmm8, 12 905 pslld xmm5, 20 906 por xmm5, xmm8 907 movdqa xmm8, xmm6 908 psrld xmm8, 12 909 pslld xmm6, 20 910 por xmm6, xmm8 911 movdqa xmm8, xmm7 912 psrld xmm8, 12 913 pslld xmm7, 20 914 por xmm7, xmm8 915 paddd xmm0, xmmword ptr [rsp+0xD0] 916 paddd xmm1, xmmword ptr [rsp+0xB0] 917 paddd xmm2, xmmword ptr [rsp+0xA0] 918 paddd xmm3, xmmword ptr [rsp+0x80] 919 paddd xmm0, xmm4 920 paddd xmm1, xmm5 921 paddd xmm2, xmm6 922 paddd xmm3, xmm7 923 pxor xmm12, xmm0 924 pxor xmm13, xmm1 925 pxor xmm14, xmm2 926 pxor xmm15, xmm3 927 movdqa xmm8, xmmword ptr [ROT8+rip] 928 pshufb xmm12, xmm8 929 pshufb xmm13, xmm8 930 pshufb xmm14, xmm8 931 pshufb xmm15, xmm8 932 movdqa xmm8, xmmword ptr [rsp+0x100] 933 paddd xmm8, xmm12 934 paddd xmm9, xmm13 935 paddd xmm10, xmm14 936 paddd xmm11, xmm15 937 pxor xmm4, xmm8 938 pxor xmm5, xmm9 939 pxor xmm6, xmm10 940 pxor xmm7, xmm11 941 movdqa xmmword ptr [rsp+0x100], xmm8 942 movdqa xmm8, xmm4 943 psrld xmm8, 7 944 pslld xmm4, 25 945 por xmm4, xmm8 946 movdqa xmm8, xmm5 947 psrld xmm8, 7 948 pslld xmm5, 25 949 por xmm5, xmm8 950 movdqa xmm8, xmm6 951 psrld xmm8, 7 952 pslld xmm6, 25 953 por xmm6, xmm8 954 movdqa xmm8, xmm7 955 psrld xmm8, 7 956 pslld xmm7, 25 957 por xmm7, xmm8 958 paddd xmm0, xmmword ptr [rsp+0x70] 959 paddd xmm1, xmmword ptr [rsp+0x50] 960 paddd xmm2, xmmword ptr [rsp] 961 paddd xmm3, xmmword ptr [rsp+0x60] 962 paddd xmm0, xmm5 963 paddd xmm1, xmm6 964 paddd xmm2, xmm7 965 paddd xmm3, xmm4 966 pxor xmm15, xmm0 967 pxor xmm12, xmm1 968 pxor xmm13, xmm2 969 pxor xmm14, xmm3 970 movdqa xmm8, xmmword ptr [ROT16+rip] 971 pshufb xmm15, xmm8 972 pshufb xmm12, xmm8 973 pshufb xmm13, xmm8 974 pshufb xmm14, xmm8 975 paddd xmm10, xmm15 976 paddd xmm11, xmm12 977 movdqa xmm8, xmmword ptr [rsp+0x100] 978 paddd xmm8, xmm13 979 paddd xmm9, xmm14 980 pxor xmm5, xmm10 981 pxor xmm6, xmm11 982 pxor xmm7, xmm8 983 pxor xmm4, xmm9 984 movdqa xmmword ptr [rsp+0x100], xmm8 985 movdqa xmm8, xmm5 986 psrld xmm8, 12 987 pslld xmm5, 20 988 por xmm5, xmm8 989 movdqa xmm8, xmm6 990 psrld xmm8, 12 991 pslld xmm6, 20 992 por xmm6, xmm8 993 movdqa xmm8, xmm7 994 psrld xmm8, 12 995 pslld xmm7, 20 996 por xmm7, xmm8 997 movdqa xmm8, xmm4 998 psrld xmm8, 12 999 pslld xmm4, 20 1000 por xmm4, xmm8 1001 paddd xmm0, xmmword ptr [rsp+0x20] 1002 paddd xmm1, xmmword ptr [rsp+0x30] 1003 paddd xmm2, xmmword ptr [rsp+0x10] 1004 paddd xmm3, xmmword ptr [rsp+0x40] 1005 paddd xmm0, xmm5 1006 paddd xmm1, xmm6 1007 paddd xmm2, xmm7 1008 paddd xmm3, xmm4 1009 pxor xmm15, xmm0 1010 pxor xmm12, xmm1 1011 pxor xmm13, xmm2 1012 pxor xmm14, xmm3 1013 movdqa xmm8, xmmword ptr [ROT8+rip] 1014 pshufb xmm15, xmm8 1015 pshufb xmm12, xmm8 1016 pshufb xmm13, xmm8 1017 pshufb xmm14, xmm8 1018 paddd xmm10, xmm15 1019 paddd xmm11, xmm12 1020 movdqa xmm8, xmmword ptr [rsp+0x100] 1021 paddd xmm8, xmm13 1022 paddd xmm9, xmm14 1023 pxor xmm5, xmm10 1024 pxor xmm6, xmm11 1025 pxor xmm7, xmm8 1026 pxor xmm4, xmm9 1027 movdqa xmmword ptr [rsp+0x100], xmm8 1028 movdqa xmm8, xmm5 1029 psrld xmm8, 7 1030 pslld xmm5, 25 1031 por xmm5, xmm8 1032 movdqa xmm8, xmm6 1033 psrld xmm8, 7 1034 pslld xmm6, 25 1035 por xmm6, xmm8 1036 movdqa xmm8, xmm7 1037 psrld xmm8, 7 1038 pslld xmm7, 25 1039 por xmm7, xmm8 1040 movdqa xmm8, xmm4 1041 psrld xmm8, 7 1042 pslld xmm4, 25 1043 por xmm4, xmm8 1044 paddd xmm0, xmmword ptr [rsp+0x90] 1045 paddd xmm1, xmmword ptr [rsp+0xB0] 1046 paddd xmm2, xmmword ptr [rsp+0x80] 1047 paddd xmm3, xmmword ptr [rsp+0xF0] 1048 paddd xmm0, xmm4 1049 paddd xmm1, xmm5 1050 paddd xmm2, xmm6 1051 paddd xmm3, xmm7 1052 pxor xmm12, xmm0 1053 pxor xmm13, xmm1 1054 pxor xmm14, xmm2 1055 pxor xmm15, xmm3 1056 movdqa xmm8, xmmword ptr [ROT16+rip] 1057 pshufb xmm12, xmm8 1058 pshufb xmm13, xmm8 1059 pshufb xmm14, xmm8 1060 pshufb xmm15, xmm8 1061 movdqa xmm8, xmmword ptr [rsp+0x100] 1062 paddd xmm8, xmm12 1063 paddd xmm9, xmm13 1064 paddd xmm10, xmm14 1065 paddd xmm11, xmm15 1066 pxor xmm4, xmm8 1067 pxor xmm5, xmm9 1068 pxor xmm6, xmm10 1069 pxor xmm7, xmm11 1070 movdqa xmmword ptr [rsp+0x100], xmm8 1071 movdqa xmm8, xmm4 1072 psrld xmm8, 12 1073 pslld xmm4, 20 1074 por xmm4, xmm8 1075 movdqa xmm8, xmm5 1076 psrld xmm8, 12 1077 pslld xmm5, 20 1078 por xmm5, xmm8 1079 movdqa xmm8, xmm6 1080 psrld xmm8, 12 1081 pslld xmm6, 20 1082 por xmm6, xmm8 1083 movdqa xmm8, xmm7 1084 psrld xmm8, 12 1085 pslld xmm7, 20 1086 por xmm7, xmm8 1087 paddd xmm0, xmmword ptr [rsp+0xE0] 1088 paddd xmm1, xmmword ptr [rsp+0x50] 1089 paddd xmm2, xmmword ptr [rsp+0xC0] 1090 paddd xmm3, xmmword ptr [rsp+0x10] 1091 paddd xmm0, xmm4 1092 paddd xmm1, xmm5 1093 paddd xmm2, xmm6 1094 paddd xmm3, xmm7 1095 pxor xmm12, xmm0 1096 pxor xmm13, xmm1 1097 pxor xmm14, xmm2 1098 pxor xmm15, xmm3 1099 movdqa xmm8, xmmword ptr [ROT8+rip] 1100 pshufb xmm12, xmm8 1101 pshufb xmm13, xmm8 1102 pshufb xmm14, xmm8 1103 pshufb xmm15, xmm8 1104 movdqa xmm8, xmmword ptr [rsp+0x100] 1105 paddd xmm8, xmm12 1106 paddd xmm9, xmm13 1107 paddd xmm10, xmm14 1108 paddd xmm11, xmm15 1109 pxor xmm4, xmm8 1110 pxor xmm5, xmm9 1111 pxor xmm6, xmm10 1112 pxor xmm7, xmm11 1113 movdqa xmmword ptr [rsp+0x100], xmm8 1114 movdqa xmm8, xmm4 1115 psrld xmm8, 7 1116 pslld xmm4, 25 1117 por xmm4, xmm8 1118 movdqa xmm8, xmm5 1119 psrld xmm8, 7 1120 pslld xmm5, 25 1121 por xmm5, xmm8 1122 movdqa xmm8, xmm6 1123 psrld xmm8, 7 1124 pslld xmm6, 25 1125 por xmm6, xmm8 1126 movdqa xmm8, xmm7 1127 psrld xmm8, 7 1128 pslld xmm7, 25 1129 por xmm7, xmm8 1130 paddd xmm0, xmmword ptr [rsp+0xD0] 1131 paddd xmm1, xmmword ptr [rsp] 1132 paddd xmm2, xmmword ptr [rsp+0x20] 1133 paddd xmm3, xmmword ptr [rsp+0x40] 1134 paddd xmm0, xmm5 1135 paddd xmm1, xmm6 1136 paddd xmm2, xmm7 1137 paddd xmm3, xmm4 1138 pxor xmm15, xmm0 1139 pxor xmm12, xmm1 1140 pxor xmm13, xmm2 1141 pxor xmm14, xmm3 1142 movdqa xmm8, xmmword ptr [ROT16+rip] 1143 pshufb xmm15, xmm8 1144 pshufb xmm12, xmm8 1145 pshufb xmm13, xmm8 1146 pshufb xmm14, xmm8 1147 paddd xmm10, xmm15 1148 paddd xmm11, xmm12 1149 movdqa xmm8, xmmword ptr [rsp+0x100] 1150 paddd xmm8, xmm13 1151 paddd xmm9, xmm14 1152 pxor xmm5, xmm10 1153 pxor xmm6, xmm11 1154 pxor xmm7, xmm8 1155 pxor xmm4, xmm9 1156 movdqa xmmword ptr [rsp+0x100], xmm8 1157 movdqa xmm8, xmm5 1158 psrld xmm8, 12 1159 pslld xmm5, 20 1160 por xmm5, xmm8 1161 movdqa xmm8, xmm6 1162 psrld xmm8, 12 1163 pslld xmm6, 20 1164 por xmm6, xmm8 1165 movdqa xmm8, xmm7 1166 psrld xmm8, 12 1167 pslld xmm7, 20 1168 por xmm7, xmm8 1169 movdqa xmm8, xmm4 1170 psrld xmm8, 12 1171 pslld xmm4, 20 1172 por xmm4, xmm8 1173 paddd xmm0, xmmword ptr [rsp+0x30] 1174 paddd xmm1, xmmword ptr [rsp+0xA0] 1175 paddd xmm2, xmmword ptr [rsp+0x60] 1176 paddd xmm3, xmmword ptr [rsp+0x70] 1177 paddd xmm0, xmm5 1178 paddd xmm1, xmm6 1179 paddd xmm2, xmm7 1180 paddd xmm3, xmm4 1181 pxor xmm15, xmm0 1182 pxor xmm12, xmm1 1183 pxor xmm13, xmm2 1184 pxor xmm14, xmm3 1185 movdqa xmm8, xmmword ptr [ROT8+rip] 1186 pshufb xmm15, xmm8 1187 pshufb xmm12, xmm8 1188 pshufb xmm13, xmm8 1189 pshufb xmm14, xmm8 1190 paddd xmm10, xmm15 1191 paddd xmm11, xmm12 1192 movdqa xmm8, xmmword ptr [rsp+0x100] 1193 paddd xmm8, xmm13 1194 paddd xmm9, xmm14 1195 pxor xmm5, xmm10 1196 pxor xmm6, xmm11 1197 pxor xmm7, xmm8 1198 pxor xmm4, xmm9 1199 movdqa xmmword ptr [rsp+0x100], xmm8 1200 movdqa xmm8, xmm5 1201 psrld xmm8, 7 1202 pslld xmm5, 25 1203 por xmm5, xmm8 1204 movdqa xmm8, xmm6 1205 psrld xmm8, 7 1206 pslld xmm6, 25 1207 por xmm6, xmm8 1208 movdqa xmm8, xmm7 1209 psrld xmm8, 7 1210 pslld xmm7, 25 1211 por xmm7, xmm8 1212 movdqa xmm8, xmm4 1213 psrld xmm8, 7 1214 pslld xmm4, 25 1215 por xmm4, xmm8 1216 paddd xmm0, xmmword ptr [rsp+0xB0] 1217 paddd xmm1, xmmword ptr [rsp+0x50] 1218 paddd xmm2, xmmword ptr [rsp+0x10] 1219 paddd xmm3, xmmword ptr [rsp+0x80] 1220 paddd xmm0, xmm4 1221 paddd xmm1, xmm5 1222 paddd xmm2, xmm6 1223 paddd xmm3, xmm7 1224 pxor xmm12, xmm0 1225 pxor xmm13, xmm1 1226 pxor xmm14, xmm2 1227 pxor xmm15, xmm3 1228 movdqa xmm8, xmmword ptr [ROT16+rip] 1229 pshufb xmm12, xmm8 1230 pshufb xmm13, xmm8 1231 pshufb xmm14, xmm8 1232 pshufb xmm15, xmm8 1233 movdqa xmm8, xmmword ptr [rsp+0x100] 1234 paddd xmm8, xmm12 1235 paddd xmm9, xmm13 1236 paddd xmm10, xmm14 1237 paddd xmm11, xmm15 1238 pxor xmm4, xmm8 1239 pxor xmm5, xmm9 1240 pxor xmm6, xmm10 1241 pxor xmm7, xmm11 1242 movdqa xmmword ptr [rsp+0x100], xmm8 1243 movdqa xmm8, xmm4 1244 psrld xmm8, 12 1245 pslld xmm4, 20 1246 por xmm4, xmm8 1247 movdqa xmm8, xmm5 1248 psrld xmm8, 12 1249 pslld xmm5, 20 1250 por xmm5, xmm8 1251 movdqa xmm8, xmm6 1252 psrld xmm8, 12 1253 pslld xmm6, 20 1254 por xmm6, xmm8 1255 movdqa xmm8, xmm7 1256 psrld xmm8, 12 1257 pslld xmm7, 20 1258 por xmm7, xmm8 1259 paddd xmm0, xmmword ptr [rsp+0xF0] 1260 paddd xmm1, xmmword ptr [rsp] 1261 paddd xmm2, xmmword ptr [rsp+0x90] 1262 paddd xmm3, xmmword ptr [rsp+0x60] 1263 paddd xmm0, xmm4 1264 paddd xmm1, xmm5 1265 paddd xmm2, xmm6 1266 paddd xmm3, xmm7 1267 pxor xmm12, xmm0 1268 pxor xmm13, xmm1 1269 pxor xmm14, xmm2 1270 pxor xmm15, xmm3 1271 movdqa xmm8, xmmword ptr [ROT8+rip] 1272 pshufb xmm12, xmm8 1273 pshufb xmm13, xmm8 1274 pshufb xmm14, xmm8 1275 pshufb xmm15, xmm8 1276 movdqa xmm8, xmmword ptr [rsp+0x100] 1277 paddd xmm8, xmm12 1278 paddd xmm9, xmm13 1279 paddd xmm10, xmm14 1280 paddd xmm11, xmm15 1281 pxor xmm4, xmm8 1282 pxor xmm5, xmm9 1283 pxor xmm6, xmm10 1284 pxor xmm7, xmm11 1285 movdqa xmmword ptr [rsp+0x100], xmm8 1286 movdqa xmm8, xmm4 1287 psrld xmm8, 7 1288 pslld xmm4, 25 1289 por xmm4, xmm8 1290 movdqa xmm8, xmm5 1291 psrld xmm8, 7 1292 pslld xmm5, 25 1293 por xmm5, xmm8 1294 movdqa xmm8, xmm6 1295 psrld xmm8, 7 1296 pslld xmm6, 25 1297 por xmm6, xmm8 1298 movdqa xmm8, xmm7 1299 psrld xmm8, 7 1300 pslld xmm7, 25 1301 por xmm7, xmm8 1302 paddd xmm0, xmmword ptr [rsp+0xE0] 1303 paddd xmm1, xmmword ptr [rsp+0x20] 1304 paddd xmm2, xmmword ptr [rsp+0x30] 1305 paddd xmm3, xmmword ptr [rsp+0x70] 1306 paddd xmm0, xmm5 1307 paddd xmm1, xmm6 1308 paddd xmm2, xmm7 1309 paddd xmm3, xmm4 1310 pxor xmm15, xmm0 1311 pxor xmm12, xmm1 1312 pxor xmm13, xmm2 1313 pxor xmm14, xmm3 1314 movdqa xmm8, xmmword ptr [ROT16+rip] 1315 pshufb xmm15, xmm8 1316 pshufb xmm12, xmm8 1317 pshufb xmm13, xmm8 1318 pshufb xmm14, xmm8 1319 paddd xmm10, xmm15 1320 paddd xmm11, xmm12 1321 movdqa xmm8, xmmword ptr [rsp+0x100] 1322 paddd xmm8, xmm13 1323 paddd xmm9, xmm14 1324 pxor xmm5, xmm10 1325 pxor xmm6, xmm11 1326 pxor xmm7, xmm8 1327 pxor xmm4, xmm9 1328 movdqa xmmword ptr [rsp+0x100], xmm8 1329 movdqa xmm8, xmm5 1330 psrld xmm8, 12 1331 pslld xmm5, 20 1332 por xmm5, xmm8 1333 movdqa xmm8, xmm6 1334 psrld xmm8, 12 1335 pslld xmm6, 20 1336 por xmm6, xmm8 1337 movdqa xmm8, xmm7 1338 psrld xmm8, 12 1339 pslld xmm7, 20 1340 por xmm7, xmm8 1341 movdqa xmm8, xmm4 1342 psrld xmm8, 12 1343 pslld xmm4, 20 1344 por xmm4, xmm8 1345 paddd xmm0, xmmword ptr [rsp+0xA0] 1346 paddd xmm1, xmmword ptr [rsp+0xC0] 1347 paddd xmm2, xmmword ptr [rsp+0x40] 1348 paddd xmm3, xmmword ptr [rsp+0xD0] 1349 paddd xmm0, xmm5 1350 paddd xmm1, xmm6 1351 paddd xmm2, xmm7 1352 paddd xmm3, xmm4 1353 pxor xmm15, xmm0 1354 pxor xmm12, xmm1 1355 pxor xmm13, xmm2 1356 pxor xmm14, xmm3 1357 movdqa xmm8, xmmword ptr [ROT8+rip] 1358 pshufb xmm15, xmm8 1359 pshufb xmm12, xmm8 1360 pshufb xmm13, xmm8 1361 pshufb xmm14, xmm8 1362 paddd xmm10, xmm15 1363 paddd xmm11, xmm12 1364 movdqa xmm8, xmmword ptr [rsp+0x100] 1365 paddd xmm8, xmm13 1366 paddd xmm9, xmm14 1367 pxor xmm5, xmm10 1368 pxor xmm6, xmm11 1369 pxor xmm7, xmm8 1370 pxor xmm4, xmm9 1371 pxor xmm0, xmm8 1372 pxor xmm1, xmm9 1373 pxor xmm2, xmm10 1374 pxor xmm3, xmm11 1375 movdqa xmm8, xmm5 1376 psrld xmm8, 7 1377 pslld xmm5, 25 1378 por xmm5, xmm8 1379 movdqa xmm8, xmm6 1380 psrld xmm8, 7 1381 pslld xmm6, 25 1382 por xmm6, xmm8 1383 movdqa xmm8, xmm7 1384 psrld xmm8, 7 1385 pslld xmm7, 25 1386 por xmm7, xmm8 1387 movdqa xmm8, xmm4 1388 psrld xmm8, 7 1389 pslld xmm4, 25 1390 por xmm4, xmm8 1391 pxor xmm4, xmm12 1392 pxor xmm5, xmm13 1393 pxor xmm6, xmm14 1394 pxor xmm7, xmm15 1395 mov eax, r13d 1396 jne 9b 1397 movdqa xmm9, xmm0 1398 punpckldq xmm0, xmm1 1399 punpckhdq xmm9, xmm1 1400 movdqa xmm11, xmm2 1401 punpckldq xmm2, xmm3 1402 punpckhdq xmm11, xmm3 1403 movdqa xmm1, xmm0 1404 punpcklqdq xmm0, xmm2 1405 punpckhqdq xmm1, xmm2 1406 movdqa xmm3, xmm9 1407 punpcklqdq xmm9, xmm11 1408 punpckhqdq xmm3, xmm11 1409 movdqu xmmword ptr [rbx], xmm0 1410 movdqu xmmword ptr [rbx+0x20], xmm1 1411 movdqu xmmword ptr [rbx+0x40], xmm9 1412 movdqu xmmword ptr [rbx+0x60], xmm3 1413 movdqa xmm9, xmm4 1414 punpckldq xmm4, xmm5 1415 punpckhdq xmm9, xmm5 1416 movdqa xmm11, xmm6 1417 punpckldq xmm6, xmm7 1418 punpckhdq xmm11, xmm7 1419 movdqa xmm5, xmm4 1420 punpcklqdq xmm4, xmm6 1421 punpckhqdq xmm5, xmm6 1422 movdqa xmm7, xmm9 1423 punpcklqdq xmm9, xmm11 1424 punpckhqdq xmm7, xmm11 1425 movdqu xmmword ptr [rbx+0x10], xmm4 1426 movdqu xmmword ptr [rbx+0x30], xmm5 1427 movdqu xmmword ptr [rbx+0x50], xmm9 1428 movdqu xmmword ptr [rbx+0x70], xmm7 1429 movdqa xmm1, xmmword ptr [rsp+0x110] 1430 movdqa xmm0, xmm1 1431 paddd xmm1, xmmword ptr [rsp+0x150] 1432 movdqa xmmword ptr [rsp+0x110], xmm1 1433 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1434 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1435 pcmpgtd xmm0, xmm1 1436 movdqa xmm1, xmmword ptr [rsp+0x120] 1437 psubd xmm1, xmm0 1438 movdqa xmmword ptr [rsp+0x120], xmm1 1439 add rbx, 128 1440 add rdi, 32 1441 sub rsi, 4 1442 cmp rsi, 4 1443 jnc 2b 1444 test rsi, rsi 1445 jne 3f 14464: 1447 movdqa xmm6, xmmword ptr [rsp+0x170] 1448 movdqa xmm7, xmmword ptr [rsp+0x180] 1449 movdqa xmm8, xmmword ptr [rsp+0x190] 1450 movdqa xmm9, xmmword ptr [rsp+0x1A0] 1451 movdqa xmm10, xmmword ptr [rsp+0x1B0] 1452 movdqa xmm11, xmmword ptr [rsp+0x1C0] 1453 movdqa xmm12, xmmword ptr [rsp+0x1D0] 1454 movdqa xmm13, xmmword ptr [rsp+0x1E0] 1455 movdqa xmm14, xmmword ptr [rsp+0x1F0] 1456 movdqa xmm15, xmmword ptr [rsp+0x200] 1457 mov rsp, rbp 1458 pop rbp 1459 pop rbx 1460 pop rdi 1461 pop rsi 1462 pop r12 1463 pop r13 1464 pop r14 1465 pop r15 1466 ret 1467.p2align 5 14683: 1469 test esi, 0x2 1470 je 3f 1471 movups xmm0, xmmword ptr [rcx] 1472 movups xmm1, xmmword ptr [rcx+0x10] 1473 movaps xmm8, xmm0 1474 movaps xmm9, xmm1 1475 movd xmm13, dword ptr [rsp+0x110] 1476 pinsrd xmm13, dword ptr [rsp+0x120], 1 1477 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1478 movaps xmmword ptr [rsp], xmm13 1479 movd xmm14, dword ptr [rsp+0x114] 1480 pinsrd xmm14, dword ptr [rsp+0x124], 1 1481 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1482 movaps xmmword ptr [rsp+0x10], xmm14 1483 mov r8, qword ptr [rdi] 1484 mov r9, qword ptr [rdi+0x8] 1485 movzx eax, byte ptr [rbp+0x80] 1486 or eax, r13d 1487 xor edx, edx 14882: 1489 mov r14d, eax 1490 or eax, r12d 1491 add rdx, 64 1492 cmp rdx, r15 1493 cmovne eax, r14d 1494 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1495 movaps xmm10, xmm2 1496 movups xmm4, xmmword ptr [r8+rdx-0x40] 1497 movups xmm5, xmmword ptr [r8+rdx-0x30] 1498 movaps xmm3, xmm4 1499 shufps xmm4, xmm5, 136 1500 shufps xmm3, xmm5, 221 1501 movaps xmm5, xmm3 1502 movups xmm6, xmmword ptr [r8+rdx-0x20] 1503 movups xmm7, xmmword ptr [r8+rdx-0x10] 1504 movaps xmm3, xmm6 1505 shufps xmm6, xmm7, 136 1506 pshufd xmm6, xmm6, 0x93 1507 shufps xmm3, xmm7, 221 1508 pshufd xmm7, xmm3, 0x93 1509 movups xmm12, xmmword ptr [r9+rdx-0x40] 1510 movups xmm13, xmmword ptr [r9+rdx-0x30] 1511 movaps xmm11, xmm12 1512 shufps xmm12, xmm13, 136 1513 shufps xmm11, xmm13, 221 1514 movaps xmm13, xmm11 1515 movups xmm14, xmmword ptr [r9+rdx-0x20] 1516 movups xmm15, xmmword ptr [r9+rdx-0x10] 1517 movaps xmm11, xmm14 1518 shufps xmm14, xmm15, 136 1519 pshufd xmm14, xmm14, 0x93 1520 shufps xmm11, xmm15, 221 1521 pshufd xmm15, xmm11, 0x93 1522 movaps xmm3, xmmword ptr [rsp] 1523 movaps xmm11, xmmword ptr [rsp+0x10] 1524 pinsrd xmm3, eax, 3 1525 pinsrd xmm11, eax, 3 1526 mov al, 7 15279: 1528 paddd xmm0, xmm4 1529 paddd xmm8, xmm12 1530 movaps xmmword ptr [rsp+0x20], xmm4 1531 movaps xmmword ptr [rsp+0x30], xmm12 1532 paddd xmm0, xmm1 1533 paddd xmm8, xmm9 1534 pxor xmm3, xmm0 1535 pxor xmm11, xmm8 1536 movaps xmm12, xmmword ptr [ROT16+rip] 1537 pshufb xmm3, xmm12 1538 pshufb xmm11, xmm12 1539 paddd xmm2, xmm3 1540 paddd xmm10, xmm11 1541 pxor xmm1, xmm2 1542 pxor xmm9, xmm10 1543 movdqa xmm4, xmm1 1544 pslld xmm1, 20 1545 psrld xmm4, 12 1546 por xmm1, xmm4 1547 movdqa xmm4, xmm9 1548 pslld xmm9, 20 1549 psrld xmm4, 12 1550 por xmm9, xmm4 1551 paddd xmm0, xmm5 1552 paddd xmm8, xmm13 1553 movaps xmmword ptr [rsp+0x40], xmm5 1554 movaps xmmword ptr [rsp+0x50], xmm13 1555 paddd xmm0, xmm1 1556 paddd xmm8, xmm9 1557 pxor xmm3, xmm0 1558 pxor xmm11, xmm8 1559 movaps xmm13, xmmword ptr [ROT8+rip] 1560 pshufb xmm3, xmm13 1561 pshufb xmm11, xmm13 1562 paddd xmm2, xmm3 1563 paddd xmm10, xmm11 1564 pxor xmm1, xmm2 1565 pxor xmm9, xmm10 1566 movdqa xmm4, xmm1 1567 pslld xmm1, 25 1568 psrld xmm4, 7 1569 por xmm1, xmm4 1570 movdqa xmm4, xmm9 1571 pslld xmm9, 25 1572 psrld xmm4, 7 1573 por xmm9, xmm4 1574 pshufd xmm0, xmm0, 0x93 1575 pshufd xmm8, xmm8, 0x93 1576 pshufd xmm3, xmm3, 0x4E 1577 pshufd xmm11, xmm11, 0x4E 1578 pshufd xmm2, xmm2, 0x39 1579 pshufd xmm10, xmm10, 0x39 1580 paddd xmm0, xmm6 1581 paddd xmm8, xmm14 1582 paddd xmm0, xmm1 1583 paddd xmm8, xmm9 1584 pxor xmm3, xmm0 1585 pxor xmm11, xmm8 1586 pshufb xmm3, xmm12 1587 pshufb xmm11, xmm12 1588 paddd xmm2, xmm3 1589 paddd xmm10, xmm11 1590 pxor xmm1, xmm2 1591 pxor xmm9, xmm10 1592 movdqa xmm4, xmm1 1593 pslld xmm1, 20 1594 psrld xmm4, 12 1595 por xmm1, xmm4 1596 movdqa xmm4, xmm9 1597 pslld xmm9, 20 1598 psrld xmm4, 12 1599 por xmm9, xmm4 1600 paddd xmm0, xmm7 1601 paddd xmm8, xmm15 1602 paddd xmm0, xmm1 1603 paddd xmm8, xmm9 1604 pxor xmm3, xmm0 1605 pxor xmm11, xmm8 1606 pshufb xmm3, xmm13 1607 pshufb xmm11, xmm13 1608 paddd xmm2, xmm3 1609 paddd xmm10, xmm11 1610 pxor xmm1, xmm2 1611 pxor xmm9, xmm10 1612 movdqa xmm4, xmm1 1613 pslld xmm1, 25 1614 psrld xmm4, 7 1615 por xmm1, xmm4 1616 movdqa xmm4, xmm9 1617 pslld xmm9, 25 1618 psrld xmm4, 7 1619 por xmm9, xmm4 1620 pshufd xmm0, xmm0, 0x39 1621 pshufd xmm8, xmm8, 0x39 1622 pshufd xmm3, xmm3, 0x4E 1623 pshufd xmm11, xmm11, 0x4E 1624 pshufd xmm2, xmm2, 0x93 1625 pshufd xmm10, xmm10, 0x93 1626 dec al 1627 je 9f 1628 movdqa xmm12, xmmword ptr [rsp+0x20] 1629 movdqa xmm5, xmmword ptr [rsp+0x40] 1630 pshufd xmm13, xmm12, 0x0F 1631 shufps xmm12, xmm5, 214 1632 pshufd xmm4, xmm12, 0x39 1633 movdqa xmm12, xmm6 1634 shufps xmm12, xmm7, 250 1635 pblendw xmm13, xmm12, 0xCC 1636 movdqa xmm12, xmm7 1637 punpcklqdq xmm12, xmm5 1638 pblendw xmm12, xmm6, 0xC0 1639 pshufd xmm12, xmm12, 0x78 1640 punpckhdq xmm5, xmm7 1641 punpckldq xmm6, xmm5 1642 pshufd xmm7, xmm6, 0x1E 1643 movdqa xmmword ptr [rsp+0x20], xmm13 1644 movdqa xmmword ptr [rsp+0x40], xmm12 1645 movdqa xmm5, xmmword ptr [rsp+0x30] 1646 movdqa xmm13, xmmword ptr [rsp+0x50] 1647 pshufd xmm6, xmm5, 0x0F 1648 shufps xmm5, xmm13, 214 1649 pshufd xmm12, xmm5, 0x39 1650 movdqa xmm5, xmm14 1651 shufps xmm5, xmm15, 250 1652 pblendw xmm6, xmm5, 0xCC 1653 movdqa xmm5, xmm15 1654 punpcklqdq xmm5, xmm13 1655 pblendw xmm5, xmm14, 0xC0 1656 pshufd xmm5, xmm5, 0x78 1657 punpckhdq xmm13, xmm15 1658 punpckldq xmm14, xmm13 1659 pshufd xmm15, xmm14, 0x1E 1660 movdqa xmm13, xmm6 1661 movdqa xmm14, xmm5 1662 movdqa xmm5, xmmword ptr [rsp+0x20] 1663 movdqa xmm6, xmmword ptr [rsp+0x40] 1664 jmp 9b 16659: 1666 pxor xmm0, xmm2 1667 pxor xmm1, xmm3 1668 pxor xmm8, xmm10 1669 pxor xmm9, xmm11 1670 mov eax, r13d 1671 cmp rdx, r15 1672 jne 2b 1673 movups xmmword ptr [rbx], xmm0 1674 movups xmmword ptr [rbx+0x10], xmm1 1675 movups xmmword ptr [rbx+0x20], xmm8 1676 movups xmmword ptr [rbx+0x30], xmm9 1677 movdqa xmm0, xmmword ptr [rsp+0x130] 1678 movdqa xmm1, xmmword ptr [rsp+0x110] 1679 movdqa xmm2, xmmword ptr [rsp+0x120] 1680 movdqu xmm3, xmmword ptr [rsp+0x118] 1681 movdqu xmm4, xmmword ptr [rsp+0x128] 1682 blendvps xmm1, xmm3, xmm0 1683 blendvps xmm2, xmm4, xmm0 1684 movdqa xmmword ptr [rsp+0x110], xmm1 1685 movdqa xmmword ptr [rsp+0x120], xmm2 1686 add rdi, 16 1687 add rbx, 64 1688 sub rsi, 2 16893: 1690 test esi, 0x1 1691 je 4b 1692 movups xmm0, xmmword ptr [rcx] 1693 movups xmm1, xmmword ptr [rcx+0x10] 1694 movd xmm13, dword ptr [rsp+0x110] 1695 pinsrd xmm13, dword ptr [rsp+0x120], 1 1696 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1697 movaps xmm14, xmmword ptr [ROT8+rip] 1698 movaps xmm15, xmmword ptr [ROT16+rip] 1699 mov r8, qword ptr [rdi] 1700 movzx eax, byte ptr [rbp+0x80] 1701 or eax, r13d 1702 xor edx, edx 17032: 1704 mov r14d, eax 1705 or eax, r12d 1706 add rdx, 64 1707 cmp rdx, r15 1708 cmovne eax, r14d 1709 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1710 movaps xmm3, xmm13 1711 pinsrd xmm3, eax, 3 1712 movups xmm4, xmmword ptr [r8+rdx-0x40] 1713 movups xmm5, xmmword ptr [r8+rdx-0x30] 1714 movaps xmm8, xmm4 1715 shufps xmm4, xmm5, 136 1716 shufps xmm8, xmm5, 221 1717 movaps xmm5, xmm8 1718 movups xmm6, xmmword ptr [r8+rdx-0x20] 1719 movups xmm7, xmmword ptr [r8+rdx-0x10] 1720 movaps xmm8, xmm6 1721 shufps xmm6, xmm7, 136 1722 pshufd xmm6, xmm6, 0x93 1723 shufps xmm8, xmm7, 221 1724 pshufd xmm7, xmm8, 0x93 1725 mov al, 7 17269: 1727 paddd xmm0, xmm4 1728 paddd xmm0, xmm1 1729 pxor xmm3, xmm0 1730 pshufb xmm3, xmm15 1731 paddd xmm2, xmm3 1732 pxor xmm1, xmm2 1733 movdqa xmm11, xmm1 1734 pslld xmm1, 20 1735 psrld xmm11, 12 1736 por xmm1, xmm11 1737 paddd xmm0, xmm5 1738 paddd xmm0, xmm1 1739 pxor xmm3, xmm0 1740 pshufb xmm3, xmm14 1741 paddd xmm2, xmm3 1742 pxor xmm1, xmm2 1743 movdqa xmm11, xmm1 1744 pslld xmm1, 25 1745 psrld xmm11, 7 1746 por xmm1, xmm11 1747 pshufd xmm0, xmm0, 0x93 1748 pshufd xmm3, xmm3, 0x4E 1749 pshufd xmm2, xmm2, 0x39 1750 paddd xmm0, xmm6 1751 paddd xmm0, xmm1 1752 pxor xmm3, xmm0 1753 pshufb xmm3, xmm15 1754 paddd xmm2, xmm3 1755 pxor xmm1, xmm2 1756 movdqa xmm11, xmm1 1757 pslld xmm1, 20 1758 psrld xmm11, 12 1759 por xmm1, xmm11 1760 paddd xmm0, xmm7 1761 paddd xmm0, xmm1 1762 pxor xmm3, xmm0 1763 pshufb xmm3, xmm14 1764 paddd xmm2, xmm3 1765 pxor xmm1, xmm2 1766 movdqa xmm11, xmm1 1767 pslld xmm1, 25 1768 psrld xmm11, 7 1769 por xmm1, xmm11 1770 pshufd xmm0, xmm0, 0x39 1771 pshufd xmm3, xmm3, 0x4E 1772 pshufd xmm2, xmm2, 0x93 1773 dec al 1774 jz 9f 1775 movdqa xmm8, xmm4 1776 shufps xmm8, xmm5, 214 1777 pshufd xmm9, xmm4, 0x0F 1778 pshufd xmm4, xmm8, 0x39 1779 movdqa xmm8, xmm6 1780 shufps xmm8, xmm7, 250 1781 pblendw xmm9, xmm8, 0xCC 1782 movdqa xmm8, xmm7 1783 punpcklqdq xmm8, xmm5 1784 pblendw xmm8, xmm6, 0xC0 1785 pshufd xmm8, xmm8, 0x78 1786 punpckhdq xmm5, xmm7 1787 punpckldq xmm6, xmm5 1788 pshufd xmm7, xmm6, 0x1E 1789 movdqa xmm5, xmm9 1790 movdqa xmm6, xmm8 1791 jmp 9b 17929: 1793 pxor xmm0, xmm2 1794 pxor xmm1, xmm3 1795 mov eax, r13d 1796 cmp rdx, r15 1797 jne 2b 1798 movups xmmword ptr [rbx], xmm0 1799 movups xmmword ptr [rbx+0x10], xmm1 1800 jmp 4b 1801 1802.p2align 6 1803blake3_compress_in_place_sse41: 1804_blake3_compress_in_place_sse41: 1805 sub rsp, 120 1806 movdqa xmmword ptr [rsp], xmm6 1807 movdqa xmmword ptr [rsp+0x10], xmm7 1808 movdqa xmmword ptr [rsp+0x20], xmm8 1809 movdqa xmmword ptr [rsp+0x30], xmm9 1810 movdqa xmmword ptr [rsp+0x40], xmm11 1811 movdqa xmmword ptr [rsp+0x50], xmm14 1812 movdqa xmmword ptr [rsp+0x60], xmm15 1813 movups xmm0, xmmword ptr [rcx] 1814 movups xmm1, xmmword ptr [rcx+0x10] 1815 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1816 movzx eax, byte ptr [rsp+0xA0] 1817 movzx r8d, r8b 1818 shl rax, 32 1819 add r8, rax 1820 movq xmm3, r9 1821 movq xmm4, r8 1822 punpcklqdq xmm3, xmm4 1823 movups xmm4, xmmword ptr [rdx] 1824 movups xmm5, xmmword ptr [rdx+0x10] 1825 movaps xmm8, xmm4 1826 shufps xmm4, xmm5, 136 1827 shufps xmm8, xmm5, 221 1828 movaps xmm5, xmm8 1829 movups xmm6, xmmword ptr [rdx+0x20] 1830 movups xmm7, xmmword ptr [rdx+0x30] 1831 movaps xmm8, xmm6 1832 shufps xmm6, xmm7, 136 1833 pshufd xmm6, xmm6, 0x93 1834 shufps xmm8, xmm7, 221 1835 pshufd xmm7, xmm8, 0x93 1836 movaps xmm14, xmmword ptr [ROT8+rip] 1837 movaps xmm15, xmmword ptr [ROT16+rip] 1838 mov al, 7 18399: 1840 paddd xmm0, xmm4 1841 paddd xmm0, xmm1 1842 pxor xmm3, xmm0 1843 pshufb xmm3, xmm15 1844 paddd xmm2, xmm3 1845 pxor xmm1, xmm2 1846 movdqa xmm11, xmm1 1847 pslld xmm1, 20 1848 psrld xmm11, 12 1849 por xmm1, xmm11 1850 paddd xmm0, xmm5 1851 paddd xmm0, xmm1 1852 pxor xmm3, xmm0 1853 pshufb xmm3, xmm14 1854 paddd xmm2, xmm3 1855 pxor xmm1, xmm2 1856 movdqa xmm11, xmm1 1857 pslld xmm1, 25 1858 psrld xmm11, 7 1859 por xmm1, xmm11 1860 pshufd xmm0, xmm0, 0x93 1861 pshufd xmm3, xmm3, 0x4E 1862 pshufd xmm2, xmm2, 0x39 1863 paddd xmm0, xmm6 1864 paddd xmm0, xmm1 1865 pxor xmm3, xmm0 1866 pshufb xmm3, xmm15 1867 paddd xmm2, xmm3 1868 pxor xmm1, xmm2 1869 movdqa xmm11, xmm1 1870 pslld xmm1, 20 1871 psrld xmm11, 12 1872 por xmm1, xmm11 1873 paddd xmm0, xmm7 1874 paddd xmm0, xmm1 1875 pxor xmm3, xmm0 1876 pshufb xmm3, xmm14 1877 paddd xmm2, xmm3 1878 pxor xmm1, xmm2 1879 movdqa xmm11, xmm1 1880 pslld xmm1, 25 1881 psrld xmm11, 7 1882 por xmm1, xmm11 1883 pshufd xmm0, xmm0, 0x39 1884 pshufd xmm3, xmm3, 0x4E 1885 pshufd xmm2, xmm2, 0x93 1886 dec al 1887 jz 9f 1888 movdqa xmm8, xmm4 1889 shufps xmm8, xmm5, 214 1890 pshufd xmm9, xmm4, 0x0F 1891 pshufd xmm4, xmm8, 0x39 1892 movdqa xmm8, xmm6 1893 shufps xmm8, xmm7, 250 1894 pblendw xmm9, xmm8, 0xCC 1895 movdqa xmm8, xmm7 1896 punpcklqdq xmm8, xmm5 1897 pblendw xmm8, xmm6, 0xC0 1898 pshufd xmm8, xmm8, 0x78 1899 punpckhdq xmm5, xmm7 1900 punpckldq xmm6, xmm5 1901 pshufd xmm7, xmm6, 0x1E 1902 movdqa xmm5, xmm9 1903 movdqa xmm6, xmm8 1904 jmp 9b 19059: 1906 pxor xmm0, xmm2 1907 pxor xmm1, xmm3 1908 movups xmmword ptr [rcx], xmm0 1909 movups xmmword ptr [rcx+0x10], xmm1 1910 movdqa xmm6, xmmword ptr [rsp] 1911 movdqa xmm7, xmmword ptr [rsp+0x10] 1912 movdqa xmm8, xmmword ptr [rsp+0x20] 1913 movdqa xmm9, xmmword ptr [rsp+0x30] 1914 movdqa xmm11, xmmword ptr [rsp+0x40] 1915 movdqa xmm14, xmmword ptr [rsp+0x50] 1916 movdqa xmm15, xmmword ptr [rsp+0x60] 1917 add rsp, 120 1918 ret 1919 1920 1921.p2align 6 1922_blake3_compress_xof_sse41: 1923blake3_compress_xof_sse41: 1924 sub rsp, 120 1925 movdqa xmmword ptr [rsp], xmm6 1926 movdqa xmmword ptr [rsp+0x10], xmm7 1927 movdqa xmmword ptr [rsp+0x20], xmm8 1928 movdqa xmmword ptr [rsp+0x30], xmm9 1929 movdqa xmmword ptr [rsp+0x40], xmm11 1930 movdqa xmmword ptr [rsp+0x50], xmm14 1931 movdqa xmmword ptr [rsp+0x60], xmm15 1932 movups xmm0, xmmword ptr [rcx] 1933 movups xmm1, xmmword ptr [rcx+0x10] 1934 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1935 movzx eax, byte ptr [rsp+0xA0] 1936 movzx r8d, r8b 1937 mov r10, qword ptr [rsp+0xA8] 1938 shl rax, 32 1939 add r8, rax 1940 movq xmm3, r9 1941 movq xmm4, r8 1942 punpcklqdq xmm3, xmm4 1943 movups xmm4, xmmword ptr [rdx] 1944 movups xmm5, xmmword ptr [rdx+0x10] 1945 movaps xmm8, xmm4 1946 shufps xmm4, xmm5, 136 1947 shufps xmm8, xmm5, 221 1948 movaps xmm5, xmm8 1949 movups xmm6, xmmword ptr [rdx+0x20] 1950 movups xmm7, xmmword ptr [rdx+0x30] 1951 movaps xmm8, xmm6 1952 shufps xmm6, xmm7, 136 1953 pshufd xmm6, xmm6, 0x93 1954 shufps xmm8, xmm7, 221 1955 pshufd xmm7, xmm8, 0x93 1956 movaps xmm14, xmmword ptr [ROT8+rip] 1957 movaps xmm15, xmmword ptr [ROT16+rip] 1958 mov al, 7 19599: 1960 paddd xmm0, xmm4 1961 paddd xmm0, xmm1 1962 pxor xmm3, xmm0 1963 pshufb xmm3, xmm15 1964 paddd xmm2, xmm3 1965 pxor xmm1, xmm2 1966 movdqa xmm11, xmm1 1967 pslld xmm1, 20 1968 psrld xmm11, 12 1969 por xmm1, xmm11 1970 paddd xmm0, xmm5 1971 paddd xmm0, xmm1 1972 pxor xmm3, xmm0 1973 pshufb xmm3, xmm14 1974 paddd xmm2, xmm3 1975 pxor xmm1, xmm2 1976 movdqa xmm11, xmm1 1977 pslld xmm1, 25 1978 psrld xmm11, 7 1979 por xmm1, xmm11 1980 pshufd xmm0, xmm0, 0x93 1981 pshufd xmm3, xmm3, 0x4E 1982 pshufd xmm2, xmm2, 0x39 1983 paddd xmm0, xmm6 1984 paddd xmm0, xmm1 1985 pxor xmm3, xmm0 1986 pshufb xmm3, xmm15 1987 paddd xmm2, xmm3 1988 pxor xmm1, xmm2 1989 movdqa xmm11, xmm1 1990 pslld xmm1, 20 1991 psrld xmm11, 12 1992 por xmm1, xmm11 1993 paddd xmm0, xmm7 1994 paddd xmm0, xmm1 1995 pxor xmm3, xmm0 1996 pshufb xmm3, xmm14 1997 paddd xmm2, xmm3 1998 pxor xmm1, xmm2 1999 movdqa xmm11, xmm1 2000 pslld xmm1, 25 2001 psrld xmm11, 7 2002 por xmm1, xmm11 2003 pshufd xmm0, xmm0, 0x39 2004 pshufd xmm3, xmm3, 0x4E 2005 pshufd xmm2, xmm2, 0x93 2006 dec al 2007 jz 9f 2008 movdqa xmm8, xmm4 2009 shufps xmm8, xmm5, 214 2010 pshufd xmm9, xmm4, 0x0F 2011 pshufd xmm4, xmm8, 0x39 2012 movdqa xmm8, xmm6 2013 shufps xmm8, xmm7, 250 2014 pblendw xmm9, xmm8, 0xCC 2015 movdqa xmm8, xmm7 2016 punpcklqdq xmm8, xmm5 2017 pblendw xmm8, xmm6, 0xC0 2018 pshufd xmm8, xmm8, 0x78 2019 punpckhdq xmm5, xmm7 2020 punpckldq xmm6, xmm5 2021 pshufd xmm7, xmm6, 0x1E 2022 movdqa xmm5, xmm9 2023 movdqa xmm6, xmm8 2024 jmp 9b 20259: 2026 movdqu xmm4, xmmword ptr [rcx] 2027 movdqu xmm5, xmmword ptr [rcx+0x10] 2028 pxor xmm0, xmm2 2029 pxor xmm1, xmm3 2030 pxor xmm2, xmm4 2031 pxor xmm3, xmm5 2032 movups xmmword ptr [r10], xmm0 2033 movups xmmword ptr [r10+0x10], xmm1 2034 movups xmmword ptr [r10+0x20], xmm2 2035 movups xmmword ptr [r10+0x30], xmm3 2036 movdqa xmm6, xmmword ptr [rsp] 2037 movdqa xmm7, xmmword ptr [rsp+0x10] 2038 movdqa xmm8, xmmword ptr [rsp+0x20] 2039 movdqa xmm9, xmmword ptr [rsp+0x30] 2040 movdqa xmm11, xmmword ptr [rsp+0x40] 2041 movdqa xmm14, xmmword ptr [rsp+0x50] 2042 movdqa xmm15, xmmword ptr [rsp+0x60] 2043 add rsp, 120 2044 ret 2045 2046 2047.section .rodata 2048.p2align 6 2049BLAKE3_IV: 2050 .long 0x6A09E667, 0xBB67AE85 2051 .long 0x3C6EF372, 0xA54FF53A 2052ROT16: 2053 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2054ROT8: 2055 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2056ADD0: 2057 .long 0, 1, 2, 3 2058ADD1: 2059 .long 4, 4, 4, 4 2060BLAKE3_IV_0: 2061 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2062BLAKE3_IV_1: 2063 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2064BLAKE3_IV_2: 2065 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2066BLAKE3_IV_3: 2067 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2068BLAKE3_BLOCK_LEN: 2069 .long 64, 64, 64, 64 2070CMP_MSB_MASK: 2071 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2072