1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 24 * Copyright (c) 2019-2020 Samuel Neves 25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 26 */ 27 28#if defined(HAVE_SSE4_1) 29 30#define _ASM 31#include <sys/asm_linkage.h> 32 33#if defined(__ELF__) && defined(__CET__) && defined(__has_include) 34#if __has_include(<cet.h>) 35#include <cet.h> 36#endif 37#endif 38 39#if !defined(_CET_ENDBR) 40#define _CET_ENDBR 41#endif 42 43.intel_syntax noprefix 44.global zfs_blake3_compress_in_place_sse41 45.global zfs_blake3_compress_xof_sse41 46.global zfs_blake3_hash_many_sse41 47 48.text 49.type zfs_blake3_hash_many_sse41,@function 50.type zfs_blake3_compress_in_place_sse41,@function 51.type zfs_blake3_compress_xof_sse41,@function 52 53.p2align 6 54zfs_blake3_hash_many_sse41: 55 _CET_ENDBR 56 push r15 57 push r14 58 push r13 59 push r12 60 push rbx 61 push rbp 62 mov rbp, rsp 63 sub rsp, 360 64 and rsp, 0xFFFFFFFFFFFFFFC0 65 neg r9d 66 movd xmm0, r9d 67 pshufd xmm0, xmm0, 0x00 68 movdqa xmmword ptr [rsp+0x130], xmm0 69 movdqa xmm1, xmm0 70 pand xmm1, xmmword ptr [ADD0+rip] 71 pand xmm0, xmmword ptr [ADD1+rip] 72 movdqa xmmword ptr [rsp+0x150], xmm0 73 movd xmm0, r8d 74 pshufd xmm0, xmm0, 0x00 75 paddd xmm0, xmm1 76 movdqa xmmword ptr [rsp+0x110], xmm0 77 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 78 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 79 pcmpgtd xmm1, xmm0 80 shr r8, 32 81 movd xmm2, r8d 82 pshufd xmm2, xmm2, 0x00 83 psubd xmm2, xmm1 84 movdqa xmmword ptr [rsp+0x120], xmm2 85 mov rbx, qword ptr [rbp+0x50] 86 mov r15, rdx 87 shl r15, 6 88 movzx r13d, byte ptr [rbp+0x38] 89 movzx r12d, byte ptr [rbp+0x48] 90 cmp rsi, 4 91 jc 3f 922: 93 movdqu xmm3, xmmword ptr [rcx] 94 pshufd xmm0, xmm3, 0x00 95 pshufd xmm1, xmm3, 0x55 96 pshufd xmm2, xmm3, 0xAA 97 pshufd xmm3, xmm3, 0xFF 98 movdqu xmm7, xmmword ptr [rcx+0x10] 99 pshufd xmm4, xmm7, 0x00 100 pshufd xmm5, xmm7, 0x55 101 pshufd xmm6, xmm7, 0xAA 102 pshufd xmm7, xmm7, 0xFF 103 mov r8, qword ptr [rdi] 104 mov r9, qword ptr [rdi+0x8] 105 mov r10, qword ptr [rdi+0x10] 106 mov r11, qword ptr [rdi+0x18] 107 movzx eax, byte ptr [rbp+0x40] 108 or eax, r13d 109 xor edx, edx 1109: 111 mov r14d, eax 112 or eax, r12d 113 add rdx, 64 114 cmp rdx, r15 115 cmovne eax, r14d 116 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 117 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 118 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 119 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 120 movdqa xmm12, xmm8 121 punpckldq xmm8, xmm9 122 punpckhdq xmm12, xmm9 123 movdqa xmm14, xmm10 124 punpckldq xmm10, xmm11 125 punpckhdq xmm14, xmm11 126 movdqa xmm9, xmm8 127 punpcklqdq xmm8, xmm10 128 punpckhqdq xmm9, xmm10 129 movdqa xmm13, xmm12 130 punpcklqdq xmm12, xmm14 131 punpckhqdq xmm13, xmm14 132 movdqa xmmword ptr [rsp], xmm8 133 movdqa xmmword ptr [rsp+0x10], xmm9 134 movdqa xmmword ptr [rsp+0x20], xmm12 135 movdqa xmmword ptr [rsp+0x30], xmm13 136 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 137 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 138 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 139 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 140 movdqa xmm12, xmm8 141 punpckldq xmm8, xmm9 142 punpckhdq xmm12, xmm9 143 movdqa xmm14, xmm10 144 punpckldq xmm10, xmm11 145 punpckhdq xmm14, xmm11 146 movdqa xmm9, xmm8 147 punpcklqdq xmm8, xmm10 148 punpckhqdq xmm9, xmm10 149 movdqa xmm13, xmm12 150 punpcklqdq xmm12, xmm14 151 punpckhqdq xmm13, xmm14 152 movdqa xmmword ptr [rsp+0x40], xmm8 153 movdqa xmmword ptr [rsp+0x50], xmm9 154 movdqa xmmword ptr [rsp+0x60], xmm12 155 movdqa xmmword ptr [rsp+0x70], xmm13 156 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 157 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 158 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 159 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 160 movdqa xmm12, xmm8 161 punpckldq xmm8, xmm9 162 punpckhdq xmm12, xmm9 163 movdqa xmm14, xmm10 164 punpckldq xmm10, xmm11 165 punpckhdq xmm14, xmm11 166 movdqa xmm9, xmm8 167 punpcklqdq xmm8, xmm10 168 punpckhqdq xmm9, xmm10 169 movdqa xmm13, xmm12 170 punpcklqdq xmm12, xmm14 171 punpckhqdq xmm13, xmm14 172 movdqa xmmword ptr [rsp+0x80], xmm8 173 movdqa xmmword ptr [rsp+0x90], xmm9 174 movdqa xmmword ptr [rsp+0xA0], xmm12 175 movdqa xmmword ptr [rsp+0xB0], xmm13 176 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 177 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 178 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 179 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 180 movdqa xmm12, xmm8 181 punpckldq xmm8, xmm9 182 punpckhdq xmm12, xmm9 183 movdqa xmm14, xmm10 184 punpckldq xmm10, xmm11 185 punpckhdq xmm14, xmm11 186 movdqa xmm9, xmm8 187 punpcklqdq xmm8, xmm10 188 punpckhqdq xmm9, xmm10 189 movdqa xmm13, xmm12 190 punpcklqdq xmm12, xmm14 191 punpckhqdq xmm13, xmm14 192 movdqa xmmword ptr [rsp+0xC0], xmm8 193 movdqa xmmword ptr [rsp+0xD0], xmm9 194 movdqa xmmword ptr [rsp+0xE0], xmm12 195 movdqa xmmword ptr [rsp+0xF0], xmm13 196 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 197 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 198 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 199 movdqa xmm12, xmmword ptr [rsp+0x110] 200 movdqa xmm13, xmmword ptr [rsp+0x120] 201 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 202 movd xmm15, eax 203 pshufd xmm15, xmm15, 0x00 204 prefetcht0 [r8+rdx+0x80] 205 prefetcht0 [r9+rdx+0x80] 206 prefetcht0 [r10+rdx+0x80] 207 prefetcht0 [r11+rdx+0x80] 208 paddd xmm0, xmmword ptr [rsp] 209 paddd xmm1, xmmword ptr [rsp+0x20] 210 paddd xmm2, xmmword ptr [rsp+0x40] 211 paddd xmm3, xmmword ptr [rsp+0x60] 212 paddd xmm0, xmm4 213 paddd xmm1, xmm5 214 paddd xmm2, xmm6 215 paddd xmm3, xmm7 216 pxor xmm12, xmm0 217 pxor xmm13, xmm1 218 pxor xmm14, xmm2 219 pxor xmm15, xmm3 220 movdqa xmm8, xmmword ptr [ROT16+rip] 221 pshufb xmm12, xmm8 222 pshufb xmm13, xmm8 223 pshufb xmm14, xmm8 224 pshufb xmm15, xmm8 225 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 226 paddd xmm8, xmm12 227 paddd xmm9, xmm13 228 paddd xmm10, xmm14 229 paddd xmm11, xmm15 230 pxor xmm4, xmm8 231 pxor xmm5, xmm9 232 pxor xmm6, xmm10 233 pxor xmm7, xmm11 234 movdqa xmmword ptr [rsp+0x100], xmm8 235 movdqa xmm8, xmm4 236 psrld xmm8, 12 237 pslld xmm4, 20 238 por xmm4, xmm8 239 movdqa xmm8, xmm5 240 psrld xmm8, 12 241 pslld xmm5, 20 242 por xmm5, xmm8 243 movdqa xmm8, xmm6 244 psrld xmm8, 12 245 pslld xmm6, 20 246 por xmm6, xmm8 247 movdqa xmm8, xmm7 248 psrld xmm8, 12 249 pslld xmm7, 20 250 por xmm7, xmm8 251 paddd xmm0, xmmword ptr [rsp+0x10] 252 paddd xmm1, xmmword ptr [rsp+0x30] 253 paddd xmm2, xmmword ptr [rsp+0x50] 254 paddd xmm3, xmmword ptr [rsp+0x70] 255 paddd xmm0, xmm4 256 paddd xmm1, xmm5 257 paddd xmm2, xmm6 258 paddd xmm3, xmm7 259 pxor xmm12, xmm0 260 pxor xmm13, xmm1 261 pxor xmm14, xmm2 262 pxor xmm15, xmm3 263 movdqa xmm8, xmmword ptr [ROT8+rip] 264 pshufb xmm12, xmm8 265 pshufb xmm13, xmm8 266 pshufb xmm14, xmm8 267 pshufb xmm15, xmm8 268 movdqa xmm8, xmmword ptr [rsp+0x100] 269 paddd xmm8, xmm12 270 paddd xmm9, xmm13 271 paddd xmm10, xmm14 272 paddd xmm11, xmm15 273 pxor xmm4, xmm8 274 pxor xmm5, xmm9 275 pxor xmm6, xmm10 276 pxor xmm7, xmm11 277 movdqa xmmword ptr [rsp+0x100], xmm8 278 movdqa xmm8, xmm4 279 psrld xmm8, 7 280 pslld xmm4, 25 281 por xmm4, xmm8 282 movdqa xmm8, xmm5 283 psrld xmm8, 7 284 pslld xmm5, 25 285 por xmm5, xmm8 286 movdqa xmm8, xmm6 287 psrld xmm8, 7 288 pslld xmm6, 25 289 por xmm6, xmm8 290 movdqa xmm8, xmm7 291 psrld xmm8, 7 292 pslld xmm7, 25 293 por xmm7, xmm8 294 paddd xmm0, xmmword ptr [rsp+0x80] 295 paddd xmm1, xmmword ptr [rsp+0xA0] 296 paddd xmm2, xmmword ptr [rsp+0xC0] 297 paddd xmm3, xmmword ptr [rsp+0xE0] 298 paddd xmm0, xmm5 299 paddd xmm1, xmm6 300 paddd xmm2, xmm7 301 paddd xmm3, xmm4 302 pxor xmm15, xmm0 303 pxor xmm12, xmm1 304 pxor xmm13, xmm2 305 pxor xmm14, xmm3 306 movdqa xmm8, xmmword ptr [ROT16+rip] 307 pshufb xmm15, xmm8 308 pshufb xmm12, xmm8 309 pshufb xmm13, xmm8 310 pshufb xmm14, xmm8 311 paddd xmm10, xmm15 312 paddd xmm11, xmm12 313 movdqa xmm8, xmmword ptr [rsp+0x100] 314 paddd xmm8, xmm13 315 paddd xmm9, xmm14 316 pxor xmm5, xmm10 317 pxor xmm6, xmm11 318 pxor xmm7, xmm8 319 pxor xmm4, xmm9 320 movdqa xmmword ptr [rsp+0x100], xmm8 321 movdqa xmm8, xmm5 322 psrld xmm8, 12 323 pslld xmm5, 20 324 por xmm5, xmm8 325 movdqa xmm8, xmm6 326 psrld xmm8, 12 327 pslld xmm6, 20 328 por xmm6, xmm8 329 movdqa xmm8, xmm7 330 psrld xmm8, 12 331 pslld xmm7, 20 332 por xmm7, xmm8 333 movdqa xmm8, xmm4 334 psrld xmm8, 12 335 pslld xmm4, 20 336 por xmm4, xmm8 337 paddd xmm0, xmmword ptr [rsp+0x90] 338 paddd xmm1, xmmword ptr [rsp+0xB0] 339 paddd xmm2, xmmword ptr [rsp+0xD0] 340 paddd xmm3, xmmword ptr [rsp+0xF0] 341 paddd xmm0, xmm5 342 paddd xmm1, xmm6 343 paddd xmm2, xmm7 344 paddd xmm3, xmm4 345 pxor xmm15, xmm0 346 pxor xmm12, xmm1 347 pxor xmm13, xmm2 348 pxor xmm14, xmm3 349 movdqa xmm8, xmmword ptr [ROT8+rip] 350 pshufb xmm15, xmm8 351 pshufb xmm12, xmm8 352 pshufb xmm13, xmm8 353 pshufb xmm14, xmm8 354 paddd xmm10, xmm15 355 paddd xmm11, xmm12 356 movdqa xmm8, xmmword ptr [rsp+0x100] 357 paddd xmm8, xmm13 358 paddd xmm9, xmm14 359 pxor xmm5, xmm10 360 pxor xmm6, xmm11 361 pxor xmm7, xmm8 362 pxor xmm4, xmm9 363 movdqa xmmword ptr [rsp+0x100], xmm8 364 movdqa xmm8, xmm5 365 psrld xmm8, 7 366 pslld xmm5, 25 367 por xmm5, xmm8 368 movdqa xmm8, xmm6 369 psrld xmm8, 7 370 pslld xmm6, 25 371 por xmm6, xmm8 372 movdqa xmm8, xmm7 373 psrld xmm8, 7 374 pslld xmm7, 25 375 por xmm7, xmm8 376 movdqa xmm8, xmm4 377 psrld xmm8, 7 378 pslld xmm4, 25 379 por xmm4, xmm8 380 paddd xmm0, xmmword ptr [rsp+0x20] 381 paddd xmm1, xmmword ptr [rsp+0x30] 382 paddd xmm2, xmmword ptr [rsp+0x70] 383 paddd xmm3, xmmword ptr [rsp+0x40] 384 paddd xmm0, xmm4 385 paddd xmm1, xmm5 386 paddd xmm2, xmm6 387 paddd xmm3, xmm7 388 pxor xmm12, xmm0 389 pxor xmm13, xmm1 390 pxor xmm14, xmm2 391 pxor xmm15, xmm3 392 movdqa xmm8, xmmword ptr [ROT16+rip] 393 pshufb xmm12, xmm8 394 pshufb xmm13, xmm8 395 pshufb xmm14, xmm8 396 pshufb xmm15, xmm8 397 movdqa xmm8, xmmword ptr [rsp+0x100] 398 paddd xmm8, xmm12 399 paddd xmm9, xmm13 400 paddd xmm10, xmm14 401 paddd xmm11, xmm15 402 pxor xmm4, xmm8 403 pxor xmm5, xmm9 404 pxor xmm6, xmm10 405 pxor xmm7, xmm11 406 movdqa xmmword ptr [rsp+0x100], xmm8 407 movdqa xmm8, xmm4 408 psrld xmm8, 12 409 pslld xmm4, 20 410 por xmm4, xmm8 411 movdqa xmm8, xmm5 412 psrld xmm8, 12 413 pslld xmm5, 20 414 por xmm5, xmm8 415 movdqa xmm8, xmm6 416 psrld xmm8, 12 417 pslld xmm6, 20 418 por xmm6, xmm8 419 movdqa xmm8, xmm7 420 psrld xmm8, 12 421 pslld xmm7, 20 422 por xmm7, xmm8 423 paddd xmm0, xmmword ptr [rsp+0x60] 424 paddd xmm1, xmmword ptr [rsp+0xA0] 425 paddd xmm2, xmmword ptr [rsp] 426 paddd xmm3, xmmword ptr [rsp+0xD0] 427 paddd xmm0, xmm4 428 paddd xmm1, xmm5 429 paddd xmm2, xmm6 430 paddd xmm3, xmm7 431 pxor xmm12, xmm0 432 pxor xmm13, xmm1 433 pxor xmm14, xmm2 434 pxor xmm15, xmm3 435 movdqa xmm8, xmmword ptr [ROT8+rip] 436 pshufb xmm12, xmm8 437 pshufb xmm13, xmm8 438 pshufb xmm14, xmm8 439 pshufb xmm15, xmm8 440 movdqa xmm8, xmmword ptr [rsp+0x100] 441 paddd xmm8, xmm12 442 paddd xmm9, xmm13 443 paddd xmm10, xmm14 444 paddd xmm11, xmm15 445 pxor xmm4, xmm8 446 pxor xmm5, xmm9 447 pxor xmm6, xmm10 448 pxor xmm7, xmm11 449 movdqa xmmword ptr [rsp+0x100], xmm8 450 movdqa xmm8, xmm4 451 psrld xmm8, 7 452 pslld xmm4, 25 453 por xmm4, xmm8 454 movdqa xmm8, xmm5 455 psrld xmm8, 7 456 pslld xmm5, 25 457 por xmm5, xmm8 458 movdqa xmm8, xmm6 459 psrld xmm8, 7 460 pslld xmm6, 25 461 por xmm6, xmm8 462 movdqa xmm8, xmm7 463 psrld xmm8, 7 464 pslld xmm7, 25 465 por xmm7, xmm8 466 paddd xmm0, xmmword ptr [rsp+0x10] 467 paddd xmm1, xmmword ptr [rsp+0xC0] 468 paddd xmm2, xmmword ptr [rsp+0x90] 469 paddd xmm3, xmmword ptr [rsp+0xF0] 470 paddd xmm0, xmm5 471 paddd xmm1, xmm6 472 paddd xmm2, xmm7 473 paddd xmm3, xmm4 474 pxor xmm15, xmm0 475 pxor xmm12, xmm1 476 pxor xmm13, xmm2 477 pxor xmm14, xmm3 478 movdqa xmm8, xmmword ptr [ROT16+rip] 479 pshufb xmm15, xmm8 480 pshufb xmm12, xmm8 481 pshufb xmm13, xmm8 482 pshufb xmm14, xmm8 483 paddd xmm10, xmm15 484 paddd xmm11, xmm12 485 movdqa xmm8, xmmword ptr [rsp+0x100] 486 paddd xmm8, xmm13 487 paddd xmm9, xmm14 488 pxor xmm5, xmm10 489 pxor xmm6, xmm11 490 pxor xmm7, xmm8 491 pxor xmm4, xmm9 492 movdqa xmmword ptr [rsp+0x100], xmm8 493 movdqa xmm8, xmm5 494 psrld xmm8, 12 495 pslld xmm5, 20 496 por xmm5, xmm8 497 movdqa xmm8, xmm6 498 psrld xmm8, 12 499 pslld xmm6, 20 500 por xmm6, xmm8 501 movdqa xmm8, xmm7 502 psrld xmm8, 12 503 pslld xmm7, 20 504 por xmm7, xmm8 505 movdqa xmm8, xmm4 506 psrld xmm8, 12 507 pslld xmm4, 20 508 por xmm4, xmm8 509 paddd xmm0, xmmword ptr [rsp+0xB0] 510 paddd xmm1, xmmword ptr [rsp+0x50] 511 paddd xmm2, xmmword ptr [rsp+0xE0] 512 paddd xmm3, xmmword ptr [rsp+0x80] 513 paddd xmm0, xmm5 514 paddd xmm1, xmm6 515 paddd xmm2, xmm7 516 paddd xmm3, xmm4 517 pxor xmm15, xmm0 518 pxor xmm12, xmm1 519 pxor xmm13, xmm2 520 pxor xmm14, xmm3 521 movdqa xmm8, xmmword ptr [ROT8+rip] 522 pshufb xmm15, xmm8 523 pshufb xmm12, xmm8 524 pshufb xmm13, xmm8 525 pshufb xmm14, xmm8 526 paddd xmm10, xmm15 527 paddd xmm11, xmm12 528 movdqa xmm8, xmmword ptr [rsp+0x100] 529 paddd xmm8, xmm13 530 paddd xmm9, xmm14 531 pxor xmm5, xmm10 532 pxor xmm6, xmm11 533 pxor xmm7, xmm8 534 pxor xmm4, xmm9 535 movdqa xmmword ptr [rsp+0x100], xmm8 536 movdqa xmm8, xmm5 537 psrld xmm8, 7 538 pslld xmm5, 25 539 por xmm5, xmm8 540 movdqa xmm8, xmm6 541 psrld xmm8, 7 542 pslld xmm6, 25 543 por xmm6, xmm8 544 movdqa xmm8, xmm7 545 psrld xmm8, 7 546 pslld xmm7, 25 547 por xmm7, xmm8 548 movdqa xmm8, xmm4 549 psrld xmm8, 7 550 pslld xmm4, 25 551 por xmm4, xmm8 552 paddd xmm0, xmmword ptr [rsp+0x30] 553 paddd xmm1, xmmword ptr [rsp+0xA0] 554 paddd xmm2, xmmword ptr [rsp+0xD0] 555 paddd xmm3, xmmword ptr [rsp+0x70] 556 paddd xmm0, xmm4 557 paddd xmm1, xmm5 558 paddd xmm2, xmm6 559 paddd xmm3, xmm7 560 pxor xmm12, xmm0 561 pxor xmm13, xmm1 562 pxor xmm14, xmm2 563 pxor xmm15, xmm3 564 movdqa xmm8, xmmword ptr [ROT16+rip] 565 pshufb xmm12, xmm8 566 pshufb xmm13, xmm8 567 pshufb xmm14, xmm8 568 pshufb xmm15, xmm8 569 movdqa xmm8, xmmword ptr [rsp+0x100] 570 paddd xmm8, xmm12 571 paddd xmm9, xmm13 572 paddd xmm10, xmm14 573 paddd xmm11, xmm15 574 pxor xmm4, xmm8 575 pxor xmm5, xmm9 576 pxor xmm6, xmm10 577 pxor xmm7, xmm11 578 movdqa xmmword ptr [rsp+0x100], xmm8 579 movdqa xmm8, xmm4 580 psrld xmm8, 12 581 pslld xmm4, 20 582 por xmm4, xmm8 583 movdqa xmm8, xmm5 584 psrld xmm8, 12 585 pslld xmm5, 20 586 por xmm5, xmm8 587 movdqa xmm8, xmm6 588 psrld xmm8, 12 589 pslld xmm6, 20 590 por xmm6, xmm8 591 movdqa xmm8, xmm7 592 psrld xmm8, 12 593 pslld xmm7, 20 594 por xmm7, xmm8 595 paddd xmm0, xmmword ptr [rsp+0x40] 596 paddd xmm1, xmmword ptr [rsp+0xC0] 597 paddd xmm2, xmmword ptr [rsp+0x20] 598 paddd xmm3, xmmword ptr [rsp+0xE0] 599 paddd xmm0, xmm4 600 paddd xmm1, xmm5 601 paddd xmm2, xmm6 602 paddd xmm3, xmm7 603 pxor xmm12, xmm0 604 pxor xmm13, xmm1 605 pxor xmm14, xmm2 606 pxor xmm15, xmm3 607 movdqa xmm8, xmmword ptr [ROT8+rip] 608 pshufb xmm12, xmm8 609 pshufb xmm13, xmm8 610 pshufb xmm14, xmm8 611 pshufb xmm15, xmm8 612 movdqa xmm8, xmmword ptr [rsp+0x100] 613 paddd xmm8, xmm12 614 paddd xmm9, xmm13 615 paddd xmm10, xmm14 616 paddd xmm11, xmm15 617 pxor xmm4, xmm8 618 pxor xmm5, xmm9 619 pxor xmm6, xmm10 620 pxor xmm7, xmm11 621 movdqa xmmword ptr [rsp+0x100], xmm8 622 movdqa xmm8, xmm4 623 psrld xmm8, 7 624 pslld xmm4, 25 625 por xmm4, xmm8 626 movdqa xmm8, xmm5 627 psrld xmm8, 7 628 pslld xmm5, 25 629 por xmm5, xmm8 630 movdqa xmm8, xmm6 631 psrld xmm8, 7 632 pslld xmm6, 25 633 por xmm6, xmm8 634 movdqa xmm8, xmm7 635 psrld xmm8, 7 636 pslld xmm7, 25 637 por xmm7, xmm8 638 paddd xmm0, xmmword ptr [rsp+0x60] 639 paddd xmm1, xmmword ptr [rsp+0x90] 640 paddd xmm2, xmmword ptr [rsp+0xB0] 641 paddd xmm3, xmmword ptr [rsp+0x80] 642 paddd xmm0, xmm5 643 paddd xmm1, xmm6 644 paddd xmm2, xmm7 645 paddd xmm3, xmm4 646 pxor xmm15, xmm0 647 pxor xmm12, xmm1 648 pxor xmm13, xmm2 649 pxor xmm14, xmm3 650 movdqa xmm8, xmmword ptr [ROT16+rip] 651 pshufb xmm15, xmm8 652 pshufb xmm12, xmm8 653 pshufb xmm13, xmm8 654 pshufb xmm14, xmm8 655 paddd xmm10, xmm15 656 paddd xmm11, xmm12 657 movdqa xmm8, xmmword ptr [rsp+0x100] 658 paddd xmm8, xmm13 659 paddd xmm9, xmm14 660 pxor xmm5, xmm10 661 pxor xmm6, xmm11 662 pxor xmm7, xmm8 663 pxor xmm4, xmm9 664 movdqa xmmword ptr [rsp+0x100], xmm8 665 movdqa xmm8, xmm5 666 psrld xmm8, 12 667 pslld xmm5, 20 668 por xmm5, xmm8 669 movdqa xmm8, xmm6 670 psrld xmm8, 12 671 pslld xmm6, 20 672 por xmm6, xmm8 673 movdqa xmm8, xmm7 674 psrld xmm8, 12 675 pslld xmm7, 20 676 por xmm7, xmm8 677 movdqa xmm8, xmm4 678 psrld xmm8, 12 679 pslld xmm4, 20 680 por xmm4, xmm8 681 paddd xmm0, xmmword ptr [rsp+0x50] 682 paddd xmm1, xmmword ptr [rsp] 683 paddd xmm2, xmmword ptr [rsp+0xF0] 684 paddd xmm3, xmmword ptr [rsp+0x10] 685 paddd xmm0, xmm5 686 paddd xmm1, xmm6 687 paddd xmm2, xmm7 688 paddd xmm3, xmm4 689 pxor xmm15, xmm0 690 pxor xmm12, xmm1 691 pxor xmm13, xmm2 692 pxor xmm14, xmm3 693 movdqa xmm8, xmmword ptr [ROT8+rip] 694 pshufb xmm15, xmm8 695 pshufb xmm12, xmm8 696 pshufb xmm13, xmm8 697 pshufb xmm14, xmm8 698 paddd xmm10, xmm15 699 paddd xmm11, xmm12 700 movdqa xmm8, xmmword ptr [rsp+0x100] 701 paddd xmm8, xmm13 702 paddd xmm9, xmm14 703 pxor xmm5, xmm10 704 pxor xmm6, xmm11 705 pxor xmm7, xmm8 706 pxor xmm4, xmm9 707 movdqa xmmword ptr [rsp+0x100], xmm8 708 movdqa xmm8, xmm5 709 psrld xmm8, 7 710 pslld xmm5, 25 711 por xmm5, xmm8 712 movdqa xmm8, xmm6 713 psrld xmm8, 7 714 pslld xmm6, 25 715 por xmm6, xmm8 716 movdqa xmm8, xmm7 717 psrld xmm8, 7 718 pslld xmm7, 25 719 por xmm7, xmm8 720 movdqa xmm8, xmm4 721 psrld xmm8, 7 722 pslld xmm4, 25 723 por xmm4, xmm8 724 paddd xmm0, xmmword ptr [rsp+0xA0] 725 paddd xmm1, xmmword ptr [rsp+0xC0] 726 paddd xmm2, xmmword ptr [rsp+0xE0] 727 paddd xmm3, xmmword ptr [rsp+0xD0] 728 paddd xmm0, xmm4 729 paddd xmm1, xmm5 730 paddd xmm2, xmm6 731 paddd xmm3, xmm7 732 pxor xmm12, xmm0 733 pxor xmm13, xmm1 734 pxor xmm14, xmm2 735 pxor xmm15, xmm3 736 movdqa xmm8, xmmword ptr [ROT16+rip] 737 pshufb xmm12, xmm8 738 pshufb xmm13, xmm8 739 pshufb xmm14, xmm8 740 pshufb xmm15, xmm8 741 movdqa xmm8, xmmword ptr [rsp+0x100] 742 paddd xmm8, xmm12 743 paddd xmm9, xmm13 744 paddd xmm10, xmm14 745 paddd xmm11, xmm15 746 pxor xmm4, xmm8 747 pxor xmm5, xmm9 748 pxor xmm6, xmm10 749 pxor xmm7, xmm11 750 movdqa xmmword ptr [rsp+0x100], xmm8 751 movdqa xmm8, xmm4 752 psrld xmm8, 12 753 pslld xmm4, 20 754 por xmm4, xmm8 755 movdqa xmm8, xmm5 756 psrld xmm8, 12 757 pslld xmm5, 20 758 por xmm5, xmm8 759 movdqa xmm8, xmm6 760 psrld xmm8, 12 761 pslld xmm6, 20 762 por xmm6, xmm8 763 movdqa xmm8, xmm7 764 psrld xmm8, 12 765 pslld xmm7, 20 766 por xmm7, xmm8 767 paddd xmm0, xmmword ptr [rsp+0x70] 768 paddd xmm1, xmmword ptr [rsp+0x90] 769 paddd xmm2, xmmword ptr [rsp+0x30] 770 paddd xmm3, xmmword ptr [rsp+0xF0] 771 paddd xmm0, xmm4 772 paddd xmm1, xmm5 773 paddd xmm2, xmm6 774 paddd xmm3, xmm7 775 pxor xmm12, xmm0 776 pxor xmm13, xmm1 777 pxor xmm14, xmm2 778 pxor xmm15, xmm3 779 movdqa xmm8, xmmword ptr [ROT8+rip] 780 pshufb xmm12, xmm8 781 pshufb xmm13, xmm8 782 pshufb xmm14, xmm8 783 pshufb xmm15, xmm8 784 movdqa xmm8, xmmword ptr [rsp+0x100] 785 paddd xmm8, xmm12 786 paddd xmm9, xmm13 787 paddd xmm10, xmm14 788 paddd xmm11, xmm15 789 pxor xmm4, xmm8 790 pxor xmm5, xmm9 791 pxor xmm6, xmm10 792 pxor xmm7, xmm11 793 movdqa xmmword ptr [rsp+0x100], xmm8 794 movdqa xmm8, xmm4 795 psrld xmm8, 7 796 pslld xmm4, 25 797 por xmm4, xmm8 798 movdqa xmm8, xmm5 799 psrld xmm8, 7 800 pslld xmm5, 25 801 por xmm5, xmm8 802 movdqa xmm8, xmm6 803 psrld xmm8, 7 804 pslld xmm6, 25 805 por xmm6, xmm8 806 movdqa xmm8, xmm7 807 psrld xmm8, 7 808 pslld xmm7, 25 809 por xmm7, xmm8 810 paddd xmm0, xmmword ptr [rsp+0x40] 811 paddd xmm1, xmmword ptr [rsp+0xB0] 812 paddd xmm2, xmmword ptr [rsp+0x50] 813 paddd xmm3, xmmword ptr [rsp+0x10] 814 paddd xmm0, xmm5 815 paddd xmm1, xmm6 816 paddd xmm2, xmm7 817 paddd xmm3, xmm4 818 pxor xmm15, xmm0 819 pxor xmm12, xmm1 820 pxor xmm13, xmm2 821 pxor xmm14, xmm3 822 movdqa xmm8, xmmword ptr [ROT16+rip] 823 pshufb xmm15, xmm8 824 pshufb xmm12, xmm8 825 pshufb xmm13, xmm8 826 pshufb xmm14, xmm8 827 paddd xmm10, xmm15 828 paddd xmm11, xmm12 829 movdqa xmm8, xmmword ptr [rsp+0x100] 830 paddd xmm8, xmm13 831 paddd xmm9, xmm14 832 pxor xmm5, xmm10 833 pxor xmm6, xmm11 834 pxor xmm7, xmm8 835 pxor xmm4, xmm9 836 movdqa xmmword ptr [rsp+0x100], xmm8 837 movdqa xmm8, xmm5 838 psrld xmm8, 12 839 pslld xmm5, 20 840 por xmm5, xmm8 841 movdqa xmm8, xmm6 842 psrld xmm8, 12 843 pslld xmm6, 20 844 por xmm6, xmm8 845 movdqa xmm8, xmm7 846 psrld xmm8, 12 847 pslld xmm7, 20 848 por xmm7, xmm8 849 movdqa xmm8, xmm4 850 psrld xmm8, 12 851 pslld xmm4, 20 852 por xmm4, xmm8 853 paddd xmm0, xmmword ptr [rsp] 854 paddd xmm1, xmmword ptr [rsp+0x20] 855 paddd xmm2, xmmword ptr [rsp+0x80] 856 paddd xmm3, xmmword ptr [rsp+0x60] 857 paddd xmm0, xmm5 858 paddd xmm1, xmm6 859 paddd xmm2, xmm7 860 paddd xmm3, xmm4 861 pxor xmm15, xmm0 862 pxor xmm12, xmm1 863 pxor xmm13, xmm2 864 pxor xmm14, xmm3 865 movdqa xmm8, xmmword ptr [ROT8+rip] 866 pshufb xmm15, xmm8 867 pshufb xmm12, xmm8 868 pshufb xmm13, xmm8 869 pshufb xmm14, xmm8 870 paddd xmm10, xmm15 871 paddd xmm11, xmm12 872 movdqa xmm8, xmmword ptr [rsp+0x100] 873 paddd xmm8, xmm13 874 paddd xmm9, xmm14 875 pxor xmm5, xmm10 876 pxor xmm6, xmm11 877 pxor xmm7, xmm8 878 pxor xmm4, xmm9 879 movdqa xmmword ptr [rsp+0x100], xmm8 880 movdqa xmm8, xmm5 881 psrld xmm8, 7 882 pslld xmm5, 25 883 por xmm5, xmm8 884 movdqa xmm8, xmm6 885 psrld xmm8, 7 886 pslld xmm6, 25 887 por xmm6, xmm8 888 movdqa xmm8, xmm7 889 psrld xmm8, 7 890 pslld xmm7, 25 891 por xmm7, xmm8 892 movdqa xmm8, xmm4 893 psrld xmm8, 7 894 pslld xmm4, 25 895 por xmm4, xmm8 896 paddd xmm0, xmmword ptr [rsp+0xC0] 897 paddd xmm1, xmmword ptr [rsp+0x90] 898 paddd xmm2, xmmword ptr [rsp+0xF0] 899 paddd xmm3, xmmword ptr [rsp+0xE0] 900 paddd xmm0, xmm4 901 paddd xmm1, xmm5 902 paddd xmm2, xmm6 903 paddd xmm3, xmm7 904 pxor xmm12, xmm0 905 pxor xmm13, xmm1 906 pxor xmm14, xmm2 907 pxor xmm15, xmm3 908 movdqa xmm8, xmmword ptr [ROT16+rip] 909 pshufb xmm12, xmm8 910 pshufb xmm13, xmm8 911 pshufb xmm14, xmm8 912 pshufb xmm15, xmm8 913 movdqa xmm8, xmmword ptr [rsp+0x100] 914 paddd xmm8, xmm12 915 paddd xmm9, xmm13 916 paddd xmm10, xmm14 917 paddd xmm11, xmm15 918 pxor xmm4, xmm8 919 pxor xmm5, xmm9 920 pxor xmm6, xmm10 921 pxor xmm7, xmm11 922 movdqa xmmword ptr [rsp+0x100], xmm8 923 movdqa xmm8, xmm4 924 psrld xmm8, 12 925 pslld xmm4, 20 926 por xmm4, xmm8 927 movdqa xmm8, xmm5 928 psrld xmm8, 12 929 pslld xmm5, 20 930 por xmm5, xmm8 931 movdqa xmm8, xmm6 932 psrld xmm8, 12 933 pslld xmm6, 20 934 por xmm6, xmm8 935 movdqa xmm8, xmm7 936 psrld xmm8, 12 937 pslld xmm7, 20 938 por xmm7, xmm8 939 paddd xmm0, xmmword ptr [rsp+0xD0] 940 paddd xmm1, xmmword ptr [rsp+0xB0] 941 paddd xmm2, xmmword ptr [rsp+0xA0] 942 paddd xmm3, xmmword ptr [rsp+0x80] 943 paddd xmm0, xmm4 944 paddd xmm1, xmm5 945 paddd xmm2, xmm6 946 paddd xmm3, xmm7 947 pxor xmm12, xmm0 948 pxor xmm13, xmm1 949 pxor xmm14, xmm2 950 pxor xmm15, xmm3 951 movdqa xmm8, xmmword ptr [ROT8+rip] 952 pshufb xmm12, xmm8 953 pshufb xmm13, xmm8 954 pshufb xmm14, xmm8 955 pshufb xmm15, xmm8 956 movdqa xmm8, xmmword ptr [rsp+0x100] 957 paddd xmm8, xmm12 958 paddd xmm9, xmm13 959 paddd xmm10, xmm14 960 paddd xmm11, xmm15 961 pxor xmm4, xmm8 962 pxor xmm5, xmm9 963 pxor xmm6, xmm10 964 pxor xmm7, xmm11 965 movdqa xmmword ptr [rsp+0x100], xmm8 966 movdqa xmm8, xmm4 967 psrld xmm8, 7 968 pslld xmm4, 25 969 por xmm4, xmm8 970 movdqa xmm8, xmm5 971 psrld xmm8, 7 972 pslld xmm5, 25 973 por xmm5, xmm8 974 movdqa xmm8, xmm6 975 psrld xmm8, 7 976 pslld xmm6, 25 977 por xmm6, xmm8 978 movdqa xmm8, xmm7 979 psrld xmm8, 7 980 pslld xmm7, 25 981 por xmm7, xmm8 982 paddd xmm0, xmmword ptr [rsp+0x70] 983 paddd xmm1, xmmword ptr [rsp+0x50] 984 paddd xmm2, xmmword ptr [rsp] 985 paddd xmm3, xmmword ptr [rsp+0x60] 986 paddd xmm0, xmm5 987 paddd xmm1, xmm6 988 paddd xmm2, xmm7 989 paddd xmm3, xmm4 990 pxor xmm15, xmm0 991 pxor xmm12, xmm1 992 pxor xmm13, xmm2 993 pxor xmm14, xmm3 994 movdqa xmm8, xmmword ptr [ROT16+rip] 995 pshufb xmm15, xmm8 996 pshufb xmm12, xmm8 997 pshufb xmm13, xmm8 998 pshufb xmm14, xmm8 999 paddd xmm10, xmm15 1000 paddd xmm11, xmm12 1001 movdqa xmm8, xmmword ptr [rsp+0x100] 1002 paddd xmm8, xmm13 1003 paddd xmm9, xmm14 1004 pxor xmm5, xmm10 1005 pxor xmm6, xmm11 1006 pxor xmm7, xmm8 1007 pxor xmm4, xmm9 1008 movdqa xmmword ptr [rsp+0x100], xmm8 1009 movdqa xmm8, xmm5 1010 psrld xmm8, 12 1011 pslld xmm5, 20 1012 por xmm5, xmm8 1013 movdqa xmm8, xmm6 1014 psrld xmm8, 12 1015 pslld xmm6, 20 1016 por xmm6, xmm8 1017 movdqa xmm8, xmm7 1018 psrld xmm8, 12 1019 pslld xmm7, 20 1020 por xmm7, xmm8 1021 movdqa xmm8, xmm4 1022 psrld xmm8, 12 1023 pslld xmm4, 20 1024 por xmm4, xmm8 1025 paddd xmm0, xmmword ptr [rsp+0x20] 1026 paddd xmm1, xmmword ptr [rsp+0x30] 1027 paddd xmm2, xmmword ptr [rsp+0x10] 1028 paddd xmm3, xmmword ptr [rsp+0x40] 1029 paddd xmm0, xmm5 1030 paddd xmm1, xmm6 1031 paddd xmm2, xmm7 1032 paddd xmm3, xmm4 1033 pxor xmm15, xmm0 1034 pxor xmm12, xmm1 1035 pxor xmm13, xmm2 1036 pxor xmm14, xmm3 1037 movdqa xmm8, xmmword ptr [ROT8+rip] 1038 pshufb xmm15, xmm8 1039 pshufb xmm12, xmm8 1040 pshufb xmm13, xmm8 1041 pshufb xmm14, xmm8 1042 paddd xmm10, xmm15 1043 paddd xmm11, xmm12 1044 movdqa xmm8, xmmword ptr [rsp+0x100] 1045 paddd xmm8, xmm13 1046 paddd xmm9, xmm14 1047 pxor xmm5, xmm10 1048 pxor xmm6, xmm11 1049 pxor xmm7, xmm8 1050 pxor xmm4, xmm9 1051 movdqa xmmword ptr [rsp+0x100], xmm8 1052 movdqa xmm8, xmm5 1053 psrld xmm8, 7 1054 pslld xmm5, 25 1055 por xmm5, xmm8 1056 movdqa xmm8, xmm6 1057 psrld xmm8, 7 1058 pslld xmm6, 25 1059 por xmm6, xmm8 1060 movdqa xmm8, xmm7 1061 psrld xmm8, 7 1062 pslld xmm7, 25 1063 por xmm7, xmm8 1064 movdqa xmm8, xmm4 1065 psrld xmm8, 7 1066 pslld xmm4, 25 1067 por xmm4, xmm8 1068 paddd xmm0, xmmword ptr [rsp+0x90] 1069 paddd xmm1, xmmword ptr [rsp+0xB0] 1070 paddd xmm2, xmmword ptr [rsp+0x80] 1071 paddd xmm3, xmmword ptr [rsp+0xF0] 1072 paddd xmm0, xmm4 1073 paddd xmm1, xmm5 1074 paddd xmm2, xmm6 1075 paddd xmm3, xmm7 1076 pxor xmm12, xmm0 1077 pxor xmm13, xmm1 1078 pxor xmm14, xmm2 1079 pxor xmm15, xmm3 1080 movdqa xmm8, xmmword ptr [ROT16+rip] 1081 pshufb xmm12, xmm8 1082 pshufb xmm13, xmm8 1083 pshufb xmm14, xmm8 1084 pshufb xmm15, xmm8 1085 movdqa xmm8, xmmword ptr [rsp+0x100] 1086 paddd xmm8, xmm12 1087 paddd xmm9, xmm13 1088 paddd xmm10, xmm14 1089 paddd xmm11, xmm15 1090 pxor xmm4, xmm8 1091 pxor xmm5, xmm9 1092 pxor xmm6, xmm10 1093 pxor xmm7, xmm11 1094 movdqa xmmword ptr [rsp+0x100], xmm8 1095 movdqa xmm8, xmm4 1096 psrld xmm8, 12 1097 pslld xmm4, 20 1098 por xmm4, xmm8 1099 movdqa xmm8, xmm5 1100 psrld xmm8, 12 1101 pslld xmm5, 20 1102 por xmm5, xmm8 1103 movdqa xmm8, xmm6 1104 psrld xmm8, 12 1105 pslld xmm6, 20 1106 por xmm6, xmm8 1107 movdqa xmm8, xmm7 1108 psrld xmm8, 12 1109 pslld xmm7, 20 1110 por xmm7, xmm8 1111 paddd xmm0, xmmword ptr [rsp+0xE0] 1112 paddd xmm1, xmmword ptr [rsp+0x50] 1113 paddd xmm2, xmmword ptr [rsp+0xC0] 1114 paddd xmm3, xmmword ptr [rsp+0x10] 1115 paddd xmm0, xmm4 1116 paddd xmm1, xmm5 1117 paddd xmm2, xmm6 1118 paddd xmm3, xmm7 1119 pxor xmm12, xmm0 1120 pxor xmm13, xmm1 1121 pxor xmm14, xmm2 1122 pxor xmm15, xmm3 1123 movdqa xmm8, xmmword ptr [ROT8+rip] 1124 pshufb xmm12, xmm8 1125 pshufb xmm13, xmm8 1126 pshufb xmm14, xmm8 1127 pshufb xmm15, xmm8 1128 movdqa xmm8, xmmword ptr [rsp+0x100] 1129 paddd xmm8, xmm12 1130 paddd xmm9, xmm13 1131 paddd xmm10, xmm14 1132 paddd xmm11, xmm15 1133 pxor xmm4, xmm8 1134 pxor xmm5, xmm9 1135 pxor xmm6, xmm10 1136 pxor xmm7, xmm11 1137 movdqa xmmword ptr [rsp+0x100], xmm8 1138 movdqa xmm8, xmm4 1139 psrld xmm8, 7 1140 pslld xmm4, 25 1141 por xmm4, xmm8 1142 movdqa xmm8, xmm5 1143 psrld xmm8, 7 1144 pslld xmm5, 25 1145 por xmm5, xmm8 1146 movdqa xmm8, xmm6 1147 psrld xmm8, 7 1148 pslld xmm6, 25 1149 por xmm6, xmm8 1150 movdqa xmm8, xmm7 1151 psrld xmm8, 7 1152 pslld xmm7, 25 1153 por xmm7, xmm8 1154 paddd xmm0, xmmword ptr [rsp+0xD0] 1155 paddd xmm1, xmmword ptr [rsp] 1156 paddd xmm2, xmmword ptr [rsp+0x20] 1157 paddd xmm3, xmmword ptr [rsp+0x40] 1158 paddd xmm0, xmm5 1159 paddd xmm1, xmm6 1160 paddd xmm2, xmm7 1161 paddd xmm3, xmm4 1162 pxor xmm15, xmm0 1163 pxor xmm12, xmm1 1164 pxor xmm13, xmm2 1165 pxor xmm14, xmm3 1166 movdqa xmm8, xmmword ptr [ROT16+rip] 1167 pshufb xmm15, xmm8 1168 pshufb xmm12, xmm8 1169 pshufb xmm13, xmm8 1170 pshufb xmm14, xmm8 1171 paddd xmm10, xmm15 1172 paddd xmm11, xmm12 1173 movdqa xmm8, xmmword ptr [rsp+0x100] 1174 paddd xmm8, xmm13 1175 paddd xmm9, xmm14 1176 pxor xmm5, xmm10 1177 pxor xmm6, xmm11 1178 pxor xmm7, xmm8 1179 pxor xmm4, xmm9 1180 movdqa xmmword ptr [rsp+0x100], xmm8 1181 movdqa xmm8, xmm5 1182 psrld xmm8, 12 1183 pslld xmm5, 20 1184 por xmm5, xmm8 1185 movdqa xmm8, xmm6 1186 psrld xmm8, 12 1187 pslld xmm6, 20 1188 por xmm6, xmm8 1189 movdqa xmm8, xmm7 1190 psrld xmm8, 12 1191 pslld xmm7, 20 1192 por xmm7, xmm8 1193 movdqa xmm8, xmm4 1194 psrld xmm8, 12 1195 pslld xmm4, 20 1196 por xmm4, xmm8 1197 paddd xmm0, xmmword ptr [rsp+0x30] 1198 paddd xmm1, xmmword ptr [rsp+0xA0] 1199 paddd xmm2, xmmword ptr [rsp+0x60] 1200 paddd xmm3, xmmword ptr [rsp+0x70] 1201 paddd xmm0, xmm5 1202 paddd xmm1, xmm6 1203 paddd xmm2, xmm7 1204 paddd xmm3, xmm4 1205 pxor xmm15, xmm0 1206 pxor xmm12, xmm1 1207 pxor xmm13, xmm2 1208 pxor xmm14, xmm3 1209 movdqa xmm8, xmmword ptr [ROT8+rip] 1210 pshufb xmm15, xmm8 1211 pshufb xmm12, xmm8 1212 pshufb xmm13, xmm8 1213 pshufb xmm14, xmm8 1214 paddd xmm10, xmm15 1215 paddd xmm11, xmm12 1216 movdqa xmm8, xmmword ptr [rsp+0x100] 1217 paddd xmm8, xmm13 1218 paddd xmm9, xmm14 1219 pxor xmm5, xmm10 1220 pxor xmm6, xmm11 1221 pxor xmm7, xmm8 1222 pxor xmm4, xmm9 1223 movdqa xmmword ptr [rsp+0x100], xmm8 1224 movdqa xmm8, xmm5 1225 psrld xmm8, 7 1226 pslld xmm5, 25 1227 por xmm5, xmm8 1228 movdqa xmm8, xmm6 1229 psrld xmm8, 7 1230 pslld xmm6, 25 1231 por xmm6, xmm8 1232 movdqa xmm8, xmm7 1233 psrld xmm8, 7 1234 pslld xmm7, 25 1235 por xmm7, xmm8 1236 movdqa xmm8, xmm4 1237 psrld xmm8, 7 1238 pslld xmm4, 25 1239 por xmm4, xmm8 1240 paddd xmm0, xmmword ptr [rsp+0xB0] 1241 paddd xmm1, xmmword ptr [rsp+0x50] 1242 paddd xmm2, xmmword ptr [rsp+0x10] 1243 paddd xmm3, xmmword ptr [rsp+0x80] 1244 paddd xmm0, xmm4 1245 paddd xmm1, xmm5 1246 paddd xmm2, xmm6 1247 paddd xmm3, xmm7 1248 pxor xmm12, xmm0 1249 pxor xmm13, xmm1 1250 pxor xmm14, xmm2 1251 pxor xmm15, xmm3 1252 movdqa xmm8, xmmword ptr [ROT16+rip] 1253 pshufb xmm12, xmm8 1254 pshufb xmm13, xmm8 1255 pshufb xmm14, xmm8 1256 pshufb xmm15, xmm8 1257 movdqa xmm8, xmmword ptr [rsp+0x100] 1258 paddd xmm8, xmm12 1259 paddd xmm9, xmm13 1260 paddd xmm10, xmm14 1261 paddd xmm11, xmm15 1262 pxor xmm4, xmm8 1263 pxor xmm5, xmm9 1264 pxor xmm6, xmm10 1265 pxor xmm7, xmm11 1266 movdqa xmmword ptr [rsp+0x100], xmm8 1267 movdqa xmm8, xmm4 1268 psrld xmm8, 12 1269 pslld xmm4, 20 1270 por xmm4, xmm8 1271 movdqa xmm8, xmm5 1272 psrld xmm8, 12 1273 pslld xmm5, 20 1274 por xmm5, xmm8 1275 movdqa xmm8, xmm6 1276 psrld xmm8, 12 1277 pslld xmm6, 20 1278 por xmm6, xmm8 1279 movdqa xmm8, xmm7 1280 psrld xmm8, 12 1281 pslld xmm7, 20 1282 por xmm7, xmm8 1283 paddd xmm0, xmmword ptr [rsp+0xF0] 1284 paddd xmm1, xmmword ptr [rsp] 1285 paddd xmm2, xmmword ptr [rsp+0x90] 1286 paddd xmm3, xmmword ptr [rsp+0x60] 1287 paddd xmm0, xmm4 1288 paddd xmm1, xmm5 1289 paddd xmm2, xmm6 1290 paddd xmm3, xmm7 1291 pxor xmm12, xmm0 1292 pxor xmm13, xmm1 1293 pxor xmm14, xmm2 1294 pxor xmm15, xmm3 1295 movdqa xmm8, xmmword ptr [ROT8+rip] 1296 pshufb xmm12, xmm8 1297 pshufb xmm13, xmm8 1298 pshufb xmm14, xmm8 1299 pshufb xmm15, xmm8 1300 movdqa xmm8, xmmword ptr [rsp+0x100] 1301 paddd xmm8, xmm12 1302 paddd xmm9, xmm13 1303 paddd xmm10, xmm14 1304 paddd xmm11, xmm15 1305 pxor xmm4, xmm8 1306 pxor xmm5, xmm9 1307 pxor xmm6, xmm10 1308 pxor xmm7, xmm11 1309 movdqa xmmword ptr [rsp+0x100], xmm8 1310 movdqa xmm8, xmm4 1311 psrld xmm8, 7 1312 pslld xmm4, 25 1313 por xmm4, xmm8 1314 movdqa xmm8, xmm5 1315 psrld xmm8, 7 1316 pslld xmm5, 25 1317 por xmm5, xmm8 1318 movdqa xmm8, xmm6 1319 psrld xmm8, 7 1320 pslld xmm6, 25 1321 por xmm6, xmm8 1322 movdqa xmm8, xmm7 1323 psrld xmm8, 7 1324 pslld xmm7, 25 1325 por xmm7, xmm8 1326 paddd xmm0, xmmword ptr [rsp+0xE0] 1327 paddd xmm1, xmmword ptr [rsp+0x20] 1328 paddd xmm2, xmmword ptr [rsp+0x30] 1329 paddd xmm3, xmmword ptr [rsp+0x70] 1330 paddd xmm0, xmm5 1331 paddd xmm1, xmm6 1332 paddd xmm2, xmm7 1333 paddd xmm3, xmm4 1334 pxor xmm15, xmm0 1335 pxor xmm12, xmm1 1336 pxor xmm13, xmm2 1337 pxor xmm14, xmm3 1338 movdqa xmm8, xmmword ptr [ROT16+rip] 1339 pshufb xmm15, xmm8 1340 pshufb xmm12, xmm8 1341 pshufb xmm13, xmm8 1342 pshufb xmm14, xmm8 1343 paddd xmm10, xmm15 1344 paddd xmm11, xmm12 1345 movdqa xmm8, xmmword ptr [rsp+0x100] 1346 paddd xmm8, xmm13 1347 paddd xmm9, xmm14 1348 pxor xmm5, xmm10 1349 pxor xmm6, xmm11 1350 pxor xmm7, xmm8 1351 pxor xmm4, xmm9 1352 movdqa xmmword ptr [rsp+0x100], xmm8 1353 movdqa xmm8, xmm5 1354 psrld xmm8, 12 1355 pslld xmm5, 20 1356 por xmm5, xmm8 1357 movdqa xmm8, xmm6 1358 psrld xmm8, 12 1359 pslld xmm6, 20 1360 por xmm6, xmm8 1361 movdqa xmm8, xmm7 1362 psrld xmm8, 12 1363 pslld xmm7, 20 1364 por xmm7, xmm8 1365 movdqa xmm8, xmm4 1366 psrld xmm8, 12 1367 pslld xmm4, 20 1368 por xmm4, xmm8 1369 paddd xmm0, xmmword ptr [rsp+0xA0] 1370 paddd xmm1, xmmword ptr [rsp+0xC0] 1371 paddd xmm2, xmmword ptr [rsp+0x40] 1372 paddd xmm3, xmmword ptr [rsp+0xD0] 1373 paddd xmm0, xmm5 1374 paddd xmm1, xmm6 1375 paddd xmm2, xmm7 1376 paddd xmm3, xmm4 1377 pxor xmm15, xmm0 1378 pxor xmm12, xmm1 1379 pxor xmm13, xmm2 1380 pxor xmm14, xmm3 1381 movdqa xmm8, xmmword ptr [ROT8+rip] 1382 pshufb xmm15, xmm8 1383 pshufb xmm12, xmm8 1384 pshufb xmm13, xmm8 1385 pshufb xmm14, xmm8 1386 paddd xmm10, xmm15 1387 paddd xmm11, xmm12 1388 movdqa xmm8, xmmword ptr [rsp+0x100] 1389 paddd xmm8, xmm13 1390 paddd xmm9, xmm14 1391 pxor xmm5, xmm10 1392 pxor xmm6, xmm11 1393 pxor xmm7, xmm8 1394 pxor xmm4, xmm9 1395 pxor xmm0, xmm8 1396 pxor xmm1, xmm9 1397 pxor xmm2, xmm10 1398 pxor xmm3, xmm11 1399 movdqa xmm8, xmm5 1400 psrld xmm8, 7 1401 pslld xmm5, 25 1402 por xmm5, xmm8 1403 movdqa xmm8, xmm6 1404 psrld xmm8, 7 1405 pslld xmm6, 25 1406 por xmm6, xmm8 1407 movdqa xmm8, xmm7 1408 psrld xmm8, 7 1409 pslld xmm7, 25 1410 por xmm7, xmm8 1411 movdqa xmm8, xmm4 1412 psrld xmm8, 7 1413 pslld xmm4, 25 1414 por xmm4, xmm8 1415 pxor xmm4, xmm12 1416 pxor xmm5, xmm13 1417 pxor xmm6, xmm14 1418 pxor xmm7, xmm15 1419 mov eax, r13d 1420 jne 9b 1421 movdqa xmm9, xmm0 1422 punpckldq xmm0, xmm1 1423 punpckhdq xmm9, xmm1 1424 movdqa xmm11, xmm2 1425 punpckldq xmm2, xmm3 1426 punpckhdq xmm11, xmm3 1427 movdqa xmm1, xmm0 1428 punpcklqdq xmm0, xmm2 1429 punpckhqdq xmm1, xmm2 1430 movdqa xmm3, xmm9 1431 punpcklqdq xmm9, xmm11 1432 punpckhqdq xmm3, xmm11 1433 movdqu xmmword ptr [rbx], xmm0 1434 movdqu xmmword ptr [rbx+0x20], xmm1 1435 movdqu xmmword ptr [rbx+0x40], xmm9 1436 movdqu xmmword ptr [rbx+0x60], xmm3 1437 movdqa xmm9, xmm4 1438 punpckldq xmm4, xmm5 1439 punpckhdq xmm9, xmm5 1440 movdqa xmm11, xmm6 1441 punpckldq xmm6, xmm7 1442 punpckhdq xmm11, xmm7 1443 movdqa xmm5, xmm4 1444 punpcklqdq xmm4, xmm6 1445 punpckhqdq xmm5, xmm6 1446 movdqa xmm7, xmm9 1447 punpcklqdq xmm9, xmm11 1448 punpckhqdq xmm7, xmm11 1449 movdqu xmmword ptr [rbx+0x10], xmm4 1450 movdqu xmmword ptr [rbx+0x30], xmm5 1451 movdqu xmmword ptr [rbx+0x50], xmm9 1452 movdqu xmmword ptr [rbx+0x70], xmm7 1453 movdqa xmm1, xmmword ptr [rsp+0x110] 1454 movdqa xmm0, xmm1 1455 paddd xmm1, xmmword ptr [rsp+0x150] 1456 movdqa xmmword ptr [rsp+0x110], xmm1 1457 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1458 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1459 pcmpgtd xmm0, xmm1 1460 movdqa xmm1, xmmword ptr [rsp+0x120] 1461 psubd xmm1, xmm0 1462 movdqa xmmword ptr [rsp+0x120], xmm1 1463 add rbx, 128 1464 add rdi, 32 1465 sub rsi, 4 1466 cmp rsi, 4 1467 jnc 2b 1468 test rsi, rsi 1469 jnz 3f 14704: 1471 mov rsp, rbp 1472 pop rbp 1473 pop rbx 1474 pop r12 1475 pop r13 1476 pop r14 1477 pop r15 1478 RET 1479.p2align 5 14803: 1481 test esi, 0x2 1482 je 3f 1483 movups xmm0, xmmword ptr [rcx] 1484 movups xmm1, xmmword ptr [rcx+0x10] 1485 movaps xmm8, xmm0 1486 movaps xmm9, xmm1 1487 movd xmm13, dword ptr [rsp+0x110] 1488 pinsrd xmm13, dword ptr [rsp+0x120], 1 1489 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1490 movaps xmmword ptr [rsp], xmm13 1491 movd xmm14, dword ptr [rsp+0x114] 1492 pinsrd xmm14, dword ptr [rsp+0x124], 1 1493 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1494 movaps xmmword ptr [rsp+0x10], xmm14 1495 mov r8, qword ptr [rdi] 1496 mov r9, qword ptr [rdi+0x8] 1497 movzx eax, byte ptr [rbp+0x40] 1498 or eax, r13d 1499 xor edx, edx 15002: 1501 mov r14d, eax 1502 or eax, r12d 1503 add rdx, 64 1504 cmp rdx, r15 1505 cmovne eax, r14d 1506 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1507 movaps xmm10, xmm2 1508 movups xmm4, xmmword ptr [r8+rdx-0x40] 1509 movups xmm5, xmmword ptr [r8+rdx-0x30] 1510 movaps xmm3, xmm4 1511 shufps xmm4, xmm5, 136 1512 shufps xmm3, xmm5, 221 1513 movaps xmm5, xmm3 1514 movups xmm6, xmmword ptr [r8+rdx-0x20] 1515 movups xmm7, xmmword ptr [r8+rdx-0x10] 1516 movaps xmm3, xmm6 1517 shufps xmm6, xmm7, 136 1518 pshufd xmm6, xmm6, 0x93 1519 shufps xmm3, xmm7, 221 1520 pshufd xmm7, xmm3, 0x93 1521 movups xmm12, xmmword ptr [r9+rdx-0x40] 1522 movups xmm13, xmmword ptr [r9+rdx-0x30] 1523 movaps xmm11, xmm12 1524 shufps xmm12, xmm13, 136 1525 shufps xmm11, xmm13, 221 1526 movaps xmm13, xmm11 1527 movups xmm14, xmmword ptr [r9+rdx-0x20] 1528 movups xmm15, xmmword ptr [r9+rdx-0x10] 1529 movaps xmm11, xmm14 1530 shufps xmm14, xmm15, 136 1531 pshufd xmm14, xmm14, 0x93 1532 shufps xmm11, xmm15, 221 1533 pshufd xmm15, xmm11, 0x93 1534 movaps xmm3, xmmword ptr [rsp] 1535 movaps xmm11, xmmword ptr [rsp+0x10] 1536 pinsrd xmm3, eax, 3 1537 pinsrd xmm11, eax, 3 1538 mov al, 7 15399: 1540 paddd xmm0, xmm4 1541 paddd xmm8, xmm12 1542 movaps xmmword ptr [rsp+0x20], xmm4 1543 movaps xmmword ptr [rsp+0x30], xmm12 1544 paddd xmm0, xmm1 1545 paddd xmm8, xmm9 1546 pxor xmm3, xmm0 1547 pxor xmm11, xmm8 1548 movaps xmm12, xmmword ptr [ROT16+rip] 1549 pshufb xmm3, xmm12 1550 pshufb xmm11, xmm12 1551 paddd xmm2, xmm3 1552 paddd xmm10, xmm11 1553 pxor xmm1, xmm2 1554 pxor xmm9, xmm10 1555 movdqa xmm4, xmm1 1556 pslld xmm1, 20 1557 psrld xmm4, 12 1558 por xmm1, xmm4 1559 movdqa xmm4, xmm9 1560 pslld xmm9, 20 1561 psrld xmm4, 12 1562 por xmm9, xmm4 1563 paddd xmm0, xmm5 1564 paddd xmm8, xmm13 1565 movaps xmmword ptr [rsp+0x40], xmm5 1566 movaps xmmword ptr [rsp+0x50], xmm13 1567 paddd xmm0, xmm1 1568 paddd xmm8, xmm9 1569 pxor xmm3, xmm0 1570 pxor xmm11, xmm8 1571 movaps xmm13, xmmword ptr [ROT8+rip] 1572 pshufb xmm3, xmm13 1573 pshufb xmm11, xmm13 1574 paddd xmm2, xmm3 1575 paddd xmm10, xmm11 1576 pxor xmm1, xmm2 1577 pxor xmm9, xmm10 1578 movdqa xmm4, xmm1 1579 pslld xmm1, 25 1580 psrld xmm4, 7 1581 por xmm1, xmm4 1582 movdqa xmm4, xmm9 1583 pslld xmm9, 25 1584 psrld xmm4, 7 1585 por xmm9, xmm4 1586 pshufd xmm0, xmm0, 0x93 1587 pshufd xmm8, xmm8, 0x93 1588 pshufd xmm3, xmm3, 0x4E 1589 pshufd xmm11, xmm11, 0x4E 1590 pshufd xmm2, xmm2, 0x39 1591 pshufd xmm10, xmm10, 0x39 1592 paddd xmm0, xmm6 1593 paddd xmm8, xmm14 1594 paddd xmm0, xmm1 1595 paddd xmm8, xmm9 1596 pxor xmm3, xmm0 1597 pxor xmm11, xmm8 1598 pshufb xmm3, xmm12 1599 pshufb xmm11, xmm12 1600 paddd xmm2, xmm3 1601 paddd xmm10, xmm11 1602 pxor xmm1, xmm2 1603 pxor xmm9, xmm10 1604 movdqa xmm4, xmm1 1605 pslld xmm1, 20 1606 psrld xmm4, 12 1607 por xmm1, xmm4 1608 movdqa xmm4, xmm9 1609 pslld xmm9, 20 1610 psrld xmm4, 12 1611 por xmm9, xmm4 1612 paddd xmm0, xmm7 1613 paddd xmm8, xmm15 1614 paddd xmm0, xmm1 1615 paddd xmm8, xmm9 1616 pxor xmm3, xmm0 1617 pxor xmm11, xmm8 1618 pshufb xmm3, xmm13 1619 pshufb xmm11, xmm13 1620 paddd xmm2, xmm3 1621 paddd xmm10, xmm11 1622 pxor xmm1, xmm2 1623 pxor xmm9, xmm10 1624 movdqa xmm4, xmm1 1625 pslld xmm1, 25 1626 psrld xmm4, 7 1627 por xmm1, xmm4 1628 movdqa xmm4, xmm9 1629 pslld xmm9, 25 1630 psrld xmm4, 7 1631 por xmm9, xmm4 1632 pshufd xmm0, xmm0, 0x39 1633 pshufd xmm8, xmm8, 0x39 1634 pshufd xmm3, xmm3, 0x4E 1635 pshufd xmm11, xmm11, 0x4E 1636 pshufd xmm2, xmm2, 0x93 1637 pshufd xmm10, xmm10, 0x93 1638 dec al 1639 je 9f 1640 movdqa xmm12, xmmword ptr [rsp+0x20] 1641 movdqa xmm5, xmmword ptr [rsp+0x40] 1642 pshufd xmm13, xmm12, 0x0F 1643 shufps xmm12, xmm5, 214 1644 pshufd xmm4, xmm12, 0x39 1645 movdqa xmm12, xmm6 1646 shufps xmm12, xmm7, 250 1647 pblendw xmm13, xmm12, 0xCC 1648 movdqa xmm12, xmm7 1649 punpcklqdq xmm12, xmm5 1650 pblendw xmm12, xmm6, 0xC0 1651 pshufd xmm12, xmm12, 0x78 1652 punpckhdq xmm5, xmm7 1653 punpckldq xmm6, xmm5 1654 pshufd xmm7, xmm6, 0x1E 1655 movdqa xmmword ptr [rsp+0x20], xmm13 1656 movdqa xmmword ptr [rsp+0x40], xmm12 1657 movdqa xmm5, xmmword ptr [rsp+0x30] 1658 movdqa xmm13, xmmword ptr [rsp+0x50] 1659 pshufd xmm6, xmm5, 0x0F 1660 shufps xmm5, xmm13, 214 1661 pshufd xmm12, xmm5, 0x39 1662 movdqa xmm5, xmm14 1663 shufps xmm5, xmm15, 250 1664 pblendw xmm6, xmm5, 0xCC 1665 movdqa xmm5, xmm15 1666 punpcklqdq xmm5, xmm13 1667 pblendw xmm5, xmm14, 0xC0 1668 pshufd xmm5, xmm5, 0x78 1669 punpckhdq xmm13, xmm15 1670 punpckldq xmm14, xmm13 1671 pshufd xmm15, xmm14, 0x1E 1672 movdqa xmm13, xmm6 1673 movdqa xmm14, xmm5 1674 movdqa xmm5, xmmword ptr [rsp+0x20] 1675 movdqa xmm6, xmmword ptr [rsp+0x40] 1676 jmp 9b 16779: 1678 pxor xmm0, xmm2 1679 pxor xmm1, xmm3 1680 pxor xmm8, xmm10 1681 pxor xmm9, xmm11 1682 mov eax, r13d 1683 cmp rdx, r15 1684 jne 2b 1685 movups xmmword ptr [rbx], xmm0 1686 movups xmmword ptr [rbx+0x10], xmm1 1687 movups xmmword ptr [rbx+0x20], xmm8 1688 movups xmmword ptr [rbx+0x30], xmm9 1689 movdqa xmm0, xmmword ptr [rsp+0x130] 1690 movdqa xmm1, xmmword ptr [rsp+0x110] 1691 movdqa xmm2, xmmword ptr [rsp+0x120] 1692 movdqu xmm3, xmmword ptr [rsp+0x118] 1693 movdqu xmm4, xmmword ptr [rsp+0x128] 1694 blendvps xmm1, xmm3, xmm0 1695 blendvps xmm2, xmm4, xmm0 1696 movdqa xmmword ptr [rsp+0x110], xmm1 1697 movdqa xmmword ptr [rsp+0x120], xmm2 1698 add rdi, 16 1699 add rbx, 64 1700 sub rsi, 2 17013: 1702 test esi, 0x1 1703 je 4b 1704 movups xmm0, xmmword ptr [rcx] 1705 movups xmm1, xmmword ptr [rcx+0x10] 1706 movd xmm13, dword ptr [rsp+0x110] 1707 pinsrd xmm13, dword ptr [rsp+0x120], 1 1708 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1709 movaps xmm14, xmmword ptr [ROT8+rip] 1710 movaps xmm15, xmmword ptr [ROT16+rip] 1711 mov r8, qword ptr [rdi] 1712 movzx eax, byte ptr [rbp+0x40] 1713 or eax, r13d 1714 xor edx, edx 17152: 1716 mov r14d, eax 1717 or eax, r12d 1718 add rdx, 64 1719 cmp rdx, r15 1720 cmovne eax, r14d 1721 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1722 movaps xmm3, xmm13 1723 pinsrd xmm3, eax, 3 1724 movups xmm4, xmmword ptr [r8+rdx-0x40] 1725 movups xmm5, xmmword ptr [r8+rdx-0x30] 1726 movaps xmm8, xmm4 1727 shufps xmm4, xmm5, 136 1728 shufps xmm8, xmm5, 221 1729 movaps xmm5, xmm8 1730 movups xmm6, xmmword ptr [r8+rdx-0x20] 1731 movups xmm7, xmmword ptr [r8+rdx-0x10] 1732 movaps xmm8, xmm6 1733 shufps xmm6, xmm7, 136 1734 pshufd xmm6, xmm6, 0x93 1735 shufps xmm8, xmm7, 221 1736 pshufd xmm7, xmm8, 0x93 1737 mov al, 7 17389: 1739 paddd xmm0, xmm4 1740 paddd xmm0, xmm1 1741 pxor xmm3, xmm0 1742 pshufb xmm3, xmm15 1743 paddd xmm2, xmm3 1744 pxor xmm1, xmm2 1745 movdqa xmm11, xmm1 1746 pslld xmm1, 20 1747 psrld xmm11, 12 1748 por xmm1, xmm11 1749 paddd xmm0, xmm5 1750 paddd xmm0, xmm1 1751 pxor xmm3, xmm0 1752 pshufb xmm3, xmm14 1753 paddd xmm2, xmm3 1754 pxor xmm1, xmm2 1755 movdqa xmm11, xmm1 1756 pslld xmm1, 25 1757 psrld xmm11, 7 1758 por xmm1, xmm11 1759 pshufd xmm0, xmm0, 0x93 1760 pshufd xmm3, xmm3, 0x4E 1761 pshufd xmm2, xmm2, 0x39 1762 paddd xmm0, xmm6 1763 paddd xmm0, xmm1 1764 pxor xmm3, xmm0 1765 pshufb xmm3, xmm15 1766 paddd xmm2, xmm3 1767 pxor xmm1, xmm2 1768 movdqa xmm11, xmm1 1769 pslld xmm1, 20 1770 psrld xmm11, 12 1771 por xmm1, xmm11 1772 paddd xmm0, xmm7 1773 paddd xmm0, xmm1 1774 pxor xmm3, xmm0 1775 pshufb xmm3, xmm14 1776 paddd xmm2, xmm3 1777 pxor xmm1, xmm2 1778 movdqa xmm11, xmm1 1779 pslld xmm1, 25 1780 psrld xmm11, 7 1781 por xmm1, xmm11 1782 pshufd xmm0, xmm0, 0x39 1783 pshufd xmm3, xmm3, 0x4E 1784 pshufd xmm2, xmm2, 0x93 1785 dec al 1786 jz 9f 1787 movdqa xmm8, xmm4 1788 shufps xmm8, xmm5, 214 1789 pshufd xmm9, xmm4, 0x0F 1790 pshufd xmm4, xmm8, 0x39 1791 movdqa xmm8, xmm6 1792 shufps xmm8, xmm7, 250 1793 pblendw xmm9, xmm8, 0xCC 1794 movdqa xmm8, xmm7 1795 punpcklqdq xmm8, xmm5 1796 pblendw xmm8, xmm6, 0xC0 1797 pshufd xmm8, xmm8, 0x78 1798 punpckhdq xmm5, xmm7 1799 punpckldq xmm6, xmm5 1800 pshufd xmm7, xmm6, 0x1E 1801 movdqa xmm5, xmm9 1802 movdqa xmm6, xmm8 1803 jmp 9b 18049: 1805 pxor xmm0, xmm2 1806 pxor xmm1, xmm3 1807 mov eax, r13d 1808 cmp rdx, r15 1809 jne 2b 1810 movups xmmword ptr [rbx], xmm0 1811 movups xmmword ptr [rbx+0x10], xmm1 1812 jmp 4b 1813.p2align 6 1814zfs_blake3_compress_in_place_sse41: 1815 _CET_ENDBR 1816 movups xmm0, xmmword ptr [rdi] 1817 movups xmm1, xmmword ptr [rdi+0x10] 1818 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1819 shl r8, 32 1820 add rdx, r8 1821 movq xmm3, rcx 1822 movq xmm4, rdx 1823 punpcklqdq xmm3, xmm4 1824 movups xmm4, xmmword ptr [rsi] 1825 movups xmm5, xmmword ptr [rsi+0x10] 1826 movaps xmm8, xmm4 1827 shufps xmm4, xmm5, 136 1828 shufps xmm8, xmm5, 221 1829 movaps xmm5, xmm8 1830 movups xmm6, xmmword ptr [rsi+0x20] 1831 movups xmm7, xmmword ptr [rsi+0x30] 1832 movaps xmm8, xmm6 1833 shufps xmm6, xmm7, 136 1834 pshufd xmm6, xmm6, 0x93 1835 shufps xmm8, xmm7, 221 1836 pshufd xmm7, xmm8, 0x93 1837 movaps xmm14, xmmword ptr [ROT8+rip] 1838 movaps xmm15, xmmword ptr [ROT16+rip] 1839 mov al, 7 18409: 1841 paddd xmm0, xmm4 1842 paddd xmm0, xmm1 1843 pxor xmm3, xmm0 1844 pshufb xmm3, xmm15 1845 paddd xmm2, xmm3 1846 pxor xmm1, xmm2 1847 movdqa xmm11, xmm1 1848 pslld xmm1, 20 1849 psrld xmm11, 12 1850 por xmm1, xmm11 1851 paddd xmm0, xmm5 1852 paddd xmm0, xmm1 1853 pxor xmm3, xmm0 1854 pshufb xmm3, xmm14 1855 paddd xmm2, xmm3 1856 pxor xmm1, xmm2 1857 movdqa xmm11, xmm1 1858 pslld xmm1, 25 1859 psrld xmm11, 7 1860 por xmm1, xmm11 1861 pshufd xmm0, xmm0, 0x93 1862 pshufd xmm3, xmm3, 0x4E 1863 pshufd xmm2, xmm2, 0x39 1864 paddd xmm0, xmm6 1865 paddd xmm0, xmm1 1866 pxor xmm3, xmm0 1867 pshufb xmm3, xmm15 1868 paddd xmm2, xmm3 1869 pxor xmm1, xmm2 1870 movdqa xmm11, xmm1 1871 pslld xmm1, 20 1872 psrld xmm11, 12 1873 por xmm1, xmm11 1874 paddd xmm0, xmm7 1875 paddd xmm0, xmm1 1876 pxor xmm3, xmm0 1877 pshufb xmm3, xmm14 1878 paddd xmm2, xmm3 1879 pxor xmm1, xmm2 1880 movdqa xmm11, xmm1 1881 pslld xmm1, 25 1882 psrld xmm11, 7 1883 por xmm1, xmm11 1884 pshufd xmm0, xmm0, 0x39 1885 pshufd xmm3, xmm3, 0x4E 1886 pshufd xmm2, xmm2, 0x93 1887 dec al 1888 jz 9f 1889 movdqa xmm8, xmm4 1890 shufps xmm8, xmm5, 214 1891 pshufd xmm9, xmm4, 0x0F 1892 pshufd xmm4, xmm8, 0x39 1893 movdqa xmm8, xmm6 1894 shufps xmm8, xmm7, 250 1895 pblendw xmm9, xmm8, 0xCC 1896 movdqa xmm8, xmm7 1897 punpcklqdq xmm8, xmm5 1898 pblendw xmm8, xmm6, 0xC0 1899 pshufd xmm8, xmm8, 0x78 1900 punpckhdq xmm5, xmm7 1901 punpckldq xmm6, xmm5 1902 pshufd xmm7, xmm6, 0x1E 1903 movdqa xmm5, xmm9 1904 movdqa xmm6, xmm8 1905 jmp 9b 19069: 1907 pxor xmm0, xmm2 1908 pxor xmm1, xmm3 1909 movups xmmword ptr [rdi], xmm0 1910 movups xmmword ptr [rdi+0x10], xmm1 1911 RET 1912.p2align 6 1913zfs_blake3_compress_xof_sse41: 1914 _CET_ENDBR 1915 movups xmm0, xmmword ptr [rdi] 1916 movups xmm1, xmmword ptr [rdi+0x10] 1917 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1918 movzx eax, r8b 1919 movzx edx, dl 1920 shl rax, 32 1921 add rdx, rax 1922 movq xmm3, rcx 1923 movq xmm4, rdx 1924 punpcklqdq xmm3, xmm4 1925 movups xmm4, xmmword ptr [rsi] 1926 movups xmm5, xmmword ptr [rsi+0x10] 1927 movaps xmm8, xmm4 1928 shufps xmm4, xmm5, 136 1929 shufps xmm8, xmm5, 221 1930 movaps xmm5, xmm8 1931 movups xmm6, xmmword ptr [rsi+0x20] 1932 movups xmm7, xmmword ptr [rsi+0x30] 1933 movaps xmm8, xmm6 1934 shufps xmm6, xmm7, 136 1935 pshufd xmm6, xmm6, 0x93 1936 shufps xmm8, xmm7, 221 1937 pshufd xmm7, xmm8, 0x93 1938 movaps xmm14, xmmword ptr [ROT8+rip] 1939 movaps xmm15, xmmword ptr [ROT16+rip] 1940 mov al, 7 19419: 1942 paddd xmm0, xmm4 1943 paddd xmm0, xmm1 1944 pxor xmm3, xmm0 1945 pshufb xmm3, xmm15 1946 paddd xmm2, xmm3 1947 pxor xmm1, xmm2 1948 movdqa xmm11, xmm1 1949 pslld xmm1, 20 1950 psrld xmm11, 12 1951 por xmm1, xmm11 1952 paddd xmm0, xmm5 1953 paddd xmm0, xmm1 1954 pxor xmm3, xmm0 1955 pshufb xmm3, xmm14 1956 paddd xmm2, xmm3 1957 pxor xmm1, xmm2 1958 movdqa xmm11, xmm1 1959 pslld xmm1, 25 1960 psrld xmm11, 7 1961 por xmm1, xmm11 1962 pshufd xmm0, xmm0, 0x93 1963 pshufd xmm3, xmm3, 0x4E 1964 pshufd xmm2, xmm2, 0x39 1965 paddd xmm0, xmm6 1966 paddd xmm0, xmm1 1967 pxor xmm3, xmm0 1968 pshufb xmm3, xmm15 1969 paddd xmm2, xmm3 1970 pxor xmm1, xmm2 1971 movdqa xmm11, xmm1 1972 pslld xmm1, 20 1973 psrld xmm11, 12 1974 por xmm1, xmm11 1975 paddd xmm0, xmm7 1976 paddd xmm0, xmm1 1977 pxor xmm3, xmm0 1978 pshufb xmm3, xmm14 1979 paddd xmm2, xmm3 1980 pxor xmm1, xmm2 1981 movdqa xmm11, xmm1 1982 pslld xmm1, 25 1983 psrld xmm11, 7 1984 por xmm1, xmm11 1985 pshufd xmm0, xmm0, 0x39 1986 pshufd xmm3, xmm3, 0x4E 1987 pshufd xmm2, xmm2, 0x93 1988 dec al 1989 jz 9f 1990 movdqa xmm8, xmm4 1991 shufps xmm8, xmm5, 214 1992 pshufd xmm9, xmm4, 0x0F 1993 pshufd xmm4, xmm8, 0x39 1994 movdqa xmm8, xmm6 1995 shufps xmm8, xmm7, 250 1996 pblendw xmm9, xmm8, 0xCC 1997 movdqa xmm8, xmm7 1998 punpcklqdq xmm8, xmm5 1999 pblendw xmm8, xmm6, 0xC0 2000 pshufd xmm8, xmm8, 0x78 2001 punpckhdq xmm5, xmm7 2002 punpckldq xmm6, xmm5 2003 pshufd xmm7, xmm6, 0x1E 2004 movdqa xmm5, xmm9 2005 movdqa xmm6, xmm8 2006 jmp 9b 20079: 2008 movdqu xmm4, xmmword ptr [rdi] 2009 movdqu xmm5, xmmword ptr [rdi+0x10] 2010 pxor xmm0, xmm2 2011 pxor xmm1, xmm3 2012 pxor xmm2, xmm4 2013 pxor xmm3, xmm5 2014 movups xmmword ptr [r9], xmm0 2015 movups xmmword ptr [r9+0x10], xmm1 2016 movups xmmword ptr [r9+0x20], xmm2 2017 movups xmmword ptr [r9+0x30], xmm3 2018 RET 2019 2020.size zfs_blake3_hash_many_sse41, . - zfs_blake3_hash_many_sse41 2021.size zfs_blake3_compress_in_place_sse41, . - zfs_blake3_compress_in_place_sse41 2022.size zfs_blake3_compress_xof_sse41, . - zfs_blake3_compress_xof_sse41 2023 2024#ifdef __APPLE__ 2025.static_data 2026#else 2027.section .rodata 2028#endif 2029.p2align 6 2030BLAKE3_IV: 2031 .long 0x6A09E667, 0xBB67AE85 2032 .long 0x3C6EF372, 0xA54FF53A 2033ROT16: 2034 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2035ROT8: 2036 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2037ADD0: 2038 .long 0, 1, 2, 3 2039ADD1: 2040 .long 4, 4, 4, 4 2041BLAKE3_IV_0: 2042 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2043BLAKE3_IV_1: 2044 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2045BLAKE3_IV_2: 2046 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2047BLAKE3_IV_3: 2048 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2049BLAKE3_BLOCK_LEN: 2050 .long 64, 64, 64, 64 2051CMP_MSB_MASK: 2052 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2053 2054#endif /* HAVE_SSE4_1 */ 2055 2056#ifdef __ELF__ 2057.section .note.GNU-stack,"",%progbits 2058#endif 2059