1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8section .text code align=64 9 10 11EXTERN GFp_ia32cap_P 12 13ALIGN 64 14$L$zero: 15 DD 0,0,0,0 16$L$one: 17 DD 1,0,0,0 18$L$inc: 19 DD 0,1,2,3 20$L$four: 21 DD 4,4,4,4 22$L$incy: 23 DD 0,2,4,6,1,3,5,7 24$L$eight: 25 DD 8,8,8,8,8,8,8,8 26$L$rot16: 27DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd 28$L$rot24: 29DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe 30$L$sigma: 31DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 32DB 0 33ALIGN 64 34$L$zeroz: 35 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 36$L$fourz: 37 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 38$L$incz: 39 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 40$L$sixteen: 41 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 42DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 43DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 44DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 45DB 108,46,111,114,103,62,0 46global GFp_ChaCha20_ctr32 47 48ALIGN 64 49GFp_ChaCha20_ctr32: 50 mov QWORD[8+rsp],rdi ;WIN64 prologue 51 mov QWORD[16+rsp],rsi 52 mov rax,rsp 53$L$SEH_begin_GFp_ChaCha20_ctr32: 54 mov rdi,rcx 55 mov rsi,rdx 56 mov rdx,r8 57 mov rcx,r9 58 mov r8,QWORD[40+rsp] 59 60 61 62 cmp rdx,0 63 je NEAR $L$no_data 64 mov r10,QWORD[((GFp_ia32cap_P+4))] 65 test r10d,512 66 jnz NEAR $L$ChaCha20_ssse3 67 68 push rbx 69 70 push rbp 71 72 push r12 73 74 push r13 75 76 push r14 77 78 push r15 79 80 sub rsp,64+24 81 82$L$ctr32_body: 83 84 85 movdqu xmm1,XMMWORD[rcx] 86 movdqu xmm2,XMMWORD[16+rcx] 87 movdqu xmm3,XMMWORD[r8] 88 movdqa xmm4,XMMWORD[$L$one] 89 90 91 movdqa XMMWORD[16+rsp],xmm1 92 movdqa XMMWORD[32+rsp],xmm2 93 movdqa XMMWORD[48+rsp],xmm3 94 mov rbp,rdx 95 jmp NEAR $L$oop_outer 96 97ALIGN 32 98$L$oop_outer: 99 mov eax,0x61707865 100 mov ebx,0x3320646e 101 mov ecx,0x79622d32 102 mov edx,0x6b206574 103 mov r8d,DWORD[16+rsp] 104 mov r9d,DWORD[20+rsp] 105 mov r10d,DWORD[24+rsp] 106 mov r11d,DWORD[28+rsp] 107 movd r12d,xmm3 108 mov r13d,DWORD[52+rsp] 109 mov r14d,DWORD[56+rsp] 110 mov r15d,DWORD[60+rsp] 111 112 mov QWORD[((64+0))+rsp],rbp 113 mov ebp,10 114 mov QWORD[((64+8))+rsp],rsi 115DB 102,72,15,126,214 116 mov QWORD[((64+16))+rsp],rdi 117 mov rdi,rsi 118 shr rdi,32 119 jmp NEAR $L$oop 120 121ALIGN 32 122$L$oop: 123 add eax,r8d 124 xor r12d,eax 125 rol r12d,16 126 add ebx,r9d 127 xor r13d,ebx 128 rol r13d,16 129 add esi,r12d 130 xor r8d,esi 131 rol r8d,12 132 add edi,r13d 133 xor r9d,edi 134 rol r9d,12 135 add eax,r8d 136 xor r12d,eax 137 rol r12d,8 138 add ebx,r9d 139 xor r13d,ebx 140 rol r13d,8 141 add esi,r12d 142 xor r8d,esi 143 rol r8d,7 144 add edi,r13d 145 xor r9d,edi 146 rol r9d,7 147 mov DWORD[32+rsp],esi 148 mov DWORD[36+rsp],edi 149 mov esi,DWORD[40+rsp] 150 mov edi,DWORD[44+rsp] 151 add ecx,r10d 152 xor r14d,ecx 153 rol r14d,16 154 add edx,r11d 155 xor r15d,edx 156 rol r15d,16 157 add esi,r14d 158 xor r10d,esi 159 rol r10d,12 160 add edi,r15d 161 xor r11d,edi 162 rol r11d,12 163 add ecx,r10d 164 xor r14d,ecx 165 rol r14d,8 166 add edx,r11d 167 xor r15d,edx 168 rol r15d,8 169 add esi,r14d 170 xor r10d,esi 171 rol r10d,7 172 add edi,r15d 173 xor r11d,edi 174 rol r11d,7 175 add eax,r9d 176 xor r15d,eax 177 rol r15d,16 178 add ebx,r10d 179 xor r12d,ebx 180 rol r12d,16 181 add esi,r15d 182 xor r9d,esi 183 rol r9d,12 184 add edi,r12d 185 xor r10d,edi 186 rol r10d,12 187 add eax,r9d 188 xor r15d,eax 189 rol r15d,8 190 add ebx,r10d 191 xor r12d,ebx 192 rol r12d,8 193 add esi,r15d 194 xor r9d,esi 195 rol r9d,7 196 add edi,r12d 197 xor r10d,edi 198 rol r10d,7 199 mov DWORD[40+rsp],esi 200 mov DWORD[44+rsp],edi 201 mov esi,DWORD[32+rsp] 202 mov edi,DWORD[36+rsp] 203 add ecx,r11d 204 xor r13d,ecx 205 rol r13d,16 206 add edx,r8d 207 xor r14d,edx 208 rol r14d,16 209 add esi,r13d 210 xor r11d,esi 211 rol r11d,12 212 add edi,r14d 213 xor r8d,edi 214 rol r8d,12 215 add ecx,r11d 216 xor r13d,ecx 217 rol r13d,8 218 add edx,r8d 219 xor r14d,edx 220 rol r14d,8 221 add esi,r13d 222 xor r11d,esi 223 rol r11d,7 224 add edi,r14d 225 xor r8d,edi 226 rol r8d,7 227 dec ebp 228 jnz NEAR $L$oop 229 mov DWORD[36+rsp],edi 230 mov DWORD[32+rsp],esi 231 mov rbp,QWORD[64+rsp] 232 movdqa xmm1,xmm2 233 mov rsi,QWORD[((64+8))+rsp] 234 paddd xmm3,xmm4 235 mov rdi,QWORD[((64+16))+rsp] 236 237 add eax,0x61707865 238 add ebx,0x3320646e 239 add ecx,0x79622d32 240 add edx,0x6b206574 241 add r8d,DWORD[16+rsp] 242 add r9d,DWORD[20+rsp] 243 add r10d,DWORD[24+rsp] 244 add r11d,DWORD[28+rsp] 245 add r12d,DWORD[48+rsp] 246 add r13d,DWORD[52+rsp] 247 add r14d,DWORD[56+rsp] 248 add r15d,DWORD[60+rsp] 249 paddd xmm1,XMMWORD[32+rsp] 250 251 cmp rbp,64 252 jb NEAR $L$tail 253 254 xor eax,DWORD[rsi] 255 xor ebx,DWORD[4+rsi] 256 xor ecx,DWORD[8+rsi] 257 xor edx,DWORD[12+rsi] 258 xor r8d,DWORD[16+rsi] 259 xor r9d,DWORD[20+rsi] 260 xor r10d,DWORD[24+rsi] 261 xor r11d,DWORD[28+rsi] 262 movdqu xmm0,XMMWORD[32+rsi] 263 xor r12d,DWORD[48+rsi] 264 xor r13d,DWORD[52+rsi] 265 xor r14d,DWORD[56+rsi] 266 xor r15d,DWORD[60+rsi] 267 lea rsi,[64+rsi] 268 pxor xmm0,xmm1 269 270 movdqa XMMWORD[32+rsp],xmm2 271 movd DWORD[48+rsp],xmm3 272 273 mov DWORD[rdi],eax 274 mov DWORD[4+rdi],ebx 275 mov DWORD[8+rdi],ecx 276 mov DWORD[12+rdi],edx 277 mov DWORD[16+rdi],r8d 278 mov DWORD[20+rdi],r9d 279 mov DWORD[24+rdi],r10d 280 mov DWORD[28+rdi],r11d 281 movdqu XMMWORD[32+rdi],xmm0 282 mov DWORD[48+rdi],r12d 283 mov DWORD[52+rdi],r13d 284 mov DWORD[56+rdi],r14d 285 mov DWORD[60+rdi],r15d 286 lea rdi,[64+rdi] 287 288 sub rbp,64 289 jnz NEAR $L$oop_outer 290 291 jmp NEAR $L$done 292 293ALIGN 16 294$L$tail: 295 mov DWORD[rsp],eax 296 mov DWORD[4+rsp],ebx 297 xor rbx,rbx 298 mov DWORD[8+rsp],ecx 299 mov DWORD[12+rsp],edx 300 mov DWORD[16+rsp],r8d 301 mov DWORD[20+rsp],r9d 302 mov DWORD[24+rsp],r10d 303 mov DWORD[28+rsp],r11d 304 movdqa XMMWORD[32+rsp],xmm1 305 mov DWORD[48+rsp],r12d 306 mov DWORD[52+rsp],r13d 307 mov DWORD[56+rsp],r14d 308 mov DWORD[60+rsp],r15d 309 310$L$oop_tail: 311 movzx eax,BYTE[rbx*1+rsi] 312 movzx edx,BYTE[rbx*1+rsp] 313 lea rbx,[1+rbx] 314 xor eax,edx 315 mov BYTE[((-1))+rbx*1+rdi],al 316 dec rbp 317 jnz NEAR $L$oop_tail 318 319$L$done: 320 lea rsi,[((64+24+48))+rsp] 321 mov r15,QWORD[((-48))+rsi] 322 323 mov r14,QWORD[((-40))+rsi] 324 325 mov r13,QWORD[((-32))+rsi] 326 327 mov r12,QWORD[((-24))+rsi] 328 329 mov rbp,QWORD[((-16))+rsi] 330 331 mov rbx,QWORD[((-8))+rsi] 332 333 lea rsp,[rsi] 334 335$L$no_data: 336 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 337 mov rsi,QWORD[16+rsp] 338 DB 0F3h,0C3h ;repret 339 340$L$SEH_end_GFp_ChaCha20_ctr32: 341 342ALIGN 32 343ChaCha20_ssse3: 344 mov QWORD[8+rsp],rdi ;WIN64 prologue 345 mov QWORD[16+rsp],rsi 346 mov rax,rsp 347$L$SEH_begin_ChaCha20_ssse3: 348 mov rdi,rcx 349 mov rsi,rdx 350 mov rdx,r8 351 mov rcx,r9 352 mov r8,QWORD[40+rsp] 353 354 355$L$ChaCha20_ssse3: 356 357 mov r9,rsp 358 359 cmp rdx,128 360 ja NEAR $L$ChaCha20_4x 361 362$L$do_sse3_after_all: 363 sub rsp,64+40 364 movaps XMMWORD[(-40)+r9],xmm6 365 movaps XMMWORD[(-24)+r9],xmm7 366$L$ssse3_body: 367 movdqa xmm0,XMMWORD[$L$sigma] 368 movdqu xmm1,XMMWORD[rcx] 369 movdqu xmm2,XMMWORD[16+rcx] 370 movdqu xmm3,XMMWORD[r8] 371 movdqa xmm6,XMMWORD[$L$rot16] 372 movdqa xmm7,XMMWORD[$L$rot24] 373 374 movdqa XMMWORD[rsp],xmm0 375 movdqa XMMWORD[16+rsp],xmm1 376 movdqa XMMWORD[32+rsp],xmm2 377 movdqa XMMWORD[48+rsp],xmm3 378 mov r8,10 379 jmp NEAR $L$oop_ssse3 380 381ALIGN 32 382$L$oop_outer_ssse3: 383 movdqa xmm3,XMMWORD[$L$one] 384 movdqa xmm0,XMMWORD[rsp] 385 movdqa xmm1,XMMWORD[16+rsp] 386 movdqa xmm2,XMMWORD[32+rsp] 387 paddd xmm3,XMMWORD[48+rsp] 388 mov r8,10 389 movdqa XMMWORD[48+rsp],xmm3 390 jmp NEAR $L$oop_ssse3 391 392ALIGN 32 393$L$oop_ssse3: 394 paddd xmm0,xmm1 395 pxor xmm3,xmm0 396DB 102,15,56,0,222 397 paddd xmm2,xmm3 398 pxor xmm1,xmm2 399 movdqa xmm4,xmm1 400 psrld xmm1,20 401 pslld xmm4,12 402 por xmm1,xmm4 403 paddd xmm0,xmm1 404 pxor xmm3,xmm0 405DB 102,15,56,0,223 406 paddd xmm2,xmm3 407 pxor xmm1,xmm2 408 movdqa xmm4,xmm1 409 psrld xmm1,25 410 pslld xmm4,7 411 por xmm1,xmm4 412 pshufd xmm2,xmm2,78 413 pshufd xmm1,xmm1,57 414 pshufd xmm3,xmm3,147 415 nop 416 paddd xmm0,xmm1 417 pxor xmm3,xmm0 418DB 102,15,56,0,222 419 paddd xmm2,xmm3 420 pxor xmm1,xmm2 421 movdqa xmm4,xmm1 422 psrld xmm1,20 423 pslld xmm4,12 424 por xmm1,xmm4 425 paddd xmm0,xmm1 426 pxor xmm3,xmm0 427DB 102,15,56,0,223 428 paddd xmm2,xmm3 429 pxor xmm1,xmm2 430 movdqa xmm4,xmm1 431 psrld xmm1,25 432 pslld xmm4,7 433 por xmm1,xmm4 434 pshufd xmm2,xmm2,78 435 pshufd xmm1,xmm1,147 436 pshufd xmm3,xmm3,57 437 dec r8 438 jnz NEAR $L$oop_ssse3 439 paddd xmm0,XMMWORD[rsp] 440 paddd xmm1,XMMWORD[16+rsp] 441 paddd xmm2,XMMWORD[32+rsp] 442 paddd xmm3,XMMWORD[48+rsp] 443 444 cmp rdx,64 445 jb NEAR $L$tail_ssse3 446 447 movdqu xmm4,XMMWORD[rsi] 448 movdqu xmm5,XMMWORD[16+rsi] 449 pxor xmm0,xmm4 450 movdqu xmm4,XMMWORD[32+rsi] 451 pxor xmm1,xmm5 452 movdqu xmm5,XMMWORD[48+rsi] 453 lea rsi,[64+rsi] 454 pxor xmm2,xmm4 455 pxor xmm3,xmm5 456 457 movdqu XMMWORD[rdi],xmm0 458 movdqu XMMWORD[16+rdi],xmm1 459 movdqu XMMWORD[32+rdi],xmm2 460 movdqu XMMWORD[48+rdi],xmm3 461 lea rdi,[64+rdi] 462 463 sub rdx,64 464 jnz NEAR $L$oop_outer_ssse3 465 466 jmp NEAR $L$done_ssse3 467 468ALIGN 16 469$L$tail_ssse3: 470 movdqa XMMWORD[rsp],xmm0 471 movdqa XMMWORD[16+rsp],xmm1 472 movdqa XMMWORD[32+rsp],xmm2 473 movdqa XMMWORD[48+rsp],xmm3 474 xor r8,r8 475 476$L$oop_tail_ssse3: 477 movzx eax,BYTE[r8*1+rsi] 478 movzx ecx,BYTE[r8*1+rsp] 479 lea r8,[1+r8] 480 xor eax,ecx 481 mov BYTE[((-1))+r8*1+rdi],al 482 dec rdx 483 jnz NEAR $L$oop_tail_ssse3 484 485$L$done_ssse3: 486 movaps xmm6,XMMWORD[((-40))+r9] 487 movaps xmm7,XMMWORD[((-24))+r9] 488 lea rsp,[r9] 489 490$L$ssse3_epilogue: 491 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 492 mov rsi,QWORD[16+rsp] 493 DB 0F3h,0C3h ;repret 494 495$L$SEH_end_ChaCha20_ssse3: 496 497ALIGN 32 498ChaCha20_4x: 499 mov QWORD[8+rsp],rdi ;WIN64 prologue 500 mov QWORD[16+rsp],rsi 501 mov rax,rsp 502$L$SEH_begin_ChaCha20_4x: 503 mov rdi,rcx 504 mov rsi,rdx 505 mov rdx,r8 506 mov rcx,r9 507 mov r8,QWORD[40+rsp] 508 509 510$L$ChaCha20_4x: 511 512 mov r9,rsp 513 514 mov r11,r10 515 shr r10,32 516 test r10,32 517 jnz NEAR $L$ChaCha20_8x 518 cmp rdx,192 519 ja NEAR $L$proceed4x 520 521 and r11,71303168 522 cmp r11,4194304 523 je NEAR $L$do_sse3_after_all 524 525$L$proceed4x: 526 sub rsp,0x140+168 527 movaps XMMWORD[(-168)+r9],xmm6 528 movaps XMMWORD[(-152)+r9],xmm7 529 movaps XMMWORD[(-136)+r9],xmm8 530 movaps XMMWORD[(-120)+r9],xmm9 531 movaps XMMWORD[(-104)+r9],xmm10 532 movaps XMMWORD[(-88)+r9],xmm11 533 movaps XMMWORD[(-72)+r9],xmm12 534 movaps XMMWORD[(-56)+r9],xmm13 535 movaps XMMWORD[(-40)+r9],xmm14 536 movaps XMMWORD[(-24)+r9],xmm15 537$L$4x_body: 538 movdqa xmm11,XMMWORD[$L$sigma] 539 movdqu xmm15,XMMWORD[rcx] 540 movdqu xmm7,XMMWORD[16+rcx] 541 movdqu xmm3,XMMWORD[r8] 542 lea rcx,[256+rsp] 543 lea r10,[$L$rot16] 544 lea r11,[$L$rot24] 545 546 pshufd xmm8,xmm11,0x00 547 pshufd xmm9,xmm11,0x55 548 movdqa XMMWORD[64+rsp],xmm8 549 pshufd xmm10,xmm11,0xaa 550 movdqa XMMWORD[80+rsp],xmm9 551 pshufd xmm11,xmm11,0xff 552 movdqa XMMWORD[96+rsp],xmm10 553 movdqa XMMWORD[112+rsp],xmm11 554 555 pshufd xmm12,xmm15,0x00 556 pshufd xmm13,xmm15,0x55 557 movdqa XMMWORD[(128-256)+rcx],xmm12 558 pshufd xmm14,xmm15,0xaa 559 movdqa XMMWORD[(144-256)+rcx],xmm13 560 pshufd xmm15,xmm15,0xff 561 movdqa XMMWORD[(160-256)+rcx],xmm14 562 movdqa XMMWORD[(176-256)+rcx],xmm15 563 564 pshufd xmm4,xmm7,0x00 565 pshufd xmm5,xmm7,0x55 566 movdqa XMMWORD[(192-256)+rcx],xmm4 567 pshufd xmm6,xmm7,0xaa 568 movdqa XMMWORD[(208-256)+rcx],xmm5 569 pshufd xmm7,xmm7,0xff 570 movdqa XMMWORD[(224-256)+rcx],xmm6 571 movdqa XMMWORD[(240-256)+rcx],xmm7 572 573 pshufd xmm0,xmm3,0x00 574 pshufd xmm1,xmm3,0x55 575 paddd xmm0,XMMWORD[$L$inc] 576 pshufd xmm2,xmm3,0xaa 577 movdqa XMMWORD[(272-256)+rcx],xmm1 578 pshufd xmm3,xmm3,0xff 579 movdqa XMMWORD[(288-256)+rcx],xmm2 580 movdqa XMMWORD[(304-256)+rcx],xmm3 581 582 jmp NEAR $L$oop_enter4x 583 584ALIGN 32 585$L$oop_outer4x: 586 movdqa xmm8,XMMWORD[64+rsp] 587 movdqa xmm9,XMMWORD[80+rsp] 588 movdqa xmm10,XMMWORD[96+rsp] 589 movdqa xmm11,XMMWORD[112+rsp] 590 movdqa xmm12,XMMWORD[((128-256))+rcx] 591 movdqa xmm13,XMMWORD[((144-256))+rcx] 592 movdqa xmm14,XMMWORD[((160-256))+rcx] 593 movdqa xmm15,XMMWORD[((176-256))+rcx] 594 movdqa xmm4,XMMWORD[((192-256))+rcx] 595 movdqa xmm5,XMMWORD[((208-256))+rcx] 596 movdqa xmm6,XMMWORD[((224-256))+rcx] 597 movdqa xmm7,XMMWORD[((240-256))+rcx] 598 movdqa xmm0,XMMWORD[((256-256))+rcx] 599 movdqa xmm1,XMMWORD[((272-256))+rcx] 600 movdqa xmm2,XMMWORD[((288-256))+rcx] 601 movdqa xmm3,XMMWORD[((304-256))+rcx] 602 paddd xmm0,XMMWORD[$L$four] 603 604$L$oop_enter4x: 605 movdqa XMMWORD[32+rsp],xmm6 606 movdqa XMMWORD[48+rsp],xmm7 607 movdqa xmm7,XMMWORD[r10] 608 mov eax,10 609 movdqa XMMWORD[(256-256)+rcx],xmm0 610 jmp NEAR $L$oop4x 611 612ALIGN 32 613$L$oop4x: 614 paddd xmm8,xmm12 615 paddd xmm9,xmm13 616 pxor xmm0,xmm8 617 pxor xmm1,xmm9 618DB 102,15,56,0,199 619DB 102,15,56,0,207 620 paddd xmm4,xmm0 621 paddd xmm5,xmm1 622 pxor xmm12,xmm4 623 pxor xmm13,xmm5 624 movdqa xmm6,xmm12 625 pslld xmm12,12 626 psrld xmm6,20 627 movdqa xmm7,xmm13 628 pslld xmm13,12 629 por xmm12,xmm6 630 psrld xmm7,20 631 movdqa xmm6,XMMWORD[r11] 632 por xmm13,xmm7 633 paddd xmm8,xmm12 634 paddd xmm9,xmm13 635 pxor xmm0,xmm8 636 pxor xmm1,xmm9 637DB 102,15,56,0,198 638DB 102,15,56,0,206 639 paddd xmm4,xmm0 640 paddd xmm5,xmm1 641 pxor xmm12,xmm4 642 pxor xmm13,xmm5 643 movdqa xmm7,xmm12 644 pslld xmm12,7 645 psrld xmm7,25 646 movdqa xmm6,xmm13 647 pslld xmm13,7 648 por xmm12,xmm7 649 psrld xmm6,25 650 movdqa xmm7,XMMWORD[r10] 651 por xmm13,xmm6 652 movdqa XMMWORD[rsp],xmm4 653 movdqa XMMWORD[16+rsp],xmm5 654 movdqa xmm4,XMMWORD[32+rsp] 655 movdqa xmm5,XMMWORD[48+rsp] 656 paddd xmm10,xmm14 657 paddd xmm11,xmm15 658 pxor xmm2,xmm10 659 pxor xmm3,xmm11 660DB 102,15,56,0,215 661DB 102,15,56,0,223 662 paddd xmm4,xmm2 663 paddd xmm5,xmm3 664 pxor xmm14,xmm4 665 pxor xmm15,xmm5 666 movdqa xmm6,xmm14 667 pslld xmm14,12 668 psrld xmm6,20 669 movdqa xmm7,xmm15 670 pslld xmm15,12 671 por xmm14,xmm6 672 psrld xmm7,20 673 movdqa xmm6,XMMWORD[r11] 674 por xmm15,xmm7 675 paddd xmm10,xmm14 676 paddd xmm11,xmm15 677 pxor xmm2,xmm10 678 pxor xmm3,xmm11 679DB 102,15,56,0,214 680DB 102,15,56,0,222 681 paddd xmm4,xmm2 682 paddd xmm5,xmm3 683 pxor xmm14,xmm4 684 pxor xmm15,xmm5 685 movdqa xmm7,xmm14 686 pslld xmm14,7 687 psrld xmm7,25 688 movdqa xmm6,xmm15 689 pslld xmm15,7 690 por xmm14,xmm7 691 psrld xmm6,25 692 movdqa xmm7,XMMWORD[r10] 693 por xmm15,xmm6 694 paddd xmm8,xmm13 695 paddd xmm9,xmm14 696 pxor xmm3,xmm8 697 pxor xmm0,xmm9 698DB 102,15,56,0,223 699DB 102,15,56,0,199 700 paddd xmm4,xmm3 701 paddd xmm5,xmm0 702 pxor xmm13,xmm4 703 pxor xmm14,xmm5 704 movdqa xmm6,xmm13 705 pslld xmm13,12 706 psrld xmm6,20 707 movdqa xmm7,xmm14 708 pslld xmm14,12 709 por xmm13,xmm6 710 psrld xmm7,20 711 movdqa xmm6,XMMWORD[r11] 712 por xmm14,xmm7 713 paddd xmm8,xmm13 714 paddd xmm9,xmm14 715 pxor xmm3,xmm8 716 pxor xmm0,xmm9 717DB 102,15,56,0,222 718DB 102,15,56,0,198 719 paddd xmm4,xmm3 720 paddd xmm5,xmm0 721 pxor xmm13,xmm4 722 pxor xmm14,xmm5 723 movdqa xmm7,xmm13 724 pslld xmm13,7 725 psrld xmm7,25 726 movdqa xmm6,xmm14 727 pslld xmm14,7 728 por xmm13,xmm7 729 psrld xmm6,25 730 movdqa xmm7,XMMWORD[r10] 731 por xmm14,xmm6 732 movdqa XMMWORD[32+rsp],xmm4 733 movdqa XMMWORD[48+rsp],xmm5 734 movdqa xmm4,XMMWORD[rsp] 735 movdqa xmm5,XMMWORD[16+rsp] 736 paddd xmm10,xmm15 737 paddd xmm11,xmm12 738 pxor xmm1,xmm10 739 pxor xmm2,xmm11 740DB 102,15,56,0,207 741DB 102,15,56,0,215 742 paddd xmm4,xmm1 743 paddd xmm5,xmm2 744 pxor xmm15,xmm4 745 pxor xmm12,xmm5 746 movdqa xmm6,xmm15 747 pslld xmm15,12 748 psrld xmm6,20 749 movdqa xmm7,xmm12 750 pslld xmm12,12 751 por xmm15,xmm6 752 psrld xmm7,20 753 movdqa xmm6,XMMWORD[r11] 754 por xmm12,xmm7 755 paddd xmm10,xmm15 756 paddd xmm11,xmm12 757 pxor xmm1,xmm10 758 pxor xmm2,xmm11 759DB 102,15,56,0,206 760DB 102,15,56,0,214 761 paddd xmm4,xmm1 762 paddd xmm5,xmm2 763 pxor xmm15,xmm4 764 pxor xmm12,xmm5 765 movdqa xmm7,xmm15 766 pslld xmm15,7 767 psrld xmm7,25 768 movdqa xmm6,xmm12 769 pslld xmm12,7 770 por xmm15,xmm7 771 psrld xmm6,25 772 movdqa xmm7,XMMWORD[r10] 773 por xmm12,xmm6 774 dec eax 775 jnz NEAR $L$oop4x 776 777 paddd xmm8,XMMWORD[64+rsp] 778 paddd xmm9,XMMWORD[80+rsp] 779 paddd xmm10,XMMWORD[96+rsp] 780 paddd xmm11,XMMWORD[112+rsp] 781 782 movdqa xmm6,xmm8 783 punpckldq xmm8,xmm9 784 movdqa xmm7,xmm10 785 punpckldq xmm10,xmm11 786 punpckhdq xmm6,xmm9 787 punpckhdq xmm7,xmm11 788 movdqa xmm9,xmm8 789 punpcklqdq xmm8,xmm10 790 movdqa xmm11,xmm6 791 punpcklqdq xmm6,xmm7 792 punpckhqdq xmm9,xmm10 793 punpckhqdq xmm11,xmm7 794 paddd xmm12,XMMWORD[((128-256))+rcx] 795 paddd xmm13,XMMWORD[((144-256))+rcx] 796 paddd xmm14,XMMWORD[((160-256))+rcx] 797 paddd xmm15,XMMWORD[((176-256))+rcx] 798 799 movdqa XMMWORD[rsp],xmm8 800 movdqa XMMWORD[16+rsp],xmm9 801 movdqa xmm8,XMMWORD[32+rsp] 802 movdqa xmm9,XMMWORD[48+rsp] 803 804 movdqa xmm10,xmm12 805 punpckldq xmm12,xmm13 806 movdqa xmm7,xmm14 807 punpckldq xmm14,xmm15 808 punpckhdq xmm10,xmm13 809 punpckhdq xmm7,xmm15 810 movdqa xmm13,xmm12 811 punpcklqdq xmm12,xmm14 812 movdqa xmm15,xmm10 813 punpcklqdq xmm10,xmm7 814 punpckhqdq xmm13,xmm14 815 punpckhqdq xmm15,xmm7 816 paddd xmm4,XMMWORD[((192-256))+rcx] 817 paddd xmm5,XMMWORD[((208-256))+rcx] 818 paddd xmm8,XMMWORD[((224-256))+rcx] 819 paddd xmm9,XMMWORD[((240-256))+rcx] 820 821 movdqa XMMWORD[32+rsp],xmm6 822 movdqa XMMWORD[48+rsp],xmm11 823 824 movdqa xmm14,xmm4 825 punpckldq xmm4,xmm5 826 movdqa xmm7,xmm8 827 punpckldq xmm8,xmm9 828 punpckhdq xmm14,xmm5 829 punpckhdq xmm7,xmm9 830 movdqa xmm5,xmm4 831 punpcklqdq xmm4,xmm8 832 movdqa xmm9,xmm14 833 punpcklqdq xmm14,xmm7 834 punpckhqdq xmm5,xmm8 835 punpckhqdq xmm9,xmm7 836 paddd xmm0,XMMWORD[((256-256))+rcx] 837 paddd xmm1,XMMWORD[((272-256))+rcx] 838 paddd xmm2,XMMWORD[((288-256))+rcx] 839 paddd xmm3,XMMWORD[((304-256))+rcx] 840 841 movdqa xmm8,xmm0 842 punpckldq xmm0,xmm1 843 movdqa xmm7,xmm2 844 punpckldq xmm2,xmm3 845 punpckhdq xmm8,xmm1 846 punpckhdq xmm7,xmm3 847 movdqa xmm1,xmm0 848 punpcklqdq xmm0,xmm2 849 movdqa xmm3,xmm8 850 punpcklqdq xmm8,xmm7 851 punpckhqdq xmm1,xmm2 852 punpckhqdq xmm3,xmm7 853 cmp rdx,64*4 854 jb NEAR $L$tail4x 855 856 movdqu xmm6,XMMWORD[rsi] 857 movdqu xmm11,XMMWORD[16+rsi] 858 movdqu xmm2,XMMWORD[32+rsi] 859 movdqu xmm7,XMMWORD[48+rsi] 860 pxor xmm6,XMMWORD[rsp] 861 pxor xmm11,xmm12 862 pxor xmm2,xmm4 863 pxor xmm7,xmm0 864 865 movdqu XMMWORD[rdi],xmm6 866 movdqu xmm6,XMMWORD[64+rsi] 867 movdqu XMMWORD[16+rdi],xmm11 868 movdqu xmm11,XMMWORD[80+rsi] 869 movdqu XMMWORD[32+rdi],xmm2 870 movdqu xmm2,XMMWORD[96+rsi] 871 movdqu XMMWORD[48+rdi],xmm7 872 movdqu xmm7,XMMWORD[112+rsi] 873 lea rsi,[128+rsi] 874 pxor xmm6,XMMWORD[16+rsp] 875 pxor xmm11,xmm13 876 pxor xmm2,xmm5 877 pxor xmm7,xmm1 878 879 movdqu XMMWORD[64+rdi],xmm6 880 movdqu xmm6,XMMWORD[rsi] 881 movdqu XMMWORD[80+rdi],xmm11 882 movdqu xmm11,XMMWORD[16+rsi] 883 movdqu XMMWORD[96+rdi],xmm2 884 movdqu xmm2,XMMWORD[32+rsi] 885 movdqu XMMWORD[112+rdi],xmm7 886 lea rdi,[128+rdi] 887 movdqu xmm7,XMMWORD[48+rsi] 888 pxor xmm6,XMMWORD[32+rsp] 889 pxor xmm11,xmm10 890 pxor xmm2,xmm14 891 pxor xmm7,xmm8 892 893 movdqu XMMWORD[rdi],xmm6 894 movdqu xmm6,XMMWORD[64+rsi] 895 movdqu XMMWORD[16+rdi],xmm11 896 movdqu xmm11,XMMWORD[80+rsi] 897 movdqu XMMWORD[32+rdi],xmm2 898 movdqu xmm2,XMMWORD[96+rsi] 899 movdqu XMMWORD[48+rdi],xmm7 900 movdqu xmm7,XMMWORD[112+rsi] 901 lea rsi,[128+rsi] 902 pxor xmm6,XMMWORD[48+rsp] 903 pxor xmm11,xmm15 904 pxor xmm2,xmm9 905 pxor xmm7,xmm3 906 movdqu XMMWORD[64+rdi],xmm6 907 movdqu XMMWORD[80+rdi],xmm11 908 movdqu XMMWORD[96+rdi],xmm2 909 movdqu XMMWORD[112+rdi],xmm7 910 lea rdi,[128+rdi] 911 912 sub rdx,64*4 913 jnz NEAR $L$oop_outer4x 914 915 jmp NEAR $L$done4x 916 917$L$tail4x: 918 cmp rdx,192 919 jae NEAR $L$192_or_more4x 920 cmp rdx,128 921 jae NEAR $L$128_or_more4x 922 cmp rdx,64 923 jae NEAR $L$64_or_more4x 924 925 926 xor r10,r10 927 928 movdqa XMMWORD[16+rsp],xmm12 929 movdqa XMMWORD[32+rsp],xmm4 930 movdqa XMMWORD[48+rsp],xmm0 931 jmp NEAR $L$oop_tail4x 932 933ALIGN 32 934$L$64_or_more4x: 935 movdqu xmm6,XMMWORD[rsi] 936 movdqu xmm11,XMMWORD[16+rsi] 937 movdqu xmm2,XMMWORD[32+rsi] 938 movdqu xmm7,XMMWORD[48+rsi] 939 pxor xmm6,XMMWORD[rsp] 940 pxor xmm11,xmm12 941 pxor xmm2,xmm4 942 pxor xmm7,xmm0 943 movdqu XMMWORD[rdi],xmm6 944 movdqu XMMWORD[16+rdi],xmm11 945 movdqu XMMWORD[32+rdi],xmm2 946 movdqu XMMWORD[48+rdi],xmm7 947 je NEAR $L$done4x 948 949 movdqa xmm6,XMMWORD[16+rsp] 950 lea rsi,[64+rsi] 951 xor r10,r10 952 movdqa XMMWORD[rsp],xmm6 953 movdqa XMMWORD[16+rsp],xmm13 954 lea rdi,[64+rdi] 955 movdqa XMMWORD[32+rsp],xmm5 956 sub rdx,64 957 movdqa XMMWORD[48+rsp],xmm1 958 jmp NEAR $L$oop_tail4x 959 960ALIGN 32 961$L$128_or_more4x: 962 movdqu xmm6,XMMWORD[rsi] 963 movdqu xmm11,XMMWORD[16+rsi] 964 movdqu xmm2,XMMWORD[32+rsi] 965 movdqu xmm7,XMMWORD[48+rsi] 966 pxor xmm6,XMMWORD[rsp] 967 pxor xmm11,xmm12 968 pxor xmm2,xmm4 969 pxor xmm7,xmm0 970 971 movdqu XMMWORD[rdi],xmm6 972 movdqu xmm6,XMMWORD[64+rsi] 973 movdqu XMMWORD[16+rdi],xmm11 974 movdqu xmm11,XMMWORD[80+rsi] 975 movdqu XMMWORD[32+rdi],xmm2 976 movdqu xmm2,XMMWORD[96+rsi] 977 movdqu XMMWORD[48+rdi],xmm7 978 movdqu xmm7,XMMWORD[112+rsi] 979 pxor xmm6,XMMWORD[16+rsp] 980 pxor xmm11,xmm13 981 pxor xmm2,xmm5 982 pxor xmm7,xmm1 983 movdqu XMMWORD[64+rdi],xmm6 984 movdqu XMMWORD[80+rdi],xmm11 985 movdqu XMMWORD[96+rdi],xmm2 986 movdqu XMMWORD[112+rdi],xmm7 987 je NEAR $L$done4x 988 989 movdqa xmm6,XMMWORD[32+rsp] 990 lea rsi,[128+rsi] 991 xor r10,r10 992 movdqa XMMWORD[rsp],xmm6 993 movdqa XMMWORD[16+rsp],xmm10 994 lea rdi,[128+rdi] 995 movdqa XMMWORD[32+rsp],xmm14 996 sub rdx,128 997 movdqa XMMWORD[48+rsp],xmm8 998 jmp NEAR $L$oop_tail4x 999 1000ALIGN 32 1001$L$192_or_more4x: 1002 movdqu xmm6,XMMWORD[rsi] 1003 movdqu xmm11,XMMWORD[16+rsi] 1004 movdqu xmm2,XMMWORD[32+rsi] 1005 movdqu xmm7,XMMWORD[48+rsi] 1006 pxor xmm6,XMMWORD[rsp] 1007 pxor xmm11,xmm12 1008 pxor xmm2,xmm4 1009 pxor xmm7,xmm0 1010 1011 movdqu XMMWORD[rdi],xmm6 1012 movdqu xmm6,XMMWORD[64+rsi] 1013 movdqu XMMWORD[16+rdi],xmm11 1014 movdqu xmm11,XMMWORD[80+rsi] 1015 movdqu XMMWORD[32+rdi],xmm2 1016 movdqu xmm2,XMMWORD[96+rsi] 1017 movdqu XMMWORD[48+rdi],xmm7 1018 movdqu xmm7,XMMWORD[112+rsi] 1019 lea rsi,[128+rsi] 1020 pxor xmm6,XMMWORD[16+rsp] 1021 pxor xmm11,xmm13 1022 pxor xmm2,xmm5 1023 pxor xmm7,xmm1 1024 1025 movdqu XMMWORD[64+rdi],xmm6 1026 movdqu xmm6,XMMWORD[rsi] 1027 movdqu XMMWORD[80+rdi],xmm11 1028 movdqu xmm11,XMMWORD[16+rsi] 1029 movdqu XMMWORD[96+rdi],xmm2 1030 movdqu xmm2,XMMWORD[32+rsi] 1031 movdqu XMMWORD[112+rdi],xmm7 1032 lea rdi,[128+rdi] 1033 movdqu xmm7,XMMWORD[48+rsi] 1034 pxor xmm6,XMMWORD[32+rsp] 1035 pxor xmm11,xmm10 1036 pxor xmm2,xmm14 1037 pxor xmm7,xmm8 1038 movdqu XMMWORD[rdi],xmm6 1039 movdqu XMMWORD[16+rdi],xmm11 1040 movdqu XMMWORD[32+rdi],xmm2 1041 movdqu XMMWORD[48+rdi],xmm7 1042 je NEAR $L$done4x 1043 1044 movdqa xmm6,XMMWORD[48+rsp] 1045 lea rsi,[64+rsi] 1046 xor r10,r10 1047 movdqa XMMWORD[rsp],xmm6 1048 movdqa XMMWORD[16+rsp],xmm15 1049 lea rdi,[64+rdi] 1050 movdqa XMMWORD[32+rsp],xmm9 1051 sub rdx,192 1052 movdqa XMMWORD[48+rsp],xmm3 1053 1054$L$oop_tail4x: 1055 movzx eax,BYTE[r10*1+rsi] 1056 movzx ecx,BYTE[r10*1+rsp] 1057 lea r10,[1+r10] 1058 xor eax,ecx 1059 mov BYTE[((-1))+r10*1+rdi],al 1060 dec rdx 1061 jnz NEAR $L$oop_tail4x 1062 1063$L$done4x: 1064 movaps xmm6,XMMWORD[((-168))+r9] 1065 movaps xmm7,XMMWORD[((-152))+r9] 1066 movaps xmm8,XMMWORD[((-136))+r9] 1067 movaps xmm9,XMMWORD[((-120))+r9] 1068 movaps xmm10,XMMWORD[((-104))+r9] 1069 movaps xmm11,XMMWORD[((-88))+r9] 1070 movaps xmm12,XMMWORD[((-72))+r9] 1071 movaps xmm13,XMMWORD[((-56))+r9] 1072 movaps xmm14,XMMWORD[((-40))+r9] 1073 movaps xmm15,XMMWORD[((-24))+r9] 1074 lea rsp,[r9] 1075 1076$L$4x_epilogue: 1077 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1078 mov rsi,QWORD[16+rsp] 1079 DB 0F3h,0C3h ;repret 1080 1081$L$SEH_end_ChaCha20_4x: 1082 1083ALIGN 32 1084ChaCha20_8x: 1085 mov QWORD[8+rsp],rdi ;WIN64 prologue 1086 mov QWORD[16+rsp],rsi 1087 mov rax,rsp 1088$L$SEH_begin_ChaCha20_8x: 1089 mov rdi,rcx 1090 mov rsi,rdx 1091 mov rdx,r8 1092 mov rcx,r9 1093 mov r8,QWORD[40+rsp] 1094 1095 1096$L$ChaCha20_8x: 1097 1098 mov r9,rsp 1099 1100 sub rsp,0x280+168 1101 and rsp,-32 1102 movaps XMMWORD[(-168)+r9],xmm6 1103 movaps XMMWORD[(-152)+r9],xmm7 1104 movaps XMMWORD[(-136)+r9],xmm8 1105 movaps XMMWORD[(-120)+r9],xmm9 1106 movaps XMMWORD[(-104)+r9],xmm10 1107 movaps XMMWORD[(-88)+r9],xmm11 1108 movaps XMMWORD[(-72)+r9],xmm12 1109 movaps XMMWORD[(-56)+r9],xmm13 1110 movaps XMMWORD[(-40)+r9],xmm14 1111 movaps XMMWORD[(-24)+r9],xmm15 1112$L$8x_body: 1113 vzeroupper 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 vbroadcasti128 ymm11,XMMWORD[$L$sigma] 1125 vbroadcasti128 ymm3,XMMWORD[rcx] 1126 vbroadcasti128 ymm15,XMMWORD[16+rcx] 1127 vbroadcasti128 ymm7,XMMWORD[r8] 1128 lea rcx,[256+rsp] 1129 lea rax,[512+rsp] 1130 lea r10,[$L$rot16] 1131 lea r11,[$L$rot24] 1132 1133 vpshufd ymm8,ymm11,0x00 1134 vpshufd ymm9,ymm11,0x55 1135 vmovdqa YMMWORD[(128-256)+rcx],ymm8 1136 vpshufd ymm10,ymm11,0xaa 1137 vmovdqa YMMWORD[(160-256)+rcx],ymm9 1138 vpshufd ymm11,ymm11,0xff 1139 vmovdqa YMMWORD[(192-256)+rcx],ymm10 1140 vmovdqa YMMWORD[(224-256)+rcx],ymm11 1141 1142 vpshufd ymm0,ymm3,0x00 1143 vpshufd ymm1,ymm3,0x55 1144 vmovdqa YMMWORD[(256-256)+rcx],ymm0 1145 vpshufd ymm2,ymm3,0xaa 1146 vmovdqa YMMWORD[(288-256)+rcx],ymm1 1147 vpshufd ymm3,ymm3,0xff 1148 vmovdqa YMMWORD[(320-256)+rcx],ymm2 1149 vmovdqa YMMWORD[(352-256)+rcx],ymm3 1150 1151 vpshufd ymm12,ymm15,0x00 1152 vpshufd ymm13,ymm15,0x55 1153 vmovdqa YMMWORD[(384-512)+rax],ymm12 1154 vpshufd ymm14,ymm15,0xaa 1155 vmovdqa YMMWORD[(416-512)+rax],ymm13 1156 vpshufd ymm15,ymm15,0xff 1157 vmovdqa YMMWORD[(448-512)+rax],ymm14 1158 vmovdqa YMMWORD[(480-512)+rax],ymm15 1159 1160 vpshufd ymm4,ymm7,0x00 1161 vpshufd ymm5,ymm7,0x55 1162 vpaddd ymm4,ymm4,YMMWORD[$L$incy] 1163 vpshufd ymm6,ymm7,0xaa 1164 vmovdqa YMMWORD[(544-512)+rax],ymm5 1165 vpshufd ymm7,ymm7,0xff 1166 vmovdqa YMMWORD[(576-512)+rax],ymm6 1167 vmovdqa YMMWORD[(608-512)+rax],ymm7 1168 1169 jmp NEAR $L$oop_enter8x 1170 1171ALIGN 32 1172$L$oop_outer8x: 1173 vmovdqa ymm8,YMMWORD[((128-256))+rcx] 1174 vmovdqa ymm9,YMMWORD[((160-256))+rcx] 1175 vmovdqa ymm10,YMMWORD[((192-256))+rcx] 1176 vmovdqa ymm11,YMMWORD[((224-256))+rcx] 1177 vmovdqa ymm0,YMMWORD[((256-256))+rcx] 1178 vmovdqa ymm1,YMMWORD[((288-256))+rcx] 1179 vmovdqa ymm2,YMMWORD[((320-256))+rcx] 1180 vmovdqa ymm3,YMMWORD[((352-256))+rcx] 1181 vmovdqa ymm12,YMMWORD[((384-512))+rax] 1182 vmovdqa ymm13,YMMWORD[((416-512))+rax] 1183 vmovdqa ymm14,YMMWORD[((448-512))+rax] 1184 vmovdqa ymm15,YMMWORD[((480-512))+rax] 1185 vmovdqa ymm4,YMMWORD[((512-512))+rax] 1186 vmovdqa ymm5,YMMWORD[((544-512))+rax] 1187 vmovdqa ymm6,YMMWORD[((576-512))+rax] 1188 vmovdqa ymm7,YMMWORD[((608-512))+rax] 1189 vpaddd ymm4,ymm4,YMMWORD[$L$eight] 1190 1191$L$oop_enter8x: 1192 vmovdqa YMMWORD[64+rsp],ymm14 1193 vmovdqa YMMWORD[96+rsp],ymm15 1194 vbroadcasti128 ymm15,XMMWORD[r10] 1195 vmovdqa YMMWORD[(512-512)+rax],ymm4 1196 mov eax,10 1197 jmp NEAR $L$oop8x 1198 1199ALIGN 32 1200$L$oop8x: 1201 vpaddd ymm8,ymm8,ymm0 1202 vpxor ymm4,ymm8,ymm4 1203 vpshufb ymm4,ymm4,ymm15 1204 vpaddd ymm9,ymm9,ymm1 1205 vpxor ymm5,ymm9,ymm5 1206 vpshufb ymm5,ymm5,ymm15 1207 vpaddd ymm12,ymm12,ymm4 1208 vpxor ymm0,ymm12,ymm0 1209 vpslld ymm14,ymm0,12 1210 vpsrld ymm0,ymm0,20 1211 vpor ymm0,ymm14,ymm0 1212 vbroadcasti128 ymm14,XMMWORD[r11] 1213 vpaddd ymm13,ymm13,ymm5 1214 vpxor ymm1,ymm13,ymm1 1215 vpslld ymm15,ymm1,12 1216 vpsrld ymm1,ymm1,20 1217 vpor ymm1,ymm15,ymm1 1218 vpaddd ymm8,ymm8,ymm0 1219 vpxor ymm4,ymm8,ymm4 1220 vpshufb ymm4,ymm4,ymm14 1221 vpaddd ymm9,ymm9,ymm1 1222 vpxor ymm5,ymm9,ymm5 1223 vpshufb ymm5,ymm5,ymm14 1224 vpaddd ymm12,ymm12,ymm4 1225 vpxor ymm0,ymm12,ymm0 1226 vpslld ymm15,ymm0,7 1227 vpsrld ymm0,ymm0,25 1228 vpor ymm0,ymm15,ymm0 1229 vbroadcasti128 ymm15,XMMWORD[r10] 1230 vpaddd ymm13,ymm13,ymm5 1231 vpxor ymm1,ymm13,ymm1 1232 vpslld ymm14,ymm1,7 1233 vpsrld ymm1,ymm1,25 1234 vpor ymm1,ymm14,ymm1 1235 vmovdqa YMMWORD[rsp],ymm12 1236 vmovdqa YMMWORD[32+rsp],ymm13 1237 vmovdqa ymm12,YMMWORD[64+rsp] 1238 vmovdqa ymm13,YMMWORD[96+rsp] 1239 vpaddd ymm10,ymm10,ymm2 1240 vpxor ymm6,ymm10,ymm6 1241 vpshufb ymm6,ymm6,ymm15 1242 vpaddd ymm11,ymm11,ymm3 1243 vpxor ymm7,ymm11,ymm7 1244 vpshufb ymm7,ymm7,ymm15 1245 vpaddd ymm12,ymm12,ymm6 1246 vpxor ymm2,ymm12,ymm2 1247 vpslld ymm14,ymm2,12 1248 vpsrld ymm2,ymm2,20 1249 vpor ymm2,ymm14,ymm2 1250 vbroadcasti128 ymm14,XMMWORD[r11] 1251 vpaddd ymm13,ymm13,ymm7 1252 vpxor ymm3,ymm13,ymm3 1253 vpslld ymm15,ymm3,12 1254 vpsrld ymm3,ymm3,20 1255 vpor ymm3,ymm15,ymm3 1256 vpaddd ymm10,ymm10,ymm2 1257 vpxor ymm6,ymm10,ymm6 1258 vpshufb ymm6,ymm6,ymm14 1259 vpaddd ymm11,ymm11,ymm3 1260 vpxor ymm7,ymm11,ymm7 1261 vpshufb ymm7,ymm7,ymm14 1262 vpaddd ymm12,ymm12,ymm6 1263 vpxor ymm2,ymm12,ymm2 1264 vpslld ymm15,ymm2,7 1265 vpsrld ymm2,ymm2,25 1266 vpor ymm2,ymm15,ymm2 1267 vbroadcasti128 ymm15,XMMWORD[r10] 1268 vpaddd ymm13,ymm13,ymm7 1269 vpxor ymm3,ymm13,ymm3 1270 vpslld ymm14,ymm3,7 1271 vpsrld ymm3,ymm3,25 1272 vpor ymm3,ymm14,ymm3 1273 vpaddd ymm8,ymm8,ymm1 1274 vpxor ymm7,ymm8,ymm7 1275 vpshufb ymm7,ymm7,ymm15 1276 vpaddd ymm9,ymm9,ymm2 1277 vpxor ymm4,ymm9,ymm4 1278 vpshufb ymm4,ymm4,ymm15 1279 vpaddd ymm12,ymm12,ymm7 1280 vpxor ymm1,ymm12,ymm1 1281 vpslld ymm14,ymm1,12 1282 vpsrld ymm1,ymm1,20 1283 vpor ymm1,ymm14,ymm1 1284 vbroadcasti128 ymm14,XMMWORD[r11] 1285 vpaddd ymm13,ymm13,ymm4 1286 vpxor ymm2,ymm13,ymm2 1287 vpslld ymm15,ymm2,12 1288 vpsrld ymm2,ymm2,20 1289 vpor ymm2,ymm15,ymm2 1290 vpaddd ymm8,ymm8,ymm1 1291 vpxor ymm7,ymm8,ymm7 1292 vpshufb ymm7,ymm7,ymm14 1293 vpaddd ymm9,ymm9,ymm2 1294 vpxor ymm4,ymm9,ymm4 1295 vpshufb ymm4,ymm4,ymm14 1296 vpaddd ymm12,ymm12,ymm7 1297 vpxor ymm1,ymm12,ymm1 1298 vpslld ymm15,ymm1,7 1299 vpsrld ymm1,ymm1,25 1300 vpor ymm1,ymm15,ymm1 1301 vbroadcasti128 ymm15,XMMWORD[r10] 1302 vpaddd ymm13,ymm13,ymm4 1303 vpxor ymm2,ymm13,ymm2 1304 vpslld ymm14,ymm2,7 1305 vpsrld ymm2,ymm2,25 1306 vpor ymm2,ymm14,ymm2 1307 vmovdqa YMMWORD[64+rsp],ymm12 1308 vmovdqa YMMWORD[96+rsp],ymm13 1309 vmovdqa ymm12,YMMWORD[rsp] 1310 vmovdqa ymm13,YMMWORD[32+rsp] 1311 vpaddd ymm10,ymm10,ymm3 1312 vpxor ymm5,ymm10,ymm5 1313 vpshufb ymm5,ymm5,ymm15 1314 vpaddd ymm11,ymm11,ymm0 1315 vpxor ymm6,ymm11,ymm6 1316 vpshufb ymm6,ymm6,ymm15 1317 vpaddd ymm12,ymm12,ymm5 1318 vpxor ymm3,ymm12,ymm3 1319 vpslld ymm14,ymm3,12 1320 vpsrld ymm3,ymm3,20 1321 vpor ymm3,ymm14,ymm3 1322 vbroadcasti128 ymm14,XMMWORD[r11] 1323 vpaddd ymm13,ymm13,ymm6 1324 vpxor ymm0,ymm13,ymm0 1325 vpslld ymm15,ymm0,12 1326 vpsrld ymm0,ymm0,20 1327 vpor ymm0,ymm15,ymm0 1328 vpaddd ymm10,ymm10,ymm3 1329 vpxor ymm5,ymm10,ymm5 1330 vpshufb ymm5,ymm5,ymm14 1331 vpaddd ymm11,ymm11,ymm0 1332 vpxor ymm6,ymm11,ymm6 1333 vpshufb ymm6,ymm6,ymm14 1334 vpaddd ymm12,ymm12,ymm5 1335 vpxor ymm3,ymm12,ymm3 1336 vpslld ymm15,ymm3,7 1337 vpsrld ymm3,ymm3,25 1338 vpor ymm3,ymm15,ymm3 1339 vbroadcasti128 ymm15,XMMWORD[r10] 1340 vpaddd ymm13,ymm13,ymm6 1341 vpxor ymm0,ymm13,ymm0 1342 vpslld ymm14,ymm0,7 1343 vpsrld ymm0,ymm0,25 1344 vpor ymm0,ymm14,ymm0 1345 dec eax 1346 jnz NEAR $L$oop8x 1347 1348 lea rax,[512+rsp] 1349 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] 1350 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] 1351 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] 1352 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] 1353 1354 vpunpckldq ymm14,ymm8,ymm9 1355 vpunpckldq ymm15,ymm10,ymm11 1356 vpunpckhdq ymm8,ymm8,ymm9 1357 vpunpckhdq ymm10,ymm10,ymm11 1358 vpunpcklqdq ymm9,ymm14,ymm15 1359 vpunpckhqdq ymm14,ymm14,ymm15 1360 vpunpcklqdq ymm11,ymm8,ymm10 1361 vpunpckhqdq ymm8,ymm8,ymm10 1362 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] 1363 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] 1364 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] 1365 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] 1366 1367 vpunpckldq ymm10,ymm0,ymm1 1368 vpunpckldq ymm15,ymm2,ymm3 1369 vpunpckhdq ymm0,ymm0,ymm1 1370 vpunpckhdq ymm2,ymm2,ymm3 1371 vpunpcklqdq ymm1,ymm10,ymm15 1372 vpunpckhqdq ymm10,ymm10,ymm15 1373 vpunpcklqdq ymm3,ymm0,ymm2 1374 vpunpckhqdq ymm0,ymm0,ymm2 1375 vperm2i128 ymm15,ymm9,ymm1,0x20 1376 vperm2i128 ymm1,ymm9,ymm1,0x31 1377 vperm2i128 ymm9,ymm14,ymm10,0x20 1378 vperm2i128 ymm10,ymm14,ymm10,0x31 1379 vperm2i128 ymm14,ymm11,ymm3,0x20 1380 vperm2i128 ymm3,ymm11,ymm3,0x31 1381 vperm2i128 ymm11,ymm8,ymm0,0x20 1382 vperm2i128 ymm0,ymm8,ymm0,0x31 1383 vmovdqa YMMWORD[rsp],ymm15 1384 vmovdqa YMMWORD[32+rsp],ymm9 1385 vmovdqa ymm15,YMMWORD[64+rsp] 1386 vmovdqa ymm9,YMMWORD[96+rsp] 1387 1388 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] 1389 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] 1390 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] 1391 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] 1392 1393 vpunpckldq ymm2,ymm12,ymm13 1394 vpunpckldq ymm8,ymm15,ymm9 1395 vpunpckhdq ymm12,ymm12,ymm13 1396 vpunpckhdq ymm15,ymm15,ymm9 1397 vpunpcklqdq ymm13,ymm2,ymm8 1398 vpunpckhqdq ymm2,ymm2,ymm8 1399 vpunpcklqdq ymm9,ymm12,ymm15 1400 vpunpckhqdq ymm12,ymm12,ymm15 1401 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] 1402 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] 1403 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] 1404 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] 1405 1406 vpunpckldq ymm15,ymm4,ymm5 1407 vpunpckldq ymm8,ymm6,ymm7 1408 vpunpckhdq ymm4,ymm4,ymm5 1409 vpunpckhdq ymm6,ymm6,ymm7 1410 vpunpcklqdq ymm5,ymm15,ymm8 1411 vpunpckhqdq ymm15,ymm15,ymm8 1412 vpunpcklqdq ymm7,ymm4,ymm6 1413 vpunpckhqdq ymm4,ymm4,ymm6 1414 vperm2i128 ymm8,ymm13,ymm5,0x20 1415 vperm2i128 ymm5,ymm13,ymm5,0x31 1416 vperm2i128 ymm13,ymm2,ymm15,0x20 1417 vperm2i128 ymm15,ymm2,ymm15,0x31 1418 vperm2i128 ymm2,ymm9,ymm7,0x20 1419 vperm2i128 ymm7,ymm9,ymm7,0x31 1420 vperm2i128 ymm9,ymm12,ymm4,0x20 1421 vperm2i128 ymm4,ymm12,ymm4,0x31 1422 vmovdqa ymm6,YMMWORD[rsp] 1423 vmovdqa ymm12,YMMWORD[32+rsp] 1424 1425 cmp rdx,64*8 1426 jb NEAR $L$tail8x 1427 1428 vpxor ymm6,ymm6,YMMWORD[rsi] 1429 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1430 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1431 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1432 lea rsi,[128+rsi] 1433 vmovdqu YMMWORD[rdi],ymm6 1434 vmovdqu YMMWORD[32+rdi],ymm8 1435 vmovdqu YMMWORD[64+rdi],ymm1 1436 vmovdqu YMMWORD[96+rdi],ymm5 1437 lea rdi,[128+rdi] 1438 1439 vpxor ymm12,ymm12,YMMWORD[rsi] 1440 vpxor ymm13,ymm13,YMMWORD[32+rsi] 1441 vpxor ymm10,ymm10,YMMWORD[64+rsi] 1442 vpxor ymm15,ymm15,YMMWORD[96+rsi] 1443 lea rsi,[128+rsi] 1444 vmovdqu YMMWORD[rdi],ymm12 1445 vmovdqu YMMWORD[32+rdi],ymm13 1446 vmovdqu YMMWORD[64+rdi],ymm10 1447 vmovdqu YMMWORD[96+rdi],ymm15 1448 lea rdi,[128+rdi] 1449 1450 vpxor ymm14,ymm14,YMMWORD[rsi] 1451 vpxor ymm2,ymm2,YMMWORD[32+rsi] 1452 vpxor ymm3,ymm3,YMMWORD[64+rsi] 1453 vpxor ymm7,ymm7,YMMWORD[96+rsi] 1454 lea rsi,[128+rsi] 1455 vmovdqu YMMWORD[rdi],ymm14 1456 vmovdqu YMMWORD[32+rdi],ymm2 1457 vmovdqu YMMWORD[64+rdi],ymm3 1458 vmovdqu YMMWORD[96+rdi],ymm7 1459 lea rdi,[128+rdi] 1460 1461 vpxor ymm11,ymm11,YMMWORD[rsi] 1462 vpxor ymm9,ymm9,YMMWORD[32+rsi] 1463 vpxor ymm0,ymm0,YMMWORD[64+rsi] 1464 vpxor ymm4,ymm4,YMMWORD[96+rsi] 1465 lea rsi,[128+rsi] 1466 vmovdqu YMMWORD[rdi],ymm11 1467 vmovdqu YMMWORD[32+rdi],ymm9 1468 vmovdqu YMMWORD[64+rdi],ymm0 1469 vmovdqu YMMWORD[96+rdi],ymm4 1470 lea rdi,[128+rdi] 1471 1472 sub rdx,64*8 1473 jnz NEAR $L$oop_outer8x 1474 1475 jmp NEAR $L$done8x 1476 1477$L$tail8x: 1478 cmp rdx,448 1479 jae NEAR $L$448_or_more8x 1480 cmp rdx,384 1481 jae NEAR $L$384_or_more8x 1482 cmp rdx,320 1483 jae NEAR $L$320_or_more8x 1484 cmp rdx,256 1485 jae NEAR $L$256_or_more8x 1486 cmp rdx,192 1487 jae NEAR $L$192_or_more8x 1488 cmp rdx,128 1489 jae NEAR $L$128_or_more8x 1490 cmp rdx,64 1491 jae NEAR $L$64_or_more8x 1492 1493 xor r10,r10 1494 vmovdqa YMMWORD[rsp],ymm6 1495 vmovdqa YMMWORD[32+rsp],ymm8 1496 jmp NEAR $L$oop_tail8x 1497 1498ALIGN 32 1499$L$64_or_more8x: 1500 vpxor ymm6,ymm6,YMMWORD[rsi] 1501 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1502 vmovdqu YMMWORD[rdi],ymm6 1503 vmovdqu YMMWORD[32+rdi],ymm8 1504 je NEAR $L$done8x 1505 1506 lea rsi,[64+rsi] 1507 xor r10,r10 1508 vmovdqa YMMWORD[rsp],ymm1 1509 lea rdi,[64+rdi] 1510 sub rdx,64 1511 vmovdqa YMMWORD[32+rsp],ymm5 1512 jmp NEAR $L$oop_tail8x 1513 1514ALIGN 32 1515$L$128_or_more8x: 1516 vpxor ymm6,ymm6,YMMWORD[rsi] 1517 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1518 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1519 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1520 vmovdqu YMMWORD[rdi],ymm6 1521 vmovdqu YMMWORD[32+rdi],ymm8 1522 vmovdqu YMMWORD[64+rdi],ymm1 1523 vmovdqu YMMWORD[96+rdi],ymm5 1524 je NEAR $L$done8x 1525 1526 lea rsi,[128+rsi] 1527 xor r10,r10 1528 vmovdqa YMMWORD[rsp],ymm12 1529 lea rdi,[128+rdi] 1530 sub rdx,128 1531 vmovdqa YMMWORD[32+rsp],ymm13 1532 jmp NEAR $L$oop_tail8x 1533 1534ALIGN 32 1535$L$192_or_more8x: 1536 vpxor ymm6,ymm6,YMMWORD[rsi] 1537 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1538 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1539 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1540 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1541 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1542 vmovdqu YMMWORD[rdi],ymm6 1543 vmovdqu YMMWORD[32+rdi],ymm8 1544 vmovdqu YMMWORD[64+rdi],ymm1 1545 vmovdqu YMMWORD[96+rdi],ymm5 1546 vmovdqu YMMWORD[128+rdi],ymm12 1547 vmovdqu YMMWORD[160+rdi],ymm13 1548 je NEAR $L$done8x 1549 1550 lea rsi,[192+rsi] 1551 xor r10,r10 1552 vmovdqa YMMWORD[rsp],ymm10 1553 lea rdi,[192+rdi] 1554 sub rdx,192 1555 vmovdqa YMMWORD[32+rsp],ymm15 1556 jmp NEAR $L$oop_tail8x 1557 1558ALIGN 32 1559$L$256_or_more8x: 1560 vpxor ymm6,ymm6,YMMWORD[rsi] 1561 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1562 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1563 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1564 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1565 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1566 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1567 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1568 vmovdqu YMMWORD[rdi],ymm6 1569 vmovdqu YMMWORD[32+rdi],ymm8 1570 vmovdqu YMMWORD[64+rdi],ymm1 1571 vmovdqu YMMWORD[96+rdi],ymm5 1572 vmovdqu YMMWORD[128+rdi],ymm12 1573 vmovdqu YMMWORD[160+rdi],ymm13 1574 vmovdqu YMMWORD[192+rdi],ymm10 1575 vmovdqu YMMWORD[224+rdi],ymm15 1576 je NEAR $L$done8x 1577 1578 lea rsi,[256+rsi] 1579 xor r10,r10 1580 vmovdqa YMMWORD[rsp],ymm14 1581 lea rdi,[256+rdi] 1582 sub rdx,256 1583 vmovdqa YMMWORD[32+rsp],ymm2 1584 jmp NEAR $L$oop_tail8x 1585 1586ALIGN 32 1587$L$320_or_more8x: 1588 vpxor ymm6,ymm6,YMMWORD[rsi] 1589 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1590 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1591 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1592 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1593 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1594 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1595 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1596 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1597 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1598 vmovdqu YMMWORD[rdi],ymm6 1599 vmovdqu YMMWORD[32+rdi],ymm8 1600 vmovdqu YMMWORD[64+rdi],ymm1 1601 vmovdqu YMMWORD[96+rdi],ymm5 1602 vmovdqu YMMWORD[128+rdi],ymm12 1603 vmovdqu YMMWORD[160+rdi],ymm13 1604 vmovdqu YMMWORD[192+rdi],ymm10 1605 vmovdqu YMMWORD[224+rdi],ymm15 1606 vmovdqu YMMWORD[256+rdi],ymm14 1607 vmovdqu YMMWORD[288+rdi],ymm2 1608 je NEAR $L$done8x 1609 1610 lea rsi,[320+rsi] 1611 xor r10,r10 1612 vmovdqa YMMWORD[rsp],ymm3 1613 lea rdi,[320+rdi] 1614 sub rdx,320 1615 vmovdqa YMMWORD[32+rsp],ymm7 1616 jmp NEAR $L$oop_tail8x 1617 1618ALIGN 32 1619$L$384_or_more8x: 1620 vpxor ymm6,ymm6,YMMWORD[rsi] 1621 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1622 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1623 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1624 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1625 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1626 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1627 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1628 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1629 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1630 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1631 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1632 vmovdqu YMMWORD[rdi],ymm6 1633 vmovdqu YMMWORD[32+rdi],ymm8 1634 vmovdqu YMMWORD[64+rdi],ymm1 1635 vmovdqu YMMWORD[96+rdi],ymm5 1636 vmovdqu YMMWORD[128+rdi],ymm12 1637 vmovdqu YMMWORD[160+rdi],ymm13 1638 vmovdqu YMMWORD[192+rdi],ymm10 1639 vmovdqu YMMWORD[224+rdi],ymm15 1640 vmovdqu YMMWORD[256+rdi],ymm14 1641 vmovdqu YMMWORD[288+rdi],ymm2 1642 vmovdqu YMMWORD[320+rdi],ymm3 1643 vmovdqu YMMWORD[352+rdi],ymm7 1644 je NEAR $L$done8x 1645 1646 lea rsi,[384+rsi] 1647 xor r10,r10 1648 vmovdqa YMMWORD[rsp],ymm11 1649 lea rdi,[384+rdi] 1650 sub rdx,384 1651 vmovdqa YMMWORD[32+rsp],ymm9 1652 jmp NEAR $L$oop_tail8x 1653 1654ALIGN 32 1655$L$448_or_more8x: 1656 vpxor ymm6,ymm6,YMMWORD[rsi] 1657 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1658 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1659 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1660 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1661 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1662 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1663 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1664 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1665 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1666 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1667 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1668 vpxor ymm11,ymm11,YMMWORD[384+rsi] 1669 vpxor ymm9,ymm9,YMMWORD[416+rsi] 1670 vmovdqu YMMWORD[rdi],ymm6 1671 vmovdqu YMMWORD[32+rdi],ymm8 1672 vmovdqu YMMWORD[64+rdi],ymm1 1673 vmovdqu YMMWORD[96+rdi],ymm5 1674 vmovdqu YMMWORD[128+rdi],ymm12 1675 vmovdqu YMMWORD[160+rdi],ymm13 1676 vmovdqu YMMWORD[192+rdi],ymm10 1677 vmovdqu YMMWORD[224+rdi],ymm15 1678 vmovdqu YMMWORD[256+rdi],ymm14 1679 vmovdqu YMMWORD[288+rdi],ymm2 1680 vmovdqu YMMWORD[320+rdi],ymm3 1681 vmovdqu YMMWORD[352+rdi],ymm7 1682 vmovdqu YMMWORD[384+rdi],ymm11 1683 vmovdqu YMMWORD[416+rdi],ymm9 1684 je NEAR $L$done8x 1685 1686 lea rsi,[448+rsi] 1687 xor r10,r10 1688 vmovdqa YMMWORD[rsp],ymm0 1689 lea rdi,[448+rdi] 1690 sub rdx,448 1691 vmovdqa YMMWORD[32+rsp],ymm4 1692 1693$L$oop_tail8x: 1694 movzx eax,BYTE[r10*1+rsi] 1695 movzx ecx,BYTE[r10*1+rsp] 1696 lea r10,[1+r10] 1697 xor eax,ecx 1698 mov BYTE[((-1))+r10*1+rdi],al 1699 dec rdx 1700 jnz NEAR $L$oop_tail8x 1701 1702$L$done8x: 1703 vzeroall 1704 movaps xmm6,XMMWORD[((-168))+r9] 1705 movaps xmm7,XMMWORD[((-152))+r9] 1706 movaps xmm8,XMMWORD[((-136))+r9] 1707 movaps xmm9,XMMWORD[((-120))+r9] 1708 movaps xmm10,XMMWORD[((-104))+r9] 1709 movaps xmm11,XMMWORD[((-88))+r9] 1710 movaps xmm12,XMMWORD[((-72))+r9] 1711 movaps xmm13,XMMWORD[((-56))+r9] 1712 movaps xmm14,XMMWORD[((-40))+r9] 1713 movaps xmm15,XMMWORD[((-24))+r9] 1714 lea rsp,[r9] 1715 1716$L$8x_epilogue: 1717 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1718 mov rsi,QWORD[16+rsp] 1719 DB 0F3h,0C3h ;repret 1720 1721$L$SEH_end_ChaCha20_8x: 1722EXTERN __imp_RtlVirtualUnwind 1723 1724ALIGN 16 1725se_handler: 1726 push rsi 1727 push rdi 1728 push rbx 1729 push rbp 1730 push r12 1731 push r13 1732 push r14 1733 push r15 1734 pushfq 1735 sub rsp,64 1736 1737 mov rax,QWORD[120+r8] 1738 mov rbx,QWORD[248+r8] 1739 1740 mov rsi,QWORD[8+r9] 1741 mov r11,QWORD[56+r9] 1742 1743 lea r10,[$L$ctr32_body] 1744 cmp rbx,r10 1745 jb NEAR $L$common_seh_tail 1746 1747 mov rax,QWORD[152+r8] 1748 1749 lea r10,[$L$no_data] 1750 cmp rbx,r10 1751 jae NEAR $L$common_seh_tail 1752 1753 lea rax,[((64+24+48))+rax] 1754 1755 mov rbx,QWORD[((-8))+rax] 1756 mov rbp,QWORD[((-16))+rax] 1757 mov r12,QWORD[((-24))+rax] 1758 mov r13,QWORD[((-32))+rax] 1759 mov r14,QWORD[((-40))+rax] 1760 mov r15,QWORD[((-48))+rax] 1761 mov QWORD[144+r8],rbx 1762 mov QWORD[160+r8],rbp 1763 mov QWORD[216+r8],r12 1764 mov QWORD[224+r8],r13 1765 mov QWORD[232+r8],r14 1766 mov QWORD[240+r8],r15 1767 1768$L$common_seh_tail: 1769 mov rdi,QWORD[8+rax] 1770 mov rsi,QWORD[16+rax] 1771 mov QWORD[152+r8],rax 1772 mov QWORD[168+r8],rsi 1773 mov QWORD[176+r8],rdi 1774 1775 mov rdi,QWORD[40+r9] 1776 mov rsi,r8 1777 mov ecx,154 1778 DD 0xa548f3fc 1779 1780 mov rsi,r9 1781 xor rcx,rcx 1782 mov rdx,QWORD[8+rsi] 1783 mov r8,QWORD[rsi] 1784 mov r9,QWORD[16+rsi] 1785 mov r10,QWORD[40+rsi] 1786 lea r11,[56+rsi] 1787 lea r12,[24+rsi] 1788 mov QWORD[32+rsp],r10 1789 mov QWORD[40+rsp],r11 1790 mov QWORD[48+rsp],r12 1791 mov QWORD[56+rsp],rcx 1792 call QWORD[__imp_RtlVirtualUnwind] 1793 1794 mov eax,1 1795 add rsp,64 1796 popfq 1797 pop r15 1798 pop r14 1799 pop r13 1800 pop r12 1801 pop rbp 1802 pop rbx 1803 pop rdi 1804 pop rsi 1805 DB 0F3h,0C3h ;repret 1806 1807 1808 1809ALIGN 16 1810ssse3_handler: 1811 push rsi 1812 push rdi 1813 push rbx 1814 push rbp 1815 push r12 1816 push r13 1817 push r14 1818 push r15 1819 pushfq 1820 sub rsp,64 1821 1822 mov rax,QWORD[120+r8] 1823 mov rbx,QWORD[248+r8] 1824 1825 mov rsi,QWORD[8+r9] 1826 mov r11,QWORD[56+r9] 1827 1828 mov r10d,DWORD[r11] 1829 lea r10,[r10*1+rsi] 1830 cmp rbx,r10 1831 jb NEAR $L$common_seh_tail 1832 1833 mov rax,QWORD[192+r8] 1834 1835 mov r10d,DWORD[4+r11] 1836 lea r10,[r10*1+rsi] 1837 cmp rbx,r10 1838 jae NEAR $L$common_seh_tail 1839 1840 lea rsi,[((-40))+rax] 1841 lea rdi,[512+r8] 1842 mov ecx,4 1843 DD 0xa548f3fc 1844 1845 jmp NEAR $L$common_seh_tail 1846 1847 1848 1849ALIGN 16 1850full_handler: 1851 push rsi 1852 push rdi 1853 push rbx 1854 push rbp 1855 push r12 1856 push r13 1857 push r14 1858 push r15 1859 pushfq 1860 sub rsp,64 1861 1862 mov rax,QWORD[120+r8] 1863 mov rbx,QWORD[248+r8] 1864 1865 mov rsi,QWORD[8+r9] 1866 mov r11,QWORD[56+r9] 1867 1868 mov r10d,DWORD[r11] 1869 lea r10,[r10*1+rsi] 1870 cmp rbx,r10 1871 jb NEAR $L$common_seh_tail 1872 1873 mov rax,QWORD[192+r8] 1874 1875 mov r10d,DWORD[4+r11] 1876 lea r10,[r10*1+rsi] 1877 cmp rbx,r10 1878 jae NEAR $L$common_seh_tail 1879 1880 lea rsi,[((-168))+rax] 1881 lea rdi,[512+r8] 1882 mov ecx,20 1883 DD 0xa548f3fc 1884 1885 jmp NEAR $L$common_seh_tail 1886 1887 1888section .pdata rdata align=4 1889ALIGN 4 1890 DD $L$SEH_begin_GFp_ChaCha20_ctr32 wrt ..imagebase 1891 DD $L$SEH_end_GFp_ChaCha20_ctr32 wrt ..imagebase 1892 DD $L$SEH_info_GFp_ChaCha20_ctr32 wrt ..imagebase 1893 1894 DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase 1895 DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase 1896 DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase 1897 1898 DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase 1899 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase 1900 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase 1901 DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase 1902 DD $L$SEH_end_ChaCha20_8x wrt ..imagebase 1903 DD $L$SEH_info_ChaCha20_8x wrt ..imagebase 1904section .xdata rdata align=8 1905ALIGN 8 1906$L$SEH_info_GFp_ChaCha20_ctr32: 1907DB 9,0,0,0 1908 DD se_handler wrt ..imagebase 1909 1910$L$SEH_info_ChaCha20_ssse3: 1911DB 9,0,0,0 1912 DD ssse3_handler wrt ..imagebase 1913 DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase 1914 1915$L$SEH_info_ChaCha20_4x: 1916DB 9,0,0,0 1917 DD full_handler wrt ..imagebase 1918 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 1919$L$SEH_info_ChaCha20_8x: 1920DB 9,0,0,0 1921 DD full_handler wrt ..imagebase 1922 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase 1923