1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8section .text code align=64 9 10EXTERN GFp_ia32cap_P 11 12chacha20_poly1305_constants: 13 14ALIGN 64 15$L$chacha20_consts: 16DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 17DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 18$L$rol8: 19DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 20DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 21$L$rol16: 22DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 23DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 24$L$avx2_init: 25 DD 0,0,0,0 26$L$sse_inc: 27 DD 1,0,0,0 28$L$avx2_inc: 29 DD 2,0,0,0,2,0,0,0 30$L$clamp: 31 DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC 32 DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF 33ALIGN 16 34$L$and_masks: 35DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 45DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 46DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 47DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 48DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 49DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 50DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 51 52 53ALIGN 64 54poly_hash_ad_internal: 55 56 57 xor r10,r10 58 xor r11,r11 59 xor r12,r12 60 cmp r8,13 61 jne NEAR $L$hash_ad_loop 62$L$poly_fast_tls_ad: 63 64 mov r10,QWORD[rcx] 65 mov r11,QWORD[5+rcx] 66 shr r11,24 67 mov r12,1 68 mov rax,QWORD[((0+160+0))+rbp] 69 mov r15,rax 70 mul r10 71 mov r13,rax 72 mov r14,rdx 73 mov rax,QWORD[((0+160+0))+rbp] 74 mul r11 75 imul r15,r12 76 add r14,rax 77 adc r15,rdx 78 mov rax,QWORD[((8+160+0))+rbp] 79 mov r9,rax 80 mul r10 81 add r14,rax 82 adc rdx,0 83 mov r10,rdx 84 mov rax,QWORD[((8+160+0))+rbp] 85 mul r11 86 add r15,rax 87 adc rdx,0 88 imul r9,r12 89 add r15,r10 90 adc r9,rdx 91 mov r10,r13 92 mov r11,r14 93 mov r12,r15 94 and r12,3 95 mov r13,r15 96 and r13,-4 97 mov r14,r9 98 shrd r15,r9,2 99 shr r9,2 100 add r15,r13 101 adc r9,r14 102 add r10,r15 103 adc r11,r9 104 adc r12,0 105 106 DB 0F3h,0C3h ;repret 107$L$hash_ad_loop: 108 109 cmp r8,16 110 jb NEAR $L$hash_ad_tail 111 add r10,QWORD[((0+0))+rcx] 112 adc r11,QWORD[((8+0))+rcx] 113 adc r12,1 114 mov rax,QWORD[((0+160+0))+rbp] 115 mov r15,rax 116 mul r10 117 mov r13,rax 118 mov r14,rdx 119 mov rax,QWORD[((0+160+0))+rbp] 120 mul r11 121 imul r15,r12 122 add r14,rax 123 adc r15,rdx 124 mov rax,QWORD[((8+160+0))+rbp] 125 mov r9,rax 126 mul r10 127 add r14,rax 128 adc rdx,0 129 mov r10,rdx 130 mov rax,QWORD[((8+160+0))+rbp] 131 mul r11 132 add r15,rax 133 adc rdx,0 134 imul r9,r12 135 add r15,r10 136 adc r9,rdx 137 mov r10,r13 138 mov r11,r14 139 mov r12,r15 140 and r12,3 141 mov r13,r15 142 and r13,-4 143 mov r14,r9 144 shrd r15,r9,2 145 shr r9,2 146 add r15,r13 147 adc r9,r14 148 add r10,r15 149 adc r11,r9 150 adc r12,0 151 152 lea rcx,[16+rcx] 153 sub r8,16 154 jmp NEAR $L$hash_ad_loop 155$L$hash_ad_tail: 156 cmp r8,0 157 je NEAR $L$hash_ad_done 158 159 xor r13,r13 160 xor r14,r14 161 xor r15,r15 162 add rcx,r8 163$L$hash_ad_tail_loop: 164 shld r14,r13,8 165 shl r13,8 166 movzx r15,BYTE[((-1))+rcx] 167 xor r13,r15 168 dec rcx 169 dec r8 170 jne NEAR $L$hash_ad_tail_loop 171 172 add r10,r13 173 adc r11,r14 174 adc r12,1 175 mov rax,QWORD[((0+160+0))+rbp] 176 mov r15,rax 177 mul r10 178 mov r13,rax 179 mov r14,rdx 180 mov rax,QWORD[((0+160+0))+rbp] 181 mul r11 182 imul r15,r12 183 add r14,rax 184 adc r15,rdx 185 mov rax,QWORD[((8+160+0))+rbp] 186 mov r9,rax 187 mul r10 188 add r14,rax 189 adc rdx,0 190 mov r10,rdx 191 mov rax,QWORD[((8+160+0))+rbp] 192 mul r11 193 add r15,rax 194 adc rdx,0 195 imul r9,r12 196 add r15,r10 197 adc r9,rdx 198 mov r10,r13 199 mov r11,r14 200 mov r12,r15 201 and r12,3 202 mov r13,r15 203 and r13,-4 204 mov r14,r9 205 shrd r15,r9,2 206 shr r9,2 207 add r15,r13 208 adc r9,r14 209 add r10,r15 210 adc r11,r9 211 adc r12,0 212 213 214$L$hash_ad_done: 215 DB 0F3h,0C3h ;repret 216 217 218 219global GFp_chacha20_poly1305_open 220 221ALIGN 64 222GFp_chacha20_poly1305_open: 223 mov QWORD[8+rsp],rdi ;WIN64 prologue 224 mov QWORD[16+rsp],rsi 225 mov rax,rsp 226$L$SEH_begin_GFp_chacha20_poly1305_open: 227 mov rdi,rcx 228 mov rsi,rdx 229 mov rdx,r8 230 mov rcx,r9 231 mov r8,QWORD[40+rsp] 232 mov r9,QWORD[48+rsp] 233 234 235 236 push rbp 237 238 push rbx 239 240 push r12 241 242 push r13 243 244 push r14 245 246 push r15 247 248 249 250 push r9 251 252 sub rsp,288 + 160 + 32 253 254 255 lea rbp,[32+rsp] 256 and rbp,-32 257 258 movaps XMMWORD[(0+0)+rbp],xmm6 259 movaps XMMWORD[(16+0)+rbp],xmm7 260 movaps XMMWORD[(32+0)+rbp],xmm8 261 movaps XMMWORD[(48+0)+rbp],xmm9 262 movaps XMMWORD[(64+0)+rbp],xmm10 263 movaps XMMWORD[(80+0)+rbp],xmm11 264 movaps XMMWORD[(96+0)+rbp],xmm12 265 movaps XMMWORD[(112+0)+rbp],xmm13 266 movaps XMMWORD[(128+0)+rbp],xmm14 267 movaps XMMWORD[(144+0)+rbp],xmm15 268 269 mov rbx,rdx 270 mov QWORD[((0+160+32))+rbp],r8 271 mov QWORD[((8+160+32))+rbp],rbx 272 273 mov eax,DWORD[((GFp_ia32cap_P+8))] 274 and eax,288 275 xor eax,288 276 jz NEAR chacha20_poly1305_open_avx2 277 278 cmp rbx,128 279 jbe NEAR $L$open_sse_128 280 281 movdqa xmm0,XMMWORD[$L$chacha20_consts] 282 movdqu xmm4,XMMWORD[r9] 283 movdqu xmm8,XMMWORD[16+r9] 284 movdqu xmm12,XMMWORD[32+r9] 285 286 movdqa xmm7,xmm12 287 288 movdqa XMMWORD[(160+48)+rbp],xmm4 289 movdqa XMMWORD[(160+64)+rbp],xmm8 290 movdqa XMMWORD[(160+96)+rbp],xmm12 291 mov r10,10 292$L$open_sse_init_rounds: 293 paddd xmm0,xmm4 294 pxor xmm12,xmm0 295 pshufb xmm12,XMMWORD[$L$rol16] 296 paddd xmm8,xmm12 297 pxor xmm4,xmm8 298 movdqa xmm3,xmm4 299 pslld xmm3,12 300 psrld xmm4,20 301 pxor xmm4,xmm3 302 paddd xmm0,xmm4 303 pxor xmm12,xmm0 304 pshufb xmm12,XMMWORD[$L$rol8] 305 paddd xmm8,xmm12 306 pxor xmm4,xmm8 307 movdqa xmm3,xmm4 308 pslld xmm3,7 309 psrld xmm4,25 310 pxor xmm4,xmm3 311DB 102,15,58,15,228,4 312DB 102,69,15,58,15,192,8 313DB 102,69,15,58,15,228,12 314 paddd xmm0,xmm4 315 pxor xmm12,xmm0 316 pshufb xmm12,XMMWORD[$L$rol16] 317 paddd xmm8,xmm12 318 pxor xmm4,xmm8 319 movdqa xmm3,xmm4 320 pslld xmm3,12 321 psrld xmm4,20 322 pxor xmm4,xmm3 323 paddd xmm0,xmm4 324 pxor xmm12,xmm0 325 pshufb xmm12,XMMWORD[$L$rol8] 326 paddd xmm8,xmm12 327 pxor xmm4,xmm8 328 movdqa xmm3,xmm4 329 pslld xmm3,7 330 psrld xmm4,25 331 pxor xmm4,xmm3 332DB 102,15,58,15,228,12 333DB 102,69,15,58,15,192,8 334DB 102,69,15,58,15,228,4 335 336 dec r10 337 jne NEAR $L$open_sse_init_rounds 338 339 paddd xmm0,XMMWORD[$L$chacha20_consts] 340 paddd xmm4,XMMWORD[((160+48))+rbp] 341 342 pand xmm0,XMMWORD[$L$clamp] 343 movdqa XMMWORD[(160+0)+rbp],xmm0 344 movdqa XMMWORD[(160+16)+rbp],xmm4 345 346 mov r8,r8 347 call poly_hash_ad_internal 348$L$open_sse_main_loop: 349 cmp rbx,16*16 350 jb NEAR $L$open_sse_tail 351 352 movdqa xmm0,XMMWORD[$L$chacha20_consts] 353 movdqa xmm4,XMMWORD[((160+48))+rbp] 354 movdqa xmm8,XMMWORD[((160+64))+rbp] 355 movdqa xmm1,xmm0 356 movdqa xmm5,xmm4 357 movdqa xmm9,xmm8 358 movdqa xmm2,xmm0 359 movdqa xmm6,xmm4 360 movdqa xmm10,xmm8 361 movdqa xmm3,xmm0 362 movdqa xmm7,xmm4 363 movdqa xmm11,xmm8 364 movdqa xmm15,XMMWORD[((160+96))+rbp] 365 paddd xmm15,XMMWORD[$L$sse_inc] 366 movdqa xmm14,xmm15 367 paddd xmm14,XMMWORD[$L$sse_inc] 368 movdqa xmm13,xmm14 369 paddd xmm13,XMMWORD[$L$sse_inc] 370 movdqa xmm12,xmm13 371 paddd xmm12,XMMWORD[$L$sse_inc] 372 movdqa XMMWORD[(160+96)+rbp],xmm12 373 movdqa XMMWORD[(160+112)+rbp],xmm13 374 movdqa XMMWORD[(160+128)+rbp],xmm14 375 movdqa XMMWORD[(160+144)+rbp],xmm15 376 377 378 379 mov rcx,4 380 mov r8,rsi 381$L$open_sse_main_loop_rounds: 382 movdqa XMMWORD[(160+80)+rbp],xmm8 383 movdqa xmm8,XMMWORD[$L$rol16] 384 paddd xmm3,xmm7 385 paddd xmm2,xmm6 386 paddd xmm1,xmm5 387 paddd xmm0,xmm4 388 pxor xmm15,xmm3 389 pxor xmm14,xmm2 390 pxor xmm13,xmm1 391 pxor xmm12,xmm0 392DB 102,69,15,56,0,248 393DB 102,69,15,56,0,240 394DB 102,69,15,56,0,232 395DB 102,69,15,56,0,224 396 movdqa xmm8,XMMWORD[((160+80))+rbp] 397 paddd xmm11,xmm15 398 paddd xmm10,xmm14 399 paddd xmm9,xmm13 400 paddd xmm8,xmm12 401 pxor xmm7,xmm11 402 add r10,QWORD[((0+0))+r8] 403 adc r11,QWORD[((8+0))+r8] 404 adc r12,1 405 406 lea r8,[16+r8] 407 pxor xmm6,xmm10 408 pxor xmm5,xmm9 409 pxor xmm4,xmm8 410 movdqa XMMWORD[(160+80)+rbp],xmm8 411 movdqa xmm8,xmm7 412 psrld xmm8,20 413 pslld xmm7,32-20 414 pxor xmm7,xmm8 415 movdqa xmm8,xmm6 416 psrld xmm8,20 417 pslld xmm6,32-20 418 pxor xmm6,xmm8 419 movdqa xmm8,xmm5 420 psrld xmm8,20 421 pslld xmm5,32-20 422 pxor xmm5,xmm8 423 movdqa xmm8,xmm4 424 psrld xmm8,20 425 pslld xmm4,32-20 426 pxor xmm4,xmm8 427 mov rax,QWORD[((0+160+0))+rbp] 428 mov r15,rax 429 mul r10 430 mov r13,rax 431 mov r14,rdx 432 mov rax,QWORD[((0+160+0))+rbp] 433 mul r11 434 imul r15,r12 435 add r14,rax 436 adc r15,rdx 437 movdqa xmm8,XMMWORD[$L$rol8] 438 paddd xmm3,xmm7 439 paddd xmm2,xmm6 440 paddd xmm1,xmm5 441 paddd xmm0,xmm4 442 pxor xmm15,xmm3 443 pxor xmm14,xmm2 444 pxor xmm13,xmm1 445 pxor xmm12,xmm0 446DB 102,69,15,56,0,248 447DB 102,69,15,56,0,240 448DB 102,69,15,56,0,232 449DB 102,69,15,56,0,224 450 movdqa xmm8,XMMWORD[((160+80))+rbp] 451 paddd xmm11,xmm15 452 paddd xmm10,xmm14 453 paddd xmm9,xmm13 454 paddd xmm8,xmm12 455 pxor xmm7,xmm11 456 pxor xmm6,xmm10 457 mov rax,QWORD[((8+160+0))+rbp] 458 mov r9,rax 459 mul r10 460 add r14,rax 461 adc rdx,0 462 mov r10,rdx 463 mov rax,QWORD[((8+160+0))+rbp] 464 mul r11 465 add r15,rax 466 adc rdx,0 467 pxor xmm5,xmm9 468 pxor xmm4,xmm8 469 movdqa XMMWORD[(160+80)+rbp],xmm8 470 movdqa xmm8,xmm7 471 psrld xmm8,25 472 pslld xmm7,32-25 473 pxor xmm7,xmm8 474 movdqa xmm8,xmm6 475 psrld xmm8,25 476 pslld xmm6,32-25 477 pxor xmm6,xmm8 478 movdqa xmm8,xmm5 479 psrld xmm8,25 480 pslld xmm5,32-25 481 pxor xmm5,xmm8 482 movdqa xmm8,xmm4 483 psrld xmm8,25 484 pslld xmm4,32-25 485 pxor xmm4,xmm8 486 movdqa xmm8,XMMWORD[((160+80))+rbp] 487 imul r9,r12 488 add r15,r10 489 adc r9,rdx 490DB 102,15,58,15,255,4 491DB 102,69,15,58,15,219,8 492DB 102,69,15,58,15,255,12 493DB 102,15,58,15,246,4 494DB 102,69,15,58,15,210,8 495DB 102,69,15,58,15,246,12 496DB 102,15,58,15,237,4 497DB 102,69,15,58,15,201,8 498DB 102,69,15,58,15,237,12 499DB 102,15,58,15,228,4 500DB 102,69,15,58,15,192,8 501DB 102,69,15,58,15,228,12 502 movdqa XMMWORD[(160+80)+rbp],xmm8 503 movdqa xmm8,XMMWORD[$L$rol16] 504 paddd xmm3,xmm7 505 paddd xmm2,xmm6 506 paddd xmm1,xmm5 507 paddd xmm0,xmm4 508 pxor xmm15,xmm3 509 pxor xmm14,xmm2 510 mov r10,r13 511 mov r11,r14 512 mov r12,r15 513 and r12,3 514 mov r13,r15 515 and r13,-4 516 mov r14,r9 517 shrd r15,r9,2 518 shr r9,2 519 add r15,r13 520 adc r9,r14 521 add r10,r15 522 adc r11,r9 523 adc r12,0 524 pxor xmm13,xmm1 525 pxor xmm12,xmm0 526DB 102,69,15,56,0,248 527DB 102,69,15,56,0,240 528DB 102,69,15,56,0,232 529DB 102,69,15,56,0,224 530 movdqa xmm8,XMMWORD[((160+80))+rbp] 531 paddd xmm11,xmm15 532 paddd xmm10,xmm14 533 paddd xmm9,xmm13 534 paddd xmm8,xmm12 535 pxor xmm7,xmm11 536 pxor xmm6,xmm10 537 pxor xmm5,xmm9 538 pxor xmm4,xmm8 539 movdqa XMMWORD[(160+80)+rbp],xmm8 540 movdqa xmm8,xmm7 541 psrld xmm8,20 542 pslld xmm7,32-20 543 pxor xmm7,xmm8 544 movdqa xmm8,xmm6 545 psrld xmm8,20 546 pslld xmm6,32-20 547 pxor xmm6,xmm8 548 movdqa xmm8,xmm5 549 psrld xmm8,20 550 pslld xmm5,32-20 551 pxor xmm5,xmm8 552 movdqa xmm8,xmm4 553 psrld xmm8,20 554 pslld xmm4,32-20 555 pxor xmm4,xmm8 556 movdqa xmm8,XMMWORD[$L$rol8] 557 paddd xmm3,xmm7 558 paddd xmm2,xmm6 559 paddd xmm1,xmm5 560 paddd xmm0,xmm4 561 pxor xmm15,xmm3 562 pxor xmm14,xmm2 563 pxor xmm13,xmm1 564 pxor xmm12,xmm0 565DB 102,69,15,56,0,248 566DB 102,69,15,56,0,240 567DB 102,69,15,56,0,232 568DB 102,69,15,56,0,224 569 movdqa xmm8,XMMWORD[((160+80))+rbp] 570 paddd xmm11,xmm15 571 paddd xmm10,xmm14 572 paddd xmm9,xmm13 573 paddd xmm8,xmm12 574 pxor xmm7,xmm11 575 pxor xmm6,xmm10 576 pxor xmm5,xmm9 577 pxor xmm4,xmm8 578 movdqa XMMWORD[(160+80)+rbp],xmm8 579 movdqa xmm8,xmm7 580 psrld xmm8,25 581 pslld xmm7,32-25 582 pxor xmm7,xmm8 583 movdqa xmm8,xmm6 584 psrld xmm8,25 585 pslld xmm6,32-25 586 pxor xmm6,xmm8 587 movdqa xmm8,xmm5 588 psrld xmm8,25 589 pslld xmm5,32-25 590 pxor xmm5,xmm8 591 movdqa xmm8,xmm4 592 psrld xmm8,25 593 pslld xmm4,32-25 594 pxor xmm4,xmm8 595 movdqa xmm8,XMMWORD[((160+80))+rbp] 596DB 102,15,58,15,255,12 597DB 102,69,15,58,15,219,8 598DB 102,69,15,58,15,255,4 599DB 102,15,58,15,246,12 600DB 102,69,15,58,15,210,8 601DB 102,69,15,58,15,246,4 602DB 102,15,58,15,237,12 603DB 102,69,15,58,15,201,8 604DB 102,69,15,58,15,237,4 605DB 102,15,58,15,228,12 606DB 102,69,15,58,15,192,8 607DB 102,69,15,58,15,228,4 608 609 dec rcx 610 jge NEAR $L$open_sse_main_loop_rounds 611 add r10,QWORD[((0+0))+r8] 612 adc r11,QWORD[((8+0))+r8] 613 adc r12,1 614 mov rax,QWORD[((0+160+0))+rbp] 615 mov r15,rax 616 mul r10 617 mov r13,rax 618 mov r14,rdx 619 mov rax,QWORD[((0+160+0))+rbp] 620 mul r11 621 imul r15,r12 622 add r14,rax 623 adc r15,rdx 624 mov rax,QWORD[((8+160+0))+rbp] 625 mov r9,rax 626 mul r10 627 add r14,rax 628 adc rdx,0 629 mov r10,rdx 630 mov rax,QWORD[((8+160+0))+rbp] 631 mul r11 632 add r15,rax 633 adc rdx,0 634 imul r9,r12 635 add r15,r10 636 adc r9,rdx 637 mov r10,r13 638 mov r11,r14 639 mov r12,r15 640 and r12,3 641 mov r13,r15 642 and r13,-4 643 mov r14,r9 644 shrd r15,r9,2 645 shr r9,2 646 add r15,r13 647 adc r9,r14 648 add r10,r15 649 adc r11,r9 650 adc r12,0 651 652 lea r8,[16+r8] 653 cmp rcx,-6 654 jg NEAR $L$open_sse_main_loop_rounds 655 paddd xmm3,XMMWORD[$L$chacha20_consts] 656 paddd xmm7,XMMWORD[((160+48))+rbp] 657 paddd xmm11,XMMWORD[((160+64))+rbp] 658 paddd xmm15,XMMWORD[((160+144))+rbp] 659 paddd xmm2,XMMWORD[$L$chacha20_consts] 660 paddd xmm6,XMMWORD[((160+48))+rbp] 661 paddd xmm10,XMMWORD[((160+64))+rbp] 662 paddd xmm14,XMMWORD[((160+128))+rbp] 663 paddd xmm1,XMMWORD[$L$chacha20_consts] 664 paddd xmm5,XMMWORD[((160+48))+rbp] 665 paddd xmm9,XMMWORD[((160+64))+rbp] 666 paddd xmm13,XMMWORD[((160+112))+rbp] 667 paddd xmm0,XMMWORD[$L$chacha20_consts] 668 paddd xmm4,XMMWORD[((160+48))+rbp] 669 paddd xmm8,XMMWORD[((160+64))+rbp] 670 paddd xmm12,XMMWORD[((160+96))+rbp] 671 movdqa XMMWORD[(160+80)+rbp],xmm12 672 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 673 pxor xmm12,xmm3 674 movdqu XMMWORD[(0 + 0)+rdi],xmm12 675 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 676 pxor xmm12,xmm7 677 movdqu XMMWORD[(16 + 0)+rdi],xmm12 678 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 679 pxor xmm12,xmm11 680 movdqu XMMWORD[(32 + 0)+rdi],xmm12 681 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 682 pxor xmm12,xmm15 683 movdqu XMMWORD[(48 + 0)+rdi],xmm12 684 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 685 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 686 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 687 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 688 pxor xmm2,xmm3 689 pxor xmm6,xmm7 690 pxor xmm10,xmm11 691 pxor xmm15,xmm14 692 movdqu XMMWORD[(0 + 64)+rdi],xmm2 693 movdqu XMMWORD[(16 + 64)+rdi],xmm6 694 movdqu XMMWORD[(32 + 64)+rdi],xmm10 695 movdqu XMMWORD[(48 + 64)+rdi],xmm15 696 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 697 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 698 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 699 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 700 pxor xmm1,xmm3 701 pxor xmm5,xmm7 702 pxor xmm9,xmm11 703 pxor xmm15,xmm13 704 movdqu XMMWORD[(0 + 128)+rdi],xmm1 705 movdqu XMMWORD[(16 + 128)+rdi],xmm5 706 movdqu XMMWORD[(32 + 128)+rdi],xmm9 707 movdqu XMMWORD[(48 + 128)+rdi],xmm15 708 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 709 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 710 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 711 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 712 pxor xmm0,xmm3 713 pxor xmm4,xmm7 714 pxor xmm8,xmm11 715 pxor xmm15,XMMWORD[((160+80))+rbp] 716 movdqu XMMWORD[(0 + 192)+rdi],xmm0 717 movdqu XMMWORD[(16 + 192)+rdi],xmm4 718 movdqu XMMWORD[(32 + 192)+rdi],xmm8 719 movdqu XMMWORD[(48 + 192)+rdi],xmm15 720 721 lea rsi,[256+rsi] 722 lea rdi,[256+rdi] 723 sub rbx,16*16 724 jmp NEAR $L$open_sse_main_loop 725$L$open_sse_tail: 726 727 test rbx,rbx 728 jz NEAR $L$open_sse_finalize 729 cmp rbx,12*16 730 ja NEAR $L$open_sse_tail_256 731 cmp rbx,8*16 732 ja NEAR $L$open_sse_tail_192 733 cmp rbx,4*16 734 ja NEAR $L$open_sse_tail_128 735 movdqa xmm0,XMMWORD[$L$chacha20_consts] 736 movdqa xmm4,XMMWORD[((160+48))+rbp] 737 movdqa xmm8,XMMWORD[((160+64))+rbp] 738 movdqa xmm12,XMMWORD[((160+96))+rbp] 739 paddd xmm12,XMMWORD[$L$sse_inc] 740 movdqa XMMWORD[(160+96)+rbp],xmm12 741 742 xor r8,r8 743 mov rcx,rbx 744 cmp rcx,16 745 jb NEAR $L$open_sse_tail_64_rounds 746$L$open_sse_tail_64_rounds_and_x1hash: 747 add r10,QWORD[((0+0))+r8*1+rsi] 748 adc r11,QWORD[((8+0))+r8*1+rsi] 749 adc r12,1 750 mov rax,QWORD[((0+160+0))+rbp] 751 mov r15,rax 752 mul r10 753 mov r13,rax 754 mov r14,rdx 755 mov rax,QWORD[((0+160+0))+rbp] 756 mul r11 757 imul r15,r12 758 add r14,rax 759 adc r15,rdx 760 mov rax,QWORD[((8+160+0))+rbp] 761 mov r9,rax 762 mul r10 763 add r14,rax 764 adc rdx,0 765 mov r10,rdx 766 mov rax,QWORD[((8+160+0))+rbp] 767 mul r11 768 add r15,rax 769 adc rdx,0 770 imul r9,r12 771 add r15,r10 772 adc r9,rdx 773 mov r10,r13 774 mov r11,r14 775 mov r12,r15 776 and r12,3 777 mov r13,r15 778 and r13,-4 779 mov r14,r9 780 shrd r15,r9,2 781 shr r9,2 782 add r15,r13 783 adc r9,r14 784 add r10,r15 785 adc r11,r9 786 adc r12,0 787 788 sub rcx,16 789$L$open_sse_tail_64_rounds: 790 add r8,16 791 paddd xmm0,xmm4 792 pxor xmm12,xmm0 793 pshufb xmm12,XMMWORD[$L$rol16] 794 paddd xmm8,xmm12 795 pxor xmm4,xmm8 796 movdqa xmm3,xmm4 797 pslld xmm3,12 798 psrld xmm4,20 799 pxor xmm4,xmm3 800 paddd xmm0,xmm4 801 pxor xmm12,xmm0 802 pshufb xmm12,XMMWORD[$L$rol8] 803 paddd xmm8,xmm12 804 pxor xmm4,xmm8 805 movdqa xmm3,xmm4 806 pslld xmm3,7 807 psrld xmm4,25 808 pxor xmm4,xmm3 809DB 102,15,58,15,228,4 810DB 102,69,15,58,15,192,8 811DB 102,69,15,58,15,228,12 812 paddd xmm0,xmm4 813 pxor xmm12,xmm0 814 pshufb xmm12,XMMWORD[$L$rol16] 815 paddd xmm8,xmm12 816 pxor xmm4,xmm8 817 movdqa xmm3,xmm4 818 pslld xmm3,12 819 psrld xmm4,20 820 pxor xmm4,xmm3 821 paddd xmm0,xmm4 822 pxor xmm12,xmm0 823 pshufb xmm12,XMMWORD[$L$rol8] 824 paddd xmm8,xmm12 825 pxor xmm4,xmm8 826 movdqa xmm3,xmm4 827 pslld xmm3,7 828 psrld xmm4,25 829 pxor xmm4,xmm3 830DB 102,15,58,15,228,12 831DB 102,69,15,58,15,192,8 832DB 102,69,15,58,15,228,4 833 834 cmp rcx,16 835 jae NEAR $L$open_sse_tail_64_rounds_and_x1hash 836 cmp r8,10*16 837 jne NEAR $L$open_sse_tail_64_rounds 838 paddd xmm0,XMMWORD[$L$chacha20_consts] 839 paddd xmm4,XMMWORD[((160+48))+rbp] 840 paddd xmm8,XMMWORD[((160+64))+rbp] 841 paddd xmm12,XMMWORD[((160+96))+rbp] 842 843 jmp NEAR $L$open_sse_tail_64_dec_loop 844 845$L$open_sse_tail_128: 846 movdqa xmm0,XMMWORD[$L$chacha20_consts] 847 movdqa xmm4,XMMWORD[((160+48))+rbp] 848 movdqa xmm8,XMMWORD[((160+64))+rbp] 849 movdqa xmm1,xmm0 850 movdqa xmm5,xmm4 851 movdqa xmm9,xmm8 852 movdqa xmm13,XMMWORD[((160+96))+rbp] 853 paddd xmm13,XMMWORD[$L$sse_inc] 854 movdqa xmm12,xmm13 855 paddd xmm12,XMMWORD[$L$sse_inc] 856 movdqa XMMWORD[(160+96)+rbp],xmm12 857 movdqa XMMWORD[(160+112)+rbp],xmm13 858 859 mov rcx,rbx 860 and rcx,-16 861 xor r8,r8 862$L$open_sse_tail_128_rounds_and_x1hash: 863 add r10,QWORD[((0+0))+r8*1+rsi] 864 adc r11,QWORD[((8+0))+r8*1+rsi] 865 adc r12,1 866 mov rax,QWORD[((0+160+0))+rbp] 867 mov r15,rax 868 mul r10 869 mov r13,rax 870 mov r14,rdx 871 mov rax,QWORD[((0+160+0))+rbp] 872 mul r11 873 imul r15,r12 874 add r14,rax 875 adc r15,rdx 876 mov rax,QWORD[((8+160+0))+rbp] 877 mov r9,rax 878 mul r10 879 add r14,rax 880 adc rdx,0 881 mov r10,rdx 882 mov rax,QWORD[((8+160+0))+rbp] 883 mul r11 884 add r15,rax 885 adc rdx,0 886 imul r9,r12 887 add r15,r10 888 adc r9,rdx 889 mov r10,r13 890 mov r11,r14 891 mov r12,r15 892 and r12,3 893 mov r13,r15 894 and r13,-4 895 mov r14,r9 896 shrd r15,r9,2 897 shr r9,2 898 add r15,r13 899 adc r9,r14 900 add r10,r15 901 adc r11,r9 902 adc r12,0 903 904$L$open_sse_tail_128_rounds: 905 add r8,16 906 paddd xmm0,xmm4 907 pxor xmm12,xmm0 908 pshufb xmm12,XMMWORD[$L$rol16] 909 paddd xmm8,xmm12 910 pxor xmm4,xmm8 911 movdqa xmm3,xmm4 912 pslld xmm3,12 913 psrld xmm4,20 914 pxor xmm4,xmm3 915 paddd xmm0,xmm4 916 pxor xmm12,xmm0 917 pshufb xmm12,XMMWORD[$L$rol8] 918 paddd xmm8,xmm12 919 pxor xmm4,xmm8 920 movdqa xmm3,xmm4 921 pslld xmm3,7 922 psrld xmm4,25 923 pxor xmm4,xmm3 924DB 102,15,58,15,228,4 925DB 102,69,15,58,15,192,8 926DB 102,69,15,58,15,228,12 927 paddd xmm1,xmm5 928 pxor xmm13,xmm1 929 pshufb xmm13,XMMWORD[$L$rol16] 930 paddd xmm9,xmm13 931 pxor xmm5,xmm9 932 movdqa xmm3,xmm5 933 pslld xmm3,12 934 psrld xmm5,20 935 pxor xmm5,xmm3 936 paddd xmm1,xmm5 937 pxor xmm13,xmm1 938 pshufb xmm13,XMMWORD[$L$rol8] 939 paddd xmm9,xmm13 940 pxor xmm5,xmm9 941 movdqa xmm3,xmm5 942 pslld xmm3,7 943 psrld xmm5,25 944 pxor xmm5,xmm3 945DB 102,15,58,15,237,4 946DB 102,69,15,58,15,201,8 947DB 102,69,15,58,15,237,12 948 paddd xmm0,xmm4 949 pxor xmm12,xmm0 950 pshufb xmm12,XMMWORD[$L$rol16] 951 paddd xmm8,xmm12 952 pxor xmm4,xmm8 953 movdqa xmm3,xmm4 954 pslld xmm3,12 955 psrld xmm4,20 956 pxor xmm4,xmm3 957 paddd xmm0,xmm4 958 pxor xmm12,xmm0 959 pshufb xmm12,XMMWORD[$L$rol8] 960 paddd xmm8,xmm12 961 pxor xmm4,xmm8 962 movdqa xmm3,xmm4 963 pslld xmm3,7 964 psrld xmm4,25 965 pxor xmm4,xmm3 966DB 102,15,58,15,228,12 967DB 102,69,15,58,15,192,8 968DB 102,69,15,58,15,228,4 969 paddd xmm1,xmm5 970 pxor xmm13,xmm1 971 pshufb xmm13,XMMWORD[$L$rol16] 972 paddd xmm9,xmm13 973 pxor xmm5,xmm9 974 movdqa xmm3,xmm5 975 pslld xmm3,12 976 psrld xmm5,20 977 pxor xmm5,xmm3 978 paddd xmm1,xmm5 979 pxor xmm13,xmm1 980 pshufb xmm13,XMMWORD[$L$rol8] 981 paddd xmm9,xmm13 982 pxor xmm5,xmm9 983 movdqa xmm3,xmm5 984 pslld xmm3,7 985 psrld xmm5,25 986 pxor xmm5,xmm3 987DB 102,15,58,15,237,12 988DB 102,69,15,58,15,201,8 989DB 102,69,15,58,15,237,4 990 991 cmp r8,rcx 992 jb NEAR $L$open_sse_tail_128_rounds_and_x1hash 993 cmp r8,10*16 994 jne NEAR $L$open_sse_tail_128_rounds 995 paddd xmm1,XMMWORD[$L$chacha20_consts] 996 paddd xmm5,XMMWORD[((160+48))+rbp] 997 paddd xmm9,XMMWORD[((160+64))+rbp] 998 paddd xmm13,XMMWORD[((160+112))+rbp] 999 paddd xmm0,XMMWORD[$L$chacha20_consts] 1000 paddd xmm4,XMMWORD[((160+48))+rbp] 1001 paddd xmm8,XMMWORD[((160+64))+rbp] 1002 paddd xmm12,XMMWORD[((160+96))+rbp] 1003 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1004 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1005 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1006 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1007 pxor xmm1,xmm3 1008 pxor xmm5,xmm7 1009 pxor xmm9,xmm11 1010 pxor xmm15,xmm13 1011 movdqu XMMWORD[(0 + 0)+rdi],xmm1 1012 movdqu XMMWORD[(16 + 0)+rdi],xmm5 1013 movdqu XMMWORD[(32 + 0)+rdi],xmm9 1014 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1015 1016 sub rbx,4*16 1017 lea rsi,[64+rsi] 1018 lea rdi,[64+rdi] 1019 jmp NEAR $L$open_sse_tail_64_dec_loop 1020 1021$L$open_sse_tail_192: 1022 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1023 movdqa xmm4,XMMWORD[((160+48))+rbp] 1024 movdqa xmm8,XMMWORD[((160+64))+rbp] 1025 movdqa xmm1,xmm0 1026 movdqa xmm5,xmm4 1027 movdqa xmm9,xmm8 1028 movdqa xmm2,xmm0 1029 movdqa xmm6,xmm4 1030 movdqa xmm10,xmm8 1031 movdqa xmm14,XMMWORD[((160+96))+rbp] 1032 paddd xmm14,XMMWORD[$L$sse_inc] 1033 movdqa xmm13,xmm14 1034 paddd xmm13,XMMWORD[$L$sse_inc] 1035 movdqa xmm12,xmm13 1036 paddd xmm12,XMMWORD[$L$sse_inc] 1037 movdqa XMMWORD[(160+96)+rbp],xmm12 1038 movdqa XMMWORD[(160+112)+rbp],xmm13 1039 movdqa XMMWORD[(160+128)+rbp],xmm14 1040 1041 mov rcx,rbx 1042 mov r8,10*16 1043 cmp rcx,10*16 1044 cmovg rcx,r8 1045 and rcx,-16 1046 xor r8,r8 1047$L$open_sse_tail_192_rounds_and_x1hash: 1048 add r10,QWORD[((0+0))+r8*1+rsi] 1049 adc r11,QWORD[((8+0))+r8*1+rsi] 1050 adc r12,1 1051 mov rax,QWORD[((0+160+0))+rbp] 1052 mov r15,rax 1053 mul r10 1054 mov r13,rax 1055 mov r14,rdx 1056 mov rax,QWORD[((0+160+0))+rbp] 1057 mul r11 1058 imul r15,r12 1059 add r14,rax 1060 adc r15,rdx 1061 mov rax,QWORD[((8+160+0))+rbp] 1062 mov r9,rax 1063 mul r10 1064 add r14,rax 1065 adc rdx,0 1066 mov r10,rdx 1067 mov rax,QWORD[((8+160+0))+rbp] 1068 mul r11 1069 add r15,rax 1070 adc rdx,0 1071 imul r9,r12 1072 add r15,r10 1073 adc r9,rdx 1074 mov r10,r13 1075 mov r11,r14 1076 mov r12,r15 1077 and r12,3 1078 mov r13,r15 1079 and r13,-4 1080 mov r14,r9 1081 shrd r15,r9,2 1082 shr r9,2 1083 add r15,r13 1084 adc r9,r14 1085 add r10,r15 1086 adc r11,r9 1087 adc r12,0 1088 1089$L$open_sse_tail_192_rounds: 1090 add r8,16 1091 paddd xmm0,xmm4 1092 pxor xmm12,xmm0 1093 pshufb xmm12,XMMWORD[$L$rol16] 1094 paddd xmm8,xmm12 1095 pxor xmm4,xmm8 1096 movdqa xmm3,xmm4 1097 pslld xmm3,12 1098 psrld xmm4,20 1099 pxor xmm4,xmm3 1100 paddd xmm0,xmm4 1101 pxor xmm12,xmm0 1102 pshufb xmm12,XMMWORD[$L$rol8] 1103 paddd xmm8,xmm12 1104 pxor xmm4,xmm8 1105 movdqa xmm3,xmm4 1106 pslld xmm3,7 1107 psrld xmm4,25 1108 pxor xmm4,xmm3 1109DB 102,15,58,15,228,4 1110DB 102,69,15,58,15,192,8 1111DB 102,69,15,58,15,228,12 1112 paddd xmm1,xmm5 1113 pxor xmm13,xmm1 1114 pshufb xmm13,XMMWORD[$L$rol16] 1115 paddd xmm9,xmm13 1116 pxor xmm5,xmm9 1117 movdqa xmm3,xmm5 1118 pslld xmm3,12 1119 psrld xmm5,20 1120 pxor xmm5,xmm3 1121 paddd xmm1,xmm5 1122 pxor xmm13,xmm1 1123 pshufb xmm13,XMMWORD[$L$rol8] 1124 paddd xmm9,xmm13 1125 pxor xmm5,xmm9 1126 movdqa xmm3,xmm5 1127 pslld xmm3,7 1128 psrld xmm5,25 1129 pxor xmm5,xmm3 1130DB 102,15,58,15,237,4 1131DB 102,69,15,58,15,201,8 1132DB 102,69,15,58,15,237,12 1133 paddd xmm2,xmm6 1134 pxor xmm14,xmm2 1135 pshufb xmm14,XMMWORD[$L$rol16] 1136 paddd xmm10,xmm14 1137 pxor xmm6,xmm10 1138 movdqa xmm3,xmm6 1139 pslld xmm3,12 1140 psrld xmm6,20 1141 pxor xmm6,xmm3 1142 paddd xmm2,xmm6 1143 pxor xmm14,xmm2 1144 pshufb xmm14,XMMWORD[$L$rol8] 1145 paddd xmm10,xmm14 1146 pxor xmm6,xmm10 1147 movdqa xmm3,xmm6 1148 pslld xmm3,7 1149 psrld xmm6,25 1150 pxor xmm6,xmm3 1151DB 102,15,58,15,246,4 1152DB 102,69,15,58,15,210,8 1153DB 102,69,15,58,15,246,12 1154 paddd xmm0,xmm4 1155 pxor xmm12,xmm0 1156 pshufb xmm12,XMMWORD[$L$rol16] 1157 paddd xmm8,xmm12 1158 pxor xmm4,xmm8 1159 movdqa xmm3,xmm4 1160 pslld xmm3,12 1161 psrld xmm4,20 1162 pxor xmm4,xmm3 1163 paddd xmm0,xmm4 1164 pxor xmm12,xmm0 1165 pshufb xmm12,XMMWORD[$L$rol8] 1166 paddd xmm8,xmm12 1167 pxor xmm4,xmm8 1168 movdqa xmm3,xmm4 1169 pslld xmm3,7 1170 psrld xmm4,25 1171 pxor xmm4,xmm3 1172DB 102,15,58,15,228,12 1173DB 102,69,15,58,15,192,8 1174DB 102,69,15,58,15,228,4 1175 paddd xmm1,xmm5 1176 pxor xmm13,xmm1 1177 pshufb xmm13,XMMWORD[$L$rol16] 1178 paddd xmm9,xmm13 1179 pxor xmm5,xmm9 1180 movdqa xmm3,xmm5 1181 pslld xmm3,12 1182 psrld xmm5,20 1183 pxor xmm5,xmm3 1184 paddd xmm1,xmm5 1185 pxor xmm13,xmm1 1186 pshufb xmm13,XMMWORD[$L$rol8] 1187 paddd xmm9,xmm13 1188 pxor xmm5,xmm9 1189 movdqa xmm3,xmm5 1190 pslld xmm3,7 1191 psrld xmm5,25 1192 pxor xmm5,xmm3 1193DB 102,15,58,15,237,12 1194DB 102,69,15,58,15,201,8 1195DB 102,69,15,58,15,237,4 1196 paddd xmm2,xmm6 1197 pxor xmm14,xmm2 1198 pshufb xmm14,XMMWORD[$L$rol16] 1199 paddd xmm10,xmm14 1200 pxor xmm6,xmm10 1201 movdqa xmm3,xmm6 1202 pslld xmm3,12 1203 psrld xmm6,20 1204 pxor xmm6,xmm3 1205 paddd xmm2,xmm6 1206 pxor xmm14,xmm2 1207 pshufb xmm14,XMMWORD[$L$rol8] 1208 paddd xmm10,xmm14 1209 pxor xmm6,xmm10 1210 movdqa xmm3,xmm6 1211 pslld xmm3,7 1212 psrld xmm6,25 1213 pxor xmm6,xmm3 1214DB 102,15,58,15,246,12 1215DB 102,69,15,58,15,210,8 1216DB 102,69,15,58,15,246,4 1217 1218 cmp r8,rcx 1219 jb NEAR $L$open_sse_tail_192_rounds_and_x1hash 1220 cmp r8,10*16 1221 jne NEAR $L$open_sse_tail_192_rounds 1222 cmp rbx,11*16 1223 jb NEAR $L$open_sse_tail_192_finish 1224 add r10,QWORD[((0+160))+rsi] 1225 adc r11,QWORD[((8+160))+rsi] 1226 adc r12,1 1227 mov rax,QWORD[((0+160+0))+rbp] 1228 mov r15,rax 1229 mul r10 1230 mov r13,rax 1231 mov r14,rdx 1232 mov rax,QWORD[((0+160+0))+rbp] 1233 mul r11 1234 imul r15,r12 1235 add r14,rax 1236 adc r15,rdx 1237 mov rax,QWORD[((8+160+0))+rbp] 1238 mov r9,rax 1239 mul r10 1240 add r14,rax 1241 adc rdx,0 1242 mov r10,rdx 1243 mov rax,QWORD[((8+160+0))+rbp] 1244 mul r11 1245 add r15,rax 1246 adc rdx,0 1247 imul r9,r12 1248 add r15,r10 1249 adc r9,rdx 1250 mov r10,r13 1251 mov r11,r14 1252 mov r12,r15 1253 and r12,3 1254 mov r13,r15 1255 and r13,-4 1256 mov r14,r9 1257 shrd r15,r9,2 1258 shr r9,2 1259 add r15,r13 1260 adc r9,r14 1261 add r10,r15 1262 adc r11,r9 1263 adc r12,0 1264 1265 cmp rbx,12*16 1266 jb NEAR $L$open_sse_tail_192_finish 1267 add r10,QWORD[((0+176))+rsi] 1268 adc r11,QWORD[((8+176))+rsi] 1269 adc r12,1 1270 mov rax,QWORD[((0+160+0))+rbp] 1271 mov r15,rax 1272 mul r10 1273 mov r13,rax 1274 mov r14,rdx 1275 mov rax,QWORD[((0+160+0))+rbp] 1276 mul r11 1277 imul r15,r12 1278 add r14,rax 1279 adc r15,rdx 1280 mov rax,QWORD[((8+160+0))+rbp] 1281 mov r9,rax 1282 mul r10 1283 add r14,rax 1284 adc rdx,0 1285 mov r10,rdx 1286 mov rax,QWORD[((8+160+0))+rbp] 1287 mul r11 1288 add r15,rax 1289 adc rdx,0 1290 imul r9,r12 1291 add r15,r10 1292 adc r9,rdx 1293 mov r10,r13 1294 mov r11,r14 1295 mov r12,r15 1296 and r12,3 1297 mov r13,r15 1298 and r13,-4 1299 mov r14,r9 1300 shrd r15,r9,2 1301 shr r9,2 1302 add r15,r13 1303 adc r9,r14 1304 add r10,r15 1305 adc r11,r9 1306 adc r12,0 1307 1308$L$open_sse_tail_192_finish: 1309 paddd xmm2,XMMWORD[$L$chacha20_consts] 1310 paddd xmm6,XMMWORD[((160+48))+rbp] 1311 paddd xmm10,XMMWORD[((160+64))+rbp] 1312 paddd xmm14,XMMWORD[((160+128))+rbp] 1313 paddd xmm1,XMMWORD[$L$chacha20_consts] 1314 paddd xmm5,XMMWORD[((160+48))+rbp] 1315 paddd xmm9,XMMWORD[((160+64))+rbp] 1316 paddd xmm13,XMMWORD[((160+112))+rbp] 1317 paddd xmm0,XMMWORD[$L$chacha20_consts] 1318 paddd xmm4,XMMWORD[((160+48))+rbp] 1319 paddd xmm8,XMMWORD[((160+64))+rbp] 1320 paddd xmm12,XMMWORD[((160+96))+rbp] 1321 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1322 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1323 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1324 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1325 pxor xmm2,xmm3 1326 pxor xmm6,xmm7 1327 pxor xmm10,xmm11 1328 pxor xmm15,xmm14 1329 movdqu XMMWORD[(0 + 0)+rdi],xmm2 1330 movdqu XMMWORD[(16 + 0)+rdi],xmm6 1331 movdqu XMMWORD[(32 + 0)+rdi],xmm10 1332 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1333 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1334 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1335 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1336 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1337 pxor xmm1,xmm3 1338 pxor xmm5,xmm7 1339 pxor xmm9,xmm11 1340 pxor xmm15,xmm13 1341 movdqu XMMWORD[(0 + 64)+rdi],xmm1 1342 movdqu XMMWORD[(16 + 64)+rdi],xmm5 1343 movdqu XMMWORD[(32 + 64)+rdi],xmm9 1344 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1345 1346 sub rbx,8*16 1347 lea rsi,[128+rsi] 1348 lea rdi,[128+rdi] 1349 jmp NEAR $L$open_sse_tail_64_dec_loop 1350 1351$L$open_sse_tail_256: 1352 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1353 movdqa xmm4,XMMWORD[((160+48))+rbp] 1354 movdqa xmm8,XMMWORD[((160+64))+rbp] 1355 movdqa xmm1,xmm0 1356 movdqa xmm5,xmm4 1357 movdqa xmm9,xmm8 1358 movdqa xmm2,xmm0 1359 movdqa xmm6,xmm4 1360 movdqa xmm10,xmm8 1361 movdqa xmm3,xmm0 1362 movdqa xmm7,xmm4 1363 movdqa xmm11,xmm8 1364 movdqa xmm15,XMMWORD[((160+96))+rbp] 1365 paddd xmm15,XMMWORD[$L$sse_inc] 1366 movdqa xmm14,xmm15 1367 paddd xmm14,XMMWORD[$L$sse_inc] 1368 movdqa xmm13,xmm14 1369 paddd xmm13,XMMWORD[$L$sse_inc] 1370 movdqa xmm12,xmm13 1371 paddd xmm12,XMMWORD[$L$sse_inc] 1372 movdqa XMMWORD[(160+96)+rbp],xmm12 1373 movdqa XMMWORD[(160+112)+rbp],xmm13 1374 movdqa XMMWORD[(160+128)+rbp],xmm14 1375 movdqa XMMWORD[(160+144)+rbp],xmm15 1376 1377 xor r8,r8 1378$L$open_sse_tail_256_rounds_and_x1hash: 1379 add r10,QWORD[((0+0))+r8*1+rsi] 1380 adc r11,QWORD[((8+0))+r8*1+rsi] 1381 adc r12,1 1382 movdqa XMMWORD[(160+80)+rbp],xmm11 1383 paddd xmm0,xmm4 1384 pxor xmm12,xmm0 1385 pshufb xmm12,XMMWORD[$L$rol16] 1386 paddd xmm8,xmm12 1387 pxor xmm4,xmm8 1388 movdqa xmm11,xmm4 1389 pslld xmm11,12 1390 psrld xmm4,20 1391 pxor xmm4,xmm11 1392 paddd xmm0,xmm4 1393 pxor xmm12,xmm0 1394 pshufb xmm12,XMMWORD[$L$rol8] 1395 paddd xmm8,xmm12 1396 pxor xmm4,xmm8 1397 movdqa xmm11,xmm4 1398 pslld xmm11,7 1399 psrld xmm4,25 1400 pxor xmm4,xmm11 1401DB 102,15,58,15,228,4 1402DB 102,69,15,58,15,192,8 1403DB 102,69,15,58,15,228,12 1404 paddd xmm1,xmm5 1405 pxor xmm13,xmm1 1406 pshufb xmm13,XMMWORD[$L$rol16] 1407 paddd xmm9,xmm13 1408 pxor xmm5,xmm9 1409 movdqa xmm11,xmm5 1410 pslld xmm11,12 1411 psrld xmm5,20 1412 pxor xmm5,xmm11 1413 paddd xmm1,xmm5 1414 pxor xmm13,xmm1 1415 pshufb xmm13,XMMWORD[$L$rol8] 1416 paddd xmm9,xmm13 1417 pxor xmm5,xmm9 1418 movdqa xmm11,xmm5 1419 pslld xmm11,7 1420 psrld xmm5,25 1421 pxor xmm5,xmm11 1422DB 102,15,58,15,237,4 1423DB 102,69,15,58,15,201,8 1424DB 102,69,15,58,15,237,12 1425 paddd xmm2,xmm6 1426 pxor xmm14,xmm2 1427 pshufb xmm14,XMMWORD[$L$rol16] 1428 paddd xmm10,xmm14 1429 pxor xmm6,xmm10 1430 movdqa xmm11,xmm6 1431 pslld xmm11,12 1432 psrld xmm6,20 1433 pxor xmm6,xmm11 1434 paddd xmm2,xmm6 1435 pxor xmm14,xmm2 1436 pshufb xmm14,XMMWORD[$L$rol8] 1437 paddd xmm10,xmm14 1438 pxor xmm6,xmm10 1439 movdqa xmm11,xmm6 1440 pslld xmm11,7 1441 psrld xmm6,25 1442 pxor xmm6,xmm11 1443DB 102,15,58,15,246,4 1444DB 102,69,15,58,15,210,8 1445DB 102,69,15,58,15,246,12 1446 movdqa xmm11,XMMWORD[((160+80))+rbp] 1447 mov rax,QWORD[((0+160+0))+rbp] 1448 mov r15,rax 1449 mul r10 1450 mov r13,rax 1451 mov r14,rdx 1452 mov rax,QWORD[((0+160+0))+rbp] 1453 mul r11 1454 imul r15,r12 1455 add r14,rax 1456 adc r15,rdx 1457 movdqa XMMWORD[(160+80)+rbp],xmm9 1458 paddd xmm3,xmm7 1459 pxor xmm15,xmm3 1460 pshufb xmm15,XMMWORD[$L$rol16] 1461 paddd xmm11,xmm15 1462 pxor xmm7,xmm11 1463 movdqa xmm9,xmm7 1464 pslld xmm9,12 1465 psrld xmm7,20 1466 pxor xmm7,xmm9 1467 paddd xmm3,xmm7 1468 pxor xmm15,xmm3 1469 pshufb xmm15,XMMWORD[$L$rol8] 1470 paddd xmm11,xmm15 1471 pxor xmm7,xmm11 1472 movdqa xmm9,xmm7 1473 pslld xmm9,7 1474 psrld xmm7,25 1475 pxor xmm7,xmm9 1476DB 102,15,58,15,255,4 1477DB 102,69,15,58,15,219,8 1478DB 102,69,15,58,15,255,12 1479 movdqa xmm9,XMMWORD[((160+80))+rbp] 1480 mov rax,QWORD[((8+160+0))+rbp] 1481 mov r9,rax 1482 mul r10 1483 add r14,rax 1484 adc rdx,0 1485 mov r10,rdx 1486 mov rax,QWORD[((8+160+0))+rbp] 1487 mul r11 1488 add r15,rax 1489 adc rdx,0 1490 movdqa XMMWORD[(160+80)+rbp],xmm11 1491 paddd xmm0,xmm4 1492 pxor xmm12,xmm0 1493 pshufb xmm12,XMMWORD[$L$rol16] 1494 paddd xmm8,xmm12 1495 pxor xmm4,xmm8 1496 movdqa xmm11,xmm4 1497 pslld xmm11,12 1498 psrld xmm4,20 1499 pxor xmm4,xmm11 1500 paddd xmm0,xmm4 1501 pxor xmm12,xmm0 1502 pshufb xmm12,XMMWORD[$L$rol8] 1503 paddd xmm8,xmm12 1504 pxor xmm4,xmm8 1505 movdqa xmm11,xmm4 1506 pslld xmm11,7 1507 psrld xmm4,25 1508 pxor xmm4,xmm11 1509DB 102,15,58,15,228,12 1510DB 102,69,15,58,15,192,8 1511DB 102,69,15,58,15,228,4 1512 paddd xmm1,xmm5 1513 pxor xmm13,xmm1 1514 pshufb xmm13,XMMWORD[$L$rol16] 1515 paddd xmm9,xmm13 1516 pxor xmm5,xmm9 1517 movdqa xmm11,xmm5 1518 pslld xmm11,12 1519 psrld xmm5,20 1520 pxor xmm5,xmm11 1521 paddd xmm1,xmm5 1522 pxor xmm13,xmm1 1523 pshufb xmm13,XMMWORD[$L$rol8] 1524 paddd xmm9,xmm13 1525 pxor xmm5,xmm9 1526 movdqa xmm11,xmm5 1527 pslld xmm11,7 1528 psrld xmm5,25 1529 pxor xmm5,xmm11 1530DB 102,15,58,15,237,12 1531DB 102,69,15,58,15,201,8 1532DB 102,69,15,58,15,237,4 1533 imul r9,r12 1534 add r15,r10 1535 adc r9,rdx 1536 paddd xmm2,xmm6 1537 pxor xmm14,xmm2 1538 pshufb xmm14,XMMWORD[$L$rol16] 1539 paddd xmm10,xmm14 1540 pxor xmm6,xmm10 1541 movdqa xmm11,xmm6 1542 pslld xmm11,12 1543 psrld xmm6,20 1544 pxor xmm6,xmm11 1545 paddd xmm2,xmm6 1546 pxor xmm14,xmm2 1547 pshufb xmm14,XMMWORD[$L$rol8] 1548 paddd xmm10,xmm14 1549 pxor xmm6,xmm10 1550 movdqa xmm11,xmm6 1551 pslld xmm11,7 1552 psrld xmm6,25 1553 pxor xmm6,xmm11 1554DB 102,15,58,15,246,12 1555DB 102,69,15,58,15,210,8 1556DB 102,69,15,58,15,246,4 1557 movdqa xmm11,XMMWORD[((160+80))+rbp] 1558 mov r10,r13 1559 mov r11,r14 1560 mov r12,r15 1561 and r12,3 1562 mov r13,r15 1563 and r13,-4 1564 mov r14,r9 1565 shrd r15,r9,2 1566 shr r9,2 1567 add r15,r13 1568 adc r9,r14 1569 add r10,r15 1570 adc r11,r9 1571 adc r12,0 1572 movdqa XMMWORD[(160+80)+rbp],xmm9 1573 paddd xmm3,xmm7 1574 pxor xmm15,xmm3 1575 pshufb xmm15,XMMWORD[$L$rol16] 1576 paddd xmm11,xmm15 1577 pxor xmm7,xmm11 1578 movdqa xmm9,xmm7 1579 pslld xmm9,12 1580 psrld xmm7,20 1581 pxor xmm7,xmm9 1582 paddd xmm3,xmm7 1583 pxor xmm15,xmm3 1584 pshufb xmm15,XMMWORD[$L$rol8] 1585 paddd xmm11,xmm15 1586 pxor xmm7,xmm11 1587 movdqa xmm9,xmm7 1588 pslld xmm9,7 1589 psrld xmm7,25 1590 pxor xmm7,xmm9 1591DB 102,15,58,15,255,12 1592DB 102,69,15,58,15,219,8 1593DB 102,69,15,58,15,255,4 1594 movdqa xmm9,XMMWORD[((160+80))+rbp] 1595 1596 add r8,16 1597 cmp r8,10*16 1598 jb NEAR $L$open_sse_tail_256_rounds_and_x1hash 1599 1600 mov rcx,rbx 1601 and rcx,-16 1602$L$open_sse_tail_256_hash: 1603 add r10,QWORD[((0+0))+r8*1+rsi] 1604 adc r11,QWORD[((8+0))+r8*1+rsi] 1605 adc r12,1 1606 mov rax,QWORD[((0+160+0))+rbp] 1607 mov r15,rax 1608 mul r10 1609 mov r13,rax 1610 mov r14,rdx 1611 mov rax,QWORD[((0+160+0))+rbp] 1612 mul r11 1613 imul r15,r12 1614 add r14,rax 1615 adc r15,rdx 1616 mov rax,QWORD[((8+160+0))+rbp] 1617 mov r9,rax 1618 mul r10 1619 add r14,rax 1620 adc rdx,0 1621 mov r10,rdx 1622 mov rax,QWORD[((8+160+0))+rbp] 1623 mul r11 1624 add r15,rax 1625 adc rdx,0 1626 imul r9,r12 1627 add r15,r10 1628 adc r9,rdx 1629 mov r10,r13 1630 mov r11,r14 1631 mov r12,r15 1632 and r12,3 1633 mov r13,r15 1634 and r13,-4 1635 mov r14,r9 1636 shrd r15,r9,2 1637 shr r9,2 1638 add r15,r13 1639 adc r9,r14 1640 add r10,r15 1641 adc r11,r9 1642 adc r12,0 1643 1644 add r8,16 1645 cmp r8,rcx 1646 jb NEAR $L$open_sse_tail_256_hash 1647 paddd xmm3,XMMWORD[$L$chacha20_consts] 1648 paddd xmm7,XMMWORD[((160+48))+rbp] 1649 paddd xmm11,XMMWORD[((160+64))+rbp] 1650 paddd xmm15,XMMWORD[((160+144))+rbp] 1651 paddd xmm2,XMMWORD[$L$chacha20_consts] 1652 paddd xmm6,XMMWORD[((160+48))+rbp] 1653 paddd xmm10,XMMWORD[((160+64))+rbp] 1654 paddd xmm14,XMMWORD[((160+128))+rbp] 1655 paddd xmm1,XMMWORD[$L$chacha20_consts] 1656 paddd xmm5,XMMWORD[((160+48))+rbp] 1657 paddd xmm9,XMMWORD[((160+64))+rbp] 1658 paddd xmm13,XMMWORD[((160+112))+rbp] 1659 paddd xmm0,XMMWORD[$L$chacha20_consts] 1660 paddd xmm4,XMMWORD[((160+48))+rbp] 1661 paddd xmm8,XMMWORD[((160+64))+rbp] 1662 paddd xmm12,XMMWORD[((160+96))+rbp] 1663 movdqa XMMWORD[(160+80)+rbp],xmm12 1664 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 1665 pxor xmm12,xmm3 1666 movdqu XMMWORD[(0 + 0)+rdi],xmm12 1667 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 1668 pxor xmm12,xmm7 1669 movdqu XMMWORD[(16 + 0)+rdi],xmm12 1670 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 1671 pxor xmm12,xmm11 1672 movdqu XMMWORD[(32 + 0)+rdi],xmm12 1673 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 1674 pxor xmm12,xmm15 1675 movdqu XMMWORD[(48 + 0)+rdi],xmm12 1676 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1677 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1678 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1679 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1680 pxor xmm2,xmm3 1681 pxor xmm6,xmm7 1682 pxor xmm10,xmm11 1683 pxor xmm15,xmm14 1684 movdqu XMMWORD[(0 + 64)+rdi],xmm2 1685 movdqu XMMWORD[(16 + 64)+rdi],xmm6 1686 movdqu XMMWORD[(32 + 64)+rdi],xmm10 1687 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1688 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 1689 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 1690 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 1691 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 1692 pxor xmm1,xmm3 1693 pxor xmm5,xmm7 1694 pxor xmm9,xmm11 1695 pxor xmm15,xmm13 1696 movdqu XMMWORD[(0 + 128)+rdi],xmm1 1697 movdqu XMMWORD[(16 + 128)+rdi],xmm5 1698 movdqu XMMWORD[(32 + 128)+rdi],xmm9 1699 movdqu XMMWORD[(48 + 128)+rdi],xmm15 1700 1701 movdqa xmm12,XMMWORD[((160+80))+rbp] 1702 sub rbx,12*16 1703 lea rsi,[192+rsi] 1704 lea rdi,[192+rdi] 1705 1706 1707$L$open_sse_tail_64_dec_loop: 1708 cmp rbx,16 1709 jb NEAR $L$open_sse_tail_16_init 1710 sub rbx,16 1711 movdqu xmm3,XMMWORD[rsi] 1712 pxor xmm0,xmm3 1713 movdqu XMMWORD[rdi],xmm0 1714 lea rsi,[16+rsi] 1715 lea rdi,[16+rdi] 1716 movdqa xmm0,xmm4 1717 movdqa xmm4,xmm8 1718 movdqa xmm8,xmm12 1719 jmp NEAR $L$open_sse_tail_64_dec_loop 1720$L$open_sse_tail_16_init: 1721 movdqa xmm1,xmm0 1722 1723 1724$L$open_sse_tail_16: 1725 test rbx,rbx 1726 jz NEAR $L$open_sse_finalize 1727 1728 1729 1730 pxor xmm3,xmm3 1731 lea rsi,[((-1))+rbx*1+rsi] 1732 mov r8,rbx 1733$L$open_sse_tail_16_compose: 1734 pslldq xmm3,1 1735 pinsrb xmm3,BYTE[rsi],0 1736 sub rsi,1 1737 sub r8,1 1738 jnz NEAR $L$open_sse_tail_16_compose 1739 1740DB 102,73,15,126,221 1741 pextrq r14,xmm3,1 1742 1743 pxor xmm3,xmm1 1744 1745 1746$L$open_sse_tail_16_extract: 1747 pextrb XMMWORD[rdi],xmm3,0 1748 psrldq xmm3,1 1749 add rdi,1 1750 sub rbx,1 1751 jne NEAR $L$open_sse_tail_16_extract 1752 1753 add r10,r13 1754 adc r11,r14 1755 adc r12,1 1756 mov rax,QWORD[((0+160+0))+rbp] 1757 mov r15,rax 1758 mul r10 1759 mov r13,rax 1760 mov r14,rdx 1761 mov rax,QWORD[((0+160+0))+rbp] 1762 mul r11 1763 imul r15,r12 1764 add r14,rax 1765 adc r15,rdx 1766 mov rax,QWORD[((8+160+0))+rbp] 1767 mov r9,rax 1768 mul r10 1769 add r14,rax 1770 adc rdx,0 1771 mov r10,rdx 1772 mov rax,QWORD[((8+160+0))+rbp] 1773 mul r11 1774 add r15,rax 1775 adc rdx,0 1776 imul r9,r12 1777 add r15,r10 1778 adc r9,rdx 1779 mov r10,r13 1780 mov r11,r14 1781 mov r12,r15 1782 and r12,3 1783 mov r13,r15 1784 and r13,-4 1785 mov r14,r9 1786 shrd r15,r9,2 1787 shr r9,2 1788 add r15,r13 1789 adc r9,r14 1790 add r10,r15 1791 adc r11,r9 1792 adc r12,0 1793 1794 1795$L$open_sse_finalize: 1796 add r10,QWORD[((0+160+32))+rbp] 1797 adc r11,QWORD[((8+160+32))+rbp] 1798 adc r12,1 1799 mov rax,QWORD[((0+160+0))+rbp] 1800 mov r15,rax 1801 mul r10 1802 mov r13,rax 1803 mov r14,rdx 1804 mov rax,QWORD[((0+160+0))+rbp] 1805 mul r11 1806 imul r15,r12 1807 add r14,rax 1808 adc r15,rdx 1809 mov rax,QWORD[((8+160+0))+rbp] 1810 mov r9,rax 1811 mul r10 1812 add r14,rax 1813 adc rdx,0 1814 mov r10,rdx 1815 mov rax,QWORD[((8+160+0))+rbp] 1816 mul r11 1817 add r15,rax 1818 adc rdx,0 1819 imul r9,r12 1820 add r15,r10 1821 adc r9,rdx 1822 mov r10,r13 1823 mov r11,r14 1824 mov r12,r15 1825 and r12,3 1826 mov r13,r15 1827 and r13,-4 1828 mov r14,r9 1829 shrd r15,r9,2 1830 shr r9,2 1831 add r15,r13 1832 adc r9,r14 1833 add r10,r15 1834 adc r11,r9 1835 adc r12,0 1836 1837 1838 mov r13,r10 1839 mov r14,r11 1840 mov r15,r12 1841 sub r10,-5 1842 sbb r11,-1 1843 sbb r12,3 1844 cmovc r10,r13 1845 cmovc r11,r14 1846 cmovc r12,r15 1847 1848 add r10,QWORD[((0+160+16))+rbp] 1849 adc r11,QWORD[((8+160+16))+rbp] 1850 1851 movaps xmm6,XMMWORD[((0+0))+rbp] 1852 movaps xmm7,XMMWORD[((16+0))+rbp] 1853 movaps xmm8,XMMWORD[((32+0))+rbp] 1854 movaps xmm9,XMMWORD[((48+0))+rbp] 1855 movaps xmm10,XMMWORD[((64+0))+rbp] 1856 movaps xmm11,XMMWORD[((80+0))+rbp] 1857 movaps xmm12,XMMWORD[((96+0))+rbp] 1858 movaps xmm13,XMMWORD[((112+0))+rbp] 1859 movaps xmm14,XMMWORD[((128+0))+rbp] 1860 movaps xmm15,XMMWORD[((144+0))+rbp] 1861 1862 1863 add rsp,288 + 160 + 32 1864 1865 1866 pop r9 1867 1868 mov QWORD[r9],r10 1869 mov QWORD[8+r9],r11 1870 pop r15 1871 1872 pop r14 1873 1874 pop r13 1875 1876 pop r12 1877 1878 pop rbx 1879 1880 pop rbp 1881 1882 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1883 mov rsi,QWORD[16+rsp] 1884 DB 0F3h,0C3h ;repret 1885 1886$L$open_sse_128: 1887 1888 movdqu xmm0,XMMWORD[$L$chacha20_consts] 1889 movdqa xmm1,xmm0 1890 movdqa xmm2,xmm0 1891 movdqu xmm4,XMMWORD[r9] 1892 movdqa xmm5,xmm4 1893 movdqa xmm6,xmm4 1894 movdqu xmm8,XMMWORD[16+r9] 1895 movdqa xmm9,xmm8 1896 movdqa xmm10,xmm8 1897 movdqu xmm12,XMMWORD[32+r9] 1898 movdqa xmm13,xmm12 1899 paddd xmm13,XMMWORD[$L$sse_inc] 1900 movdqa xmm14,xmm13 1901 paddd xmm14,XMMWORD[$L$sse_inc] 1902 movdqa xmm7,xmm4 1903 movdqa xmm11,xmm8 1904 movdqa xmm15,xmm13 1905 mov r10,10 1906 1907$L$open_sse_128_rounds: 1908 paddd xmm0,xmm4 1909 pxor xmm12,xmm0 1910 pshufb xmm12,XMMWORD[$L$rol16] 1911 paddd xmm8,xmm12 1912 pxor xmm4,xmm8 1913 movdqa xmm3,xmm4 1914 pslld xmm3,12 1915 psrld xmm4,20 1916 pxor xmm4,xmm3 1917 paddd xmm0,xmm4 1918 pxor xmm12,xmm0 1919 pshufb xmm12,XMMWORD[$L$rol8] 1920 paddd xmm8,xmm12 1921 pxor xmm4,xmm8 1922 movdqa xmm3,xmm4 1923 pslld xmm3,7 1924 psrld xmm4,25 1925 pxor xmm4,xmm3 1926DB 102,15,58,15,228,4 1927DB 102,69,15,58,15,192,8 1928DB 102,69,15,58,15,228,12 1929 paddd xmm1,xmm5 1930 pxor xmm13,xmm1 1931 pshufb xmm13,XMMWORD[$L$rol16] 1932 paddd xmm9,xmm13 1933 pxor xmm5,xmm9 1934 movdqa xmm3,xmm5 1935 pslld xmm3,12 1936 psrld xmm5,20 1937 pxor xmm5,xmm3 1938 paddd xmm1,xmm5 1939 pxor xmm13,xmm1 1940 pshufb xmm13,XMMWORD[$L$rol8] 1941 paddd xmm9,xmm13 1942 pxor xmm5,xmm9 1943 movdqa xmm3,xmm5 1944 pslld xmm3,7 1945 psrld xmm5,25 1946 pxor xmm5,xmm3 1947DB 102,15,58,15,237,4 1948DB 102,69,15,58,15,201,8 1949DB 102,69,15,58,15,237,12 1950 paddd xmm2,xmm6 1951 pxor xmm14,xmm2 1952 pshufb xmm14,XMMWORD[$L$rol16] 1953 paddd xmm10,xmm14 1954 pxor xmm6,xmm10 1955 movdqa xmm3,xmm6 1956 pslld xmm3,12 1957 psrld xmm6,20 1958 pxor xmm6,xmm3 1959 paddd xmm2,xmm6 1960 pxor xmm14,xmm2 1961 pshufb xmm14,XMMWORD[$L$rol8] 1962 paddd xmm10,xmm14 1963 pxor xmm6,xmm10 1964 movdqa xmm3,xmm6 1965 pslld xmm3,7 1966 psrld xmm6,25 1967 pxor xmm6,xmm3 1968DB 102,15,58,15,246,4 1969DB 102,69,15,58,15,210,8 1970DB 102,69,15,58,15,246,12 1971 paddd xmm0,xmm4 1972 pxor xmm12,xmm0 1973 pshufb xmm12,XMMWORD[$L$rol16] 1974 paddd xmm8,xmm12 1975 pxor xmm4,xmm8 1976 movdqa xmm3,xmm4 1977 pslld xmm3,12 1978 psrld xmm4,20 1979 pxor xmm4,xmm3 1980 paddd xmm0,xmm4 1981 pxor xmm12,xmm0 1982 pshufb xmm12,XMMWORD[$L$rol8] 1983 paddd xmm8,xmm12 1984 pxor xmm4,xmm8 1985 movdqa xmm3,xmm4 1986 pslld xmm3,7 1987 psrld xmm4,25 1988 pxor xmm4,xmm3 1989DB 102,15,58,15,228,12 1990DB 102,69,15,58,15,192,8 1991DB 102,69,15,58,15,228,4 1992 paddd xmm1,xmm5 1993 pxor xmm13,xmm1 1994 pshufb xmm13,XMMWORD[$L$rol16] 1995 paddd xmm9,xmm13 1996 pxor xmm5,xmm9 1997 movdqa xmm3,xmm5 1998 pslld xmm3,12 1999 psrld xmm5,20 2000 pxor xmm5,xmm3 2001 paddd xmm1,xmm5 2002 pxor xmm13,xmm1 2003 pshufb xmm13,XMMWORD[$L$rol8] 2004 paddd xmm9,xmm13 2005 pxor xmm5,xmm9 2006 movdqa xmm3,xmm5 2007 pslld xmm3,7 2008 psrld xmm5,25 2009 pxor xmm5,xmm3 2010DB 102,15,58,15,237,12 2011DB 102,69,15,58,15,201,8 2012DB 102,69,15,58,15,237,4 2013 paddd xmm2,xmm6 2014 pxor xmm14,xmm2 2015 pshufb xmm14,XMMWORD[$L$rol16] 2016 paddd xmm10,xmm14 2017 pxor xmm6,xmm10 2018 movdqa xmm3,xmm6 2019 pslld xmm3,12 2020 psrld xmm6,20 2021 pxor xmm6,xmm3 2022 paddd xmm2,xmm6 2023 pxor xmm14,xmm2 2024 pshufb xmm14,XMMWORD[$L$rol8] 2025 paddd xmm10,xmm14 2026 pxor xmm6,xmm10 2027 movdqa xmm3,xmm6 2028 pslld xmm3,7 2029 psrld xmm6,25 2030 pxor xmm6,xmm3 2031DB 102,15,58,15,246,12 2032DB 102,69,15,58,15,210,8 2033DB 102,69,15,58,15,246,4 2034 2035 dec r10 2036 jnz NEAR $L$open_sse_128_rounds 2037 paddd xmm0,XMMWORD[$L$chacha20_consts] 2038 paddd xmm1,XMMWORD[$L$chacha20_consts] 2039 paddd xmm2,XMMWORD[$L$chacha20_consts] 2040 paddd xmm4,xmm7 2041 paddd xmm5,xmm7 2042 paddd xmm6,xmm7 2043 paddd xmm9,xmm11 2044 paddd xmm10,xmm11 2045 paddd xmm13,xmm15 2046 paddd xmm15,XMMWORD[$L$sse_inc] 2047 paddd xmm14,xmm15 2048 2049 pand xmm0,XMMWORD[$L$clamp] 2050 movdqa XMMWORD[(160+0)+rbp],xmm0 2051 movdqa XMMWORD[(160+16)+rbp],xmm4 2052 2053 mov r8,r8 2054 call poly_hash_ad_internal 2055$L$open_sse_128_xor_hash: 2056 cmp rbx,16 2057 jb NEAR $L$open_sse_tail_16 2058 sub rbx,16 2059 add r10,QWORD[((0+0))+rsi] 2060 adc r11,QWORD[((8+0))+rsi] 2061 adc r12,1 2062 2063 2064 movdqu xmm3,XMMWORD[rsi] 2065 pxor xmm1,xmm3 2066 movdqu XMMWORD[rdi],xmm1 2067 lea rsi,[16+rsi] 2068 lea rdi,[16+rdi] 2069 mov rax,QWORD[((0+160+0))+rbp] 2070 mov r15,rax 2071 mul r10 2072 mov r13,rax 2073 mov r14,rdx 2074 mov rax,QWORD[((0+160+0))+rbp] 2075 mul r11 2076 imul r15,r12 2077 add r14,rax 2078 adc r15,rdx 2079 mov rax,QWORD[((8+160+0))+rbp] 2080 mov r9,rax 2081 mul r10 2082 add r14,rax 2083 adc rdx,0 2084 mov r10,rdx 2085 mov rax,QWORD[((8+160+0))+rbp] 2086 mul r11 2087 add r15,rax 2088 adc rdx,0 2089 imul r9,r12 2090 add r15,r10 2091 adc r9,rdx 2092 mov r10,r13 2093 mov r11,r14 2094 mov r12,r15 2095 and r12,3 2096 mov r13,r15 2097 and r13,-4 2098 mov r14,r9 2099 shrd r15,r9,2 2100 shr r9,2 2101 add r15,r13 2102 adc r9,r14 2103 add r10,r15 2104 adc r11,r9 2105 adc r12,0 2106 2107 2108 movdqa xmm1,xmm5 2109 movdqa xmm5,xmm9 2110 movdqa xmm9,xmm13 2111 movdqa xmm13,xmm2 2112 movdqa xmm2,xmm6 2113 movdqa xmm6,xmm10 2114 movdqa xmm10,xmm14 2115 jmp NEAR $L$open_sse_128_xor_hash 2116$L$SEH_end_GFp_chacha20_poly1305_open: 2117 2118 2119 2120 2121 2122 2123 2124global GFp_chacha20_poly1305_seal 2125 2126ALIGN 64 2127GFp_chacha20_poly1305_seal: 2128 mov QWORD[8+rsp],rdi ;WIN64 prologue 2129 mov QWORD[16+rsp],rsi 2130 mov rax,rsp 2131$L$SEH_begin_GFp_chacha20_poly1305_seal: 2132 mov rdi,rcx 2133 mov rsi,rdx 2134 mov rdx,r8 2135 mov rcx,r9 2136 mov r8,QWORD[40+rsp] 2137 mov r9,QWORD[48+rsp] 2138 2139 2140 2141 push rbp 2142 2143 push rbx 2144 2145 push r12 2146 2147 push r13 2148 2149 push r14 2150 2151 push r15 2152 2153 2154 2155 push r9 2156 2157 sub rsp,288 + 160 + 32 2158 2159 lea rbp,[32+rsp] 2160 and rbp,-32 2161 2162 movaps XMMWORD[(0+0)+rbp],xmm6 2163 movaps XMMWORD[(16+0)+rbp],xmm7 2164 movaps XMMWORD[(32+0)+rbp],xmm8 2165 movaps XMMWORD[(48+0)+rbp],xmm9 2166 movaps XMMWORD[(64+0)+rbp],xmm10 2167 movaps XMMWORD[(80+0)+rbp],xmm11 2168 movaps XMMWORD[(96+0)+rbp],xmm12 2169 movaps XMMWORD[(112+0)+rbp],xmm13 2170 movaps XMMWORD[(128+0)+rbp],xmm14 2171 movaps XMMWORD[(144+0)+rbp],xmm15 2172 2173 mov rbx,QWORD[56+r9] 2174 add rbx,rdx 2175 mov QWORD[((0+160+32))+rbp],r8 2176 mov QWORD[((8+160+32))+rbp],rbx 2177 mov rbx,rdx 2178 2179 mov eax,DWORD[((GFp_ia32cap_P+8))] 2180 and eax,288 2181 xor eax,288 2182 jz NEAR chacha20_poly1305_seal_avx2 2183 2184 cmp rbx,128 2185 jbe NEAR $L$seal_sse_128 2186 2187 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2188 movdqu xmm4,XMMWORD[r9] 2189 movdqu xmm8,XMMWORD[16+r9] 2190 movdqu xmm12,XMMWORD[32+r9] 2191 2192 movdqa xmm1,xmm0 2193 movdqa xmm2,xmm0 2194 movdqa xmm3,xmm0 2195 movdqa xmm5,xmm4 2196 movdqa xmm6,xmm4 2197 movdqa xmm7,xmm4 2198 movdqa xmm9,xmm8 2199 movdqa xmm10,xmm8 2200 movdqa xmm11,xmm8 2201 movdqa xmm15,xmm12 2202 paddd xmm12,XMMWORD[$L$sse_inc] 2203 movdqa xmm14,xmm12 2204 paddd xmm12,XMMWORD[$L$sse_inc] 2205 movdqa xmm13,xmm12 2206 paddd xmm12,XMMWORD[$L$sse_inc] 2207 2208 movdqa XMMWORD[(160+48)+rbp],xmm4 2209 movdqa XMMWORD[(160+64)+rbp],xmm8 2210 movdqa XMMWORD[(160+96)+rbp],xmm12 2211 movdqa XMMWORD[(160+112)+rbp],xmm13 2212 movdqa XMMWORD[(160+128)+rbp],xmm14 2213 movdqa XMMWORD[(160+144)+rbp],xmm15 2214 mov r10,10 2215$L$seal_sse_init_rounds: 2216 movdqa XMMWORD[(160+80)+rbp],xmm8 2217 movdqa xmm8,XMMWORD[$L$rol16] 2218 paddd xmm3,xmm7 2219 paddd xmm2,xmm6 2220 paddd xmm1,xmm5 2221 paddd xmm0,xmm4 2222 pxor xmm15,xmm3 2223 pxor xmm14,xmm2 2224 pxor xmm13,xmm1 2225 pxor xmm12,xmm0 2226DB 102,69,15,56,0,248 2227DB 102,69,15,56,0,240 2228DB 102,69,15,56,0,232 2229DB 102,69,15,56,0,224 2230 movdqa xmm8,XMMWORD[((160+80))+rbp] 2231 paddd xmm11,xmm15 2232 paddd xmm10,xmm14 2233 paddd xmm9,xmm13 2234 paddd xmm8,xmm12 2235 pxor xmm7,xmm11 2236 pxor xmm6,xmm10 2237 pxor xmm5,xmm9 2238 pxor xmm4,xmm8 2239 movdqa XMMWORD[(160+80)+rbp],xmm8 2240 movdqa xmm8,xmm7 2241 psrld xmm8,20 2242 pslld xmm7,32-20 2243 pxor xmm7,xmm8 2244 movdqa xmm8,xmm6 2245 psrld xmm8,20 2246 pslld xmm6,32-20 2247 pxor xmm6,xmm8 2248 movdqa xmm8,xmm5 2249 psrld xmm8,20 2250 pslld xmm5,32-20 2251 pxor xmm5,xmm8 2252 movdqa xmm8,xmm4 2253 psrld xmm8,20 2254 pslld xmm4,32-20 2255 pxor xmm4,xmm8 2256 movdqa xmm8,XMMWORD[$L$rol8] 2257 paddd xmm3,xmm7 2258 paddd xmm2,xmm6 2259 paddd xmm1,xmm5 2260 paddd xmm0,xmm4 2261 pxor xmm15,xmm3 2262 pxor xmm14,xmm2 2263 pxor xmm13,xmm1 2264 pxor xmm12,xmm0 2265DB 102,69,15,56,0,248 2266DB 102,69,15,56,0,240 2267DB 102,69,15,56,0,232 2268DB 102,69,15,56,0,224 2269 movdqa xmm8,XMMWORD[((160+80))+rbp] 2270 paddd xmm11,xmm15 2271 paddd xmm10,xmm14 2272 paddd xmm9,xmm13 2273 paddd xmm8,xmm12 2274 pxor xmm7,xmm11 2275 pxor xmm6,xmm10 2276 pxor xmm5,xmm9 2277 pxor xmm4,xmm8 2278 movdqa XMMWORD[(160+80)+rbp],xmm8 2279 movdqa xmm8,xmm7 2280 psrld xmm8,25 2281 pslld xmm7,32-25 2282 pxor xmm7,xmm8 2283 movdqa xmm8,xmm6 2284 psrld xmm8,25 2285 pslld xmm6,32-25 2286 pxor xmm6,xmm8 2287 movdqa xmm8,xmm5 2288 psrld xmm8,25 2289 pslld xmm5,32-25 2290 pxor xmm5,xmm8 2291 movdqa xmm8,xmm4 2292 psrld xmm8,25 2293 pslld xmm4,32-25 2294 pxor xmm4,xmm8 2295 movdqa xmm8,XMMWORD[((160+80))+rbp] 2296DB 102,15,58,15,255,4 2297DB 102,69,15,58,15,219,8 2298DB 102,69,15,58,15,255,12 2299DB 102,15,58,15,246,4 2300DB 102,69,15,58,15,210,8 2301DB 102,69,15,58,15,246,12 2302DB 102,15,58,15,237,4 2303DB 102,69,15,58,15,201,8 2304DB 102,69,15,58,15,237,12 2305DB 102,15,58,15,228,4 2306DB 102,69,15,58,15,192,8 2307DB 102,69,15,58,15,228,12 2308 movdqa XMMWORD[(160+80)+rbp],xmm8 2309 movdqa xmm8,XMMWORD[$L$rol16] 2310 paddd xmm3,xmm7 2311 paddd xmm2,xmm6 2312 paddd xmm1,xmm5 2313 paddd xmm0,xmm4 2314 pxor xmm15,xmm3 2315 pxor xmm14,xmm2 2316 pxor xmm13,xmm1 2317 pxor xmm12,xmm0 2318DB 102,69,15,56,0,248 2319DB 102,69,15,56,0,240 2320DB 102,69,15,56,0,232 2321DB 102,69,15,56,0,224 2322 movdqa xmm8,XMMWORD[((160+80))+rbp] 2323 paddd xmm11,xmm15 2324 paddd xmm10,xmm14 2325 paddd xmm9,xmm13 2326 paddd xmm8,xmm12 2327 pxor xmm7,xmm11 2328 pxor xmm6,xmm10 2329 pxor xmm5,xmm9 2330 pxor xmm4,xmm8 2331 movdqa XMMWORD[(160+80)+rbp],xmm8 2332 movdqa xmm8,xmm7 2333 psrld xmm8,20 2334 pslld xmm7,32-20 2335 pxor xmm7,xmm8 2336 movdqa xmm8,xmm6 2337 psrld xmm8,20 2338 pslld xmm6,32-20 2339 pxor xmm6,xmm8 2340 movdqa xmm8,xmm5 2341 psrld xmm8,20 2342 pslld xmm5,32-20 2343 pxor xmm5,xmm8 2344 movdqa xmm8,xmm4 2345 psrld xmm8,20 2346 pslld xmm4,32-20 2347 pxor xmm4,xmm8 2348 movdqa xmm8,XMMWORD[$L$rol8] 2349 paddd xmm3,xmm7 2350 paddd xmm2,xmm6 2351 paddd xmm1,xmm5 2352 paddd xmm0,xmm4 2353 pxor xmm15,xmm3 2354 pxor xmm14,xmm2 2355 pxor xmm13,xmm1 2356 pxor xmm12,xmm0 2357DB 102,69,15,56,0,248 2358DB 102,69,15,56,0,240 2359DB 102,69,15,56,0,232 2360DB 102,69,15,56,0,224 2361 movdqa xmm8,XMMWORD[((160+80))+rbp] 2362 paddd xmm11,xmm15 2363 paddd xmm10,xmm14 2364 paddd xmm9,xmm13 2365 paddd xmm8,xmm12 2366 pxor xmm7,xmm11 2367 pxor xmm6,xmm10 2368 pxor xmm5,xmm9 2369 pxor xmm4,xmm8 2370 movdqa XMMWORD[(160+80)+rbp],xmm8 2371 movdqa xmm8,xmm7 2372 psrld xmm8,25 2373 pslld xmm7,32-25 2374 pxor xmm7,xmm8 2375 movdqa xmm8,xmm6 2376 psrld xmm8,25 2377 pslld xmm6,32-25 2378 pxor xmm6,xmm8 2379 movdqa xmm8,xmm5 2380 psrld xmm8,25 2381 pslld xmm5,32-25 2382 pxor xmm5,xmm8 2383 movdqa xmm8,xmm4 2384 psrld xmm8,25 2385 pslld xmm4,32-25 2386 pxor xmm4,xmm8 2387 movdqa xmm8,XMMWORD[((160+80))+rbp] 2388DB 102,15,58,15,255,12 2389DB 102,69,15,58,15,219,8 2390DB 102,69,15,58,15,255,4 2391DB 102,15,58,15,246,12 2392DB 102,69,15,58,15,210,8 2393DB 102,69,15,58,15,246,4 2394DB 102,15,58,15,237,12 2395DB 102,69,15,58,15,201,8 2396DB 102,69,15,58,15,237,4 2397DB 102,15,58,15,228,12 2398DB 102,69,15,58,15,192,8 2399DB 102,69,15,58,15,228,4 2400 2401 dec r10 2402 jnz NEAR $L$seal_sse_init_rounds 2403 paddd xmm3,XMMWORD[$L$chacha20_consts] 2404 paddd xmm7,XMMWORD[((160+48))+rbp] 2405 paddd xmm11,XMMWORD[((160+64))+rbp] 2406 paddd xmm15,XMMWORD[((160+144))+rbp] 2407 paddd xmm2,XMMWORD[$L$chacha20_consts] 2408 paddd xmm6,XMMWORD[((160+48))+rbp] 2409 paddd xmm10,XMMWORD[((160+64))+rbp] 2410 paddd xmm14,XMMWORD[((160+128))+rbp] 2411 paddd xmm1,XMMWORD[$L$chacha20_consts] 2412 paddd xmm5,XMMWORD[((160+48))+rbp] 2413 paddd xmm9,XMMWORD[((160+64))+rbp] 2414 paddd xmm13,XMMWORD[((160+112))+rbp] 2415 paddd xmm0,XMMWORD[$L$chacha20_consts] 2416 paddd xmm4,XMMWORD[((160+48))+rbp] 2417 paddd xmm8,XMMWORD[((160+64))+rbp] 2418 paddd xmm12,XMMWORD[((160+96))+rbp] 2419 2420 2421 pand xmm3,XMMWORD[$L$clamp] 2422 movdqa XMMWORD[(160+0)+rbp],xmm3 2423 movdqa XMMWORD[(160+16)+rbp],xmm7 2424 2425 mov r8,r8 2426 call poly_hash_ad_internal 2427 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 2428 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 2429 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 2430 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 2431 pxor xmm2,xmm3 2432 pxor xmm6,xmm7 2433 pxor xmm10,xmm11 2434 pxor xmm15,xmm14 2435 movdqu XMMWORD[(0 + 0)+rdi],xmm2 2436 movdqu XMMWORD[(16 + 0)+rdi],xmm6 2437 movdqu XMMWORD[(32 + 0)+rdi],xmm10 2438 movdqu XMMWORD[(48 + 0)+rdi],xmm15 2439 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2440 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2441 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2442 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2443 pxor xmm1,xmm3 2444 pxor xmm5,xmm7 2445 pxor xmm9,xmm11 2446 pxor xmm15,xmm13 2447 movdqu XMMWORD[(0 + 64)+rdi],xmm1 2448 movdqu XMMWORD[(16 + 64)+rdi],xmm5 2449 movdqu XMMWORD[(32 + 64)+rdi],xmm9 2450 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2451 2452 cmp rbx,12*16 2453 ja NEAR $L$seal_sse_main_init 2454 mov rcx,8*16 2455 sub rbx,8*16 2456 lea rsi,[128+rsi] 2457 jmp NEAR $L$seal_sse_128_tail_hash 2458$L$seal_sse_main_init: 2459 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2460 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2461 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2462 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2463 pxor xmm0,xmm3 2464 pxor xmm4,xmm7 2465 pxor xmm8,xmm11 2466 pxor xmm15,xmm12 2467 movdqu XMMWORD[(0 + 128)+rdi],xmm0 2468 movdqu XMMWORD[(16 + 128)+rdi],xmm4 2469 movdqu XMMWORD[(32 + 128)+rdi],xmm8 2470 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2471 2472 mov rcx,12*16 2473 sub rbx,12*16 2474 lea rsi,[192+rsi] 2475 mov rcx,2 2476 mov r8,8 2477 cmp rbx,4*16 2478 jbe NEAR $L$seal_sse_tail_64 2479 cmp rbx,8*16 2480 jbe NEAR $L$seal_sse_tail_128 2481 cmp rbx,12*16 2482 jbe NEAR $L$seal_sse_tail_192 2483 2484$L$seal_sse_main_loop: 2485 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2486 movdqa xmm4,XMMWORD[((160+48))+rbp] 2487 movdqa xmm8,XMMWORD[((160+64))+rbp] 2488 movdqa xmm1,xmm0 2489 movdqa xmm5,xmm4 2490 movdqa xmm9,xmm8 2491 movdqa xmm2,xmm0 2492 movdqa xmm6,xmm4 2493 movdqa xmm10,xmm8 2494 movdqa xmm3,xmm0 2495 movdqa xmm7,xmm4 2496 movdqa xmm11,xmm8 2497 movdqa xmm15,XMMWORD[((160+96))+rbp] 2498 paddd xmm15,XMMWORD[$L$sse_inc] 2499 movdqa xmm14,xmm15 2500 paddd xmm14,XMMWORD[$L$sse_inc] 2501 movdqa xmm13,xmm14 2502 paddd xmm13,XMMWORD[$L$sse_inc] 2503 movdqa xmm12,xmm13 2504 paddd xmm12,XMMWORD[$L$sse_inc] 2505 movdqa XMMWORD[(160+96)+rbp],xmm12 2506 movdqa XMMWORD[(160+112)+rbp],xmm13 2507 movdqa XMMWORD[(160+128)+rbp],xmm14 2508 movdqa XMMWORD[(160+144)+rbp],xmm15 2509 2510ALIGN 32 2511$L$seal_sse_main_rounds: 2512 movdqa XMMWORD[(160+80)+rbp],xmm8 2513 movdqa xmm8,XMMWORD[$L$rol16] 2514 paddd xmm3,xmm7 2515 paddd xmm2,xmm6 2516 paddd xmm1,xmm5 2517 paddd xmm0,xmm4 2518 pxor xmm15,xmm3 2519 pxor xmm14,xmm2 2520 pxor xmm13,xmm1 2521 pxor xmm12,xmm0 2522DB 102,69,15,56,0,248 2523DB 102,69,15,56,0,240 2524DB 102,69,15,56,0,232 2525DB 102,69,15,56,0,224 2526 movdqa xmm8,XMMWORD[((160+80))+rbp] 2527 paddd xmm11,xmm15 2528 paddd xmm10,xmm14 2529 paddd xmm9,xmm13 2530 paddd xmm8,xmm12 2531 pxor xmm7,xmm11 2532 add r10,QWORD[((0+0))+rdi] 2533 adc r11,QWORD[((8+0))+rdi] 2534 adc r12,1 2535 pxor xmm6,xmm10 2536 pxor xmm5,xmm9 2537 pxor xmm4,xmm8 2538 movdqa XMMWORD[(160+80)+rbp],xmm8 2539 movdqa xmm8,xmm7 2540 psrld xmm8,20 2541 pslld xmm7,32-20 2542 pxor xmm7,xmm8 2543 movdqa xmm8,xmm6 2544 psrld xmm8,20 2545 pslld xmm6,32-20 2546 pxor xmm6,xmm8 2547 movdqa xmm8,xmm5 2548 psrld xmm8,20 2549 pslld xmm5,32-20 2550 pxor xmm5,xmm8 2551 movdqa xmm8,xmm4 2552 psrld xmm8,20 2553 pslld xmm4,32-20 2554 pxor xmm4,xmm8 2555 mov rax,QWORD[((0+160+0))+rbp] 2556 mov r15,rax 2557 mul r10 2558 mov r13,rax 2559 mov r14,rdx 2560 mov rax,QWORD[((0+160+0))+rbp] 2561 mul r11 2562 imul r15,r12 2563 add r14,rax 2564 adc r15,rdx 2565 movdqa xmm8,XMMWORD[$L$rol8] 2566 paddd xmm3,xmm7 2567 paddd xmm2,xmm6 2568 paddd xmm1,xmm5 2569 paddd xmm0,xmm4 2570 pxor xmm15,xmm3 2571 pxor xmm14,xmm2 2572 pxor xmm13,xmm1 2573 pxor xmm12,xmm0 2574DB 102,69,15,56,0,248 2575DB 102,69,15,56,0,240 2576DB 102,69,15,56,0,232 2577DB 102,69,15,56,0,224 2578 movdqa xmm8,XMMWORD[((160+80))+rbp] 2579 paddd xmm11,xmm15 2580 paddd xmm10,xmm14 2581 paddd xmm9,xmm13 2582 paddd xmm8,xmm12 2583 pxor xmm7,xmm11 2584 pxor xmm6,xmm10 2585 mov rax,QWORD[((8+160+0))+rbp] 2586 mov r9,rax 2587 mul r10 2588 add r14,rax 2589 adc rdx,0 2590 mov r10,rdx 2591 mov rax,QWORD[((8+160+0))+rbp] 2592 mul r11 2593 add r15,rax 2594 adc rdx,0 2595 pxor xmm5,xmm9 2596 pxor xmm4,xmm8 2597 movdqa XMMWORD[(160+80)+rbp],xmm8 2598 movdqa xmm8,xmm7 2599 psrld xmm8,25 2600 pslld xmm7,32-25 2601 pxor xmm7,xmm8 2602 movdqa xmm8,xmm6 2603 psrld xmm8,25 2604 pslld xmm6,32-25 2605 pxor xmm6,xmm8 2606 movdqa xmm8,xmm5 2607 psrld xmm8,25 2608 pslld xmm5,32-25 2609 pxor xmm5,xmm8 2610 movdqa xmm8,xmm4 2611 psrld xmm8,25 2612 pslld xmm4,32-25 2613 pxor xmm4,xmm8 2614 movdqa xmm8,XMMWORD[((160+80))+rbp] 2615 imul r9,r12 2616 add r15,r10 2617 adc r9,rdx 2618DB 102,15,58,15,255,4 2619DB 102,69,15,58,15,219,8 2620DB 102,69,15,58,15,255,12 2621DB 102,15,58,15,246,4 2622DB 102,69,15,58,15,210,8 2623DB 102,69,15,58,15,246,12 2624DB 102,15,58,15,237,4 2625DB 102,69,15,58,15,201,8 2626DB 102,69,15,58,15,237,12 2627DB 102,15,58,15,228,4 2628DB 102,69,15,58,15,192,8 2629DB 102,69,15,58,15,228,12 2630 movdqa XMMWORD[(160+80)+rbp],xmm8 2631 movdqa xmm8,XMMWORD[$L$rol16] 2632 paddd xmm3,xmm7 2633 paddd xmm2,xmm6 2634 paddd xmm1,xmm5 2635 paddd xmm0,xmm4 2636 pxor xmm15,xmm3 2637 pxor xmm14,xmm2 2638 mov r10,r13 2639 mov r11,r14 2640 mov r12,r15 2641 and r12,3 2642 mov r13,r15 2643 and r13,-4 2644 mov r14,r9 2645 shrd r15,r9,2 2646 shr r9,2 2647 add r15,r13 2648 adc r9,r14 2649 add r10,r15 2650 adc r11,r9 2651 adc r12,0 2652 pxor xmm13,xmm1 2653 pxor xmm12,xmm0 2654DB 102,69,15,56,0,248 2655DB 102,69,15,56,0,240 2656DB 102,69,15,56,0,232 2657DB 102,69,15,56,0,224 2658 movdqa xmm8,XMMWORD[((160+80))+rbp] 2659 paddd xmm11,xmm15 2660 paddd xmm10,xmm14 2661 paddd xmm9,xmm13 2662 paddd xmm8,xmm12 2663 pxor xmm7,xmm11 2664 pxor xmm6,xmm10 2665 pxor xmm5,xmm9 2666 pxor xmm4,xmm8 2667 movdqa XMMWORD[(160+80)+rbp],xmm8 2668 movdqa xmm8,xmm7 2669 psrld xmm8,20 2670 pslld xmm7,32-20 2671 pxor xmm7,xmm8 2672 movdqa xmm8,xmm6 2673 psrld xmm8,20 2674 pslld xmm6,32-20 2675 pxor xmm6,xmm8 2676 movdqa xmm8,xmm5 2677 psrld xmm8,20 2678 pslld xmm5,32-20 2679 pxor xmm5,xmm8 2680 movdqa xmm8,xmm4 2681 psrld xmm8,20 2682 pslld xmm4,32-20 2683 pxor xmm4,xmm8 2684 movdqa xmm8,XMMWORD[$L$rol8] 2685 paddd xmm3,xmm7 2686 paddd xmm2,xmm6 2687 paddd xmm1,xmm5 2688 paddd xmm0,xmm4 2689 pxor xmm15,xmm3 2690 pxor xmm14,xmm2 2691 pxor xmm13,xmm1 2692 pxor xmm12,xmm0 2693DB 102,69,15,56,0,248 2694DB 102,69,15,56,0,240 2695DB 102,69,15,56,0,232 2696DB 102,69,15,56,0,224 2697 movdqa xmm8,XMMWORD[((160+80))+rbp] 2698 paddd xmm11,xmm15 2699 paddd xmm10,xmm14 2700 paddd xmm9,xmm13 2701 paddd xmm8,xmm12 2702 pxor xmm7,xmm11 2703 pxor xmm6,xmm10 2704 pxor xmm5,xmm9 2705 pxor xmm4,xmm8 2706 movdqa XMMWORD[(160+80)+rbp],xmm8 2707 movdqa xmm8,xmm7 2708 psrld xmm8,25 2709 pslld xmm7,32-25 2710 pxor xmm7,xmm8 2711 movdqa xmm8,xmm6 2712 psrld xmm8,25 2713 pslld xmm6,32-25 2714 pxor xmm6,xmm8 2715 movdqa xmm8,xmm5 2716 psrld xmm8,25 2717 pslld xmm5,32-25 2718 pxor xmm5,xmm8 2719 movdqa xmm8,xmm4 2720 psrld xmm8,25 2721 pslld xmm4,32-25 2722 pxor xmm4,xmm8 2723 movdqa xmm8,XMMWORD[((160+80))+rbp] 2724DB 102,15,58,15,255,12 2725DB 102,69,15,58,15,219,8 2726DB 102,69,15,58,15,255,4 2727DB 102,15,58,15,246,12 2728DB 102,69,15,58,15,210,8 2729DB 102,69,15,58,15,246,4 2730DB 102,15,58,15,237,12 2731DB 102,69,15,58,15,201,8 2732DB 102,69,15,58,15,237,4 2733DB 102,15,58,15,228,12 2734DB 102,69,15,58,15,192,8 2735DB 102,69,15,58,15,228,4 2736 2737 lea rdi,[16+rdi] 2738 dec r8 2739 jge NEAR $L$seal_sse_main_rounds 2740 add r10,QWORD[((0+0))+rdi] 2741 adc r11,QWORD[((8+0))+rdi] 2742 adc r12,1 2743 mov rax,QWORD[((0+160+0))+rbp] 2744 mov r15,rax 2745 mul r10 2746 mov r13,rax 2747 mov r14,rdx 2748 mov rax,QWORD[((0+160+0))+rbp] 2749 mul r11 2750 imul r15,r12 2751 add r14,rax 2752 adc r15,rdx 2753 mov rax,QWORD[((8+160+0))+rbp] 2754 mov r9,rax 2755 mul r10 2756 add r14,rax 2757 adc rdx,0 2758 mov r10,rdx 2759 mov rax,QWORD[((8+160+0))+rbp] 2760 mul r11 2761 add r15,rax 2762 adc rdx,0 2763 imul r9,r12 2764 add r15,r10 2765 adc r9,rdx 2766 mov r10,r13 2767 mov r11,r14 2768 mov r12,r15 2769 and r12,3 2770 mov r13,r15 2771 and r13,-4 2772 mov r14,r9 2773 shrd r15,r9,2 2774 shr r9,2 2775 add r15,r13 2776 adc r9,r14 2777 add r10,r15 2778 adc r11,r9 2779 adc r12,0 2780 2781 lea rdi,[16+rdi] 2782 dec rcx 2783 jg NEAR $L$seal_sse_main_rounds 2784 paddd xmm3,XMMWORD[$L$chacha20_consts] 2785 paddd xmm7,XMMWORD[((160+48))+rbp] 2786 paddd xmm11,XMMWORD[((160+64))+rbp] 2787 paddd xmm15,XMMWORD[((160+144))+rbp] 2788 paddd xmm2,XMMWORD[$L$chacha20_consts] 2789 paddd xmm6,XMMWORD[((160+48))+rbp] 2790 paddd xmm10,XMMWORD[((160+64))+rbp] 2791 paddd xmm14,XMMWORD[((160+128))+rbp] 2792 paddd xmm1,XMMWORD[$L$chacha20_consts] 2793 paddd xmm5,XMMWORD[((160+48))+rbp] 2794 paddd xmm9,XMMWORD[((160+64))+rbp] 2795 paddd xmm13,XMMWORD[((160+112))+rbp] 2796 paddd xmm0,XMMWORD[$L$chacha20_consts] 2797 paddd xmm4,XMMWORD[((160+48))+rbp] 2798 paddd xmm8,XMMWORD[((160+64))+rbp] 2799 paddd xmm12,XMMWORD[((160+96))+rbp] 2800 2801 movdqa XMMWORD[(160+80)+rbp],xmm14 2802 movdqa XMMWORD[(160+80)+rbp],xmm14 2803 movdqu xmm14,XMMWORD[((0 + 0))+rsi] 2804 pxor xmm14,xmm3 2805 movdqu XMMWORD[(0 + 0)+rdi],xmm14 2806 movdqu xmm14,XMMWORD[((16 + 0))+rsi] 2807 pxor xmm14,xmm7 2808 movdqu XMMWORD[(16 + 0)+rdi],xmm14 2809 movdqu xmm14,XMMWORD[((32 + 0))+rsi] 2810 pxor xmm14,xmm11 2811 movdqu XMMWORD[(32 + 0)+rdi],xmm14 2812 movdqu xmm14,XMMWORD[((48 + 0))+rsi] 2813 pxor xmm14,xmm15 2814 movdqu XMMWORD[(48 + 0)+rdi],xmm14 2815 2816 movdqa xmm14,XMMWORD[((160+80))+rbp] 2817 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2818 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2819 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2820 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2821 pxor xmm2,xmm3 2822 pxor xmm6,xmm7 2823 pxor xmm10,xmm11 2824 pxor xmm15,xmm14 2825 movdqu XMMWORD[(0 + 64)+rdi],xmm2 2826 movdqu XMMWORD[(16 + 64)+rdi],xmm6 2827 movdqu XMMWORD[(32 + 64)+rdi],xmm10 2828 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2829 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2830 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2831 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2832 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2833 pxor xmm1,xmm3 2834 pxor xmm5,xmm7 2835 pxor xmm9,xmm11 2836 pxor xmm15,xmm13 2837 movdqu XMMWORD[(0 + 128)+rdi],xmm1 2838 movdqu XMMWORD[(16 + 128)+rdi],xmm5 2839 movdqu XMMWORD[(32 + 128)+rdi],xmm9 2840 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2841 2842 cmp rbx,16*16 2843 ja NEAR $L$seal_sse_main_loop_xor 2844 2845 mov rcx,12*16 2846 sub rbx,12*16 2847 lea rsi,[192+rsi] 2848 jmp NEAR $L$seal_sse_128_tail_hash 2849$L$seal_sse_main_loop_xor: 2850 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 2851 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 2852 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 2853 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 2854 pxor xmm0,xmm3 2855 pxor xmm4,xmm7 2856 pxor xmm8,xmm11 2857 pxor xmm15,xmm12 2858 movdqu XMMWORD[(0 + 192)+rdi],xmm0 2859 movdqu XMMWORD[(16 + 192)+rdi],xmm4 2860 movdqu XMMWORD[(32 + 192)+rdi],xmm8 2861 movdqu XMMWORD[(48 + 192)+rdi],xmm15 2862 2863 lea rsi,[256+rsi] 2864 sub rbx,16*16 2865 mov rcx,6 2866 mov r8,4 2867 cmp rbx,12*16 2868 jg NEAR $L$seal_sse_main_loop 2869 mov rcx,rbx 2870 test rbx,rbx 2871 je NEAR $L$seal_sse_128_tail_hash 2872 mov rcx,6 2873 cmp rbx,8*16 2874 ja NEAR $L$seal_sse_tail_192 2875 cmp rbx,4*16 2876 ja NEAR $L$seal_sse_tail_128 2877 2878$L$seal_sse_tail_64: 2879 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2880 movdqa xmm4,XMMWORD[((160+48))+rbp] 2881 movdqa xmm8,XMMWORD[((160+64))+rbp] 2882 movdqa xmm12,XMMWORD[((160+96))+rbp] 2883 paddd xmm12,XMMWORD[$L$sse_inc] 2884 movdqa XMMWORD[(160+96)+rbp],xmm12 2885 2886$L$seal_sse_tail_64_rounds_and_x2hash: 2887 add r10,QWORD[((0+0))+rdi] 2888 adc r11,QWORD[((8+0))+rdi] 2889 adc r12,1 2890 mov rax,QWORD[((0+160+0))+rbp] 2891 mov r15,rax 2892 mul r10 2893 mov r13,rax 2894 mov r14,rdx 2895 mov rax,QWORD[((0+160+0))+rbp] 2896 mul r11 2897 imul r15,r12 2898 add r14,rax 2899 adc r15,rdx 2900 mov rax,QWORD[((8+160+0))+rbp] 2901 mov r9,rax 2902 mul r10 2903 add r14,rax 2904 adc rdx,0 2905 mov r10,rdx 2906 mov rax,QWORD[((8+160+0))+rbp] 2907 mul r11 2908 add r15,rax 2909 adc rdx,0 2910 imul r9,r12 2911 add r15,r10 2912 adc r9,rdx 2913 mov r10,r13 2914 mov r11,r14 2915 mov r12,r15 2916 and r12,3 2917 mov r13,r15 2918 and r13,-4 2919 mov r14,r9 2920 shrd r15,r9,2 2921 shr r9,2 2922 add r15,r13 2923 adc r9,r14 2924 add r10,r15 2925 adc r11,r9 2926 adc r12,0 2927 2928 lea rdi,[16+rdi] 2929$L$seal_sse_tail_64_rounds_and_x1hash: 2930 paddd xmm0,xmm4 2931 pxor xmm12,xmm0 2932 pshufb xmm12,XMMWORD[$L$rol16] 2933 paddd xmm8,xmm12 2934 pxor xmm4,xmm8 2935 movdqa xmm3,xmm4 2936 pslld xmm3,12 2937 psrld xmm4,20 2938 pxor xmm4,xmm3 2939 paddd xmm0,xmm4 2940 pxor xmm12,xmm0 2941 pshufb xmm12,XMMWORD[$L$rol8] 2942 paddd xmm8,xmm12 2943 pxor xmm4,xmm8 2944 movdqa xmm3,xmm4 2945 pslld xmm3,7 2946 psrld xmm4,25 2947 pxor xmm4,xmm3 2948DB 102,15,58,15,228,4 2949DB 102,69,15,58,15,192,8 2950DB 102,69,15,58,15,228,12 2951 paddd xmm0,xmm4 2952 pxor xmm12,xmm0 2953 pshufb xmm12,XMMWORD[$L$rol16] 2954 paddd xmm8,xmm12 2955 pxor xmm4,xmm8 2956 movdqa xmm3,xmm4 2957 pslld xmm3,12 2958 psrld xmm4,20 2959 pxor xmm4,xmm3 2960 paddd xmm0,xmm4 2961 pxor xmm12,xmm0 2962 pshufb xmm12,XMMWORD[$L$rol8] 2963 paddd xmm8,xmm12 2964 pxor xmm4,xmm8 2965 movdqa xmm3,xmm4 2966 pslld xmm3,7 2967 psrld xmm4,25 2968 pxor xmm4,xmm3 2969DB 102,15,58,15,228,12 2970DB 102,69,15,58,15,192,8 2971DB 102,69,15,58,15,228,4 2972 add r10,QWORD[((0+0))+rdi] 2973 adc r11,QWORD[((8+0))+rdi] 2974 adc r12,1 2975 mov rax,QWORD[((0+160+0))+rbp] 2976 mov r15,rax 2977 mul r10 2978 mov r13,rax 2979 mov r14,rdx 2980 mov rax,QWORD[((0+160+0))+rbp] 2981 mul r11 2982 imul r15,r12 2983 add r14,rax 2984 adc r15,rdx 2985 mov rax,QWORD[((8+160+0))+rbp] 2986 mov r9,rax 2987 mul r10 2988 add r14,rax 2989 adc rdx,0 2990 mov r10,rdx 2991 mov rax,QWORD[((8+160+0))+rbp] 2992 mul r11 2993 add r15,rax 2994 adc rdx,0 2995 imul r9,r12 2996 add r15,r10 2997 adc r9,rdx 2998 mov r10,r13 2999 mov r11,r14 3000 mov r12,r15 3001 and r12,3 3002 mov r13,r15 3003 and r13,-4 3004 mov r14,r9 3005 shrd r15,r9,2 3006 shr r9,2 3007 add r15,r13 3008 adc r9,r14 3009 add r10,r15 3010 adc r11,r9 3011 adc r12,0 3012 3013 lea rdi,[16+rdi] 3014 dec rcx 3015 jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash 3016 dec r8 3017 jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash 3018 paddd xmm0,XMMWORD[$L$chacha20_consts] 3019 paddd xmm4,XMMWORD[((160+48))+rbp] 3020 paddd xmm8,XMMWORD[((160+64))+rbp] 3021 paddd xmm12,XMMWORD[((160+96))+rbp] 3022 3023 jmp NEAR $L$seal_sse_128_tail_xor 3024 3025$L$seal_sse_tail_128: 3026 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3027 movdqa xmm4,XMMWORD[((160+48))+rbp] 3028 movdqa xmm8,XMMWORD[((160+64))+rbp] 3029 movdqa xmm1,xmm0 3030 movdqa xmm5,xmm4 3031 movdqa xmm9,xmm8 3032 movdqa xmm13,XMMWORD[((160+96))+rbp] 3033 paddd xmm13,XMMWORD[$L$sse_inc] 3034 movdqa xmm12,xmm13 3035 paddd xmm12,XMMWORD[$L$sse_inc] 3036 movdqa XMMWORD[(160+96)+rbp],xmm12 3037 movdqa XMMWORD[(160+112)+rbp],xmm13 3038 3039$L$seal_sse_tail_128_rounds_and_x2hash: 3040 add r10,QWORD[((0+0))+rdi] 3041 adc r11,QWORD[((8+0))+rdi] 3042 adc r12,1 3043 mov rax,QWORD[((0+160+0))+rbp] 3044 mov r15,rax 3045 mul r10 3046 mov r13,rax 3047 mov r14,rdx 3048 mov rax,QWORD[((0+160+0))+rbp] 3049 mul r11 3050 imul r15,r12 3051 add r14,rax 3052 adc r15,rdx 3053 mov rax,QWORD[((8+160+0))+rbp] 3054 mov r9,rax 3055 mul r10 3056 add r14,rax 3057 adc rdx,0 3058 mov r10,rdx 3059 mov rax,QWORD[((8+160+0))+rbp] 3060 mul r11 3061 add r15,rax 3062 adc rdx,0 3063 imul r9,r12 3064 add r15,r10 3065 adc r9,rdx 3066 mov r10,r13 3067 mov r11,r14 3068 mov r12,r15 3069 and r12,3 3070 mov r13,r15 3071 and r13,-4 3072 mov r14,r9 3073 shrd r15,r9,2 3074 shr r9,2 3075 add r15,r13 3076 adc r9,r14 3077 add r10,r15 3078 adc r11,r9 3079 adc r12,0 3080 3081 lea rdi,[16+rdi] 3082$L$seal_sse_tail_128_rounds_and_x1hash: 3083 paddd xmm0,xmm4 3084 pxor xmm12,xmm0 3085 pshufb xmm12,XMMWORD[$L$rol16] 3086 paddd xmm8,xmm12 3087 pxor xmm4,xmm8 3088 movdqa xmm3,xmm4 3089 pslld xmm3,12 3090 psrld xmm4,20 3091 pxor xmm4,xmm3 3092 paddd xmm0,xmm4 3093 pxor xmm12,xmm0 3094 pshufb xmm12,XMMWORD[$L$rol8] 3095 paddd xmm8,xmm12 3096 pxor xmm4,xmm8 3097 movdqa xmm3,xmm4 3098 pslld xmm3,7 3099 psrld xmm4,25 3100 pxor xmm4,xmm3 3101DB 102,15,58,15,228,4 3102DB 102,69,15,58,15,192,8 3103DB 102,69,15,58,15,228,12 3104 paddd xmm1,xmm5 3105 pxor xmm13,xmm1 3106 pshufb xmm13,XMMWORD[$L$rol16] 3107 paddd xmm9,xmm13 3108 pxor xmm5,xmm9 3109 movdqa xmm3,xmm5 3110 pslld xmm3,12 3111 psrld xmm5,20 3112 pxor xmm5,xmm3 3113 paddd xmm1,xmm5 3114 pxor xmm13,xmm1 3115 pshufb xmm13,XMMWORD[$L$rol8] 3116 paddd xmm9,xmm13 3117 pxor xmm5,xmm9 3118 movdqa xmm3,xmm5 3119 pslld xmm3,7 3120 psrld xmm5,25 3121 pxor xmm5,xmm3 3122DB 102,15,58,15,237,4 3123DB 102,69,15,58,15,201,8 3124DB 102,69,15,58,15,237,12 3125 add r10,QWORD[((0+0))+rdi] 3126 adc r11,QWORD[((8+0))+rdi] 3127 adc r12,1 3128 mov rax,QWORD[((0+160+0))+rbp] 3129 mov r15,rax 3130 mul r10 3131 mov r13,rax 3132 mov r14,rdx 3133 mov rax,QWORD[((0+160+0))+rbp] 3134 mul r11 3135 imul r15,r12 3136 add r14,rax 3137 adc r15,rdx 3138 mov rax,QWORD[((8+160+0))+rbp] 3139 mov r9,rax 3140 mul r10 3141 add r14,rax 3142 adc rdx,0 3143 mov r10,rdx 3144 mov rax,QWORD[((8+160+0))+rbp] 3145 mul r11 3146 add r15,rax 3147 adc rdx,0 3148 imul r9,r12 3149 add r15,r10 3150 adc r9,rdx 3151 mov r10,r13 3152 mov r11,r14 3153 mov r12,r15 3154 and r12,3 3155 mov r13,r15 3156 and r13,-4 3157 mov r14,r9 3158 shrd r15,r9,2 3159 shr r9,2 3160 add r15,r13 3161 adc r9,r14 3162 add r10,r15 3163 adc r11,r9 3164 adc r12,0 3165 paddd xmm0,xmm4 3166 pxor xmm12,xmm0 3167 pshufb xmm12,XMMWORD[$L$rol16] 3168 paddd xmm8,xmm12 3169 pxor xmm4,xmm8 3170 movdqa xmm3,xmm4 3171 pslld xmm3,12 3172 psrld xmm4,20 3173 pxor xmm4,xmm3 3174 paddd xmm0,xmm4 3175 pxor xmm12,xmm0 3176 pshufb xmm12,XMMWORD[$L$rol8] 3177 paddd xmm8,xmm12 3178 pxor xmm4,xmm8 3179 movdqa xmm3,xmm4 3180 pslld xmm3,7 3181 psrld xmm4,25 3182 pxor xmm4,xmm3 3183DB 102,15,58,15,228,12 3184DB 102,69,15,58,15,192,8 3185DB 102,69,15,58,15,228,4 3186 paddd xmm1,xmm5 3187 pxor xmm13,xmm1 3188 pshufb xmm13,XMMWORD[$L$rol16] 3189 paddd xmm9,xmm13 3190 pxor xmm5,xmm9 3191 movdqa xmm3,xmm5 3192 pslld xmm3,12 3193 psrld xmm5,20 3194 pxor xmm5,xmm3 3195 paddd xmm1,xmm5 3196 pxor xmm13,xmm1 3197 pshufb xmm13,XMMWORD[$L$rol8] 3198 paddd xmm9,xmm13 3199 pxor xmm5,xmm9 3200 movdqa xmm3,xmm5 3201 pslld xmm3,7 3202 psrld xmm5,25 3203 pxor xmm5,xmm3 3204DB 102,15,58,15,237,12 3205DB 102,69,15,58,15,201,8 3206DB 102,69,15,58,15,237,4 3207 3208 lea rdi,[16+rdi] 3209 dec rcx 3210 jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash 3211 dec r8 3212 jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash 3213 paddd xmm1,XMMWORD[$L$chacha20_consts] 3214 paddd xmm5,XMMWORD[((160+48))+rbp] 3215 paddd xmm9,XMMWORD[((160+64))+rbp] 3216 paddd xmm13,XMMWORD[((160+112))+rbp] 3217 paddd xmm0,XMMWORD[$L$chacha20_consts] 3218 paddd xmm4,XMMWORD[((160+48))+rbp] 3219 paddd xmm8,XMMWORD[((160+64))+rbp] 3220 paddd xmm12,XMMWORD[((160+96))+rbp] 3221 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3222 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3223 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3224 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3225 pxor xmm1,xmm3 3226 pxor xmm5,xmm7 3227 pxor xmm9,xmm11 3228 pxor xmm15,xmm13 3229 movdqu XMMWORD[(0 + 0)+rdi],xmm1 3230 movdqu XMMWORD[(16 + 0)+rdi],xmm5 3231 movdqu XMMWORD[(32 + 0)+rdi],xmm9 3232 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3233 3234 mov rcx,4*16 3235 sub rbx,4*16 3236 lea rsi,[64+rsi] 3237 jmp NEAR $L$seal_sse_128_tail_hash 3238 3239$L$seal_sse_tail_192: 3240 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3241 movdqa xmm4,XMMWORD[((160+48))+rbp] 3242 movdqa xmm8,XMMWORD[((160+64))+rbp] 3243 movdqa xmm1,xmm0 3244 movdqa xmm5,xmm4 3245 movdqa xmm9,xmm8 3246 movdqa xmm2,xmm0 3247 movdqa xmm6,xmm4 3248 movdqa xmm10,xmm8 3249 movdqa xmm14,XMMWORD[((160+96))+rbp] 3250 paddd xmm14,XMMWORD[$L$sse_inc] 3251 movdqa xmm13,xmm14 3252 paddd xmm13,XMMWORD[$L$sse_inc] 3253 movdqa xmm12,xmm13 3254 paddd xmm12,XMMWORD[$L$sse_inc] 3255 movdqa XMMWORD[(160+96)+rbp],xmm12 3256 movdqa XMMWORD[(160+112)+rbp],xmm13 3257 movdqa XMMWORD[(160+128)+rbp],xmm14 3258 3259$L$seal_sse_tail_192_rounds_and_x2hash: 3260 add r10,QWORD[((0+0))+rdi] 3261 adc r11,QWORD[((8+0))+rdi] 3262 adc r12,1 3263 mov rax,QWORD[((0+160+0))+rbp] 3264 mov r15,rax 3265 mul r10 3266 mov r13,rax 3267 mov r14,rdx 3268 mov rax,QWORD[((0+160+0))+rbp] 3269 mul r11 3270 imul r15,r12 3271 add r14,rax 3272 adc r15,rdx 3273 mov rax,QWORD[((8+160+0))+rbp] 3274 mov r9,rax 3275 mul r10 3276 add r14,rax 3277 adc rdx,0 3278 mov r10,rdx 3279 mov rax,QWORD[((8+160+0))+rbp] 3280 mul r11 3281 add r15,rax 3282 adc rdx,0 3283 imul r9,r12 3284 add r15,r10 3285 adc r9,rdx 3286 mov r10,r13 3287 mov r11,r14 3288 mov r12,r15 3289 and r12,3 3290 mov r13,r15 3291 and r13,-4 3292 mov r14,r9 3293 shrd r15,r9,2 3294 shr r9,2 3295 add r15,r13 3296 adc r9,r14 3297 add r10,r15 3298 adc r11,r9 3299 adc r12,0 3300 3301 lea rdi,[16+rdi] 3302$L$seal_sse_tail_192_rounds_and_x1hash: 3303 paddd xmm0,xmm4 3304 pxor xmm12,xmm0 3305 pshufb xmm12,XMMWORD[$L$rol16] 3306 paddd xmm8,xmm12 3307 pxor xmm4,xmm8 3308 movdqa xmm3,xmm4 3309 pslld xmm3,12 3310 psrld xmm4,20 3311 pxor xmm4,xmm3 3312 paddd xmm0,xmm4 3313 pxor xmm12,xmm0 3314 pshufb xmm12,XMMWORD[$L$rol8] 3315 paddd xmm8,xmm12 3316 pxor xmm4,xmm8 3317 movdqa xmm3,xmm4 3318 pslld xmm3,7 3319 psrld xmm4,25 3320 pxor xmm4,xmm3 3321DB 102,15,58,15,228,4 3322DB 102,69,15,58,15,192,8 3323DB 102,69,15,58,15,228,12 3324 paddd xmm1,xmm5 3325 pxor xmm13,xmm1 3326 pshufb xmm13,XMMWORD[$L$rol16] 3327 paddd xmm9,xmm13 3328 pxor xmm5,xmm9 3329 movdqa xmm3,xmm5 3330 pslld xmm3,12 3331 psrld xmm5,20 3332 pxor xmm5,xmm3 3333 paddd xmm1,xmm5 3334 pxor xmm13,xmm1 3335 pshufb xmm13,XMMWORD[$L$rol8] 3336 paddd xmm9,xmm13 3337 pxor xmm5,xmm9 3338 movdqa xmm3,xmm5 3339 pslld xmm3,7 3340 psrld xmm5,25 3341 pxor xmm5,xmm3 3342DB 102,15,58,15,237,4 3343DB 102,69,15,58,15,201,8 3344DB 102,69,15,58,15,237,12 3345 paddd xmm2,xmm6 3346 pxor xmm14,xmm2 3347 pshufb xmm14,XMMWORD[$L$rol16] 3348 paddd xmm10,xmm14 3349 pxor xmm6,xmm10 3350 movdqa xmm3,xmm6 3351 pslld xmm3,12 3352 psrld xmm6,20 3353 pxor xmm6,xmm3 3354 paddd xmm2,xmm6 3355 pxor xmm14,xmm2 3356 pshufb xmm14,XMMWORD[$L$rol8] 3357 paddd xmm10,xmm14 3358 pxor xmm6,xmm10 3359 movdqa xmm3,xmm6 3360 pslld xmm3,7 3361 psrld xmm6,25 3362 pxor xmm6,xmm3 3363DB 102,15,58,15,246,4 3364DB 102,69,15,58,15,210,8 3365DB 102,69,15,58,15,246,12 3366 add r10,QWORD[((0+0))+rdi] 3367 adc r11,QWORD[((8+0))+rdi] 3368 adc r12,1 3369 mov rax,QWORD[((0+160+0))+rbp] 3370 mov r15,rax 3371 mul r10 3372 mov r13,rax 3373 mov r14,rdx 3374 mov rax,QWORD[((0+160+0))+rbp] 3375 mul r11 3376 imul r15,r12 3377 add r14,rax 3378 adc r15,rdx 3379 mov rax,QWORD[((8+160+0))+rbp] 3380 mov r9,rax 3381 mul r10 3382 add r14,rax 3383 adc rdx,0 3384 mov r10,rdx 3385 mov rax,QWORD[((8+160+0))+rbp] 3386 mul r11 3387 add r15,rax 3388 adc rdx,0 3389 imul r9,r12 3390 add r15,r10 3391 adc r9,rdx 3392 mov r10,r13 3393 mov r11,r14 3394 mov r12,r15 3395 and r12,3 3396 mov r13,r15 3397 and r13,-4 3398 mov r14,r9 3399 shrd r15,r9,2 3400 shr r9,2 3401 add r15,r13 3402 adc r9,r14 3403 add r10,r15 3404 adc r11,r9 3405 adc r12,0 3406 paddd xmm0,xmm4 3407 pxor xmm12,xmm0 3408 pshufb xmm12,XMMWORD[$L$rol16] 3409 paddd xmm8,xmm12 3410 pxor xmm4,xmm8 3411 movdqa xmm3,xmm4 3412 pslld xmm3,12 3413 psrld xmm4,20 3414 pxor xmm4,xmm3 3415 paddd xmm0,xmm4 3416 pxor xmm12,xmm0 3417 pshufb xmm12,XMMWORD[$L$rol8] 3418 paddd xmm8,xmm12 3419 pxor xmm4,xmm8 3420 movdqa xmm3,xmm4 3421 pslld xmm3,7 3422 psrld xmm4,25 3423 pxor xmm4,xmm3 3424DB 102,15,58,15,228,12 3425DB 102,69,15,58,15,192,8 3426DB 102,69,15,58,15,228,4 3427 paddd xmm1,xmm5 3428 pxor xmm13,xmm1 3429 pshufb xmm13,XMMWORD[$L$rol16] 3430 paddd xmm9,xmm13 3431 pxor xmm5,xmm9 3432 movdqa xmm3,xmm5 3433 pslld xmm3,12 3434 psrld xmm5,20 3435 pxor xmm5,xmm3 3436 paddd xmm1,xmm5 3437 pxor xmm13,xmm1 3438 pshufb xmm13,XMMWORD[$L$rol8] 3439 paddd xmm9,xmm13 3440 pxor xmm5,xmm9 3441 movdqa xmm3,xmm5 3442 pslld xmm3,7 3443 psrld xmm5,25 3444 pxor xmm5,xmm3 3445DB 102,15,58,15,237,12 3446DB 102,69,15,58,15,201,8 3447DB 102,69,15,58,15,237,4 3448 paddd xmm2,xmm6 3449 pxor xmm14,xmm2 3450 pshufb xmm14,XMMWORD[$L$rol16] 3451 paddd xmm10,xmm14 3452 pxor xmm6,xmm10 3453 movdqa xmm3,xmm6 3454 pslld xmm3,12 3455 psrld xmm6,20 3456 pxor xmm6,xmm3 3457 paddd xmm2,xmm6 3458 pxor xmm14,xmm2 3459 pshufb xmm14,XMMWORD[$L$rol8] 3460 paddd xmm10,xmm14 3461 pxor xmm6,xmm10 3462 movdqa xmm3,xmm6 3463 pslld xmm3,7 3464 psrld xmm6,25 3465 pxor xmm6,xmm3 3466DB 102,15,58,15,246,12 3467DB 102,69,15,58,15,210,8 3468DB 102,69,15,58,15,246,4 3469 3470 lea rdi,[16+rdi] 3471 dec rcx 3472 jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash 3473 dec r8 3474 jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash 3475 paddd xmm2,XMMWORD[$L$chacha20_consts] 3476 paddd xmm6,XMMWORD[((160+48))+rbp] 3477 paddd xmm10,XMMWORD[((160+64))+rbp] 3478 paddd xmm14,XMMWORD[((160+128))+rbp] 3479 paddd xmm1,XMMWORD[$L$chacha20_consts] 3480 paddd xmm5,XMMWORD[((160+48))+rbp] 3481 paddd xmm9,XMMWORD[((160+64))+rbp] 3482 paddd xmm13,XMMWORD[((160+112))+rbp] 3483 paddd xmm0,XMMWORD[$L$chacha20_consts] 3484 paddd xmm4,XMMWORD[((160+48))+rbp] 3485 paddd xmm8,XMMWORD[((160+64))+rbp] 3486 paddd xmm12,XMMWORD[((160+96))+rbp] 3487 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3488 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3489 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3490 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3491 pxor xmm2,xmm3 3492 pxor xmm6,xmm7 3493 pxor xmm10,xmm11 3494 pxor xmm15,xmm14 3495 movdqu XMMWORD[(0 + 0)+rdi],xmm2 3496 movdqu XMMWORD[(16 + 0)+rdi],xmm6 3497 movdqu XMMWORD[(32 + 0)+rdi],xmm10 3498 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3499 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 3500 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 3501 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 3502 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 3503 pxor xmm1,xmm3 3504 pxor xmm5,xmm7 3505 pxor xmm9,xmm11 3506 pxor xmm15,xmm13 3507 movdqu XMMWORD[(0 + 64)+rdi],xmm1 3508 movdqu XMMWORD[(16 + 64)+rdi],xmm5 3509 movdqu XMMWORD[(32 + 64)+rdi],xmm9 3510 movdqu XMMWORD[(48 + 64)+rdi],xmm15 3511 3512 mov rcx,8*16 3513 sub rbx,8*16 3514 lea rsi,[128+rsi] 3515 3516$L$seal_sse_128_tail_hash: 3517 cmp rcx,16 3518 jb NEAR $L$seal_sse_128_tail_xor 3519 add r10,QWORD[((0+0))+rdi] 3520 adc r11,QWORD[((8+0))+rdi] 3521 adc r12,1 3522 mov rax,QWORD[((0+160+0))+rbp] 3523 mov r15,rax 3524 mul r10 3525 mov r13,rax 3526 mov r14,rdx 3527 mov rax,QWORD[((0+160+0))+rbp] 3528 mul r11 3529 imul r15,r12 3530 add r14,rax 3531 adc r15,rdx 3532 mov rax,QWORD[((8+160+0))+rbp] 3533 mov r9,rax 3534 mul r10 3535 add r14,rax 3536 adc rdx,0 3537 mov r10,rdx 3538 mov rax,QWORD[((8+160+0))+rbp] 3539 mul r11 3540 add r15,rax 3541 adc rdx,0 3542 imul r9,r12 3543 add r15,r10 3544 adc r9,rdx 3545 mov r10,r13 3546 mov r11,r14 3547 mov r12,r15 3548 and r12,3 3549 mov r13,r15 3550 and r13,-4 3551 mov r14,r9 3552 shrd r15,r9,2 3553 shr r9,2 3554 add r15,r13 3555 adc r9,r14 3556 add r10,r15 3557 adc r11,r9 3558 adc r12,0 3559 3560 sub rcx,16 3561 lea rdi,[16+rdi] 3562 jmp NEAR $L$seal_sse_128_tail_hash 3563 3564$L$seal_sse_128_tail_xor: 3565 cmp rbx,16 3566 jb NEAR $L$seal_sse_tail_16 3567 sub rbx,16 3568 3569 movdqu xmm3,XMMWORD[rsi] 3570 pxor xmm0,xmm3 3571 movdqu XMMWORD[rdi],xmm0 3572 3573 add r10,QWORD[rdi] 3574 adc r11,QWORD[8+rdi] 3575 adc r12,1 3576 lea rsi,[16+rsi] 3577 lea rdi,[16+rdi] 3578 mov rax,QWORD[((0+160+0))+rbp] 3579 mov r15,rax 3580 mul r10 3581 mov r13,rax 3582 mov r14,rdx 3583 mov rax,QWORD[((0+160+0))+rbp] 3584 mul r11 3585 imul r15,r12 3586 add r14,rax 3587 adc r15,rdx 3588 mov rax,QWORD[((8+160+0))+rbp] 3589 mov r9,rax 3590 mul r10 3591 add r14,rax 3592 adc rdx,0 3593 mov r10,rdx 3594 mov rax,QWORD[((8+160+0))+rbp] 3595 mul r11 3596 add r15,rax 3597 adc rdx,0 3598 imul r9,r12 3599 add r15,r10 3600 adc r9,rdx 3601 mov r10,r13 3602 mov r11,r14 3603 mov r12,r15 3604 and r12,3 3605 mov r13,r15 3606 and r13,-4 3607 mov r14,r9 3608 shrd r15,r9,2 3609 shr r9,2 3610 add r15,r13 3611 adc r9,r14 3612 add r10,r15 3613 adc r11,r9 3614 adc r12,0 3615 3616 3617 movdqa xmm0,xmm4 3618 movdqa xmm4,xmm8 3619 movdqa xmm8,xmm12 3620 movdqa xmm12,xmm1 3621 movdqa xmm1,xmm5 3622 movdqa xmm5,xmm9 3623 movdqa xmm9,xmm13 3624 jmp NEAR $L$seal_sse_128_tail_xor 3625 3626$L$seal_sse_tail_16: 3627 test rbx,rbx 3628 jz NEAR $L$process_blocks_of_extra_in 3629 3630 mov r8,rbx 3631 mov rcx,rbx 3632 lea rsi,[((-1))+rbx*1+rsi] 3633 pxor xmm15,xmm15 3634$L$seal_sse_tail_16_compose: 3635 pslldq xmm15,1 3636 pinsrb xmm15,BYTE[rsi],0 3637 lea rsi,[((-1))+rsi] 3638 dec rcx 3639 jne NEAR $L$seal_sse_tail_16_compose 3640 3641 3642 pxor xmm15,xmm0 3643 3644 3645 mov rcx,rbx 3646 movdqu xmm0,xmm15 3647$L$seal_sse_tail_16_extract: 3648 pextrb XMMWORD[rdi],xmm0,0 3649 psrldq xmm0,1 3650 add rdi,1 3651 sub rcx,1 3652 jnz NEAR $L$seal_sse_tail_16_extract 3653 3654 3655 3656 3657 3658 3659 3660 3661 mov r9,QWORD[((288 + 160 + 32))+rsp] 3662 mov r14,QWORD[56+r9] 3663 mov r13,QWORD[48+r9] 3664 test r14,r14 3665 jz NEAR $L$process_partial_block 3666 3667 mov r15,16 3668 sub r15,rbx 3669 cmp r14,r15 3670 3671 jge NEAR $L$load_extra_in 3672 mov r15,r14 3673 3674$L$load_extra_in: 3675 3676 3677 lea rsi,[((-1))+r15*1+r13] 3678 3679 3680 add r13,r15 3681 sub r14,r15 3682 mov QWORD[48+r9],r13 3683 mov QWORD[56+r9],r14 3684 3685 3686 3687 add r8,r15 3688 3689 3690 pxor xmm11,xmm11 3691$L$load_extra_load_loop: 3692 pslldq xmm11,1 3693 pinsrb xmm11,BYTE[rsi],0 3694 lea rsi,[((-1))+rsi] 3695 sub r15,1 3696 jnz NEAR $L$load_extra_load_loop 3697 3698 3699 3700 3701 mov r15,rbx 3702 3703$L$load_extra_shift_loop: 3704 pslldq xmm11,1 3705 sub r15,1 3706 jnz NEAR $L$load_extra_shift_loop 3707 3708 3709 3710 3711 lea r15,[$L$and_masks] 3712 shl rbx,4 3713 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3714 3715 3716 por xmm15,xmm11 3717 3718 3719 3720DB 102,77,15,126,253 3721 pextrq r14,xmm15,1 3722 add r10,r13 3723 adc r11,r14 3724 adc r12,1 3725 mov rax,QWORD[((0+160+0))+rbp] 3726 mov r15,rax 3727 mul r10 3728 mov r13,rax 3729 mov r14,rdx 3730 mov rax,QWORD[((0+160+0))+rbp] 3731 mul r11 3732 imul r15,r12 3733 add r14,rax 3734 adc r15,rdx 3735 mov rax,QWORD[((8+160+0))+rbp] 3736 mov r9,rax 3737 mul r10 3738 add r14,rax 3739 adc rdx,0 3740 mov r10,rdx 3741 mov rax,QWORD[((8+160+0))+rbp] 3742 mul r11 3743 add r15,rax 3744 adc rdx,0 3745 imul r9,r12 3746 add r15,r10 3747 adc r9,rdx 3748 mov r10,r13 3749 mov r11,r14 3750 mov r12,r15 3751 and r12,3 3752 mov r13,r15 3753 and r13,-4 3754 mov r14,r9 3755 shrd r15,r9,2 3756 shr r9,2 3757 add r15,r13 3758 adc r9,r14 3759 add r10,r15 3760 adc r11,r9 3761 adc r12,0 3762 3763 3764$L$process_blocks_of_extra_in: 3765 3766 mov r9,QWORD[((288+32+160 ))+rsp] 3767 mov rsi,QWORD[48+r9] 3768 mov r8,QWORD[56+r9] 3769 mov rcx,r8 3770 shr r8,4 3771 3772$L$process_extra_hash_loop: 3773 jz NEAR process_extra_in_trailer 3774 add r10,QWORD[((0+0))+rsi] 3775 adc r11,QWORD[((8+0))+rsi] 3776 adc r12,1 3777 mov rax,QWORD[((0+160+0))+rbp] 3778 mov r15,rax 3779 mul r10 3780 mov r13,rax 3781 mov r14,rdx 3782 mov rax,QWORD[((0+160+0))+rbp] 3783 mul r11 3784 imul r15,r12 3785 add r14,rax 3786 adc r15,rdx 3787 mov rax,QWORD[((8+160+0))+rbp] 3788 mov r9,rax 3789 mul r10 3790 add r14,rax 3791 adc rdx,0 3792 mov r10,rdx 3793 mov rax,QWORD[((8+160+0))+rbp] 3794 mul r11 3795 add r15,rax 3796 adc rdx,0 3797 imul r9,r12 3798 add r15,r10 3799 adc r9,rdx 3800 mov r10,r13 3801 mov r11,r14 3802 mov r12,r15 3803 and r12,3 3804 mov r13,r15 3805 and r13,-4 3806 mov r14,r9 3807 shrd r15,r9,2 3808 shr r9,2 3809 add r15,r13 3810 adc r9,r14 3811 add r10,r15 3812 adc r11,r9 3813 adc r12,0 3814 3815 lea rsi,[16+rsi] 3816 sub r8,1 3817 jmp NEAR $L$process_extra_hash_loop 3818process_extra_in_trailer: 3819 and rcx,15 3820 mov rbx,rcx 3821 jz NEAR $L$do_length_block 3822 lea rsi,[((-1))+rcx*1+rsi] 3823 3824$L$process_extra_in_trailer_load: 3825 pslldq xmm15,1 3826 pinsrb xmm15,BYTE[rsi],0 3827 lea rsi,[((-1))+rsi] 3828 sub rcx,1 3829 jnz NEAR $L$process_extra_in_trailer_load 3830 3831$L$process_partial_block: 3832 3833 lea r15,[$L$and_masks] 3834 shl rbx,4 3835 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3836DB 102,77,15,126,253 3837 pextrq r14,xmm15,1 3838 add r10,r13 3839 adc r11,r14 3840 adc r12,1 3841 mov rax,QWORD[((0+160+0))+rbp] 3842 mov r15,rax 3843 mul r10 3844 mov r13,rax 3845 mov r14,rdx 3846 mov rax,QWORD[((0+160+0))+rbp] 3847 mul r11 3848 imul r15,r12 3849 add r14,rax 3850 adc r15,rdx 3851 mov rax,QWORD[((8+160+0))+rbp] 3852 mov r9,rax 3853 mul r10 3854 add r14,rax 3855 adc rdx,0 3856 mov r10,rdx 3857 mov rax,QWORD[((8+160+0))+rbp] 3858 mul r11 3859 add r15,rax 3860 adc rdx,0 3861 imul r9,r12 3862 add r15,r10 3863 adc r9,rdx 3864 mov r10,r13 3865 mov r11,r14 3866 mov r12,r15 3867 and r12,3 3868 mov r13,r15 3869 and r13,-4 3870 mov r14,r9 3871 shrd r15,r9,2 3872 shr r9,2 3873 add r15,r13 3874 adc r9,r14 3875 add r10,r15 3876 adc r11,r9 3877 adc r12,0 3878 3879 3880$L$do_length_block: 3881 add r10,QWORD[((0+160+32))+rbp] 3882 adc r11,QWORD[((8+160+32))+rbp] 3883 adc r12,1 3884 mov rax,QWORD[((0+160+0))+rbp] 3885 mov r15,rax 3886 mul r10 3887 mov r13,rax 3888 mov r14,rdx 3889 mov rax,QWORD[((0+160+0))+rbp] 3890 mul r11 3891 imul r15,r12 3892 add r14,rax 3893 adc r15,rdx 3894 mov rax,QWORD[((8+160+0))+rbp] 3895 mov r9,rax 3896 mul r10 3897 add r14,rax 3898 adc rdx,0 3899 mov r10,rdx 3900 mov rax,QWORD[((8+160+0))+rbp] 3901 mul r11 3902 add r15,rax 3903 adc rdx,0 3904 imul r9,r12 3905 add r15,r10 3906 adc r9,rdx 3907 mov r10,r13 3908 mov r11,r14 3909 mov r12,r15 3910 and r12,3 3911 mov r13,r15 3912 and r13,-4 3913 mov r14,r9 3914 shrd r15,r9,2 3915 shr r9,2 3916 add r15,r13 3917 adc r9,r14 3918 add r10,r15 3919 adc r11,r9 3920 adc r12,0 3921 3922 3923 mov r13,r10 3924 mov r14,r11 3925 mov r15,r12 3926 sub r10,-5 3927 sbb r11,-1 3928 sbb r12,3 3929 cmovc r10,r13 3930 cmovc r11,r14 3931 cmovc r12,r15 3932 3933 add r10,QWORD[((0+160+16))+rbp] 3934 adc r11,QWORD[((8+160+16))+rbp] 3935 3936 movaps xmm6,XMMWORD[((0+0))+rbp] 3937 movaps xmm7,XMMWORD[((16+0))+rbp] 3938 movaps xmm8,XMMWORD[((32+0))+rbp] 3939 movaps xmm9,XMMWORD[((48+0))+rbp] 3940 movaps xmm10,XMMWORD[((64+0))+rbp] 3941 movaps xmm11,XMMWORD[((80+0))+rbp] 3942 movaps xmm12,XMMWORD[((96+0))+rbp] 3943 movaps xmm13,XMMWORD[((112+0))+rbp] 3944 movaps xmm14,XMMWORD[((128+0))+rbp] 3945 movaps xmm15,XMMWORD[((144+0))+rbp] 3946 3947 3948 add rsp,288 + 160 + 32 3949 3950 3951 pop r9 3952 3953 mov QWORD[r9],r10 3954 mov QWORD[8+r9],r11 3955 pop r15 3956 3957 pop r14 3958 3959 pop r13 3960 3961 pop r12 3962 3963 pop rbx 3964 3965 pop rbp 3966 3967 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3968 mov rsi,QWORD[16+rsp] 3969 DB 0F3h,0C3h ;repret 3970 3971$L$seal_sse_128: 3972 3973 movdqu xmm0,XMMWORD[$L$chacha20_consts] 3974 movdqa xmm1,xmm0 3975 movdqa xmm2,xmm0 3976 movdqu xmm4,XMMWORD[r9] 3977 movdqa xmm5,xmm4 3978 movdqa xmm6,xmm4 3979 movdqu xmm8,XMMWORD[16+r9] 3980 movdqa xmm9,xmm8 3981 movdqa xmm10,xmm8 3982 movdqu xmm14,XMMWORD[32+r9] 3983 movdqa xmm12,xmm14 3984 paddd xmm12,XMMWORD[$L$sse_inc] 3985 movdqa xmm13,xmm12 3986 paddd xmm13,XMMWORD[$L$sse_inc] 3987 movdqa xmm7,xmm4 3988 movdqa xmm11,xmm8 3989 movdqa xmm15,xmm12 3990 mov r10,10 3991 3992$L$seal_sse_128_rounds: 3993 paddd xmm0,xmm4 3994 pxor xmm12,xmm0 3995 pshufb xmm12,XMMWORD[$L$rol16] 3996 paddd xmm8,xmm12 3997 pxor xmm4,xmm8 3998 movdqa xmm3,xmm4 3999 pslld xmm3,12 4000 psrld xmm4,20 4001 pxor xmm4,xmm3 4002 paddd xmm0,xmm4 4003 pxor xmm12,xmm0 4004 pshufb xmm12,XMMWORD[$L$rol8] 4005 paddd xmm8,xmm12 4006 pxor xmm4,xmm8 4007 movdqa xmm3,xmm4 4008 pslld xmm3,7 4009 psrld xmm4,25 4010 pxor xmm4,xmm3 4011DB 102,15,58,15,228,4 4012DB 102,69,15,58,15,192,8 4013DB 102,69,15,58,15,228,12 4014 paddd xmm1,xmm5 4015 pxor xmm13,xmm1 4016 pshufb xmm13,XMMWORD[$L$rol16] 4017 paddd xmm9,xmm13 4018 pxor xmm5,xmm9 4019 movdqa xmm3,xmm5 4020 pslld xmm3,12 4021 psrld xmm5,20 4022 pxor xmm5,xmm3 4023 paddd xmm1,xmm5 4024 pxor xmm13,xmm1 4025 pshufb xmm13,XMMWORD[$L$rol8] 4026 paddd xmm9,xmm13 4027 pxor xmm5,xmm9 4028 movdqa xmm3,xmm5 4029 pslld xmm3,7 4030 psrld xmm5,25 4031 pxor xmm5,xmm3 4032DB 102,15,58,15,237,4 4033DB 102,69,15,58,15,201,8 4034DB 102,69,15,58,15,237,12 4035 paddd xmm2,xmm6 4036 pxor xmm14,xmm2 4037 pshufb xmm14,XMMWORD[$L$rol16] 4038 paddd xmm10,xmm14 4039 pxor xmm6,xmm10 4040 movdqa xmm3,xmm6 4041 pslld xmm3,12 4042 psrld xmm6,20 4043 pxor xmm6,xmm3 4044 paddd xmm2,xmm6 4045 pxor xmm14,xmm2 4046 pshufb xmm14,XMMWORD[$L$rol8] 4047 paddd xmm10,xmm14 4048 pxor xmm6,xmm10 4049 movdqa xmm3,xmm6 4050 pslld xmm3,7 4051 psrld xmm6,25 4052 pxor xmm6,xmm3 4053DB 102,15,58,15,246,4 4054DB 102,69,15,58,15,210,8 4055DB 102,69,15,58,15,246,12 4056 paddd xmm0,xmm4 4057 pxor xmm12,xmm0 4058 pshufb xmm12,XMMWORD[$L$rol16] 4059 paddd xmm8,xmm12 4060 pxor xmm4,xmm8 4061 movdqa xmm3,xmm4 4062 pslld xmm3,12 4063 psrld xmm4,20 4064 pxor xmm4,xmm3 4065 paddd xmm0,xmm4 4066 pxor xmm12,xmm0 4067 pshufb xmm12,XMMWORD[$L$rol8] 4068 paddd xmm8,xmm12 4069 pxor xmm4,xmm8 4070 movdqa xmm3,xmm4 4071 pslld xmm3,7 4072 psrld xmm4,25 4073 pxor xmm4,xmm3 4074DB 102,15,58,15,228,12 4075DB 102,69,15,58,15,192,8 4076DB 102,69,15,58,15,228,4 4077 paddd xmm1,xmm5 4078 pxor xmm13,xmm1 4079 pshufb xmm13,XMMWORD[$L$rol16] 4080 paddd xmm9,xmm13 4081 pxor xmm5,xmm9 4082 movdqa xmm3,xmm5 4083 pslld xmm3,12 4084 psrld xmm5,20 4085 pxor xmm5,xmm3 4086 paddd xmm1,xmm5 4087 pxor xmm13,xmm1 4088 pshufb xmm13,XMMWORD[$L$rol8] 4089 paddd xmm9,xmm13 4090 pxor xmm5,xmm9 4091 movdqa xmm3,xmm5 4092 pslld xmm3,7 4093 psrld xmm5,25 4094 pxor xmm5,xmm3 4095DB 102,15,58,15,237,12 4096DB 102,69,15,58,15,201,8 4097DB 102,69,15,58,15,237,4 4098 paddd xmm2,xmm6 4099 pxor xmm14,xmm2 4100 pshufb xmm14,XMMWORD[$L$rol16] 4101 paddd xmm10,xmm14 4102 pxor xmm6,xmm10 4103 movdqa xmm3,xmm6 4104 pslld xmm3,12 4105 psrld xmm6,20 4106 pxor xmm6,xmm3 4107 paddd xmm2,xmm6 4108 pxor xmm14,xmm2 4109 pshufb xmm14,XMMWORD[$L$rol8] 4110 paddd xmm10,xmm14 4111 pxor xmm6,xmm10 4112 movdqa xmm3,xmm6 4113 pslld xmm3,7 4114 psrld xmm6,25 4115 pxor xmm6,xmm3 4116DB 102,15,58,15,246,12 4117DB 102,69,15,58,15,210,8 4118DB 102,69,15,58,15,246,4 4119 4120 dec r10 4121 jnz NEAR $L$seal_sse_128_rounds 4122 paddd xmm0,XMMWORD[$L$chacha20_consts] 4123 paddd xmm1,XMMWORD[$L$chacha20_consts] 4124 paddd xmm2,XMMWORD[$L$chacha20_consts] 4125 paddd xmm4,xmm7 4126 paddd xmm5,xmm7 4127 paddd xmm6,xmm7 4128 paddd xmm8,xmm11 4129 paddd xmm9,xmm11 4130 paddd xmm12,xmm15 4131 paddd xmm15,XMMWORD[$L$sse_inc] 4132 paddd xmm13,xmm15 4133 4134 pand xmm2,XMMWORD[$L$clamp] 4135 movdqa XMMWORD[(160+0)+rbp],xmm2 4136 movdqa XMMWORD[(160+16)+rbp],xmm6 4137 4138 mov r8,r8 4139 call poly_hash_ad_internal 4140 jmp NEAR $L$seal_sse_128_tail_xor 4141$L$SEH_end_GFp_chacha20_poly1305_seal: 4142 4143 4144 4145 4146ALIGN 64 4147chacha20_poly1305_open_avx2: 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 vzeroupper 4161 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4162 vbroadcasti128 ymm4,XMMWORD[r9] 4163 vbroadcasti128 ymm8,XMMWORD[16+r9] 4164 vbroadcasti128 ymm12,XMMWORD[32+r9] 4165 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 4166 cmp rbx,6*32 4167 jbe NEAR $L$open_avx2_192 4168 cmp rbx,10*32 4169 jbe NEAR $L$open_avx2_320 4170 4171 vmovdqa YMMWORD[(160+64)+rbp],ymm4 4172 vmovdqa YMMWORD[(160+96)+rbp],ymm8 4173 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4174 mov r10,10 4175$L$open_avx2_init_rounds: 4176 vpaddd ymm0,ymm0,ymm4 4177 vpxor ymm12,ymm12,ymm0 4178 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4179 vpaddd ymm8,ymm8,ymm12 4180 vpxor ymm4,ymm4,ymm8 4181 vpsrld ymm3,ymm4,20 4182 vpslld ymm4,ymm4,12 4183 vpxor ymm4,ymm4,ymm3 4184 vpaddd ymm0,ymm0,ymm4 4185 vpxor ymm12,ymm12,ymm0 4186 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4187 vpaddd ymm8,ymm8,ymm12 4188 vpxor ymm4,ymm4,ymm8 4189 vpslld ymm3,ymm4,7 4190 vpsrld ymm4,ymm4,25 4191 vpxor ymm4,ymm4,ymm3 4192 vpalignr ymm12,ymm12,ymm12,12 4193 vpalignr ymm8,ymm8,ymm8,8 4194 vpalignr ymm4,ymm4,ymm4,4 4195 vpaddd ymm0,ymm0,ymm4 4196 vpxor ymm12,ymm12,ymm0 4197 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4198 vpaddd ymm8,ymm8,ymm12 4199 vpxor ymm4,ymm4,ymm8 4200 vpsrld ymm3,ymm4,20 4201 vpslld ymm4,ymm4,12 4202 vpxor ymm4,ymm4,ymm3 4203 vpaddd ymm0,ymm0,ymm4 4204 vpxor ymm12,ymm12,ymm0 4205 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4206 vpaddd ymm8,ymm8,ymm12 4207 vpxor ymm4,ymm4,ymm8 4208 vpslld ymm3,ymm4,7 4209 vpsrld ymm4,ymm4,25 4210 vpxor ymm4,ymm4,ymm3 4211 vpalignr ymm12,ymm12,ymm12,4 4212 vpalignr ymm8,ymm8,ymm8,8 4213 vpalignr ymm4,ymm4,ymm4,12 4214 4215 dec r10 4216 jne NEAR $L$open_avx2_init_rounds 4217 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4218 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4219 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4220 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4221 4222 vperm2i128 ymm3,ymm4,ymm0,0x02 4223 4224 vpand ymm3,ymm3,YMMWORD[$L$clamp] 4225 vmovdqa YMMWORD[(160+0)+rbp],ymm3 4226 4227 vperm2i128 ymm0,ymm4,ymm0,0x13 4228 vperm2i128 ymm4,ymm12,ymm8,0x13 4229 4230 mov r8,r8 4231 call poly_hash_ad_internal 4232 4233 xor rcx,rcx 4234$L$open_avx2_init_hash: 4235 add r10,QWORD[((0+0))+rcx*1+rsi] 4236 adc r11,QWORD[((8+0))+rcx*1+rsi] 4237 adc r12,1 4238 mov rax,QWORD[((0+160+0))+rbp] 4239 mov r15,rax 4240 mul r10 4241 mov r13,rax 4242 mov r14,rdx 4243 mov rax,QWORD[((0+160+0))+rbp] 4244 mul r11 4245 imul r15,r12 4246 add r14,rax 4247 adc r15,rdx 4248 mov rax,QWORD[((8+160+0))+rbp] 4249 mov r9,rax 4250 mul r10 4251 add r14,rax 4252 adc rdx,0 4253 mov r10,rdx 4254 mov rax,QWORD[((8+160+0))+rbp] 4255 mul r11 4256 add r15,rax 4257 adc rdx,0 4258 imul r9,r12 4259 add r15,r10 4260 adc r9,rdx 4261 mov r10,r13 4262 mov r11,r14 4263 mov r12,r15 4264 and r12,3 4265 mov r13,r15 4266 and r13,-4 4267 mov r14,r9 4268 shrd r15,r9,2 4269 shr r9,2 4270 add r15,r13 4271 adc r9,r14 4272 add r10,r15 4273 adc r11,r9 4274 adc r12,0 4275 4276 add rcx,16 4277 cmp rcx,2*32 4278 jne NEAR $L$open_avx2_init_hash 4279 4280 vpxor ymm0,ymm0,YMMWORD[rsi] 4281 vpxor ymm4,ymm4,YMMWORD[32+rsi] 4282 4283 vmovdqu YMMWORD[rdi],ymm0 4284 vmovdqu YMMWORD[32+rdi],ymm4 4285 lea rsi,[64+rsi] 4286 lea rdi,[64+rdi] 4287 sub rbx,2*32 4288$L$open_avx2_main_loop: 4289 4290 cmp rbx,16*32 4291 jb NEAR $L$open_avx2_main_loop_done 4292 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4293 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4294 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4295 vmovdqa ymm1,ymm0 4296 vmovdqa ymm5,ymm4 4297 vmovdqa ymm9,ymm8 4298 vmovdqa ymm2,ymm0 4299 vmovdqa ymm6,ymm4 4300 vmovdqa ymm10,ymm8 4301 vmovdqa ymm3,ymm0 4302 vmovdqa ymm7,ymm4 4303 vmovdqa ymm11,ymm8 4304 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4305 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 4306 vpaddd ymm14,ymm12,ymm15 4307 vpaddd ymm13,ymm12,ymm14 4308 vpaddd ymm12,ymm12,ymm13 4309 vmovdqa YMMWORD[(160+256)+rbp],ymm15 4310 vmovdqa YMMWORD[(160+224)+rbp],ymm14 4311 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4312 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4313 4314 xor rcx,rcx 4315$L$open_avx2_main_loop_rounds: 4316 add r10,QWORD[((0+0))+rcx*1+rsi] 4317 adc r11,QWORD[((8+0))+rcx*1+rsi] 4318 adc r12,1 4319 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4320 vmovdqa ymm8,YMMWORD[$L$rol16] 4321 vpaddd ymm3,ymm3,ymm7 4322 vpaddd ymm2,ymm2,ymm6 4323 vpaddd ymm1,ymm1,ymm5 4324 vpaddd ymm0,ymm0,ymm4 4325 vpxor ymm15,ymm15,ymm3 4326 vpxor ymm14,ymm14,ymm2 4327 vpxor ymm13,ymm13,ymm1 4328 vpxor ymm12,ymm12,ymm0 4329 mov rdx,QWORD[((0+160+0))+rbp] 4330 mov r15,rdx 4331 mulx r14,r13,r10 4332 mulx rdx,rax,r11 4333 imul r15,r12 4334 add r14,rax 4335 adc r15,rdx 4336 vpshufb ymm15,ymm15,ymm8 4337 vpshufb ymm14,ymm14,ymm8 4338 vpshufb ymm13,ymm13,ymm8 4339 vpshufb ymm12,ymm12,ymm8 4340 vpaddd ymm11,ymm11,ymm15 4341 vpaddd ymm10,ymm10,ymm14 4342 vpaddd ymm9,ymm9,ymm13 4343 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4344 vpxor ymm7,ymm7,ymm11 4345 mov rdx,QWORD[((8+160+0))+rbp] 4346 mulx rax,r10,r10 4347 add r14,r10 4348 mulx r9,r11,r11 4349 adc r15,r11 4350 adc r9,0 4351 imul rdx,r12 4352 vpxor ymm6,ymm6,ymm10 4353 vpxor ymm5,ymm5,ymm9 4354 vpxor ymm4,ymm4,ymm8 4355 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4356 vpsrld ymm8,ymm7,20 4357 vpslld ymm7,ymm7,32-20 4358 vpxor ymm7,ymm7,ymm8 4359 vpsrld ymm8,ymm6,20 4360 vpslld ymm6,ymm6,32-20 4361 vpxor ymm6,ymm6,ymm8 4362 vpsrld ymm8,ymm5,20 4363 vpslld ymm5,ymm5,32-20 4364 add r15,rax 4365 adc r9,rdx 4366 vpxor ymm5,ymm5,ymm8 4367 vpsrld ymm8,ymm4,20 4368 vpslld ymm4,ymm4,32-20 4369 vpxor ymm4,ymm4,ymm8 4370 vmovdqa ymm8,YMMWORD[$L$rol8] 4371 vpaddd ymm3,ymm3,ymm7 4372 vpaddd ymm2,ymm2,ymm6 4373 vpaddd ymm1,ymm1,ymm5 4374 vpaddd ymm0,ymm0,ymm4 4375 vpxor ymm15,ymm15,ymm3 4376 mov r10,r13 4377 mov r11,r14 4378 mov r12,r15 4379 and r12,3 4380 mov r13,r15 4381 and r13,-4 4382 mov r14,r9 4383 shrd r15,r9,2 4384 shr r9,2 4385 add r15,r13 4386 adc r9,r14 4387 add r10,r15 4388 adc r11,r9 4389 adc r12,0 4390 vpxor ymm14,ymm14,ymm2 4391 vpxor ymm13,ymm13,ymm1 4392 vpxor ymm12,ymm12,ymm0 4393 vpshufb ymm15,ymm15,ymm8 4394 vpshufb ymm14,ymm14,ymm8 4395 vpshufb ymm13,ymm13,ymm8 4396 vpshufb ymm12,ymm12,ymm8 4397 vpaddd ymm11,ymm11,ymm15 4398 vpaddd ymm10,ymm10,ymm14 4399 add r10,QWORD[((0+16))+rcx*1+rsi] 4400 adc r11,QWORD[((8+16))+rcx*1+rsi] 4401 adc r12,1 4402 vpaddd ymm9,ymm9,ymm13 4403 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4404 vpxor ymm7,ymm7,ymm11 4405 vpxor ymm6,ymm6,ymm10 4406 vpxor ymm5,ymm5,ymm9 4407 vpxor ymm4,ymm4,ymm8 4408 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4409 vpsrld ymm8,ymm7,25 4410 mov rdx,QWORD[((0+160+0))+rbp] 4411 mov r15,rdx 4412 mulx r14,r13,r10 4413 mulx rdx,rax,r11 4414 imul r15,r12 4415 add r14,rax 4416 adc r15,rdx 4417 vpslld ymm7,ymm7,32-25 4418 vpxor ymm7,ymm7,ymm8 4419 vpsrld ymm8,ymm6,25 4420 vpslld ymm6,ymm6,32-25 4421 vpxor ymm6,ymm6,ymm8 4422 vpsrld ymm8,ymm5,25 4423 vpslld ymm5,ymm5,32-25 4424 vpxor ymm5,ymm5,ymm8 4425 vpsrld ymm8,ymm4,25 4426 vpslld ymm4,ymm4,32-25 4427 vpxor ymm4,ymm4,ymm8 4428 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4429 vpalignr ymm7,ymm7,ymm7,4 4430 vpalignr ymm11,ymm11,ymm11,8 4431 vpalignr ymm15,ymm15,ymm15,12 4432 vpalignr ymm6,ymm6,ymm6,4 4433 vpalignr ymm10,ymm10,ymm10,8 4434 vpalignr ymm14,ymm14,ymm14,12 4435 mov rdx,QWORD[((8+160+0))+rbp] 4436 mulx rax,r10,r10 4437 add r14,r10 4438 mulx r9,r11,r11 4439 adc r15,r11 4440 adc r9,0 4441 imul rdx,r12 4442 vpalignr ymm5,ymm5,ymm5,4 4443 vpalignr ymm9,ymm9,ymm9,8 4444 vpalignr ymm13,ymm13,ymm13,12 4445 vpalignr ymm4,ymm4,ymm4,4 4446 vpalignr ymm8,ymm8,ymm8,8 4447 vpalignr ymm12,ymm12,ymm12,12 4448 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4449 vmovdqa ymm8,YMMWORD[$L$rol16] 4450 vpaddd ymm3,ymm3,ymm7 4451 vpaddd ymm2,ymm2,ymm6 4452 vpaddd ymm1,ymm1,ymm5 4453 vpaddd ymm0,ymm0,ymm4 4454 vpxor ymm15,ymm15,ymm3 4455 vpxor ymm14,ymm14,ymm2 4456 vpxor ymm13,ymm13,ymm1 4457 vpxor ymm12,ymm12,ymm0 4458 vpshufb ymm15,ymm15,ymm8 4459 vpshufb ymm14,ymm14,ymm8 4460 add r15,rax 4461 adc r9,rdx 4462 vpshufb ymm13,ymm13,ymm8 4463 vpshufb ymm12,ymm12,ymm8 4464 vpaddd ymm11,ymm11,ymm15 4465 vpaddd ymm10,ymm10,ymm14 4466 vpaddd ymm9,ymm9,ymm13 4467 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4468 vpxor ymm7,ymm7,ymm11 4469 vpxor ymm6,ymm6,ymm10 4470 vpxor ymm5,ymm5,ymm9 4471 mov r10,r13 4472 mov r11,r14 4473 mov r12,r15 4474 and r12,3 4475 mov r13,r15 4476 and r13,-4 4477 mov r14,r9 4478 shrd r15,r9,2 4479 shr r9,2 4480 add r15,r13 4481 adc r9,r14 4482 add r10,r15 4483 adc r11,r9 4484 adc r12,0 4485 vpxor ymm4,ymm4,ymm8 4486 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4487 vpsrld ymm8,ymm7,20 4488 vpslld ymm7,ymm7,32-20 4489 vpxor ymm7,ymm7,ymm8 4490 vpsrld ymm8,ymm6,20 4491 vpslld ymm6,ymm6,32-20 4492 vpxor ymm6,ymm6,ymm8 4493 add r10,QWORD[((0+32))+rcx*1+rsi] 4494 adc r11,QWORD[((8+32))+rcx*1+rsi] 4495 adc r12,1 4496 4497 lea rcx,[48+rcx] 4498 vpsrld ymm8,ymm5,20 4499 vpslld ymm5,ymm5,32-20 4500 vpxor ymm5,ymm5,ymm8 4501 vpsrld ymm8,ymm4,20 4502 vpslld ymm4,ymm4,32-20 4503 vpxor ymm4,ymm4,ymm8 4504 vmovdqa ymm8,YMMWORD[$L$rol8] 4505 vpaddd ymm3,ymm3,ymm7 4506 vpaddd ymm2,ymm2,ymm6 4507 vpaddd ymm1,ymm1,ymm5 4508 vpaddd ymm0,ymm0,ymm4 4509 vpxor ymm15,ymm15,ymm3 4510 vpxor ymm14,ymm14,ymm2 4511 vpxor ymm13,ymm13,ymm1 4512 vpxor ymm12,ymm12,ymm0 4513 vpshufb ymm15,ymm15,ymm8 4514 vpshufb ymm14,ymm14,ymm8 4515 vpshufb ymm13,ymm13,ymm8 4516 mov rdx,QWORD[((0+160+0))+rbp] 4517 mov r15,rdx 4518 mulx r14,r13,r10 4519 mulx rdx,rax,r11 4520 imul r15,r12 4521 add r14,rax 4522 adc r15,rdx 4523 vpshufb ymm12,ymm12,ymm8 4524 vpaddd ymm11,ymm11,ymm15 4525 vpaddd ymm10,ymm10,ymm14 4526 vpaddd ymm9,ymm9,ymm13 4527 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4528 vpxor ymm7,ymm7,ymm11 4529 vpxor ymm6,ymm6,ymm10 4530 vpxor ymm5,ymm5,ymm9 4531 mov rdx,QWORD[((8+160+0))+rbp] 4532 mulx rax,r10,r10 4533 add r14,r10 4534 mulx r9,r11,r11 4535 adc r15,r11 4536 adc r9,0 4537 imul rdx,r12 4538 vpxor ymm4,ymm4,ymm8 4539 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4540 vpsrld ymm8,ymm7,25 4541 vpslld ymm7,ymm7,32-25 4542 vpxor ymm7,ymm7,ymm8 4543 vpsrld ymm8,ymm6,25 4544 vpslld ymm6,ymm6,32-25 4545 vpxor ymm6,ymm6,ymm8 4546 add r15,rax 4547 adc r9,rdx 4548 vpsrld ymm8,ymm5,25 4549 vpslld ymm5,ymm5,32-25 4550 vpxor ymm5,ymm5,ymm8 4551 vpsrld ymm8,ymm4,25 4552 vpslld ymm4,ymm4,32-25 4553 vpxor ymm4,ymm4,ymm8 4554 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4555 vpalignr ymm7,ymm7,ymm7,12 4556 vpalignr ymm11,ymm11,ymm11,8 4557 vpalignr ymm15,ymm15,ymm15,4 4558 vpalignr ymm6,ymm6,ymm6,12 4559 vpalignr ymm10,ymm10,ymm10,8 4560 vpalignr ymm14,ymm14,ymm14,4 4561 vpalignr ymm5,ymm5,ymm5,12 4562 vpalignr ymm9,ymm9,ymm9,8 4563 vpalignr ymm13,ymm13,ymm13,4 4564 vpalignr ymm4,ymm4,ymm4,12 4565 vpalignr ymm8,ymm8,ymm8,8 4566 mov r10,r13 4567 mov r11,r14 4568 mov r12,r15 4569 and r12,3 4570 mov r13,r15 4571 and r13,-4 4572 mov r14,r9 4573 shrd r15,r9,2 4574 shr r9,2 4575 add r15,r13 4576 adc r9,r14 4577 add r10,r15 4578 adc r11,r9 4579 adc r12,0 4580 vpalignr ymm12,ymm12,ymm12,4 4581 4582 cmp rcx,10*6*8 4583 jne NEAR $L$open_avx2_main_loop_rounds 4584 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 4585 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 4586 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 4587 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 4588 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 4589 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 4590 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 4591 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 4592 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 4593 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 4594 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 4595 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 4596 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4597 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4598 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4599 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4600 4601 vmovdqa YMMWORD[(160+128)+rbp],ymm0 4602 add r10,QWORD[((0+480))+rsi] 4603 adc r11,QWORD[((8+480))+rsi] 4604 adc r12,1 4605 vperm2i128 ymm0,ymm7,ymm3,0x02 4606 vperm2i128 ymm7,ymm7,ymm3,0x13 4607 vperm2i128 ymm3,ymm15,ymm11,0x02 4608 vperm2i128 ymm11,ymm15,ymm11,0x13 4609 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 4610 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 4611 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 4612 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 4613 vmovdqu YMMWORD[(0+0)+rdi],ymm0 4614 vmovdqu YMMWORD[(32+0)+rdi],ymm3 4615 vmovdqu YMMWORD[(64+0)+rdi],ymm7 4616 vmovdqu YMMWORD[(96+0)+rdi],ymm11 4617 4618 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 4619 mov rax,QWORD[((0+160+0))+rbp] 4620 mov r15,rax 4621 mul r10 4622 mov r13,rax 4623 mov r14,rdx 4624 mov rax,QWORD[((0+160+0))+rbp] 4625 mul r11 4626 imul r15,r12 4627 add r14,rax 4628 adc r15,rdx 4629 mov rax,QWORD[((8+160+0))+rbp] 4630 mov r9,rax 4631 mul r10 4632 add r14,rax 4633 adc rdx,0 4634 mov r10,rdx 4635 mov rax,QWORD[((8+160+0))+rbp] 4636 mul r11 4637 add r15,rax 4638 adc rdx,0 4639 imul r9,r12 4640 add r15,r10 4641 adc r9,rdx 4642 mov r10,r13 4643 mov r11,r14 4644 mov r12,r15 4645 and r12,3 4646 mov r13,r15 4647 and r13,-4 4648 mov r14,r9 4649 shrd r15,r9,2 4650 shr r9,2 4651 add r15,r13 4652 adc r9,r14 4653 add r10,r15 4654 adc r11,r9 4655 adc r12,0 4656 vperm2i128 ymm3,ymm6,ymm2,0x02 4657 vperm2i128 ymm6,ymm6,ymm2,0x13 4658 vperm2i128 ymm2,ymm14,ymm10,0x02 4659 vperm2i128 ymm10,ymm14,ymm10,0x13 4660 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 4661 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 4662 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 4663 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 4664 vmovdqu YMMWORD[(0+128)+rdi],ymm3 4665 vmovdqu YMMWORD[(32+128)+rdi],ymm2 4666 vmovdqu YMMWORD[(64+128)+rdi],ymm6 4667 vmovdqu YMMWORD[(96+128)+rdi],ymm10 4668 add r10,QWORD[((0+480+16))+rsi] 4669 adc r11,QWORD[((8+480+16))+rsi] 4670 adc r12,1 4671 vperm2i128 ymm3,ymm5,ymm1,0x02 4672 vperm2i128 ymm5,ymm5,ymm1,0x13 4673 vperm2i128 ymm1,ymm13,ymm9,0x02 4674 vperm2i128 ymm9,ymm13,ymm9,0x13 4675 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 4676 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 4677 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 4678 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 4679 vmovdqu YMMWORD[(0+256)+rdi],ymm3 4680 vmovdqu YMMWORD[(32+256)+rdi],ymm1 4681 vmovdqu YMMWORD[(64+256)+rdi],ymm5 4682 vmovdqu YMMWORD[(96+256)+rdi],ymm9 4683 mov rax,QWORD[((0+160+0))+rbp] 4684 mov r15,rax 4685 mul r10 4686 mov r13,rax 4687 mov r14,rdx 4688 mov rax,QWORD[((0+160+0))+rbp] 4689 mul r11 4690 imul r15,r12 4691 add r14,rax 4692 adc r15,rdx 4693 mov rax,QWORD[((8+160+0))+rbp] 4694 mov r9,rax 4695 mul r10 4696 add r14,rax 4697 adc rdx,0 4698 mov r10,rdx 4699 mov rax,QWORD[((8+160+0))+rbp] 4700 mul r11 4701 add r15,rax 4702 adc rdx,0 4703 imul r9,r12 4704 add r15,r10 4705 adc r9,rdx 4706 mov r10,r13 4707 mov r11,r14 4708 mov r12,r15 4709 and r12,3 4710 mov r13,r15 4711 and r13,-4 4712 mov r14,r9 4713 shrd r15,r9,2 4714 shr r9,2 4715 add r15,r13 4716 adc r9,r14 4717 add r10,r15 4718 adc r11,r9 4719 adc r12,0 4720 vperm2i128 ymm3,ymm4,ymm0,0x02 4721 vperm2i128 ymm4,ymm4,ymm0,0x13 4722 vperm2i128 ymm0,ymm12,ymm8,0x02 4723 vperm2i128 ymm8,ymm12,ymm8,0x13 4724 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 4725 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 4726 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 4727 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 4728 vmovdqu YMMWORD[(0+384)+rdi],ymm3 4729 vmovdqu YMMWORD[(32+384)+rdi],ymm0 4730 vmovdqu YMMWORD[(64+384)+rdi],ymm4 4731 vmovdqu YMMWORD[(96+384)+rdi],ymm8 4732 4733 lea rsi,[512+rsi] 4734 lea rdi,[512+rdi] 4735 sub rbx,16*32 4736 jmp NEAR $L$open_avx2_main_loop 4737$L$open_avx2_main_loop_done: 4738 test rbx,rbx 4739 vzeroupper 4740 je NEAR $L$open_sse_finalize 4741 4742 cmp rbx,12*32 4743 ja NEAR $L$open_avx2_tail_512 4744 cmp rbx,8*32 4745 ja NEAR $L$open_avx2_tail_384 4746 cmp rbx,4*32 4747 ja NEAR $L$open_avx2_tail_256 4748 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4749 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4750 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4751 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4752 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4753 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4754 4755 xor r8,r8 4756 mov rcx,rbx 4757 and rcx,-16 4758 test rcx,rcx 4759 je NEAR $L$open_avx2_tail_128_rounds 4760$L$open_avx2_tail_128_rounds_and_x1hash: 4761 add r10,QWORD[((0+0))+r8*1+rsi] 4762 adc r11,QWORD[((8+0))+r8*1+rsi] 4763 adc r12,1 4764 mov rax,QWORD[((0+160+0))+rbp] 4765 mov r15,rax 4766 mul r10 4767 mov r13,rax 4768 mov r14,rdx 4769 mov rax,QWORD[((0+160+0))+rbp] 4770 mul r11 4771 imul r15,r12 4772 add r14,rax 4773 adc r15,rdx 4774 mov rax,QWORD[((8+160+0))+rbp] 4775 mov r9,rax 4776 mul r10 4777 add r14,rax 4778 adc rdx,0 4779 mov r10,rdx 4780 mov rax,QWORD[((8+160+0))+rbp] 4781 mul r11 4782 add r15,rax 4783 adc rdx,0 4784 imul r9,r12 4785 add r15,r10 4786 adc r9,rdx 4787 mov r10,r13 4788 mov r11,r14 4789 mov r12,r15 4790 and r12,3 4791 mov r13,r15 4792 and r13,-4 4793 mov r14,r9 4794 shrd r15,r9,2 4795 shr r9,2 4796 add r15,r13 4797 adc r9,r14 4798 add r10,r15 4799 adc r11,r9 4800 adc r12,0 4801 4802$L$open_avx2_tail_128_rounds: 4803 add r8,16 4804 vpaddd ymm0,ymm0,ymm4 4805 vpxor ymm12,ymm12,ymm0 4806 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4807 vpaddd ymm8,ymm8,ymm12 4808 vpxor ymm4,ymm4,ymm8 4809 vpsrld ymm3,ymm4,20 4810 vpslld ymm4,ymm4,12 4811 vpxor ymm4,ymm4,ymm3 4812 vpaddd ymm0,ymm0,ymm4 4813 vpxor ymm12,ymm12,ymm0 4814 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4815 vpaddd ymm8,ymm8,ymm12 4816 vpxor ymm4,ymm4,ymm8 4817 vpslld ymm3,ymm4,7 4818 vpsrld ymm4,ymm4,25 4819 vpxor ymm4,ymm4,ymm3 4820 vpalignr ymm12,ymm12,ymm12,12 4821 vpalignr ymm8,ymm8,ymm8,8 4822 vpalignr ymm4,ymm4,ymm4,4 4823 vpaddd ymm0,ymm0,ymm4 4824 vpxor ymm12,ymm12,ymm0 4825 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4826 vpaddd ymm8,ymm8,ymm12 4827 vpxor ymm4,ymm4,ymm8 4828 vpsrld ymm3,ymm4,20 4829 vpslld ymm4,ymm4,12 4830 vpxor ymm4,ymm4,ymm3 4831 vpaddd ymm0,ymm0,ymm4 4832 vpxor ymm12,ymm12,ymm0 4833 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4834 vpaddd ymm8,ymm8,ymm12 4835 vpxor ymm4,ymm4,ymm8 4836 vpslld ymm3,ymm4,7 4837 vpsrld ymm4,ymm4,25 4838 vpxor ymm4,ymm4,ymm3 4839 vpalignr ymm12,ymm12,ymm12,4 4840 vpalignr ymm8,ymm8,ymm8,8 4841 vpalignr ymm4,ymm4,ymm4,12 4842 4843 cmp r8,rcx 4844 jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash 4845 cmp r8,160 4846 jne NEAR $L$open_avx2_tail_128_rounds 4847 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4848 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4849 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4850 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4851 vperm2i128 ymm3,ymm4,ymm0,0x13 4852 vperm2i128 ymm0,ymm4,ymm0,0x02 4853 vperm2i128 ymm4,ymm12,ymm8,0x02 4854 vperm2i128 ymm12,ymm12,ymm8,0x13 4855 vmovdqa ymm8,ymm3 4856 4857 jmp NEAR $L$open_avx2_tail_128_xor 4858 4859$L$open_avx2_tail_256: 4860 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4861 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4862 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4863 vmovdqa ymm1,ymm0 4864 vmovdqa ymm5,ymm4 4865 vmovdqa ymm9,ymm8 4866 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4867 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 4868 vpaddd ymm12,ymm12,ymm13 4869 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4870 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4871 4872 mov QWORD[((160+128))+rbp],rbx 4873 mov rcx,rbx 4874 sub rcx,4*32 4875 shr rcx,4 4876 mov r8,10 4877 cmp rcx,10 4878 cmovg rcx,r8 4879 mov rbx,rsi 4880 xor r8,r8 4881$L$open_avx2_tail_256_rounds_and_x1hash: 4882 add r10,QWORD[((0+0))+rbx] 4883 adc r11,QWORD[((8+0))+rbx] 4884 adc r12,1 4885 mov rdx,QWORD[((0+160+0))+rbp] 4886 mov r15,rdx 4887 mulx r14,r13,r10 4888 mulx rdx,rax,r11 4889 imul r15,r12 4890 add r14,rax 4891 adc r15,rdx 4892 mov rdx,QWORD[((8+160+0))+rbp] 4893 mulx rax,r10,r10 4894 add r14,r10 4895 mulx r9,r11,r11 4896 adc r15,r11 4897 adc r9,0 4898 imul rdx,r12 4899 add r15,rax 4900 adc r9,rdx 4901 mov r10,r13 4902 mov r11,r14 4903 mov r12,r15 4904 and r12,3 4905 mov r13,r15 4906 and r13,-4 4907 mov r14,r9 4908 shrd r15,r9,2 4909 shr r9,2 4910 add r15,r13 4911 adc r9,r14 4912 add r10,r15 4913 adc r11,r9 4914 adc r12,0 4915 4916 lea rbx,[16+rbx] 4917$L$open_avx2_tail_256_rounds: 4918 vpaddd ymm0,ymm0,ymm4 4919 vpxor ymm12,ymm12,ymm0 4920 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4921 vpaddd ymm8,ymm8,ymm12 4922 vpxor ymm4,ymm4,ymm8 4923 vpsrld ymm3,ymm4,20 4924 vpslld ymm4,ymm4,12 4925 vpxor ymm4,ymm4,ymm3 4926 vpaddd ymm0,ymm0,ymm4 4927 vpxor ymm12,ymm12,ymm0 4928 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4929 vpaddd ymm8,ymm8,ymm12 4930 vpxor ymm4,ymm4,ymm8 4931 vpslld ymm3,ymm4,7 4932 vpsrld ymm4,ymm4,25 4933 vpxor ymm4,ymm4,ymm3 4934 vpalignr ymm12,ymm12,ymm12,12 4935 vpalignr ymm8,ymm8,ymm8,8 4936 vpalignr ymm4,ymm4,ymm4,4 4937 vpaddd ymm1,ymm1,ymm5 4938 vpxor ymm13,ymm13,ymm1 4939 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4940 vpaddd ymm9,ymm9,ymm13 4941 vpxor ymm5,ymm5,ymm9 4942 vpsrld ymm3,ymm5,20 4943 vpslld ymm5,ymm5,12 4944 vpxor ymm5,ymm5,ymm3 4945 vpaddd ymm1,ymm1,ymm5 4946 vpxor ymm13,ymm13,ymm1 4947 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4948 vpaddd ymm9,ymm9,ymm13 4949 vpxor ymm5,ymm5,ymm9 4950 vpslld ymm3,ymm5,7 4951 vpsrld ymm5,ymm5,25 4952 vpxor ymm5,ymm5,ymm3 4953 vpalignr ymm13,ymm13,ymm13,12 4954 vpalignr ymm9,ymm9,ymm9,8 4955 vpalignr ymm5,ymm5,ymm5,4 4956 4957 inc r8 4958 vpaddd ymm0,ymm0,ymm4 4959 vpxor ymm12,ymm12,ymm0 4960 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4961 vpaddd ymm8,ymm8,ymm12 4962 vpxor ymm4,ymm4,ymm8 4963 vpsrld ymm3,ymm4,20 4964 vpslld ymm4,ymm4,12 4965 vpxor ymm4,ymm4,ymm3 4966 vpaddd ymm0,ymm0,ymm4 4967 vpxor ymm12,ymm12,ymm0 4968 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4969 vpaddd ymm8,ymm8,ymm12 4970 vpxor ymm4,ymm4,ymm8 4971 vpslld ymm3,ymm4,7 4972 vpsrld ymm4,ymm4,25 4973 vpxor ymm4,ymm4,ymm3 4974 vpalignr ymm12,ymm12,ymm12,4 4975 vpalignr ymm8,ymm8,ymm8,8 4976 vpalignr ymm4,ymm4,ymm4,12 4977 vpaddd ymm1,ymm1,ymm5 4978 vpxor ymm13,ymm13,ymm1 4979 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4980 vpaddd ymm9,ymm9,ymm13 4981 vpxor ymm5,ymm5,ymm9 4982 vpsrld ymm3,ymm5,20 4983 vpslld ymm5,ymm5,12 4984 vpxor ymm5,ymm5,ymm3 4985 vpaddd ymm1,ymm1,ymm5 4986 vpxor ymm13,ymm13,ymm1 4987 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4988 vpaddd ymm9,ymm9,ymm13 4989 vpxor ymm5,ymm5,ymm9 4990 vpslld ymm3,ymm5,7 4991 vpsrld ymm5,ymm5,25 4992 vpxor ymm5,ymm5,ymm3 4993 vpalignr ymm13,ymm13,ymm13,4 4994 vpalignr ymm9,ymm9,ymm9,8 4995 vpalignr ymm5,ymm5,ymm5,12 4996 vpaddd ymm2,ymm2,ymm6 4997 vpxor ymm14,ymm14,ymm2 4998 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 4999 vpaddd ymm10,ymm10,ymm14 5000 vpxor ymm6,ymm6,ymm10 5001 vpsrld ymm3,ymm6,20 5002 vpslld ymm6,ymm6,12 5003 vpxor ymm6,ymm6,ymm3 5004 vpaddd ymm2,ymm2,ymm6 5005 vpxor ymm14,ymm14,ymm2 5006 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5007 vpaddd ymm10,ymm10,ymm14 5008 vpxor ymm6,ymm6,ymm10 5009 vpslld ymm3,ymm6,7 5010 vpsrld ymm6,ymm6,25 5011 vpxor ymm6,ymm6,ymm3 5012 vpalignr ymm14,ymm14,ymm14,4 5013 vpalignr ymm10,ymm10,ymm10,8 5014 vpalignr ymm6,ymm6,ymm6,12 5015 5016 cmp r8,rcx 5017 jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash 5018 cmp r8,10 5019 jne NEAR $L$open_avx2_tail_256_rounds 5020 mov r8,rbx 5021 sub rbx,rsi 5022 mov rcx,rbx 5023 mov rbx,QWORD[((160+128))+rbp] 5024$L$open_avx2_tail_256_hash: 5025 add rcx,16 5026 cmp rcx,rbx 5027 jg NEAR $L$open_avx2_tail_256_done 5028 add r10,QWORD[((0+0))+r8] 5029 adc r11,QWORD[((8+0))+r8] 5030 adc r12,1 5031 mov rdx,QWORD[((0+160+0))+rbp] 5032 mov r15,rdx 5033 mulx r14,r13,r10 5034 mulx rdx,rax,r11 5035 imul r15,r12 5036 add r14,rax 5037 adc r15,rdx 5038 mov rdx,QWORD[((8+160+0))+rbp] 5039 mulx rax,r10,r10 5040 add r14,r10 5041 mulx r9,r11,r11 5042 adc r15,r11 5043 adc r9,0 5044 imul rdx,r12 5045 add r15,rax 5046 adc r9,rdx 5047 mov r10,r13 5048 mov r11,r14 5049 mov r12,r15 5050 and r12,3 5051 mov r13,r15 5052 and r13,-4 5053 mov r14,r9 5054 shrd r15,r9,2 5055 shr r9,2 5056 add r15,r13 5057 adc r9,r14 5058 add r10,r15 5059 adc r11,r9 5060 adc r12,0 5061 5062 lea r8,[16+r8] 5063 jmp NEAR $L$open_avx2_tail_256_hash 5064$L$open_avx2_tail_256_done: 5065 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5066 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5067 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5068 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5069 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5070 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5071 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5072 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5073 vperm2i128 ymm3,ymm5,ymm1,0x02 5074 vperm2i128 ymm5,ymm5,ymm1,0x13 5075 vperm2i128 ymm1,ymm13,ymm9,0x02 5076 vperm2i128 ymm9,ymm13,ymm9,0x13 5077 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5078 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 5079 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 5080 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 5081 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5082 vmovdqu YMMWORD[(32+0)+rdi],ymm1 5083 vmovdqu YMMWORD[(64+0)+rdi],ymm5 5084 vmovdqu YMMWORD[(96+0)+rdi],ymm9 5085 vperm2i128 ymm3,ymm4,ymm0,0x13 5086 vperm2i128 ymm0,ymm4,ymm0,0x02 5087 vperm2i128 ymm4,ymm12,ymm8,0x02 5088 vperm2i128 ymm12,ymm12,ymm8,0x13 5089 vmovdqa ymm8,ymm3 5090 5091 lea rsi,[128+rsi] 5092 lea rdi,[128+rdi] 5093 sub rbx,4*32 5094 jmp NEAR $L$open_avx2_tail_128_xor 5095 5096$L$open_avx2_tail_384: 5097 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5098 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5099 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5100 vmovdqa ymm1,ymm0 5101 vmovdqa ymm5,ymm4 5102 vmovdqa ymm9,ymm8 5103 vmovdqa ymm2,ymm0 5104 vmovdqa ymm6,ymm4 5105 vmovdqa ymm10,ymm8 5106 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5107 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 5108 vpaddd ymm13,ymm12,ymm14 5109 vpaddd ymm12,ymm12,ymm13 5110 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5111 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5112 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5113 5114 mov QWORD[((160+128))+rbp],rbx 5115 mov rcx,rbx 5116 sub rcx,8*32 5117 shr rcx,4 5118 add rcx,6 5119 mov r8,10 5120 cmp rcx,10 5121 cmovg rcx,r8 5122 mov rbx,rsi 5123 xor r8,r8 5124$L$open_avx2_tail_384_rounds_and_x2hash: 5125 add r10,QWORD[((0+0))+rbx] 5126 adc r11,QWORD[((8+0))+rbx] 5127 adc r12,1 5128 mov rdx,QWORD[((0+160+0))+rbp] 5129 mov r15,rdx 5130 mulx r14,r13,r10 5131 mulx rdx,rax,r11 5132 imul r15,r12 5133 add r14,rax 5134 adc r15,rdx 5135 mov rdx,QWORD[((8+160+0))+rbp] 5136 mulx rax,r10,r10 5137 add r14,r10 5138 mulx r9,r11,r11 5139 adc r15,r11 5140 adc r9,0 5141 imul rdx,r12 5142 add r15,rax 5143 adc r9,rdx 5144 mov r10,r13 5145 mov r11,r14 5146 mov r12,r15 5147 and r12,3 5148 mov r13,r15 5149 and r13,-4 5150 mov r14,r9 5151 shrd r15,r9,2 5152 shr r9,2 5153 add r15,r13 5154 adc r9,r14 5155 add r10,r15 5156 adc r11,r9 5157 adc r12,0 5158 5159 lea rbx,[16+rbx] 5160$L$open_avx2_tail_384_rounds_and_x1hash: 5161 vpaddd ymm2,ymm2,ymm6 5162 vpxor ymm14,ymm14,ymm2 5163 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5164 vpaddd ymm10,ymm10,ymm14 5165 vpxor ymm6,ymm6,ymm10 5166 vpsrld ymm3,ymm6,20 5167 vpslld ymm6,ymm6,12 5168 vpxor ymm6,ymm6,ymm3 5169 vpaddd ymm2,ymm2,ymm6 5170 vpxor ymm14,ymm14,ymm2 5171 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5172 vpaddd ymm10,ymm10,ymm14 5173 vpxor ymm6,ymm6,ymm10 5174 vpslld ymm3,ymm6,7 5175 vpsrld ymm6,ymm6,25 5176 vpxor ymm6,ymm6,ymm3 5177 vpalignr ymm14,ymm14,ymm14,12 5178 vpalignr ymm10,ymm10,ymm10,8 5179 vpalignr ymm6,ymm6,ymm6,4 5180 vpaddd ymm1,ymm1,ymm5 5181 vpxor ymm13,ymm13,ymm1 5182 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5183 vpaddd ymm9,ymm9,ymm13 5184 vpxor ymm5,ymm5,ymm9 5185 vpsrld ymm3,ymm5,20 5186 vpslld ymm5,ymm5,12 5187 vpxor ymm5,ymm5,ymm3 5188 vpaddd ymm1,ymm1,ymm5 5189 vpxor ymm13,ymm13,ymm1 5190 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5191 vpaddd ymm9,ymm9,ymm13 5192 vpxor ymm5,ymm5,ymm9 5193 vpslld ymm3,ymm5,7 5194 vpsrld ymm5,ymm5,25 5195 vpxor ymm5,ymm5,ymm3 5196 vpalignr ymm13,ymm13,ymm13,12 5197 vpalignr ymm9,ymm9,ymm9,8 5198 vpalignr ymm5,ymm5,ymm5,4 5199 vpaddd ymm0,ymm0,ymm4 5200 vpxor ymm12,ymm12,ymm0 5201 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5202 vpaddd ymm8,ymm8,ymm12 5203 vpxor ymm4,ymm4,ymm8 5204 vpsrld ymm3,ymm4,20 5205 vpslld ymm4,ymm4,12 5206 vpxor ymm4,ymm4,ymm3 5207 vpaddd ymm0,ymm0,ymm4 5208 vpxor ymm12,ymm12,ymm0 5209 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5210 vpaddd ymm8,ymm8,ymm12 5211 vpxor ymm4,ymm4,ymm8 5212 vpslld ymm3,ymm4,7 5213 vpsrld ymm4,ymm4,25 5214 vpxor ymm4,ymm4,ymm3 5215 vpalignr ymm12,ymm12,ymm12,12 5216 vpalignr ymm8,ymm8,ymm8,8 5217 vpalignr ymm4,ymm4,ymm4,4 5218 add r10,QWORD[((0+0))+rbx] 5219 adc r11,QWORD[((8+0))+rbx] 5220 adc r12,1 5221 mov rax,QWORD[((0+160+0))+rbp] 5222 mov r15,rax 5223 mul r10 5224 mov r13,rax 5225 mov r14,rdx 5226 mov rax,QWORD[((0+160+0))+rbp] 5227 mul r11 5228 imul r15,r12 5229 add r14,rax 5230 adc r15,rdx 5231 mov rax,QWORD[((8+160+0))+rbp] 5232 mov r9,rax 5233 mul r10 5234 add r14,rax 5235 adc rdx,0 5236 mov r10,rdx 5237 mov rax,QWORD[((8+160+0))+rbp] 5238 mul r11 5239 add r15,rax 5240 adc rdx,0 5241 imul r9,r12 5242 add r15,r10 5243 adc r9,rdx 5244 mov r10,r13 5245 mov r11,r14 5246 mov r12,r15 5247 and r12,3 5248 mov r13,r15 5249 and r13,-4 5250 mov r14,r9 5251 shrd r15,r9,2 5252 shr r9,2 5253 add r15,r13 5254 adc r9,r14 5255 add r10,r15 5256 adc r11,r9 5257 adc r12,0 5258 5259 lea rbx,[16+rbx] 5260 inc r8 5261 vpaddd ymm2,ymm2,ymm6 5262 vpxor ymm14,ymm14,ymm2 5263 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5264 vpaddd ymm10,ymm10,ymm14 5265 vpxor ymm6,ymm6,ymm10 5266 vpsrld ymm3,ymm6,20 5267 vpslld ymm6,ymm6,12 5268 vpxor ymm6,ymm6,ymm3 5269 vpaddd ymm2,ymm2,ymm6 5270 vpxor ymm14,ymm14,ymm2 5271 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5272 vpaddd ymm10,ymm10,ymm14 5273 vpxor ymm6,ymm6,ymm10 5274 vpslld ymm3,ymm6,7 5275 vpsrld ymm6,ymm6,25 5276 vpxor ymm6,ymm6,ymm3 5277 vpalignr ymm14,ymm14,ymm14,4 5278 vpalignr ymm10,ymm10,ymm10,8 5279 vpalignr ymm6,ymm6,ymm6,12 5280 vpaddd ymm1,ymm1,ymm5 5281 vpxor ymm13,ymm13,ymm1 5282 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5283 vpaddd ymm9,ymm9,ymm13 5284 vpxor ymm5,ymm5,ymm9 5285 vpsrld ymm3,ymm5,20 5286 vpslld ymm5,ymm5,12 5287 vpxor ymm5,ymm5,ymm3 5288 vpaddd ymm1,ymm1,ymm5 5289 vpxor ymm13,ymm13,ymm1 5290 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5291 vpaddd ymm9,ymm9,ymm13 5292 vpxor ymm5,ymm5,ymm9 5293 vpslld ymm3,ymm5,7 5294 vpsrld ymm5,ymm5,25 5295 vpxor ymm5,ymm5,ymm3 5296 vpalignr ymm13,ymm13,ymm13,4 5297 vpalignr ymm9,ymm9,ymm9,8 5298 vpalignr ymm5,ymm5,ymm5,12 5299 vpaddd ymm0,ymm0,ymm4 5300 vpxor ymm12,ymm12,ymm0 5301 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5302 vpaddd ymm8,ymm8,ymm12 5303 vpxor ymm4,ymm4,ymm8 5304 vpsrld ymm3,ymm4,20 5305 vpslld ymm4,ymm4,12 5306 vpxor ymm4,ymm4,ymm3 5307 vpaddd ymm0,ymm0,ymm4 5308 vpxor ymm12,ymm12,ymm0 5309 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5310 vpaddd ymm8,ymm8,ymm12 5311 vpxor ymm4,ymm4,ymm8 5312 vpslld ymm3,ymm4,7 5313 vpsrld ymm4,ymm4,25 5314 vpxor ymm4,ymm4,ymm3 5315 vpalignr ymm12,ymm12,ymm12,4 5316 vpalignr ymm8,ymm8,ymm8,8 5317 vpalignr ymm4,ymm4,ymm4,12 5318 5319 cmp r8,rcx 5320 jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash 5321 cmp r8,10 5322 jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash 5323 mov r8,rbx 5324 sub rbx,rsi 5325 mov rcx,rbx 5326 mov rbx,QWORD[((160+128))+rbp] 5327$L$open_avx2_384_tail_hash: 5328 add rcx,16 5329 cmp rcx,rbx 5330 jg NEAR $L$open_avx2_384_tail_done 5331 add r10,QWORD[((0+0))+r8] 5332 adc r11,QWORD[((8+0))+r8] 5333 adc r12,1 5334 mov rdx,QWORD[((0+160+0))+rbp] 5335 mov r15,rdx 5336 mulx r14,r13,r10 5337 mulx rdx,rax,r11 5338 imul r15,r12 5339 add r14,rax 5340 adc r15,rdx 5341 mov rdx,QWORD[((8+160+0))+rbp] 5342 mulx rax,r10,r10 5343 add r14,r10 5344 mulx r9,r11,r11 5345 adc r15,r11 5346 adc r9,0 5347 imul rdx,r12 5348 add r15,rax 5349 adc r9,rdx 5350 mov r10,r13 5351 mov r11,r14 5352 mov r12,r15 5353 and r12,3 5354 mov r13,r15 5355 and r13,-4 5356 mov r14,r9 5357 shrd r15,r9,2 5358 shr r9,2 5359 add r15,r13 5360 adc r9,r14 5361 add r10,r15 5362 adc r11,r9 5363 adc r12,0 5364 5365 lea r8,[16+r8] 5366 jmp NEAR $L$open_avx2_384_tail_hash 5367$L$open_avx2_384_tail_done: 5368 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5369 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5370 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5371 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5372 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5373 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5374 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5375 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5376 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5377 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5378 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5379 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5380 vperm2i128 ymm3,ymm6,ymm2,0x02 5381 vperm2i128 ymm6,ymm6,ymm2,0x13 5382 vperm2i128 ymm2,ymm14,ymm10,0x02 5383 vperm2i128 ymm10,ymm14,ymm10,0x13 5384 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5385 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 5386 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 5387 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 5388 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5389 vmovdqu YMMWORD[(32+0)+rdi],ymm2 5390 vmovdqu YMMWORD[(64+0)+rdi],ymm6 5391 vmovdqu YMMWORD[(96+0)+rdi],ymm10 5392 vperm2i128 ymm3,ymm5,ymm1,0x02 5393 vperm2i128 ymm5,ymm5,ymm1,0x13 5394 vperm2i128 ymm1,ymm13,ymm9,0x02 5395 vperm2i128 ymm9,ymm13,ymm9,0x13 5396 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5397 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 5398 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 5399 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 5400 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5401 vmovdqu YMMWORD[(32+128)+rdi],ymm1 5402 vmovdqu YMMWORD[(64+128)+rdi],ymm5 5403 vmovdqu YMMWORD[(96+128)+rdi],ymm9 5404 vperm2i128 ymm3,ymm4,ymm0,0x13 5405 vperm2i128 ymm0,ymm4,ymm0,0x02 5406 vperm2i128 ymm4,ymm12,ymm8,0x02 5407 vperm2i128 ymm12,ymm12,ymm8,0x13 5408 vmovdqa ymm8,ymm3 5409 5410 lea rsi,[256+rsi] 5411 lea rdi,[256+rdi] 5412 sub rbx,8*32 5413 jmp NEAR $L$open_avx2_tail_128_xor 5414 5415$L$open_avx2_tail_512: 5416 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5417 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5418 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5419 vmovdqa ymm1,ymm0 5420 vmovdqa ymm5,ymm4 5421 vmovdqa ymm9,ymm8 5422 vmovdqa ymm2,ymm0 5423 vmovdqa ymm6,ymm4 5424 vmovdqa ymm10,ymm8 5425 vmovdqa ymm3,ymm0 5426 vmovdqa ymm7,ymm4 5427 vmovdqa ymm11,ymm8 5428 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5429 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 5430 vpaddd ymm14,ymm12,ymm15 5431 vpaddd ymm13,ymm12,ymm14 5432 vpaddd ymm12,ymm12,ymm13 5433 vmovdqa YMMWORD[(160+256)+rbp],ymm15 5434 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5435 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5436 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5437 5438 xor rcx,rcx 5439 mov r8,rsi 5440$L$open_avx2_tail_512_rounds_and_x2hash: 5441 add r10,QWORD[((0+0))+r8] 5442 adc r11,QWORD[((8+0))+r8] 5443 adc r12,1 5444 mov rax,QWORD[((0+160+0))+rbp] 5445 mov r15,rax 5446 mul r10 5447 mov r13,rax 5448 mov r14,rdx 5449 mov rax,QWORD[((0+160+0))+rbp] 5450 mul r11 5451 imul r15,r12 5452 add r14,rax 5453 adc r15,rdx 5454 mov rax,QWORD[((8+160+0))+rbp] 5455 mov r9,rax 5456 mul r10 5457 add r14,rax 5458 adc rdx,0 5459 mov r10,rdx 5460 mov rax,QWORD[((8+160+0))+rbp] 5461 mul r11 5462 add r15,rax 5463 adc rdx,0 5464 imul r9,r12 5465 add r15,r10 5466 adc r9,rdx 5467 mov r10,r13 5468 mov r11,r14 5469 mov r12,r15 5470 and r12,3 5471 mov r13,r15 5472 and r13,-4 5473 mov r14,r9 5474 shrd r15,r9,2 5475 shr r9,2 5476 add r15,r13 5477 adc r9,r14 5478 add r10,r15 5479 adc r11,r9 5480 adc r12,0 5481 5482 lea r8,[16+r8] 5483$L$open_avx2_tail_512_rounds_and_x1hash: 5484 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5485 vmovdqa ymm8,YMMWORD[$L$rol16] 5486 vpaddd ymm3,ymm3,ymm7 5487 vpaddd ymm2,ymm2,ymm6 5488 vpaddd ymm1,ymm1,ymm5 5489 vpaddd ymm0,ymm0,ymm4 5490 vpxor ymm15,ymm15,ymm3 5491 vpxor ymm14,ymm14,ymm2 5492 vpxor ymm13,ymm13,ymm1 5493 vpxor ymm12,ymm12,ymm0 5494 vpshufb ymm15,ymm15,ymm8 5495 vpshufb ymm14,ymm14,ymm8 5496 vpshufb ymm13,ymm13,ymm8 5497 vpshufb ymm12,ymm12,ymm8 5498 vpaddd ymm11,ymm11,ymm15 5499 vpaddd ymm10,ymm10,ymm14 5500 vpaddd ymm9,ymm9,ymm13 5501 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5502 vpxor ymm7,ymm7,ymm11 5503 vpxor ymm6,ymm6,ymm10 5504 vpxor ymm5,ymm5,ymm9 5505 vpxor ymm4,ymm4,ymm8 5506 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5507 vpsrld ymm8,ymm7,20 5508 vpslld ymm7,ymm7,32-20 5509 vpxor ymm7,ymm7,ymm8 5510 vpsrld ymm8,ymm6,20 5511 vpslld ymm6,ymm6,32-20 5512 vpxor ymm6,ymm6,ymm8 5513 vpsrld ymm8,ymm5,20 5514 vpslld ymm5,ymm5,32-20 5515 vpxor ymm5,ymm5,ymm8 5516 vpsrld ymm8,ymm4,20 5517 vpslld ymm4,ymm4,32-20 5518 vpxor ymm4,ymm4,ymm8 5519 vmovdqa ymm8,YMMWORD[$L$rol8] 5520 vpaddd ymm3,ymm3,ymm7 5521 add r10,QWORD[((0+0))+r8] 5522 adc r11,QWORD[((8+0))+r8] 5523 adc r12,1 5524 mov rdx,QWORD[((0+160+0))+rbp] 5525 mov r15,rdx 5526 mulx r14,r13,r10 5527 mulx rdx,rax,r11 5528 imul r15,r12 5529 add r14,rax 5530 adc r15,rdx 5531 mov rdx,QWORD[((8+160+0))+rbp] 5532 mulx rax,r10,r10 5533 add r14,r10 5534 mulx r9,r11,r11 5535 adc r15,r11 5536 adc r9,0 5537 imul rdx,r12 5538 add r15,rax 5539 adc r9,rdx 5540 mov r10,r13 5541 mov r11,r14 5542 mov r12,r15 5543 and r12,3 5544 mov r13,r15 5545 and r13,-4 5546 mov r14,r9 5547 shrd r15,r9,2 5548 shr r9,2 5549 add r15,r13 5550 adc r9,r14 5551 add r10,r15 5552 adc r11,r9 5553 adc r12,0 5554 vpaddd ymm2,ymm2,ymm6 5555 vpaddd ymm1,ymm1,ymm5 5556 vpaddd ymm0,ymm0,ymm4 5557 vpxor ymm15,ymm15,ymm3 5558 vpxor ymm14,ymm14,ymm2 5559 vpxor ymm13,ymm13,ymm1 5560 vpxor ymm12,ymm12,ymm0 5561 vpshufb ymm15,ymm15,ymm8 5562 vpshufb ymm14,ymm14,ymm8 5563 vpshufb ymm13,ymm13,ymm8 5564 vpshufb ymm12,ymm12,ymm8 5565 vpaddd ymm11,ymm11,ymm15 5566 vpaddd ymm10,ymm10,ymm14 5567 vpaddd ymm9,ymm9,ymm13 5568 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5569 vpxor ymm7,ymm7,ymm11 5570 vpxor ymm6,ymm6,ymm10 5571 vpxor ymm5,ymm5,ymm9 5572 vpxor ymm4,ymm4,ymm8 5573 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5574 vpsrld ymm8,ymm7,25 5575 vpslld ymm7,ymm7,32-25 5576 vpxor ymm7,ymm7,ymm8 5577 vpsrld ymm8,ymm6,25 5578 vpslld ymm6,ymm6,32-25 5579 vpxor ymm6,ymm6,ymm8 5580 vpsrld ymm8,ymm5,25 5581 vpslld ymm5,ymm5,32-25 5582 vpxor ymm5,ymm5,ymm8 5583 vpsrld ymm8,ymm4,25 5584 vpslld ymm4,ymm4,32-25 5585 vpxor ymm4,ymm4,ymm8 5586 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5587 vpalignr ymm7,ymm7,ymm7,4 5588 vpalignr ymm11,ymm11,ymm11,8 5589 vpalignr ymm15,ymm15,ymm15,12 5590 vpalignr ymm6,ymm6,ymm6,4 5591 vpalignr ymm10,ymm10,ymm10,8 5592 vpalignr ymm14,ymm14,ymm14,12 5593 vpalignr ymm5,ymm5,ymm5,4 5594 vpalignr ymm9,ymm9,ymm9,8 5595 vpalignr ymm13,ymm13,ymm13,12 5596 vpalignr ymm4,ymm4,ymm4,4 5597 vpalignr ymm8,ymm8,ymm8,8 5598 vpalignr ymm12,ymm12,ymm12,12 5599 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5600 vmovdqa ymm8,YMMWORD[$L$rol16] 5601 vpaddd ymm3,ymm3,ymm7 5602 add r10,QWORD[((0+16))+r8] 5603 adc r11,QWORD[((8+16))+r8] 5604 adc r12,1 5605 mov rdx,QWORD[((0+160+0))+rbp] 5606 mov r15,rdx 5607 mulx r14,r13,r10 5608 mulx rdx,rax,r11 5609 imul r15,r12 5610 add r14,rax 5611 adc r15,rdx 5612 mov rdx,QWORD[((8+160+0))+rbp] 5613 mulx rax,r10,r10 5614 add r14,r10 5615 mulx r9,r11,r11 5616 adc r15,r11 5617 adc r9,0 5618 imul rdx,r12 5619 add r15,rax 5620 adc r9,rdx 5621 mov r10,r13 5622 mov r11,r14 5623 mov r12,r15 5624 and r12,3 5625 mov r13,r15 5626 and r13,-4 5627 mov r14,r9 5628 shrd r15,r9,2 5629 shr r9,2 5630 add r15,r13 5631 adc r9,r14 5632 add r10,r15 5633 adc r11,r9 5634 adc r12,0 5635 5636 lea r8,[32+r8] 5637 vpaddd ymm2,ymm2,ymm6 5638 vpaddd ymm1,ymm1,ymm5 5639 vpaddd ymm0,ymm0,ymm4 5640 vpxor ymm15,ymm15,ymm3 5641 vpxor ymm14,ymm14,ymm2 5642 vpxor ymm13,ymm13,ymm1 5643 vpxor ymm12,ymm12,ymm0 5644 vpshufb ymm15,ymm15,ymm8 5645 vpshufb ymm14,ymm14,ymm8 5646 vpshufb ymm13,ymm13,ymm8 5647 vpshufb ymm12,ymm12,ymm8 5648 vpaddd ymm11,ymm11,ymm15 5649 vpaddd ymm10,ymm10,ymm14 5650 vpaddd ymm9,ymm9,ymm13 5651 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5652 vpxor ymm7,ymm7,ymm11 5653 vpxor ymm6,ymm6,ymm10 5654 vpxor ymm5,ymm5,ymm9 5655 vpxor ymm4,ymm4,ymm8 5656 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5657 vpsrld ymm8,ymm7,20 5658 vpslld ymm7,ymm7,32-20 5659 vpxor ymm7,ymm7,ymm8 5660 vpsrld ymm8,ymm6,20 5661 vpslld ymm6,ymm6,32-20 5662 vpxor ymm6,ymm6,ymm8 5663 vpsrld ymm8,ymm5,20 5664 vpslld ymm5,ymm5,32-20 5665 vpxor ymm5,ymm5,ymm8 5666 vpsrld ymm8,ymm4,20 5667 vpslld ymm4,ymm4,32-20 5668 vpxor ymm4,ymm4,ymm8 5669 vmovdqa ymm8,YMMWORD[$L$rol8] 5670 vpaddd ymm3,ymm3,ymm7 5671 vpaddd ymm2,ymm2,ymm6 5672 vpaddd ymm1,ymm1,ymm5 5673 vpaddd ymm0,ymm0,ymm4 5674 vpxor ymm15,ymm15,ymm3 5675 vpxor ymm14,ymm14,ymm2 5676 vpxor ymm13,ymm13,ymm1 5677 vpxor ymm12,ymm12,ymm0 5678 vpshufb ymm15,ymm15,ymm8 5679 vpshufb ymm14,ymm14,ymm8 5680 vpshufb ymm13,ymm13,ymm8 5681 vpshufb ymm12,ymm12,ymm8 5682 vpaddd ymm11,ymm11,ymm15 5683 vpaddd ymm10,ymm10,ymm14 5684 vpaddd ymm9,ymm9,ymm13 5685 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5686 vpxor ymm7,ymm7,ymm11 5687 vpxor ymm6,ymm6,ymm10 5688 vpxor ymm5,ymm5,ymm9 5689 vpxor ymm4,ymm4,ymm8 5690 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5691 vpsrld ymm8,ymm7,25 5692 vpslld ymm7,ymm7,32-25 5693 vpxor ymm7,ymm7,ymm8 5694 vpsrld ymm8,ymm6,25 5695 vpslld ymm6,ymm6,32-25 5696 vpxor ymm6,ymm6,ymm8 5697 vpsrld ymm8,ymm5,25 5698 vpslld ymm5,ymm5,32-25 5699 vpxor ymm5,ymm5,ymm8 5700 vpsrld ymm8,ymm4,25 5701 vpslld ymm4,ymm4,32-25 5702 vpxor ymm4,ymm4,ymm8 5703 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5704 vpalignr ymm7,ymm7,ymm7,12 5705 vpalignr ymm11,ymm11,ymm11,8 5706 vpalignr ymm15,ymm15,ymm15,4 5707 vpalignr ymm6,ymm6,ymm6,12 5708 vpalignr ymm10,ymm10,ymm10,8 5709 vpalignr ymm14,ymm14,ymm14,4 5710 vpalignr ymm5,ymm5,ymm5,12 5711 vpalignr ymm9,ymm9,ymm9,8 5712 vpalignr ymm13,ymm13,ymm13,4 5713 vpalignr ymm4,ymm4,ymm4,12 5714 vpalignr ymm8,ymm8,ymm8,8 5715 vpalignr ymm12,ymm12,ymm12,4 5716 5717 inc rcx 5718 cmp rcx,4 5719 jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash 5720 cmp rcx,10 5721 jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash 5722 mov rcx,rbx 5723 sub rcx,12*32 5724 and rcx,-16 5725$L$open_avx2_tail_512_hash: 5726 test rcx,rcx 5727 je NEAR $L$open_avx2_tail_512_done 5728 add r10,QWORD[((0+0))+r8] 5729 adc r11,QWORD[((8+0))+r8] 5730 adc r12,1 5731 mov rdx,QWORD[((0+160+0))+rbp] 5732 mov r15,rdx 5733 mulx r14,r13,r10 5734 mulx rdx,rax,r11 5735 imul r15,r12 5736 add r14,rax 5737 adc r15,rdx 5738 mov rdx,QWORD[((8+160+0))+rbp] 5739 mulx rax,r10,r10 5740 add r14,r10 5741 mulx r9,r11,r11 5742 adc r15,r11 5743 adc r9,0 5744 imul rdx,r12 5745 add r15,rax 5746 adc r9,rdx 5747 mov r10,r13 5748 mov r11,r14 5749 mov r12,r15 5750 and r12,3 5751 mov r13,r15 5752 and r13,-4 5753 mov r14,r9 5754 shrd r15,r9,2 5755 shr r9,2 5756 add r15,r13 5757 adc r9,r14 5758 add r10,r15 5759 adc r11,r9 5760 adc r12,0 5761 5762 lea r8,[16+r8] 5763 sub rcx,2*8 5764 jmp NEAR $L$open_avx2_tail_512_hash 5765$L$open_avx2_tail_512_done: 5766 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 5767 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 5768 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 5769 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 5770 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5771 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5772 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5773 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5774 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5775 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5776 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5777 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5778 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5779 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5780 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5781 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5782 5783 vmovdqa YMMWORD[(160+128)+rbp],ymm0 5784 vperm2i128 ymm0,ymm7,ymm3,0x02 5785 vperm2i128 ymm7,ymm7,ymm3,0x13 5786 vperm2i128 ymm3,ymm15,ymm11,0x02 5787 vperm2i128 ymm11,ymm15,ymm11,0x13 5788 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 5789 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 5790 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 5791 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 5792 vmovdqu YMMWORD[(0+0)+rdi],ymm0 5793 vmovdqu YMMWORD[(32+0)+rdi],ymm3 5794 vmovdqu YMMWORD[(64+0)+rdi],ymm7 5795 vmovdqu YMMWORD[(96+0)+rdi],ymm11 5796 5797 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 5798 vperm2i128 ymm3,ymm6,ymm2,0x02 5799 vperm2i128 ymm6,ymm6,ymm2,0x13 5800 vperm2i128 ymm2,ymm14,ymm10,0x02 5801 vperm2i128 ymm10,ymm14,ymm10,0x13 5802 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5803 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 5804 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 5805 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 5806 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5807 vmovdqu YMMWORD[(32+128)+rdi],ymm2 5808 vmovdqu YMMWORD[(64+128)+rdi],ymm6 5809 vmovdqu YMMWORD[(96+128)+rdi],ymm10 5810 vperm2i128 ymm3,ymm5,ymm1,0x02 5811 vperm2i128 ymm5,ymm5,ymm1,0x13 5812 vperm2i128 ymm1,ymm13,ymm9,0x02 5813 vperm2i128 ymm9,ymm13,ymm9,0x13 5814 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 5815 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 5816 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 5817 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 5818 vmovdqu YMMWORD[(0+256)+rdi],ymm3 5819 vmovdqu YMMWORD[(32+256)+rdi],ymm1 5820 vmovdqu YMMWORD[(64+256)+rdi],ymm5 5821 vmovdqu YMMWORD[(96+256)+rdi],ymm9 5822 vperm2i128 ymm3,ymm4,ymm0,0x13 5823 vperm2i128 ymm0,ymm4,ymm0,0x02 5824 vperm2i128 ymm4,ymm12,ymm8,0x02 5825 vperm2i128 ymm12,ymm12,ymm8,0x13 5826 vmovdqa ymm8,ymm3 5827 5828 lea rsi,[384+rsi] 5829 lea rdi,[384+rdi] 5830 sub rbx,12*32 5831$L$open_avx2_tail_128_xor: 5832 cmp rbx,32 5833 jb NEAR $L$open_avx2_tail_32_xor 5834 sub rbx,32 5835 vpxor ymm0,ymm0,YMMWORD[rsi] 5836 vmovdqu YMMWORD[rdi],ymm0 5837 lea rsi,[32+rsi] 5838 lea rdi,[32+rdi] 5839 vmovdqa ymm0,ymm4 5840 vmovdqa ymm4,ymm8 5841 vmovdqa ymm8,ymm12 5842 jmp NEAR $L$open_avx2_tail_128_xor 5843$L$open_avx2_tail_32_xor: 5844 cmp rbx,16 5845 vmovdqa xmm1,xmm0 5846 jb NEAR $L$open_avx2_exit 5847 sub rbx,16 5848 5849 vpxor xmm1,xmm0,XMMWORD[rsi] 5850 vmovdqu XMMWORD[rdi],xmm1 5851 lea rsi,[16+rsi] 5852 lea rdi,[16+rdi] 5853 vperm2i128 ymm0,ymm0,ymm0,0x11 5854 vmovdqa xmm1,xmm0 5855$L$open_avx2_exit: 5856 vzeroupper 5857 jmp NEAR $L$open_sse_tail_16 5858 5859$L$open_avx2_192: 5860 vmovdqa ymm1,ymm0 5861 vmovdqa ymm2,ymm0 5862 vmovdqa ymm5,ymm4 5863 vmovdqa ymm6,ymm4 5864 vmovdqa ymm9,ymm8 5865 vmovdqa ymm10,ymm8 5866 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 5867 vmovdqa ymm11,ymm12 5868 vmovdqa ymm15,ymm13 5869 mov r10,10 5870$L$open_avx2_192_rounds: 5871 vpaddd ymm0,ymm0,ymm4 5872 vpxor ymm12,ymm12,ymm0 5873 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5874 vpaddd ymm8,ymm8,ymm12 5875 vpxor ymm4,ymm4,ymm8 5876 vpsrld ymm3,ymm4,20 5877 vpslld ymm4,ymm4,12 5878 vpxor ymm4,ymm4,ymm3 5879 vpaddd ymm0,ymm0,ymm4 5880 vpxor ymm12,ymm12,ymm0 5881 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5882 vpaddd ymm8,ymm8,ymm12 5883 vpxor ymm4,ymm4,ymm8 5884 vpslld ymm3,ymm4,7 5885 vpsrld ymm4,ymm4,25 5886 vpxor ymm4,ymm4,ymm3 5887 vpalignr ymm12,ymm12,ymm12,12 5888 vpalignr ymm8,ymm8,ymm8,8 5889 vpalignr ymm4,ymm4,ymm4,4 5890 vpaddd ymm1,ymm1,ymm5 5891 vpxor ymm13,ymm13,ymm1 5892 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5893 vpaddd ymm9,ymm9,ymm13 5894 vpxor ymm5,ymm5,ymm9 5895 vpsrld ymm3,ymm5,20 5896 vpslld ymm5,ymm5,12 5897 vpxor ymm5,ymm5,ymm3 5898 vpaddd ymm1,ymm1,ymm5 5899 vpxor ymm13,ymm13,ymm1 5900 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5901 vpaddd ymm9,ymm9,ymm13 5902 vpxor ymm5,ymm5,ymm9 5903 vpslld ymm3,ymm5,7 5904 vpsrld ymm5,ymm5,25 5905 vpxor ymm5,ymm5,ymm3 5906 vpalignr ymm13,ymm13,ymm13,12 5907 vpalignr ymm9,ymm9,ymm9,8 5908 vpalignr ymm5,ymm5,ymm5,4 5909 vpaddd ymm0,ymm0,ymm4 5910 vpxor ymm12,ymm12,ymm0 5911 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5912 vpaddd ymm8,ymm8,ymm12 5913 vpxor ymm4,ymm4,ymm8 5914 vpsrld ymm3,ymm4,20 5915 vpslld ymm4,ymm4,12 5916 vpxor ymm4,ymm4,ymm3 5917 vpaddd ymm0,ymm0,ymm4 5918 vpxor ymm12,ymm12,ymm0 5919 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5920 vpaddd ymm8,ymm8,ymm12 5921 vpxor ymm4,ymm4,ymm8 5922 vpslld ymm3,ymm4,7 5923 vpsrld ymm4,ymm4,25 5924 vpxor ymm4,ymm4,ymm3 5925 vpalignr ymm12,ymm12,ymm12,4 5926 vpalignr ymm8,ymm8,ymm8,8 5927 vpalignr ymm4,ymm4,ymm4,12 5928 vpaddd ymm1,ymm1,ymm5 5929 vpxor ymm13,ymm13,ymm1 5930 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5931 vpaddd ymm9,ymm9,ymm13 5932 vpxor ymm5,ymm5,ymm9 5933 vpsrld ymm3,ymm5,20 5934 vpslld ymm5,ymm5,12 5935 vpxor ymm5,ymm5,ymm3 5936 vpaddd ymm1,ymm1,ymm5 5937 vpxor ymm13,ymm13,ymm1 5938 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5939 vpaddd ymm9,ymm9,ymm13 5940 vpxor ymm5,ymm5,ymm9 5941 vpslld ymm3,ymm5,7 5942 vpsrld ymm5,ymm5,25 5943 vpxor ymm5,ymm5,ymm3 5944 vpalignr ymm13,ymm13,ymm13,4 5945 vpalignr ymm9,ymm9,ymm9,8 5946 vpalignr ymm5,ymm5,ymm5,12 5947 5948 dec r10 5949 jne NEAR $L$open_avx2_192_rounds 5950 vpaddd ymm0,ymm0,ymm2 5951 vpaddd ymm1,ymm1,ymm2 5952 vpaddd ymm4,ymm4,ymm6 5953 vpaddd ymm5,ymm5,ymm6 5954 vpaddd ymm8,ymm8,ymm10 5955 vpaddd ymm9,ymm9,ymm10 5956 vpaddd ymm12,ymm12,ymm11 5957 vpaddd ymm13,ymm13,ymm15 5958 vperm2i128 ymm3,ymm4,ymm0,0x02 5959 5960 vpand ymm3,ymm3,YMMWORD[$L$clamp] 5961 vmovdqa YMMWORD[(160+0)+rbp],ymm3 5962 5963 vperm2i128 ymm0,ymm4,ymm0,0x13 5964 vperm2i128 ymm4,ymm12,ymm8,0x13 5965 vperm2i128 ymm8,ymm5,ymm1,0x02 5966 vperm2i128 ymm12,ymm13,ymm9,0x02 5967 vperm2i128 ymm1,ymm5,ymm1,0x13 5968 vperm2i128 ymm5,ymm13,ymm9,0x13 5969$L$open_avx2_short: 5970 mov r8,r8 5971 call poly_hash_ad_internal 5972$L$open_avx2_short_hash_and_xor_loop: 5973 cmp rbx,32 5974 jb NEAR $L$open_avx2_short_tail_32 5975 sub rbx,32 5976 add r10,QWORD[((0+0))+rsi] 5977 adc r11,QWORD[((8+0))+rsi] 5978 adc r12,1 5979 mov rax,QWORD[((0+160+0))+rbp] 5980 mov r15,rax 5981 mul r10 5982 mov r13,rax 5983 mov r14,rdx 5984 mov rax,QWORD[((0+160+0))+rbp] 5985 mul r11 5986 imul r15,r12 5987 add r14,rax 5988 adc r15,rdx 5989 mov rax,QWORD[((8+160+0))+rbp] 5990 mov r9,rax 5991 mul r10 5992 add r14,rax 5993 adc rdx,0 5994 mov r10,rdx 5995 mov rax,QWORD[((8+160+0))+rbp] 5996 mul r11 5997 add r15,rax 5998 adc rdx,0 5999 imul r9,r12 6000 add r15,r10 6001 adc r9,rdx 6002 mov r10,r13 6003 mov r11,r14 6004 mov r12,r15 6005 and r12,3 6006 mov r13,r15 6007 and r13,-4 6008 mov r14,r9 6009 shrd r15,r9,2 6010 shr r9,2 6011 add r15,r13 6012 adc r9,r14 6013 add r10,r15 6014 adc r11,r9 6015 adc r12,0 6016 add r10,QWORD[((0+16))+rsi] 6017 adc r11,QWORD[((8+16))+rsi] 6018 adc r12,1 6019 mov rax,QWORD[((0+160+0))+rbp] 6020 mov r15,rax 6021 mul r10 6022 mov r13,rax 6023 mov r14,rdx 6024 mov rax,QWORD[((0+160+0))+rbp] 6025 mul r11 6026 imul r15,r12 6027 add r14,rax 6028 adc r15,rdx 6029 mov rax,QWORD[((8+160+0))+rbp] 6030 mov r9,rax 6031 mul r10 6032 add r14,rax 6033 adc rdx,0 6034 mov r10,rdx 6035 mov rax,QWORD[((8+160+0))+rbp] 6036 mul r11 6037 add r15,rax 6038 adc rdx,0 6039 imul r9,r12 6040 add r15,r10 6041 adc r9,rdx 6042 mov r10,r13 6043 mov r11,r14 6044 mov r12,r15 6045 and r12,3 6046 mov r13,r15 6047 and r13,-4 6048 mov r14,r9 6049 shrd r15,r9,2 6050 shr r9,2 6051 add r15,r13 6052 adc r9,r14 6053 add r10,r15 6054 adc r11,r9 6055 adc r12,0 6056 6057 6058 vpxor ymm0,ymm0,YMMWORD[rsi] 6059 vmovdqu YMMWORD[rdi],ymm0 6060 lea rsi,[32+rsi] 6061 lea rdi,[32+rdi] 6062 6063 vmovdqa ymm0,ymm4 6064 vmovdqa ymm4,ymm8 6065 vmovdqa ymm8,ymm12 6066 vmovdqa ymm12,ymm1 6067 vmovdqa ymm1,ymm5 6068 vmovdqa ymm5,ymm9 6069 vmovdqa ymm9,ymm13 6070 vmovdqa ymm13,ymm2 6071 vmovdqa ymm2,ymm6 6072 jmp NEAR $L$open_avx2_short_hash_and_xor_loop 6073$L$open_avx2_short_tail_32: 6074 cmp rbx,16 6075 vmovdqa xmm1,xmm0 6076 jb NEAR $L$open_avx2_short_tail_32_exit 6077 sub rbx,16 6078 add r10,QWORD[((0+0))+rsi] 6079 adc r11,QWORD[((8+0))+rsi] 6080 adc r12,1 6081 mov rax,QWORD[((0+160+0))+rbp] 6082 mov r15,rax 6083 mul r10 6084 mov r13,rax 6085 mov r14,rdx 6086 mov rax,QWORD[((0+160+0))+rbp] 6087 mul r11 6088 imul r15,r12 6089 add r14,rax 6090 adc r15,rdx 6091 mov rax,QWORD[((8+160+0))+rbp] 6092 mov r9,rax 6093 mul r10 6094 add r14,rax 6095 adc rdx,0 6096 mov r10,rdx 6097 mov rax,QWORD[((8+160+0))+rbp] 6098 mul r11 6099 add r15,rax 6100 adc rdx,0 6101 imul r9,r12 6102 add r15,r10 6103 adc r9,rdx 6104 mov r10,r13 6105 mov r11,r14 6106 mov r12,r15 6107 and r12,3 6108 mov r13,r15 6109 and r13,-4 6110 mov r14,r9 6111 shrd r15,r9,2 6112 shr r9,2 6113 add r15,r13 6114 adc r9,r14 6115 add r10,r15 6116 adc r11,r9 6117 adc r12,0 6118 6119 vpxor xmm3,xmm0,XMMWORD[rsi] 6120 vmovdqu XMMWORD[rdi],xmm3 6121 lea rsi,[16+rsi] 6122 lea rdi,[16+rdi] 6123 vextracti128 xmm1,ymm0,1 6124$L$open_avx2_short_tail_32_exit: 6125 vzeroupper 6126 jmp NEAR $L$open_sse_tail_16 6127 6128$L$open_avx2_320: 6129 vmovdqa ymm1,ymm0 6130 vmovdqa ymm2,ymm0 6131 vmovdqa ymm5,ymm4 6132 vmovdqa ymm6,ymm4 6133 vmovdqa ymm9,ymm8 6134 vmovdqa ymm10,ymm8 6135 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 6136 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 6137 vmovdqa ymm7,ymm4 6138 vmovdqa ymm11,ymm8 6139 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6140 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6141 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6142 mov r10,10 6143$L$open_avx2_320_rounds: 6144 vpaddd ymm0,ymm0,ymm4 6145 vpxor ymm12,ymm12,ymm0 6146 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6147 vpaddd ymm8,ymm8,ymm12 6148 vpxor ymm4,ymm4,ymm8 6149 vpsrld ymm3,ymm4,20 6150 vpslld ymm4,ymm4,12 6151 vpxor ymm4,ymm4,ymm3 6152 vpaddd ymm0,ymm0,ymm4 6153 vpxor ymm12,ymm12,ymm0 6154 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6155 vpaddd ymm8,ymm8,ymm12 6156 vpxor ymm4,ymm4,ymm8 6157 vpslld ymm3,ymm4,7 6158 vpsrld ymm4,ymm4,25 6159 vpxor ymm4,ymm4,ymm3 6160 vpalignr ymm12,ymm12,ymm12,12 6161 vpalignr ymm8,ymm8,ymm8,8 6162 vpalignr ymm4,ymm4,ymm4,4 6163 vpaddd ymm1,ymm1,ymm5 6164 vpxor ymm13,ymm13,ymm1 6165 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6166 vpaddd ymm9,ymm9,ymm13 6167 vpxor ymm5,ymm5,ymm9 6168 vpsrld ymm3,ymm5,20 6169 vpslld ymm5,ymm5,12 6170 vpxor ymm5,ymm5,ymm3 6171 vpaddd ymm1,ymm1,ymm5 6172 vpxor ymm13,ymm13,ymm1 6173 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6174 vpaddd ymm9,ymm9,ymm13 6175 vpxor ymm5,ymm5,ymm9 6176 vpslld ymm3,ymm5,7 6177 vpsrld ymm5,ymm5,25 6178 vpxor ymm5,ymm5,ymm3 6179 vpalignr ymm13,ymm13,ymm13,12 6180 vpalignr ymm9,ymm9,ymm9,8 6181 vpalignr ymm5,ymm5,ymm5,4 6182 vpaddd ymm2,ymm2,ymm6 6183 vpxor ymm14,ymm14,ymm2 6184 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6185 vpaddd ymm10,ymm10,ymm14 6186 vpxor ymm6,ymm6,ymm10 6187 vpsrld ymm3,ymm6,20 6188 vpslld ymm6,ymm6,12 6189 vpxor ymm6,ymm6,ymm3 6190 vpaddd ymm2,ymm2,ymm6 6191 vpxor ymm14,ymm14,ymm2 6192 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6193 vpaddd ymm10,ymm10,ymm14 6194 vpxor ymm6,ymm6,ymm10 6195 vpslld ymm3,ymm6,7 6196 vpsrld ymm6,ymm6,25 6197 vpxor ymm6,ymm6,ymm3 6198 vpalignr ymm14,ymm14,ymm14,12 6199 vpalignr ymm10,ymm10,ymm10,8 6200 vpalignr ymm6,ymm6,ymm6,4 6201 vpaddd ymm0,ymm0,ymm4 6202 vpxor ymm12,ymm12,ymm0 6203 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6204 vpaddd ymm8,ymm8,ymm12 6205 vpxor ymm4,ymm4,ymm8 6206 vpsrld ymm3,ymm4,20 6207 vpslld ymm4,ymm4,12 6208 vpxor ymm4,ymm4,ymm3 6209 vpaddd ymm0,ymm0,ymm4 6210 vpxor ymm12,ymm12,ymm0 6211 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6212 vpaddd ymm8,ymm8,ymm12 6213 vpxor ymm4,ymm4,ymm8 6214 vpslld ymm3,ymm4,7 6215 vpsrld ymm4,ymm4,25 6216 vpxor ymm4,ymm4,ymm3 6217 vpalignr ymm12,ymm12,ymm12,4 6218 vpalignr ymm8,ymm8,ymm8,8 6219 vpalignr ymm4,ymm4,ymm4,12 6220 vpaddd ymm1,ymm1,ymm5 6221 vpxor ymm13,ymm13,ymm1 6222 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6223 vpaddd ymm9,ymm9,ymm13 6224 vpxor ymm5,ymm5,ymm9 6225 vpsrld ymm3,ymm5,20 6226 vpslld ymm5,ymm5,12 6227 vpxor ymm5,ymm5,ymm3 6228 vpaddd ymm1,ymm1,ymm5 6229 vpxor ymm13,ymm13,ymm1 6230 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6231 vpaddd ymm9,ymm9,ymm13 6232 vpxor ymm5,ymm5,ymm9 6233 vpslld ymm3,ymm5,7 6234 vpsrld ymm5,ymm5,25 6235 vpxor ymm5,ymm5,ymm3 6236 vpalignr ymm13,ymm13,ymm13,4 6237 vpalignr ymm9,ymm9,ymm9,8 6238 vpalignr ymm5,ymm5,ymm5,12 6239 vpaddd ymm2,ymm2,ymm6 6240 vpxor ymm14,ymm14,ymm2 6241 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6242 vpaddd ymm10,ymm10,ymm14 6243 vpxor ymm6,ymm6,ymm10 6244 vpsrld ymm3,ymm6,20 6245 vpslld ymm6,ymm6,12 6246 vpxor ymm6,ymm6,ymm3 6247 vpaddd ymm2,ymm2,ymm6 6248 vpxor ymm14,ymm14,ymm2 6249 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6250 vpaddd ymm10,ymm10,ymm14 6251 vpxor ymm6,ymm6,ymm10 6252 vpslld ymm3,ymm6,7 6253 vpsrld ymm6,ymm6,25 6254 vpxor ymm6,ymm6,ymm3 6255 vpalignr ymm14,ymm14,ymm14,4 6256 vpalignr ymm10,ymm10,ymm10,8 6257 vpalignr ymm6,ymm6,ymm6,12 6258 6259 dec r10 6260 jne NEAR $L$open_avx2_320_rounds 6261 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6262 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6263 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6264 vpaddd ymm4,ymm4,ymm7 6265 vpaddd ymm5,ymm5,ymm7 6266 vpaddd ymm6,ymm6,ymm7 6267 vpaddd ymm8,ymm8,ymm11 6268 vpaddd ymm9,ymm9,ymm11 6269 vpaddd ymm10,ymm10,ymm11 6270 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6271 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6272 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6273 vperm2i128 ymm3,ymm4,ymm0,0x02 6274 6275 vpand ymm3,ymm3,YMMWORD[$L$clamp] 6276 vmovdqa YMMWORD[(160+0)+rbp],ymm3 6277 6278 vperm2i128 ymm0,ymm4,ymm0,0x13 6279 vperm2i128 ymm4,ymm12,ymm8,0x13 6280 vperm2i128 ymm8,ymm5,ymm1,0x02 6281 vperm2i128 ymm12,ymm13,ymm9,0x02 6282 vperm2i128 ymm1,ymm5,ymm1,0x13 6283 vperm2i128 ymm5,ymm13,ymm9,0x13 6284 vperm2i128 ymm9,ymm6,ymm2,0x02 6285 vperm2i128 ymm13,ymm14,ymm10,0x02 6286 vperm2i128 ymm2,ymm6,ymm2,0x13 6287 vperm2i128 ymm6,ymm14,ymm10,0x13 6288 jmp NEAR $L$open_avx2_short 6289 6290 6291 6292 6293 6294ALIGN 64 6295chacha20_poly1305_seal_avx2: 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 vzeroupper 6309 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6310 vbroadcasti128 ymm4,XMMWORD[r9] 6311 vbroadcasti128 ymm8,XMMWORD[16+r9] 6312 vbroadcasti128 ymm12,XMMWORD[32+r9] 6313 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 6314 cmp rbx,6*32 6315 jbe NEAR $L$seal_avx2_192 6316 cmp rbx,10*32 6317 jbe NEAR $L$seal_avx2_320 6318 vmovdqa ymm1,ymm0 6319 vmovdqa ymm2,ymm0 6320 vmovdqa ymm3,ymm0 6321 vmovdqa ymm5,ymm4 6322 vmovdqa ymm6,ymm4 6323 vmovdqa ymm7,ymm4 6324 vmovdqa YMMWORD[(160+64)+rbp],ymm4 6325 vmovdqa ymm9,ymm8 6326 vmovdqa ymm10,ymm8 6327 vmovdqa ymm11,ymm8 6328 vmovdqa YMMWORD[(160+96)+rbp],ymm8 6329 vmovdqa ymm15,ymm12 6330 vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] 6331 vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] 6332 vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] 6333 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6334 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6335 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6336 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6337 mov r10,10 6338$L$seal_avx2_init_rounds: 6339 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6340 vmovdqa ymm8,YMMWORD[$L$rol16] 6341 vpaddd ymm3,ymm3,ymm7 6342 vpaddd ymm2,ymm2,ymm6 6343 vpaddd ymm1,ymm1,ymm5 6344 vpaddd ymm0,ymm0,ymm4 6345 vpxor ymm15,ymm15,ymm3 6346 vpxor ymm14,ymm14,ymm2 6347 vpxor ymm13,ymm13,ymm1 6348 vpxor ymm12,ymm12,ymm0 6349 vpshufb ymm15,ymm15,ymm8 6350 vpshufb ymm14,ymm14,ymm8 6351 vpshufb ymm13,ymm13,ymm8 6352 vpshufb ymm12,ymm12,ymm8 6353 vpaddd ymm11,ymm11,ymm15 6354 vpaddd ymm10,ymm10,ymm14 6355 vpaddd ymm9,ymm9,ymm13 6356 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6357 vpxor ymm7,ymm7,ymm11 6358 vpxor ymm6,ymm6,ymm10 6359 vpxor ymm5,ymm5,ymm9 6360 vpxor ymm4,ymm4,ymm8 6361 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6362 vpsrld ymm8,ymm7,20 6363 vpslld ymm7,ymm7,32-20 6364 vpxor ymm7,ymm7,ymm8 6365 vpsrld ymm8,ymm6,20 6366 vpslld ymm6,ymm6,32-20 6367 vpxor ymm6,ymm6,ymm8 6368 vpsrld ymm8,ymm5,20 6369 vpslld ymm5,ymm5,32-20 6370 vpxor ymm5,ymm5,ymm8 6371 vpsrld ymm8,ymm4,20 6372 vpslld ymm4,ymm4,32-20 6373 vpxor ymm4,ymm4,ymm8 6374 vmovdqa ymm8,YMMWORD[$L$rol8] 6375 vpaddd ymm3,ymm3,ymm7 6376 vpaddd ymm2,ymm2,ymm6 6377 vpaddd ymm1,ymm1,ymm5 6378 vpaddd ymm0,ymm0,ymm4 6379 vpxor ymm15,ymm15,ymm3 6380 vpxor ymm14,ymm14,ymm2 6381 vpxor ymm13,ymm13,ymm1 6382 vpxor ymm12,ymm12,ymm0 6383 vpshufb ymm15,ymm15,ymm8 6384 vpshufb ymm14,ymm14,ymm8 6385 vpshufb ymm13,ymm13,ymm8 6386 vpshufb ymm12,ymm12,ymm8 6387 vpaddd ymm11,ymm11,ymm15 6388 vpaddd ymm10,ymm10,ymm14 6389 vpaddd ymm9,ymm9,ymm13 6390 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6391 vpxor ymm7,ymm7,ymm11 6392 vpxor ymm6,ymm6,ymm10 6393 vpxor ymm5,ymm5,ymm9 6394 vpxor ymm4,ymm4,ymm8 6395 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6396 vpsrld ymm8,ymm7,25 6397 vpslld ymm7,ymm7,32-25 6398 vpxor ymm7,ymm7,ymm8 6399 vpsrld ymm8,ymm6,25 6400 vpslld ymm6,ymm6,32-25 6401 vpxor ymm6,ymm6,ymm8 6402 vpsrld ymm8,ymm5,25 6403 vpslld ymm5,ymm5,32-25 6404 vpxor ymm5,ymm5,ymm8 6405 vpsrld ymm8,ymm4,25 6406 vpslld ymm4,ymm4,32-25 6407 vpxor ymm4,ymm4,ymm8 6408 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6409 vpalignr ymm7,ymm7,ymm7,4 6410 vpalignr ymm11,ymm11,ymm11,8 6411 vpalignr ymm15,ymm15,ymm15,12 6412 vpalignr ymm6,ymm6,ymm6,4 6413 vpalignr ymm10,ymm10,ymm10,8 6414 vpalignr ymm14,ymm14,ymm14,12 6415 vpalignr ymm5,ymm5,ymm5,4 6416 vpalignr ymm9,ymm9,ymm9,8 6417 vpalignr ymm13,ymm13,ymm13,12 6418 vpalignr ymm4,ymm4,ymm4,4 6419 vpalignr ymm8,ymm8,ymm8,8 6420 vpalignr ymm12,ymm12,ymm12,12 6421 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6422 vmovdqa ymm8,YMMWORD[$L$rol16] 6423 vpaddd ymm3,ymm3,ymm7 6424 vpaddd ymm2,ymm2,ymm6 6425 vpaddd ymm1,ymm1,ymm5 6426 vpaddd ymm0,ymm0,ymm4 6427 vpxor ymm15,ymm15,ymm3 6428 vpxor ymm14,ymm14,ymm2 6429 vpxor ymm13,ymm13,ymm1 6430 vpxor ymm12,ymm12,ymm0 6431 vpshufb ymm15,ymm15,ymm8 6432 vpshufb ymm14,ymm14,ymm8 6433 vpshufb ymm13,ymm13,ymm8 6434 vpshufb ymm12,ymm12,ymm8 6435 vpaddd ymm11,ymm11,ymm15 6436 vpaddd ymm10,ymm10,ymm14 6437 vpaddd ymm9,ymm9,ymm13 6438 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6439 vpxor ymm7,ymm7,ymm11 6440 vpxor ymm6,ymm6,ymm10 6441 vpxor ymm5,ymm5,ymm9 6442 vpxor ymm4,ymm4,ymm8 6443 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6444 vpsrld ymm8,ymm7,20 6445 vpslld ymm7,ymm7,32-20 6446 vpxor ymm7,ymm7,ymm8 6447 vpsrld ymm8,ymm6,20 6448 vpslld ymm6,ymm6,32-20 6449 vpxor ymm6,ymm6,ymm8 6450 vpsrld ymm8,ymm5,20 6451 vpslld ymm5,ymm5,32-20 6452 vpxor ymm5,ymm5,ymm8 6453 vpsrld ymm8,ymm4,20 6454 vpslld ymm4,ymm4,32-20 6455 vpxor ymm4,ymm4,ymm8 6456 vmovdqa ymm8,YMMWORD[$L$rol8] 6457 vpaddd ymm3,ymm3,ymm7 6458 vpaddd ymm2,ymm2,ymm6 6459 vpaddd ymm1,ymm1,ymm5 6460 vpaddd ymm0,ymm0,ymm4 6461 vpxor ymm15,ymm15,ymm3 6462 vpxor ymm14,ymm14,ymm2 6463 vpxor ymm13,ymm13,ymm1 6464 vpxor ymm12,ymm12,ymm0 6465 vpshufb ymm15,ymm15,ymm8 6466 vpshufb ymm14,ymm14,ymm8 6467 vpshufb ymm13,ymm13,ymm8 6468 vpshufb ymm12,ymm12,ymm8 6469 vpaddd ymm11,ymm11,ymm15 6470 vpaddd ymm10,ymm10,ymm14 6471 vpaddd ymm9,ymm9,ymm13 6472 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6473 vpxor ymm7,ymm7,ymm11 6474 vpxor ymm6,ymm6,ymm10 6475 vpxor ymm5,ymm5,ymm9 6476 vpxor ymm4,ymm4,ymm8 6477 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6478 vpsrld ymm8,ymm7,25 6479 vpslld ymm7,ymm7,32-25 6480 vpxor ymm7,ymm7,ymm8 6481 vpsrld ymm8,ymm6,25 6482 vpslld ymm6,ymm6,32-25 6483 vpxor ymm6,ymm6,ymm8 6484 vpsrld ymm8,ymm5,25 6485 vpslld ymm5,ymm5,32-25 6486 vpxor ymm5,ymm5,ymm8 6487 vpsrld ymm8,ymm4,25 6488 vpslld ymm4,ymm4,32-25 6489 vpxor ymm4,ymm4,ymm8 6490 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6491 vpalignr ymm7,ymm7,ymm7,12 6492 vpalignr ymm11,ymm11,ymm11,8 6493 vpalignr ymm15,ymm15,ymm15,4 6494 vpalignr ymm6,ymm6,ymm6,12 6495 vpalignr ymm10,ymm10,ymm10,8 6496 vpalignr ymm14,ymm14,ymm14,4 6497 vpalignr ymm5,ymm5,ymm5,12 6498 vpalignr ymm9,ymm9,ymm9,8 6499 vpalignr ymm13,ymm13,ymm13,4 6500 vpalignr ymm4,ymm4,ymm4,12 6501 vpalignr ymm8,ymm8,ymm8,8 6502 vpalignr ymm12,ymm12,ymm12,4 6503 6504 dec r10 6505 jnz NEAR $L$seal_avx2_init_rounds 6506 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 6507 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 6508 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 6509 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 6510 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6511 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 6512 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 6513 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6514 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6515 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 6516 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 6517 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6518 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6519 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 6520 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 6521 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6522 6523 vperm2i128 ymm11,ymm15,ymm11,0x13 6524 vperm2i128 ymm15,ymm7,ymm3,0x02 6525 vperm2i128 ymm3,ymm7,ymm3,0x13 6526 vpand ymm15,ymm15,YMMWORD[$L$clamp] 6527 vmovdqa YMMWORD[(160+0)+rbp],ymm15 6528 mov r8,r8 6529 call poly_hash_ad_internal 6530 6531 vpxor ymm3,ymm3,YMMWORD[rsi] 6532 vpxor ymm11,ymm11,YMMWORD[32+rsi] 6533 vmovdqu YMMWORD[rdi],ymm3 6534 vmovdqu YMMWORD[32+rdi],ymm11 6535 vperm2i128 ymm15,ymm6,ymm2,0x02 6536 vperm2i128 ymm6,ymm6,ymm2,0x13 6537 vperm2i128 ymm2,ymm14,ymm10,0x02 6538 vperm2i128 ymm10,ymm14,ymm10,0x13 6539 vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] 6540 vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] 6541 vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] 6542 vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] 6543 vmovdqu YMMWORD[(0+64)+rdi],ymm15 6544 vmovdqu YMMWORD[(32+64)+rdi],ymm2 6545 vmovdqu YMMWORD[(64+64)+rdi],ymm6 6546 vmovdqu YMMWORD[(96+64)+rdi],ymm10 6547 vperm2i128 ymm15,ymm5,ymm1,0x02 6548 vperm2i128 ymm5,ymm5,ymm1,0x13 6549 vperm2i128 ymm1,ymm13,ymm9,0x02 6550 vperm2i128 ymm9,ymm13,ymm9,0x13 6551 vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] 6552 vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] 6553 vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] 6554 vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] 6555 vmovdqu YMMWORD[(0+192)+rdi],ymm15 6556 vmovdqu YMMWORD[(32+192)+rdi],ymm1 6557 vmovdqu YMMWORD[(64+192)+rdi],ymm5 6558 vmovdqu YMMWORD[(96+192)+rdi],ymm9 6559 vperm2i128 ymm15,ymm4,ymm0,0x13 6560 vperm2i128 ymm0,ymm4,ymm0,0x02 6561 vperm2i128 ymm4,ymm12,ymm8,0x02 6562 vperm2i128 ymm12,ymm12,ymm8,0x13 6563 vmovdqa ymm8,ymm15 6564 6565 lea rsi,[320+rsi] 6566 sub rbx,10*32 6567 mov rcx,10*32 6568 cmp rbx,4*32 6569 jbe NEAR $L$seal_avx2_short_hash_remainder 6570 vpxor ymm0,ymm0,YMMWORD[rsi] 6571 vpxor ymm4,ymm4,YMMWORD[32+rsi] 6572 vpxor ymm8,ymm8,YMMWORD[64+rsi] 6573 vpxor ymm12,ymm12,YMMWORD[96+rsi] 6574 vmovdqu YMMWORD[320+rdi],ymm0 6575 vmovdqu YMMWORD[352+rdi],ymm4 6576 vmovdqu YMMWORD[384+rdi],ymm8 6577 vmovdqu YMMWORD[416+rdi],ymm12 6578 lea rsi,[128+rsi] 6579 sub rbx,4*32 6580 mov rcx,8 6581 mov r8,2 6582 cmp rbx,4*32 6583 jbe NEAR $L$seal_avx2_tail_128 6584 cmp rbx,8*32 6585 jbe NEAR $L$seal_avx2_tail_256 6586 cmp rbx,12*32 6587 jbe NEAR $L$seal_avx2_tail_384 6588 cmp rbx,16*32 6589 jbe NEAR $L$seal_avx2_tail_512 6590 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6591 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6592 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6593 vmovdqa ymm1,ymm0 6594 vmovdqa ymm5,ymm4 6595 vmovdqa ymm9,ymm8 6596 vmovdqa ymm2,ymm0 6597 vmovdqa ymm6,ymm4 6598 vmovdqa ymm10,ymm8 6599 vmovdqa ymm3,ymm0 6600 vmovdqa ymm7,ymm4 6601 vmovdqa ymm11,ymm8 6602 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6603 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6604 vpaddd ymm14,ymm12,ymm15 6605 vpaddd ymm13,ymm12,ymm14 6606 vpaddd ymm12,ymm12,ymm13 6607 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6608 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6609 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6610 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6611 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6612 vmovdqa ymm8,YMMWORD[$L$rol16] 6613 vpaddd ymm3,ymm3,ymm7 6614 vpaddd ymm2,ymm2,ymm6 6615 vpaddd ymm1,ymm1,ymm5 6616 vpaddd ymm0,ymm0,ymm4 6617 vpxor ymm15,ymm15,ymm3 6618 vpxor ymm14,ymm14,ymm2 6619 vpxor ymm13,ymm13,ymm1 6620 vpxor ymm12,ymm12,ymm0 6621 vpshufb ymm15,ymm15,ymm8 6622 vpshufb ymm14,ymm14,ymm8 6623 vpshufb ymm13,ymm13,ymm8 6624 vpshufb ymm12,ymm12,ymm8 6625 vpaddd ymm11,ymm11,ymm15 6626 vpaddd ymm10,ymm10,ymm14 6627 vpaddd ymm9,ymm9,ymm13 6628 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6629 vpxor ymm7,ymm7,ymm11 6630 vpxor ymm6,ymm6,ymm10 6631 vpxor ymm5,ymm5,ymm9 6632 vpxor ymm4,ymm4,ymm8 6633 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6634 vpsrld ymm8,ymm7,20 6635 vpslld ymm7,ymm7,32-20 6636 vpxor ymm7,ymm7,ymm8 6637 vpsrld ymm8,ymm6,20 6638 vpslld ymm6,ymm6,32-20 6639 vpxor ymm6,ymm6,ymm8 6640 vpsrld ymm8,ymm5,20 6641 vpslld ymm5,ymm5,32-20 6642 vpxor ymm5,ymm5,ymm8 6643 vpsrld ymm8,ymm4,20 6644 vpslld ymm4,ymm4,32-20 6645 vpxor ymm4,ymm4,ymm8 6646 vmovdqa ymm8,YMMWORD[$L$rol8] 6647 vpaddd ymm3,ymm3,ymm7 6648 vpaddd ymm2,ymm2,ymm6 6649 vpaddd ymm1,ymm1,ymm5 6650 vpaddd ymm0,ymm0,ymm4 6651 vpxor ymm15,ymm15,ymm3 6652 vpxor ymm14,ymm14,ymm2 6653 vpxor ymm13,ymm13,ymm1 6654 vpxor ymm12,ymm12,ymm0 6655 vpshufb ymm15,ymm15,ymm8 6656 vpshufb ymm14,ymm14,ymm8 6657 vpshufb ymm13,ymm13,ymm8 6658 vpshufb ymm12,ymm12,ymm8 6659 vpaddd ymm11,ymm11,ymm15 6660 vpaddd ymm10,ymm10,ymm14 6661 vpaddd ymm9,ymm9,ymm13 6662 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6663 vpxor ymm7,ymm7,ymm11 6664 vpxor ymm6,ymm6,ymm10 6665 vpxor ymm5,ymm5,ymm9 6666 vpxor ymm4,ymm4,ymm8 6667 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6668 vpsrld ymm8,ymm7,25 6669 vpslld ymm7,ymm7,32-25 6670 vpxor ymm7,ymm7,ymm8 6671 vpsrld ymm8,ymm6,25 6672 vpslld ymm6,ymm6,32-25 6673 vpxor ymm6,ymm6,ymm8 6674 vpsrld ymm8,ymm5,25 6675 vpslld ymm5,ymm5,32-25 6676 vpxor ymm5,ymm5,ymm8 6677 vpsrld ymm8,ymm4,25 6678 vpslld ymm4,ymm4,32-25 6679 vpxor ymm4,ymm4,ymm8 6680 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6681 vpalignr ymm7,ymm7,ymm7,4 6682 vpalignr ymm11,ymm11,ymm11,8 6683 vpalignr ymm15,ymm15,ymm15,12 6684 vpalignr ymm6,ymm6,ymm6,4 6685 vpalignr ymm10,ymm10,ymm10,8 6686 vpalignr ymm14,ymm14,ymm14,12 6687 vpalignr ymm5,ymm5,ymm5,4 6688 vpalignr ymm9,ymm9,ymm9,8 6689 vpalignr ymm13,ymm13,ymm13,12 6690 vpalignr ymm4,ymm4,ymm4,4 6691 vpalignr ymm8,ymm8,ymm8,8 6692 vpalignr ymm12,ymm12,ymm12,12 6693 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6694 vmovdqa ymm8,YMMWORD[$L$rol16] 6695 vpaddd ymm3,ymm3,ymm7 6696 vpaddd ymm2,ymm2,ymm6 6697 vpaddd ymm1,ymm1,ymm5 6698 vpaddd ymm0,ymm0,ymm4 6699 vpxor ymm15,ymm15,ymm3 6700 vpxor ymm14,ymm14,ymm2 6701 vpxor ymm13,ymm13,ymm1 6702 vpxor ymm12,ymm12,ymm0 6703 vpshufb ymm15,ymm15,ymm8 6704 vpshufb ymm14,ymm14,ymm8 6705 vpshufb ymm13,ymm13,ymm8 6706 vpshufb ymm12,ymm12,ymm8 6707 vpaddd ymm11,ymm11,ymm15 6708 vpaddd ymm10,ymm10,ymm14 6709 vpaddd ymm9,ymm9,ymm13 6710 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6711 vpxor ymm7,ymm7,ymm11 6712 vpxor ymm6,ymm6,ymm10 6713 vpxor ymm5,ymm5,ymm9 6714 vpxor ymm4,ymm4,ymm8 6715 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6716 vpsrld ymm8,ymm7,20 6717 vpslld ymm7,ymm7,32-20 6718 vpxor ymm7,ymm7,ymm8 6719 vpsrld ymm8,ymm6,20 6720 vpslld ymm6,ymm6,32-20 6721 vpxor ymm6,ymm6,ymm8 6722 vpsrld ymm8,ymm5,20 6723 vpslld ymm5,ymm5,32-20 6724 vpxor ymm5,ymm5,ymm8 6725 vpsrld ymm8,ymm4,20 6726 vpslld ymm4,ymm4,32-20 6727 vpxor ymm4,ymm4,ymm8 6728 vmovdqa ymm8,YMMWORD[$L$rol8] 6729 vpaddd ymm3,ymm3,ymm7 6730 vpaddd ymm2,ymm2,ymm6 6731 vpaddd ymm1,ymm1,ymm5 6732 vpaddd ymm0,ymm0,ymm4 6733 vpxor ymm15,ymm15,ymm3 6734 vpxor ymm14,ymm14,ymm2 6735 vpxor ymm13,ymm13,ymm1 6736 vpxor ymm12,ymm12,ymm0 6737 vpshufb ymm15,ymm15,ymm8 6738 vpshufb ymm14,ymm14,ymm8 6739 vpshufb ymm13,ymm13,ymm8 6740 vpshufb ymm12,ymm12,ymm8 6741 vpaddd ymm11,ymm11,ymm15 6742 vpaddd ymm10,ymm10,ymm14 6743 vpaddd ymm9,ymm9,ymm13 6744 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6745 vpxor ymm7,ymm7,ymm11 6746 vpxor ymm6,ymm6,ymm10 6747 vpxor ymm5,ymm5,ymm9 6748 vpxor ymm4,ymm4,ymm8 6749 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6750 vpsrld ymm8,ymm7,25 6751 vpslld ymm7,ymm7,32-25 6752 vpxor ymm7,ymm7,ymm8 6753 vpsrld ymm8,ymm6,25 6754 vpslld ymm6,ymm6,32-25 6755 vpxor ymm6,ymm6,ymm8 6756 vpsrld ymm8,ymm5,25 6757 vpslld ymm5,ymm5,32-25 6758 vpxor ymm5,ymm5,ymm8 6759 vpsrld ymm8,ymm4,25 6760 vpslld ymm4,ymm4,32-25 6761 vpxor ymm4,ymm4,ymm8 6762 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6763 vpalignr ymm7,ymm7,ymm7,12 6764 vpalignr ymm11,ymm11,ymm11,8 6765 vpalignr ymm15,ymm15,ymm15,4 6766 vpalignr ymm6,ymm6,ymm6,12 6767 vpalignr ymm10,ymm10,ymm10,8 6768 vpalignr ymm14,ymm14,ymm14,4 6769 vpalignr ymm5,ymm5,ymm5,12 6770 vpalignr ymm9,ymm9,ymm9,8 6771 vpalignr ymm13,ymm13,ymm13,4 6772 vpalignr ymm4,ymm4,ymm4,12 6773 vpalignr ymm8,ymm8,ymm8,8 6774 vpalignr ymm12,ymm12,ymm12,4 6775 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6776 vmovdqa ymm8,YMMWORD[$L$rol16] 6777 vpaddd ymm3,ymm3,ymm7 6778 vpaddd ymm2,ymm2,ymm6 6779 vpaddd ymm1,ymm1,ymm5 6780 vpaddd ymm0,ymm0,ymm4 6781 vpxor ymm15,ymm15,ymm3 6782 vpxor ymm14,ymm14,ymm2 6783 vpxor ymm13,ymm13,ymm1 6784 vpxor ymm12,ymm12,ymm0 6785 vpshufb ymm15,ymm15,ymm8 6786 vpshufb ymm14,ymm14,ymm8 6787 vpshufb ymm13,ymm13,ymm8 6788 vpshufb ymm12,ymm12,ymm8 6789 vpaddd ymm11,ymm11,ymm15 6790 vpaddd ymm10,ymm10,ymm14 6791 vpaddd ymm9,ymm9,ymm13 6792 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6793 vpxor ymm7,ymm7,ymm11 6794 vpxor ymm6,ymm6,ymm10 6795 vpxor ymm5,ymm5,ymm9 6796 vpxor ymm4,ymm4,ymm8 6797 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6798 vpsrld ymm8,ymm7,20 6799 vpslld ymm7,ymm7,32-20 6800 vpxor ymm7,ymm7,ymm8 6801 vpsrld ymm8,ymm6,20 6802 vpslld ymm6,ymm6,32-20 6803 vpxor ymm6,ymm6,ymm8 6804 vpsrld ymm8,ymm5,20 6805 vpslld ymm5,ymm5,32-20 6806 vpxor ymm5,ymm5,ymm8 6807 vpsrld ymm8,ymm4,20 6808 vpslld ymm4,ymm4,32-20 6809 vpxor ymm4,ymm4,ymm8 6810 vmovdqa ymm8,YMMWORD[$L$rol8] 6811 vpaddd ymm3,ymm3,ymm7 6812 vpaddd ymm2,ymm2,ymm6 6813 vpaddd ymm1,ymm1,ymm5 6814 vpaddd ymm0,ymm0,ymm4 6815 vpxor ymm15,ymm15,ymm3 6816 6817 sub rdi,16 6818 mov rcx,9 6819 jmp NEAR $L$seal_avx2_main_loop_rounds_entry 6820ALIGN 32 6821$L$seal_avx2_main_loop: 6822 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6823 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6824 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6825 vmovdqa ymm1,ymm0 6826 vmovdqa ymm5,ymm4 6827 vmovdqa ymm9,ymm8 6828 vmovdqa ymm2,ymm0 6829 vmovdqa ymm6,ymm4 6830 vmovdqa ymm10,ymm8 6831 vmovdqa ymm3,ymm0 6832 vmovdqa ymm7,ymm4 6833 vmovdqa ymm11,ymm8 6834 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6835 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6836 vpaddd ymm14,ymm12,ymm15 6837 vpaddd ymm13,ymm12,ymm14 6838 vpaddd ymm12,ymm12,ymm13 6839 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6840 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6841 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6842 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6843 6844 mov rcx,10 6845ALIGN 32 6846$L$seal_avx2_main_loop_rounds: 6847 add r10,QWORD[((0+0))+rdi] 6848 adc r11,QWORD[((8+0))+rdi] 6849 adc r12,1 6850 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6851 vmovdqa ymm8,YMMWORD[$L$rol16] 6852 vpaddd ymm3,ymm3,ymm7 6853 vpaddd ymm2,ymm2,ymm6 6854 vpaddd ymm1,ymm1,ymm5 6855 vpaddd ymm0,ymm0,ymm4 6856 vpxor ymm15,ymm15,ymm3 6857 vpxor ymm14,ymm14,ymm2 6858 vpxor ymm13,ymm13,ymm1 6859 vpxor ymm12,ymm12,ymm0 6860 mov rdx,QWORD[((0+160+0))+rbp] 6861 mov r15,rdx 6862 mulx r14,r13,r10 6863 mulx rdx,rax,r11 6864 imul r15,r12 6865 add r14,rax 6866 adc r15,rdx 6867 vpshufb ymm15,ymm15,ymm8 6868 vpshufb ymm14,ymm14,ymm8 6869 vpshufb ymm13,ymm13,ymm8 6870 vpshufb ymm12,ymm12,ymm8 6871 vpaddd ymm11,ymm11,ymm15 6872 vpaddd ymm10,ymm10,ymm14 6873 vpaddd ymm9,ymm9,ymm13 6874 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6875 vpxor ymm7,ymm7,ymm11 6876 mov rdx,QWORD[((8+160+0))+rbp] 6877 mulx rax,r10,r10 6878 add r14,r10 6879 mulx r9,r11,r11 6880 adc r15,r11 6881 adc r9,0 6882 imul rdx,r12 6883 vpxor ymm6,ymm6,ymm10 6884 vpxor ymm5,ymm5,ymm9 6885 vpxor ymm4,ymm4,ymm8 6886 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6887 vpsrld ymm8,ymm7,20 6888 vpslld ymm7,ymm7,32-20 6889 vpxor ymm7,ymm7,ymm8 6890 vpsrld ymm8,ymm6,20 6891 vpslld ymm6,ymm6,32-20 6892 vpxor ymm6,ymm6,ymm8 6893 vpsrld ymm8,ymm5,20 6894 vpslld ymm5,ymm5,32-20 6895 add r15,rax 6896 adc r9,rdx 6897 vpxor ymm5,ymm5,ymm8 6898 vpsrld ymm8,ymm4,20 6899 vpslld ymm4,ymm4,32-20 6900 vpxor ymm4,ymm4,ymm8 6901 vmovdqa ymm8,YMMWORD[$L$rol8] 6902 vpaddd ymm3,ymm3,ymm7 6903 vpaddd ymm2,ymm2,ymm6 6904 vpaddd ymm1,ymm1,ymm5 6905 vpaddd ymm0,ymm0,ymm4 6906 vpxor ymm15,ymm15,ymm3 6907 mov r10,r13 6908 mov r11,r14 6909 mov r12,r15 6910 and r12,3 6911 mov r13,r15 6912 and r13,-4 6913 mov r14,r9 6914 shrd r15,r9,2 6915 shr r9,2 6916 add r15,r13 6917 adc r9,r14 6918 add r10,r15 6919 adc r11,r9 6920 adc r12,0 6921 6922$L$seal_avx2_main_loop_rounds_entry: 6923 vpxor ymm14,ymm14,ymm2 6924 vpxor ymm13,ymm13,ymm1 6925 vpxor ymm12,ymm12,ymm0 6926 vpshufb ymm15,ymm15,ymm8 6927 vpshufb ymm14,ymm14,ymm8 6928 vpshufb ymm13,ymm13,ymm8 6929 vpshufb ymm12,ymm12,ymm8 6930 vpaddd ymm11,ymm11,ymm15 6931 vpaddd ymm10,ymm10,ymm14 6932 add r10,QWORD[((0+16))+rdi] 6933 adc r11,QWORD[((8+16))+rdi] 6934 adc r12,1 6935 vpaddd ymm9,ymm9,ymm13 6936 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6937 vpxor ymm7,ymm7,ymm11 6938 vpxor ymm6,ymm6,ymm10 6939 vpxor ymm5,ymm5,ymm9 6940 vpxor ymm4,ymm4,ymm8 6941 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6942 vpsrld ymm8,ymm7,25 6943 mov rdx,QWORD[((0+160+0))+rbp] 6944 mov r15,rdx 6945 mulx r14,r13,r10 6946 mulx rdx,rax,r11 6947 imul r15,r12 6948 add r14,rax 6949 adc r15,rdx 6950 vpslld ymm7,ymm7,32-25 6951 vpxor ymm7,ymm7,ymm8 6952 vpsrld ymm8,ymm6,25 6953 vpslld ymm6,ymm6,32-25 6954 vpxor ymm6,ymm6,ymm8 6955 vpsrld ymm8,ymm5,25 6956 vpslld ymm5,ymm5,32-25 6957 vpxor ymm5,ymm5,ymm8 6958 vpsrld ymm8,ymm4,25 6959 vpslld ymm4,ymm4,32-25 6960 vpxor ymm4,ymm4,ymm8 6961 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6962 vpalignr ymm7,ymm7,ymm7,4 6963 vpalignr ymm11,ymm11,ymm11,8 6964 vpalignr ymm15,ymm15,ymm15,12 6965 vpalignr ymm6,ymm6,ymm6,4 6966 vpalignr ymm10,ymm10,ymm10,8 6967 vpalignr ymm14,ymm14,ymm14,12 6968 mov rdx,QWORD[((8+160+0))+rbp] 6969 mulx rax,r10,r10 6970 add r14,r10 6971 mulx r9,r11,r11 6972 adc r15,r11 6973 adc r9,0 6974 imul rdx,r12 6975 vpalignr ymm5,ymm5,ymm5,4 6976 vpalignr ymm9,ymm9,ymm9,8 6977 vpalignr ymm13,ymm13,ymm13,12 6978 vpalignr ymm4,ymm4,ymm4,4 6979 vpalignr ymm8,ymm8,ymm8,8 6980 vpalignr ymm12,ymm12,ymm12,12 6981 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6982 vmovdqa ymm8,YMMWORD[$L$rol16] 6983 vpaddd ymm3,ymm3,ymm7 6984 vpaddd ymm2,ymm2,ymm6 6985 vpaddd ymm1,ymm1,ymm5 6986 vpaddd ymm0,ymm0,ymm4 6987 vpxor ymm15,ymm15,ymm3 6988 vpxor ymm14,ymm14,ymm2 6989 vpxor ymm13,ymm13,ymm1 6990 vpxor ymm12,ymm12,ymm0 6991 vpshufb ymm15,ymm15,ymm8 6992 vpshufb ymm14,ymm14,ymm8 6993 add r15,rax 6994 adc r9,rdx 6995 vpshufb ymm13,ymm13,ymm8 6996 vpshufb ymm12,ymm12,ymm8 6997 vpaddd ymm11,ymm11,ymm15 6998 vpaddd ymm10,ymm10,ymm14 6999 vpaddd ymm9,ymm9,ymm13 7000 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7001 vpxor ymm7,ymm7,ymm11 7002 vpxor ymm6,ymm6,ymm10 7003 vpxor ymm5,ymm5,ymm9 7004 mov r10,r13 7005 mov r11,r14 7006 mov r12,r15 7007 and r12,3 7008 mov r13,r15 7009 and r13,-4 7010 mov r14,r9 7011 shrd r15,r9,2 7012 shr r9,2 7013 add r15,r13 7014 adc r9,r14 7015 add r10,r15 7016 adc r11,r9 7017 adc r12,0 7018 vpxor ymm4,ymm4,ymm8 7019 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7020 vpsrld ymm8,ymm7,20 7021 vpslld ymm7,ymm7,32-20 7022 vpxor ymm7,ymm7,ymm8 7023 vpsrld ymm8,ymm6,20 7024 vpslld ymm6,ymm6,32-20 7025 vpxor ymm6,ymm6,ymm8 7026 add r10,QWORD[((0+32))+rdi] 7027 adc r11,QWORD[((8+32))+rdi] 7028 adc r12,1 7029 7030 lea rdi,[48+rdi] 7031 vpsrld ymm8,ymm5,20 7032 vpslld ymm5,ymm5,32-20 7033 vpxor ymm5,ymm5,ymm8 7034 vpsrld ymm8,ymm4,20 7035 vpslld ymm4,ymm4,32-20 7036 vpxor ymm4,ymm4,ymm8 7037 vmovdqa ymm8,YMMWORD[$L$rol8] 7038 vpaddd ymm3,ymm3,ymm7 7039 vpaddd ymm2,ymm2,ymm6 7040 vpaddd ymm1,ymm1,ymm5 7041 vpaddd ymm0,ymm0,ymm4 7042 vpxor ymm15,ymm15,ymm3 7043 vpxor ymm14,ymm14,ymm2 7044 vpxor ymm13,ymm13,ymm1 7045 vpxor ymm12,ymm12,ymm0 7046 vpshufb ymm15,ymm15,ymm8 7047 vpshufb ymm14,ymm14,ymm8 7048 vpshufb ymm13,ymm13,ymm8 7049 mov rdx,QWORD[((0+160+0))+rbp] 7050 mov r15,rdx 7051 mulx r14,r13,r10 7052 mulx rdx,rax,r11 7053 imul r15,r12 7054 add r14,rax 7055 adc r15,rdx 7056 vpshufb ymm12,ymm12,ymm8 7057 vpaddd ymm11,ymm11,ymm15 7058 vpaddd ymm10,ymm10,ymm14 7059 vpaddd ymm9,ymm9,ymm13 7060 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7061 vpxor ymm7,ymm7,ymm11 7062 vpxor ymm6,ymm6,ymm10 7063 vpxor ymm5,ymm5,ymm9 7064 mov rdx,QWORD[((8+160+0))+rbp] 7065 mulx rax,r10,r10 7066 add r14,r10 7067 mulx r9,r11,r11 7068 adc r15,r11 7069 adc r9,0 7070 imul rdx,r12 7071 vpxor ymm4,ymm4,ymm8 7072 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7073 vpsrld ymm8,ymm7,25 7074 vpslld ymm7,ymm7,32-25 7075 vpxor ymm7,ymm7,ymm8 7076 vpsrld ymm8,ymm6,25 7077 vpslld ymm6,ymm6,32-25 7078 vpxor ymm6,ymm6,ymm8 7079 add r15,rax 7080 adc r9,rdx 7081 vpsrld ymm8,ymm5,25 7082 vpslld ymm5,ymm5,32-25 7083 vpxor ymm5,ymm5,ymm8 7084 vpsrld ymm8,ymm4,25 7085 vpslld ymm4,ymm4,32-25 7086 vpxor ymm4,ymm4,ymm8 7087 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 7088 vpalignr ymm7,ymm7,ymm7,12 7089 vpalignr ymm11,ymm11,ymm11,8 7090 vpalignr ymm15,ymm15,ymm15,4 7091 vpalignr ymm6,ymm6,ymm6,12 7092 vpalignr ymm10,ymm10,ymm10,8 7093 vpalignr ymm14,ymm14,ymm14,4 7094 vpalignr ymm5,ymm5,ymm5,12 7095 vpalignr ymm9,ymm9,ymm9,8 7096 vpalignr ymm13,ymm13,ymm13,4 7097 vpalignr ymm4,ymm4,ymm4,12 7098 vpalignr ymm8,ymm8,ymm8,8 7099 mov r10,r13 7100 mov r11,r14 7101 mov r12,r15 7102 and r12,3 7103 mov r13,r15 7104 and r13,-4 7105 mov r14,r9 7106 shrd r15,r9,2 7107 shr r9,2 7108 add r15,r13 7109 adc r9,r14 7110 add r10,r15 7111 adc r11,r9 7112 adc r12,0 7113 vpalignr ymm12,ymm12,ymm12,4 7114 7115 dec rcx 7116 jne NEAR $L$seal_avx2_main_loop_rounds 7117 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 7118 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 7119 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 7120 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 7121 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 7122 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 7123 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 7124 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 7125 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7126 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7127 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7128 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7129 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7130 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7131 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7132 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7133 7134 vmovdqa YMMWORD[(160+128)+rbp],ymm0 7135 add r10,QWORD[((0+0))+rdi] 7136 adc r11,QWORD[((8+0))+rdi] 7137 adc r12,1 7138 mov rdx,QWORD[((0+160+0))+rbp] 7139 mov r15,rdx 7140 mulx r14,r13,r10 7141 mulx rdx,rax,r11 7142 imul r15,r12 7143 add r14,rax 7144 adc r15,rdx 7145 mov rdx,QWORD[((8+160+0))+rbp] 7146 mulx rax,r10,r10 7147 add r14,r10 7148 mulx r9,r11,r11 7149 adc r15,r11 7150 adc r9,0 7151 imul rdx,r12 7152 add r15,rax 7153 adc r9,rdx 7154 mov r10,r13 7155 mov r11,r14 7156 mov r12,r15 7157 and r12,3 7158 mov r13,r15 7159 and r13,-4 7160 mov r14,r9 7161 shrd r15,r9,2 7162 shr r9,2 7163 add r15,r13 7164 adc r9,r14 7165 add r10,r15 7166 adc r11,r9 7167 adc r12,0 7168 add r10,QWORD[((0+16))+rdi] 7169 adc r11,QWORD[((8+16))+rdi] 7170 adc r12,1 7171 mov rdx,QWORD[((0+160+0))+rbp] 7172 mov r15,rdx 7173 mulx r14,r13,r10 7174 mulx rdx,rax,r11 7175 imul r15,r12 7176 add r14,rax 7177 adc r15,rdx 7178 mov rdx,QWORD[((8+160+0))+rbp] 7179 mulx rax,r10,r10 7180 add r14,r10 7181 mulx r9,r11,r11 7182 adc r15,r11 7183 adc r9,0 7184 imul rdx,r12 7185 add r15,rax 7186 adc r9,rdx 7187 mov r10,r13 7188 mov r11,r14 7189 mov r12,r15 7190 and r12,3 7191 mov r13,r15 7192 and r13,-4 7193 mov r14,r9 7194 shrd r15,r9,2 7195 shr r9,2 7196 add r15,r13 7197 adc r9,r14 7198 add r10,r15 7199 adc r11,r9 7200 adc r12,0 7201 7202 lea rdi,[32+rdi] 7203 vperm2i128 ymm0,ymm7,ymm3,0x02 7204 vperm2i128 ymm7,ymm7,ymm3,0x13 7205 vperm2i128 ymm3,ymm15,ymm11,0x02 7206 vperm2i128 ymm11,ymm15,ymm11,0x13 7207 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 7208 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 7209 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 7210 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 7211 vmovdqu YMMWORD[(0+0)+rdi],ymm0 7212 vmovdqu YMMWORD[(32+0)+rdi],ymm3 7213 vmovdqu YMMWORD[(64+0)+rdi],ymm7 7214 vmovdqu YMMWORD[(96+0)+rdi],ymm11 7215 7216 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 7217 vperm2i128 ymm3,ymm6,ymm2,0x02 7218 vperm2i128 ymm6,ymm6,ymm2,0x13 7219 vperm2i128 ymm2,ymm14,ymm10,0x02 7220 vperm2i128 ymm10,ymm14,ymm10,0x13 7221 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 7222 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 7223 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 7224 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 7225 vmovdqu YMMWORD[(0+128)+rdi],ymm3 7226 vmovdqu YMMWORD[(32+128)+rdi],ymm2 7227 vmovdqu YMMWORD[(64+128)+rdi],ymm6 7228 vmovdqu YMMWORD[(96+128)+rdi],ymm10 7229 vperm2i128 ymm3,ymm5,ymm1,0x02 7230 vperm2i128 ymm5,ymm5,ymm1,0x13 7231 vperm2i128 ymm1,ymm13,ymm9,0x02 7232 vperm2i128 ymm9,ymm13,ymm9,0x13 7233 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 7234 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 7235 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 7236 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 7237 vmovdqu YMMWORD[(0+256)+rdi],ymm3 7238 vmovdqu YMMWORD[(32+256)+rdi],ymm1 7239 vmovdqu YMMWORD[(64+256)+rdi],ymm5 7240 vmovdqu YMMWORD[(96+256)+rdi],ymm9 7241 vperm2i128 ymm3,ymm4,ymm0,0x02 7242 vperm2i128 ymm4,ymm4,ymm0,0x13 7243 vperm2i128 ymm0,ymm12,ymm8,0x02 7244 vperm2i128 ymm8,ymm12,ymm8,0x13 7245 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 7246 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 7247 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 7248 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 7249 vmovdqu YMMWORD[(0+384)+rdi],ymm3 7250 vmovdqu YMMWORD[(32+384)+rdi],ymm0 7251 vmovdqu YMMWORD[(64+384)+rdi],ymm4 7252 vmovdqu YMMWORD[(96+384)+rdi],ymm8 7253 7254 lea rsi,[512+rsi] 7255 sub rbx,16*32 7256 cmp rbx,16*32 7257 jg NEAR $L$seal_avx2_main_loop 7258 7259 add r10,QWORD[((0+0))+rdi] 7260 adc r11,QWORD[((8+0))+rdi] 7261 adc r12,1 7262 mov rdx,QWORD[((0+160+0))+rbp] 7263 mov r15,rdx 7264 mulx r14,r13,r10 7265 mulx rdx,rax,r11 7266 imul r15,r12 7267 add r14,rax 7268 adc r15,rdx 7269 mov rdx,QWORD[((8+160+0))+rbp] 7270 mulx rax,r10,r10 7271 add r14,r10 7272 mulx r9,r11,r11 7273 adc r15,r11 7274 adc r9,0 7275 imul rdx,r12 7276 add r15,rax 7277 adc r9,rdx 7278 mov r10,r13 7279 mov r11,r14 7280 mov r12,r15 7281 and r12,3 7282 mov r13,r15 7283 and r13,-4 7284 mov r14,r9 7285 shrd r15,r9,2 7286 shr r9,2 7287 add r15,r13 7288 adc r9,r14 7289 add r10,r15 7290 adc r11,r9 7291 adc r12,0 7292 add r10,QWORD[((0+16))+rdi] 7293 adc r11,QWORD[((8+16))+rdi] 7294 adc r12,1 7295 mov rdx,QWORD[((0+160+0))+rbp] 7296 mov r15,rdx 7297 mulx r14,r13,r10 7298 mulx rdx,rax,r11 7299 imul r15,r12 7300 add r14,rax 7301 adc r15,rdx 7302 mov rdx,QWORD[((8+160+0))+rbp] 7303 mulx rax,r10,r10 7304 add r14,r10 7305 mulx r9,r11,r11 7306 adc r15,r11 7307 adc r9,0 7308 imul rdx,r12 7309 add r15,rax 7310 adc r9,rdx 7311 mov r10,r13 7312 mov r11,r14 7313 mov r12,r15 7314 and r12,3 7315 mov r13,r15 7316 and r13,-4 7317 mov r14,r9 7318 shrd r15,r9,2 7319 shr r9,2 7320 add r15,r13 7321 adc r9,r14 7322 add r10,r15 7323 adc r11,r9 7324 adc r12,0 7325 7326 lea rdi,[32+rdi] 7327 mov rcx,10 7328 xor r8,r8 7329 7330 cmp rbx,12*32 7331 ja NEAR $L$seal_avx2_tail_512 7332 cmp rbx,8*32 7333 ja NEAR $L$seal_avx2_tail_384 7334 cmp rbx,4*32 7335 ja NEAR $L$seal_avx2_tail_256 7336 7337$L$seal_avx2_tail_128: 7338 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7339 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7340 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7341 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7342 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7343 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7344 7345$L$seal_avx2_tail_128_rounds_and_3xhash: 7346 add r10,QWORD[((0+0))+rdi] 7347 adc r11,QWORD[((8+0))+rdi] 7348 adc r12,1 7349 mov rdx,QWORD[((0+160+0))+rbp] 7350 mov r15,rdx 7351 mulx r14,r13,r10 7352 mulx rdx,rax,r11 7353 imul r15,r12 7354 add r14,rax 7355 adc r15,rdx 7356 mov rdx,QWORD[((8+160+0))+rbp] 7357 mulx rax,r10,r10 7358 add r14,r10 7359 mulx r9,r11,r11 7360 adc r15,r11 7361 adc r9,0 7362 imul rdx,r12 7363 add r15,rax 7364 adc r9,rdx 7365 mov r10,r13 7366 mov r11,r14 7367 mov r12,r15 7368 and r12,3 7369 mov r13,r15 7370 and r13,-4 7371 mov r14,r9 7372 shrd r15,r9,2 7373 shr r9,2 7374 add r15,r13 7375 adc r9,r14 7376 add r10,r15 7377 adc r11,r9 7378 adc r12,0 7379 7380 lea rdi,[16+rdi] 7381$L$seal_avx2_tail_128_rounds_and_2xhash: 7382 vpaddd ymm0,ymm0,ymm4 7383 vpxor ymm12,ymm12,ymm0 7384 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7385 vpaddd ymm8,ymm8,ymm12 7386 vpxor ymm4,ymm4,ymm8 7387 vpsrld ymm3,ymm4,20 7388 vpslld ymm4,ymm4,12 7389 vpxor ymm4,ymm4,ymm3 7390 vpaddd ymm0,ymm0,ymm4 7391 vpxor ymm12,ymm12,ymm0 7392 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7393 vpaddd ymm8,ymm8,ymm12 7394 vpxor ymm4,ymm4,ymm8 7395 vpslld ymm3,ymm4,7 7396 vpsrld ymm4,ymm4,25 7397 vpxor ymm4,ymm4,ymm3 7398 vpalignr ymm12,ymm12,ymm12,12 7399 vpalignr ymm8,ymm8,ymm8,8 7400 vpalignr ymm4,ymm4,ymm4,4 7401 add r10,QWORD[((0+0))+rdi] 7402 adc r11,QWORD[((8+0))+rdi] 7403 adc r12,1 7404 mov rdx,QWORD[((0+160+0))+rbp] 7405 mov r15,rdx 7406 mulx r14,r13,r10 7407 mulx rdx,rax,r11 7408 imul r15,r12 7409 add r14,rax 7410 adc r15,rdx 7411 mov rdx,QWORD[((8+160+0))+rbp] 7412 mulx rax,r10,r10 7413 add r14,r10 7414 mulx r9,r11,r11 7415 adc r15,r11 7416 adc r9,0 7417 imul rdx,r12 7418 add r15,rax 7419 adc r9,rdx 7420 mov r10,r13 7421 mov r11,r14 7422 mov r12,r15 7423 and r12,3 7424 mov r13,r15 7425 and r13,-4 7426 mov r14,r9 7427 shrd r15,r9,2 7428 shr r9,2 7429 add r15,r13 7430 adc r9,r14 7431 add r10,r15 7432 adc r11,r9 7433 adc r12,0 7434 vpaddd ymm0,ymm0,ymm4 7435 vpxor ymm12,ymm12,ymm0 7436 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7437 vpaddd ymm8,ymm8,ymm12 7438 vpxor ymm4,ymm4,ymm8 7439 vpsrld ymm3,ymm4,20 7440 vpslld ymm4,ymm4,12 7441 vpxor ymm4,ymm4,ymm3 7442 vpaddd ymm0,ymm0,ymm4 7443 vpxor ymm12,ymm12,ymm0 7444 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7445 vpaddd ymm8,ymm8,ymm12 7446 vpxor ymm4,ymm4,ymm8 7447 vpslld ymm3,ymm4,7 7448 vpsrld ymm4,ymm4,25 7449 vpxor ymm4,ymm4,ymm3 7450 vpalignr ymm12,ymm12,ymm12,4 7451 vpalignr ymm8,ymm8,ymm8,8 7452 vpalignr ymm4,ymm4,ymm4,12 7453 add r10,QWORD[((0+16))+rdi] 7454 adc r11,QWORD[((8+16))+rdi] 7455 adc r12,1 7456 mov rdx,QWORD[((0+160+0))+rbp] 7457 mov r15,rdx 7458 mulx r14,r13,r10 7459 mulx rdx,rax,r11 7460 imul r15,r12 7461 add r14,rax 7462 adc r15,rdx 7463 mov rdx,QWORD[((8+160+0))+rbp] 7464 mulx rax,r10,r10 7465 add r14,r10 7466 mulx r9,r11,r11 7467 adc r15,r11 7468 adc r9,0 7469 imul rdx,r12 7470 add r15,rax 7471 adc r9,rdx 7472 mov r10,r13 7473 mov r11,r14 7474 mov r12,r15 7475 and r12,3 7476 mov r13,r15 7477 and r13,-4 7478 mov r14,r9 7479 shrd r15,r9,2 7480 shr r9,2 7481 add r15,r13 7482 adc r9,r14 7483 add r10,r15 7484 adc r11,r9 7485 adc r12,0 7486 7487 lea rdi,[32+rdi] 7488 dec rcx 7489 jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash 7490 dec r8 7491 jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash 7492 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7493 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7494 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7495 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7496 vperm2i128 ymm3,ymm4,ymm0,0x13 7497 vperm2i128 ymm0,ymm4,ymm0,0x02 7498 vperm2i128 ymm4,ymm12,ymm8,0x02 7499 vperm2i128 ymm12,ymm12,ymm8,0x13 7500 vmovdqa ymm8,ymm3 7501 7502 jmp NEAR $L$seal_avx2_short_loop 7503 7504$L$seal_avx2_tail_256: 7505 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7506 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7507 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7508 vmovdqa ymm1,ymm0 7509 vmovdqa ymm5,ymm4 7510 vmovdqa ymm9,ymm8 7511 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7512 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 7513 vpaddd ymm12,ymm12,ymm13 7514 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7515 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7516 7517$L$seal_avx2_tail_256_rounds_and_3xhash: 7518 add r10,QWORD[((0+0))+rdi] 7519 adc r11,QWORD[((8+0))+rdi] 7520 adc r12,1 7521 mov rax,QWORD[((0+160+0))+rbp] 7522 mov r15,rax 7523 mul r10 7524 mov r13,rax 7525 mov r14,rdx 7526 mov rax,QWORD[((0+160+0))+rbp] 7527 mul r11 7528 imul r15,r12 7529 add r14,rax 7530 adc r15,rdx 7531 mov rax,QWORD[((8+160+0))+rbp] 7532 mov r9,rax 7533 mul r10 7534 add r14,rax 7535 adc rdx,0 7536 mov r10,rdx 7537 mov rax,QWORD[((8+160+0))+rbp] 7538 mul r11 7539 add r15,rax 7540 adc rdx,0 7541 imul r9,r12 7542 add r15,r10 7543 adc r9,rdx 7544 mov r10,r13 7545 mov r11,r14 7546 mov r12,r15 7547 and r12,3 7548 mov r13,r15 7549 and r13,-4 7550 mov r14,r9 7551 shrd r15,r9,2 7552 shr r9,2 7553 add r15,r13 7554 adc r9,r14 7555 add r10,r15 7556 adc r11,r9 7557 adc r12,0 7558 7559 lea rdi,[16+rdi] 7560$L$seal_avx2_tail_256_rounds_and_2xhash: 7561 vpaddd ymm0,ymm0,ymm4 7562 vpxor ymm12,ymm12,ymm0 7563 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7564 vpaddd ymm8,ymm8,ymm12 7565 vpxor ymm4,ymm4,ymm8 7566 vpsrld ymm3,ymm4,20 7567 vpslld ymm4,ymm4,12 7568 vpxor ymm4,ymm4,ymm3 7569 vpaddd ymm0,ymm0,ymm4 7570 vpxor ymm12,ymm12,ymm0 7571 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7572 vpaddd ymm8,ymm8,ymm12 7573 vpxor ymm4,ymm4,ymm8 7574 vpslld ymm3,ymm4,7 7575 vpsrld ymm4,ymm4,25 7576 vpxor ymm4,ymm4,ymm3 7577 vpalignr ymm12,ymm12,ymm12,12 7578 vpalignr ymm8,ymm8,ymm8,8 7579 vpalignr ymm4,ymm4,ymm4,4 7580 vpaddd ymm1,ymm1,ymm5 7581 vpxor ymm13,ymm13,ymm1 7582 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7583 vpaddd ymm9,ymm9,ymm13 7584 vpxor ymm5,ymm5,ymm9 7585 vpsrld ymm3,ymm5,20 7586 vpslld ymm5,ymm5,12 7587 vpxor ymm5,ymm5,ymm3 7588 vpaddd ymm1,ymm1,ymm5 7589 vpxor ymm13,ymm13,ymm1 7590 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7591 vpaddd ymm9,ymm9,ymm13 7592 vpxor ymm5,ymm5,ymm9 7593 vpslld ymm3,ymm5,7 7594 vpsrld ymm5,ymm5,25 7595 vpxor ymm5,ymm5,ymm3 7596 vpalignr ymm13,ymm13,ymm13,12 7597 vpalignr ymm9,ymm9,ymm9,8 7598 vpalignr ymm5,ymm5,ymm5,4 7599 add r10,QWORD[((0+0))+rdi] 7600 adc r11,QWORD[((8+0))+rdi] 7601 adc r12,1 7602 mov rax,QWORD[((0+160+0))+rbp] 7603 mov r15,rax 7604 mul r10 7605 mov r13,rax 7606 mov r14,rdx 7607 mov rax,QWORD[((0+160+0))+rbp] 7608 mul r11 7609 imul r15,r12 7610 add r14,rax 7611 adc r15,rdx 7612 mov rax,QWORD[((8+160+0))+rbp] 7613 mov r9,rax 7614 mul r10 7615 add r14,rax 7616 adc rdx,0 7617 mov r10,rdx 7618 mov rax,QWORD[((8+160+0))+rbp] 7619 mul r11 7620 add r15,rax 7621 adc rdx,0 7622 imul r9,r12 7623 add r15,r10 7624 adc r9,rdx 7625 mov r10,r13 7626 mov r11,r14 7627 mov r12,r15 7628 and r12,3 7629 mov r13,r15 7630 and r13,-4 7631 mov r14,r9 7632 shrd r15,r9,2 7633 shr r9,2 7634 add r15,r13 7635 adc r9,r14 7636 add r10,r15 7637 adc r11,r9 7638 adc r12,0 7639 vpaddd ymm0,ymm0,ymm4 7640 vpxor ymm12,ymm12,ymm0 7641 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7642 vpaddd ymm8,ymm8,ymm12 7643 vpxor ymm4,ymm4,ymm8 7644 vpsrld ymm3,ymm4,20 7645 vpslld ymm4,ymm4,12 7646 vpxor ymm4,ymm4,ymm3 7647 vpaddd ymm0,ymm0,ymm4 7648 vpxor ymm12,ymm12,ymm0 7649 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7650 vpaddd ymm8,ymm8,ymm12 7651 vpxor ymm4,ymm4,ymm8 7652 vpslld ymm3,ymm4,7 7653 vpsrld ymm4,ymm4,25 7654 vpxor ymm4,ymm4,ymm3 7655 vpalignr ymm12,ymm12,ymm12,4 7656 vpalignr ymm8,ymm8,ymm8,8 7657 vpalignr ymm4,ymm4,ymm4,12 7658 vpaddd ymm1,ymm1,ymm5 7659 vpxor ymm13,ymm13,ymm1 7660 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7661 vpaddd ymm9,ymm9,ymm13 7662 vpxor ymm5,ymm5,ymm9 7663 vpsrld ymm3,ymm5,20 7664 vpslld ymm5,ymm5,12 7665 vpxor ymm5,ymm5,ymm3 7666 vpaddd ymm1,ymm1,ymm5 7667 vpxor ymm13,ymm13,ymm1 7668 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7669 vpaddd ymm9,ymm9,ymm13 7670 vpxor ymm5,ymm5,ymm9 7671 vpslld ymm3,ymm5,7 7672 vpsrld ymm5,ymm5,25 7673 vpxor ymm5,ymm5,ymm3 7674 vpalignr ymm13,ymm13,ymm13,4 7675 vpalignr ymm9,ymm9,ymm9,8 7676 vpalignr ymm5,ymm5,ymm5,12 7677 add r10,QWORD[((0+16))+rdi] 7678 adc r11,QWORD[((8+16))+rdi] 7679 adc r12,1 7680 mov rax,QWORD[((0+160+0))+rbp] 7681 mov r15,rax 7682 mul r10 7683 mov r13,rax 7684 mov r14,rdx 7685 mov rax,QWORD[((0+160+0))+rbp] 7686 mul r11 7687 imul r15,r12 7688 add r14,rax 7689 adc r15,rdx 7690 mov rax,QWORD[((8+160+0))+rbp] 7691 mov r9,rax 7692 mul r10 7693 add r14,rax 7694 adc rdx,0 7695 mov r10,rdx 7696 mov rax,QWORD[((8+160+0))+rbp] 7697 mul r11 7698 add r15,rax 7699 adc rdx,0 7700 imul r9,r12 7701 add r15,r10 7702 adc r9,rdx 7703 mov r10,r13 7704 mov r11,r14 7705 mov r12,r15 7706 and r12,3 7707 mov r13,r15 7708 and r13,-4 7709 mov r14,r9 7710 shrd r15,r9,2 7711 shr r9,2 7712 add r15,r13 7713 adc r9,r14 7714 add r10,r15 7715 adc r11,r9 7716 adc r12,0 7717 7718 lea rdi,[32+rdi] 7719 dec rcx 7720 jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash 7721 dec r8 7722 jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash 7723 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7724 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7725 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7726 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7727 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7728 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7729 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7730 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7731 vperm2i128 ymm3,ymm5,ymm1,0x02 7732 vperm2i128 ymm5,ymm5,ymm1,0x13 7733 vperm2i128 ymm1,ymm13,ymm9,0x02 7734 vperm2i128 ymm9,ymm13,ymm9,0x13 7735 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 7736 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 7737 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 7738 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 7739 vmovdqu YMMWORD[(0+0)+rdi],ymm3 7740 vmovdqu YMMWORD[(32+0)+rdi],ymm1 7741 vmovdqu YMMWORD[(64+0)+rdi],ymm5 7742 vmovdqu YMMWORD[(96+0)+rdi],ymm9 7743 vperm2i128 ymm3,ymm4,ymm0,0x13 7744 vperm2i128 ymm0,ymm4,ymm0,0x02 7745 vperm2i128 ymm4,ymm12,ymm8,0x02 7746 vperm2i128 ymm12,ymm12,ymm8,0x13 7747 vmovdqa ymm8,ymm3 7748 7749 mov rcx,4*32 7750 lea rsi,[128+rsi] 7751 sub rbx,4*32 7752 jmp NEAR $L$seal_avx2_short_hash_remainder 7753 7754$L$seal_avx2_tail_384: 7755 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7756 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7757 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7758 vmovdqa ymm1,ymm0 7759 vmovdqa ymm5,ymm4 7760 vmovdqa ymm9,ymm8 7761 vmovdqa ymm2,ymm0 7762 vmovdqa ymm6,ymm4 7763 vmovdqa ymm10,ymm8 7764 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7765 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 7766 vpaddd ymm13,ymm12,ymm14 7767 vpaddd ymm12,ymm12,ymm13 7768 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7769 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7770 vmovdqa YMMWORD[(160+224)+rbp],ymm14 7771 7772$L$seal_avx2_tail_384_rounds_and_3xhash: 7773 add r10,QWORD[((0+0))+rdi] 7774 adc r11,QWORD[((8+0))+rdi] 7775 adc r12,1 7776 mov rax,QWORD[((0+160+0))+rbp] 7777 mov r15,rax 7778 mul r10 7779 mov r13,rax 7780 mov r14,rdx 7781 mov rax,QWORD[((0+160+0))+rbp] 7782 mul r11 7783 imul r15,r12 7784 add r14,rax 7785 adc r15,rdx 7786 mov rax,QWORD[((8+160+0))+rbp] 7787 mov r9,rax 7788 mul r10 7789 add r14,rax 7790 adc rdx,0 7791 mov r10,rdx 7792 mov rax,QWORD[((8+160+0))+rbp] 7793 mul r11 7794 add r15,rax 7795 adc rdx,0 7796 imul r9,r12 7797 add r15,r10 7798 adc r9,rdx 7799 mov r10,r13 7800 mov r11,r14 7801 mov r12,r15 7802 and r12,3 7803 mov r13,r15 7804 and r13,-4 7805 mov r14,r9 7806 shrd r15,r9,2 7807 shr r9,2 7808 add r15,r13 7809 adc r9,r14 7810 add r10,r15 7811 adc r11,r9 7812 adc r12,0 7813 7814 lea rdi,[16+rdi] 7815$L$seal_avx2_tail_384_rounds_and_2xhash: 7816 vpaddd ymm0,ymm0,ymm4 7817 vpxor ymm12,ymm12,ymm0 7818 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7819 vpaddd ymm8,ymm8,ymm12 7820 vpxor ymm4,ymm4,ymm8 7821 vpsrld ymm3,ymm4,20 7822 vpslld ymm4,ymm4,12 7823 vpxor ymm4,ymm4,ymm3 7824 vpaddd ymm0,ymm0,ymm4 7825 vpxor ymm12,ymm12,ymm0 7826 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7827 vpaddd ymm8,ymm8,ymm12 7828 vpxor ymm4,ymm4,ymm8 7829 vpslld ymm3,ymm4,7 7830 vpsrld ymm4,ymm4,25 7831 vpxor ymm4,ymm4,ymm3 7832 vpalignr ymm12,ymm12,ymm12,12 7833 vpalignr ymm8,ymm8,ymm8,8 7834 vpalignr ymm4,ymm4,ymm4,4 7835 vpaddd ymm1,ymm1,ymm5 7836 vpxor ymm13,ymm13,ymm1 7837 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7838 vpaddd ymm9,ymm9,ymm13 7839 vpxor ymm5,ymm5,ymm9 7840 vpsrld ymm3,ymm5,20 7841 vpslld ymm5,ymm5,12 7842 vpxor ymm5,ymm5,ymm3 7843 vpaddd ymm1,ymm1,ymm5 7844 vpxor ymm13,ymm13,ymm1 7845 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7846 vpaddd ymm9,ymm9,ymm13 7847 vpxor ymm5,ymm5,ymm9 7848 vpslld ymm3,ymm5,7 7849 vpsrld ymm5,ymm5,25 7850 vpxor ymm5,ymm5,ymm3 7851 vpalignr ymm13,ymm13,ymm13,12 7852 vpalignr ymm9,ymm9,ymm9,8 7853 vpalignr ymm5,ymm5,ymm5,4 7854 add r10,QWORD[((0+0))+rdi] 7855 adc r11,QWORD[((8+0))+rdi] 7856 adc r12,1 7857 mov rax,QWORD[((0+160+0))+rbp] 7858 mov r15,rax 7859 mul r10 7860 mov r13,rax 7861 mov r14,rdx 7862 mov rax,QWORD[((0+160+0))+rbp] 7863 mul r11 7864 imul r15,r12 7865 add r14,rax 7866 adc r15,rdx 7867 mov rax,QWORD[((8+160+0))+rbp] 7868 mov r9,rax 7869 mul r10 7870 add r14,rax 7871 adc rdx,0 7872 mov r10,rdx 7873 mov rax,QWORD[((8+160+0))+rbp] 7874 mul r11 7875 add r15,rax 7876 adc rdx,0 7877 imul r9,r12 7878 add r15,r10 7879 adc r9,rdx 7880 mov r10,r13 7881 mov r11,r14 7882 mov r12,r15 7883 and r12,3 7884 mov r13,r15 7885 and r13,-4 7886 mov r14,r9 7887 shrd r15,r9,2 7888 shr r9,2 7889 add r15,r13 7890 adc r9,r14 7891 add r10,r15 7892 adc r11,r9 7893 adc r12,0 7894 vpaddd ymm2,ymm2,ymm6 7895 vpxor ymm14,ymm14,ymm2 7896 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7897 vpaddd ymm10,ymm10,ymm14 7898 vpxor ymm6,ymm6,ymm10 7899 vpsrld ymm3,ymm6,20 7900 vpslld ymm6,ymm6,12 7901 vpxor ymm6,ymm6,ymm3 7902 vpaddd ymm2,ymm2,ymm6 7903 vpxor ymm14,ymm14,ymm2 7904 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 7905 vpaddd ymm10,ymm10,ymm14 7906 vpxor ymm6,ymm6,ymm10 7907 vpslld ymm3,ymm6,7 7908 vpsrld ymm6,ymm6,25 7909 vpxor ymm6,ymm6,ymm3 7910 vpalignr ymm14,ymm14,ymm14,12 7911 vpalignr ymm10,ymm10,ymm10,8 7912 vpalignr ymm6,ymm6,ymm6,4 7913 vpaddd ymm0,ymm0,ymm4 7914 vpxor ymm12,ymm12,ymm0 7915 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7916 vpaddd ymm8,ymm8,ymm12 7917 vpxor ymm4,ymm4,ymm8 7918 vpsrld ymm3,ymm4,20 7919 vpslld ymm4,ymm4,12 7920 vpxor ymm4,ymm4,ymm3 7921 vpaddd ymm0,ymm0,ymm4 7922 vpxor ymm12,ymm12,ymm0 7923 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7924 vpaddd ymm8,ymm8,ymm12 7925 vpxor ymm4,ymm4,ymm8 7926 vpslld ymm3,ymm4,7 7927 vpsrld ymm4,ymm4,25 7928 vpxor ymm4,ymm4,ymm3 7929 vpalignr ymm12,ymm12,ymm12,4 7930 vpalignr ymm8,ymm8,ymm8,8 7931 vpalignr ymm4,ymm4,ymm4,12 7932 add r10,QWORD[((0+16))+rdi] 7933 adc r11,QWORD[((8+16))+rdi] 7934 adc r12,1 7935 mov rax,QWORD[((0+160+0))+rbp] 7936 mov r15,rax 7937 mul r10 7938 mov r13,rax 7939 mov r14,rdx 7940 mov rax,QWORD[((0+160+0))+rbp] 7941 mul r11 7942 imul r15,r12 7943 add r14,rax 7944 adc r15,rdx 7945 mov rax,QWORD[((8+160+0))+rbp] 7946 mov r9,rax 7947 mul r10 7948 add r14,rax 7949 adc rdx,0 7950 mov r10,rdx 7951 mov rax,QWORD[((8+160+0))+rbp] 7952 mul r11 7953 add r15,rax 7954 adc rdx,0 7955 imul r9,r12 7956 add r15,r10 7957 adc r9,rdx 7958 mov r10,r13 7959 mov r11,r14 7960 mov r12,r15 7961 and r12,3 7962 mov r13,r15 7963 and r13,-4 7964 mov r14,r9 7965 shrd r15,r9,2 7966 shr r9,2 7967 add r15,r13 7968 adc r9,r14 7969 add r10,r15 7970 adc r11,r9 7971 adc r12,0 7972 vpaddd ymm1,ymm1,ymm5 7973 vpxor ymm13,ymm13,ymm1 7974 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7975 vpaddd ymm9,ymm9,ymm13 7976 vpxor ymm5,ymm5,ymm9 7977 vpsrld ymm3,ymm5,20 7978 vpslld ymm5,ymm5,12 7979 vpxor ymm5,ymm5,ymm3 7980 vpaddd ymm1,ymm1,ymm5 7981 vpxor ymm13,ymm13,ymm1 7982 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7983 vpaddd ymm9,ymm9,ymm13 7984 vpxor ymm5,ymm5,ymm9 7985 vpslld ymm3,ymm5,7 7986 vpsrld ymm5,ymm5,25 7987 vpxor ymm5,ymm5,ymm3 7988 vpalignr ymm13,ymm13,ymm13,4 7989 vpalignr ymm9,ymm9,ymm9,8 7990 vpalignr ymm5,ymm5,ymm5,12 7991 vpaddd ymm2,ymm2,ymm6 7992 vpxor ymm14,ymm14,ymm2 7993 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7994 vpaddd ymm10,ymm10,ymm14 7995 vpxor ymm6,ymm6,ymm10 7996 vpsrld ymm3,ymm6,20 7997 vpslld ymm6,ymm6,12 7998 vpxor ymm6,ymm6,ymm3 7999 vpaddd ymm2,ymm2,ymm6 8000 vpxor ymm14,ymm14,ymm2 8001 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8002 vpaddd ymm10,ymm10,ymm14 8003 vpxor ymm6,ymm6,ymm10 8004 vpslld ymm3,ymm6,7 8005 vpsrld ymm6,ymm6,25 8006 vpxor ymm6,ymm6,ymm3 8007 vpalignr ymm14,ymm14,ymm14,4 8008 vpalignr ymm10,ymm10,ymm10,8 8009 vpalignr ymm6,ymm6,ymm6,12 8010 8011 lea rdi,[32+rdi] 8012 dec rcx 8013 jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash 8014 dec r8 8015 jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash 8016 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8017 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8018 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8019 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8020 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8021 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8022 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8023 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8024 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8025 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8026 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8027 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8028 vperm2i128 ymm3,ymm6,ymm2,0x02 8029 vperm2i128 ymm6,ymm6,ymm2,0x13 8030 vperm2i128 ymm2,ymm14,ymm10,0x02 8031 vperm2i128 ymm10,ymm14,ymm10,0x13 8032 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 8033 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 8034 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 8035 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 8036 vmovdqu YMMWORD[(0+0)+rdi],ymm3 8037 vmovdqu YMMWORD[(32+0)+rdi],ymm2 8038 vmovdqu YMMWORD[(64+0)+rdi],ymm6 8039 vmovdqu YMMWORD[(96+0)+rdi],ymm10 8040 vperm2i128 ymm3,ymm5,ymm1,0x02 8041 vperm2i128 ymm5,ymm5,ymm1,0x13 8042 vperm2i128 ymm1,ymm13,ymm9,0x02 8043 vperm2i128 ymm9,ymm13,ymm9,0x13 8044 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8045 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 8046 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 8047 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 8048 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8049 vmovdqu YMMWORD[(32+128)+rdi],ymm1 8050 vmovdqu YMMWORD[(64+128)+rdi],ymm5 8051 vmovdqu YMMWORD[(96+128)+rdi],ymm9 8052 vperm2i128 ymm3,ymm4,ymm0,0x13 8053 vperm2i128 ymm0,ymm4,ymm0,0x02 8054 vperm2i128 ymm4,ymm12,ymm8,0x02 8055 vperm2i128 ymm12,ymm12,ymm8,0x13 8056 vmovdqa ymm8,ymm3 8057 8058 mov rcx,8*32 8059 lea rsi,[256+rsi] 8060 sub rbx,8*32 8061 jmp NEAR $L$seal_avx2_short_hash_remainder 8062 8063$L$seal_avx2_tail_512: 8064 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 8065 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 8066 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 8067 vmovdqa ymm1,ymm0 8068 vmovdqa ymm5,ymm4 8069 vmovdqa ymm9,ymm8 8070 vmovdqa ymm2,ymm0 8071 vmovdqa ymm6,ymm4 8072 vmovdqa ymm10,ymm8 8073 vmovdqa ymm3,ymm0 8074 vmovdqa ymm7,ymm4 8075 vmovdqa ymm11,ymm8 8076 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 8077 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 8078 vpaddd ymm14,ymm12,ymm15 8079 vpaddd ymm13,ymm12,ymm14 8080 vpaddd ymm12,ymm12,ymm13 8081 vmovdqa YMMWORD[(160+256)+rbp],ymm15 8082 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8083 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8084 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8085 8086$L$seal_avx2_tail_512_rounds_and_3xhash: 8087 add r10,QWORD[((0+0))+rdi] 8088 adc r11,QWORD[((8+0))+rdi] 8089 adc r12,1 8090 mov rdx,QWORD[((0+160+0))+rbp] 8091 mov r15,rdx 8092 mulx r14,r13,r10 8093 mulx rdx,rax,r11 8094 imul r15,r12 8095 add r14,rax 8096 adc r15,rdx 8097 mov rdx,QWORD[((8+160+0))+rbp] 8098 mulx rax,r10,r10 8099 add r14,r10 8100 mulx r9,r11,r11 8101 adc r15,r11 8102 adc r9,0 8103 imul rdx,r12 8104 add r15,rax 8105 adc r9,rdx 8106 mov r10,r13 8107 mov r11,r14 8108 mov r12,r15 8109 and r12,3 8110 mov r13,r15 8111 and r13,-4 8112 mov r14,r9 8113 shrd r15,r9,2 8114 shr r9,2 8115 add r15,r13 8116 adc r9,r14 8117 add r10,r15 8118 adc r11,r9 8119 adc r12,0 8120 8121 lea rdi,[16+rdi] 8122$L$seal_avx2_tail_512_rounds_and_2xhash: 8123 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8124 vmovdqa ymm8,YMMWORD[$L$rol16] 8125 vpaddd ymm3,ymm3,ymm7 8126 vpaddd ymm2,ymm2,ymm6 8127 vpaddd ymm1,ymm1,ymm5 8128 vpaddd ymm0,ymm0,ymm4 8129 vpxor ymm15,ymm15,ymm3 8130 vpxor ymm14,ymm14,ymm2 8131 vpxor ymm13,ymm13,ymm1 8132 vpxor ymm12,ymm12,ymm0 8133 vpshufb ymm15,ymm15,ymm8 8134 vpshufb ymm14,ymm14,ymm8 8135 vpshufb ymm13,ymm13,ymm8 8136 vpshufb ymm12,ymm12,ymm8 8137 vpaddd ymm11,ymm11,ymm15 8138 vpaddd ymm10,ymm10,ymm14 8139 vpaddd ymm9,ymm9,ymm13 8140 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8141 vpxor ymm7,ymm7,ymm11 8142 vpxor ymm6,ymm6,ymm10 8143 add r10,QWORD[((0+0))+rdi] 8144 adc r11,QWORD[((8+0))+rdi] 8145 adc r12,1 8146 vpxor ymm5,ymm5,ymm9 8147 vpxor ymm4,ymm4,ymm8 8148 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8149 vpsrld ymm8,ymm7,20 8150 vpslld ymm7,ymm7,32-20 8151 vpxor ymm7,ymm7,ymm8 8152 vpsrld ymm8,ymm6,20 8153 vpslld ymm6,ymm6,32-20 8154 vpxor ymm6,ymm6,ymm8 8155 vpsrld ymm8,ymm5,20 8156 vpslld ymm5,ymm5,32-20 8157 vpxor ymm5,ymm5,ymm8 8158 vpsrld ymm8,ymm4,20 8159 vpslld ymm4,ymm4,32-20 8160 vpxor ymm4,ymm4,ymm8 8161 vmovdqa ymm8,YMMWORD[$L$rol8] 8162 vpaddd ymm3,ymm3,ymm7 8163 vpaddd ymm2,ymm2,ymm6 8164 vpaddd ymm1,ymm1,ymm5 8165 vpaddd ymm0,ymm0,ymm4 8166 mov rdx,QWORD[((0+160+0))+rbp] 8167 mov r15,rdx 8168 mulx r14,r13,r10 8169 mulx rdx,rax,r11 8170 imul r15,r12 8171 add r14,rax 8172 adc r15,rdx 8173 vpxor ymm15,ymm15,ymm3 8174 vpxor ymm14,ymm14,ymm2 8175 vpxor ymm13,ymm13,ymm1 8176 vpxor ymm12,ymm12,ymm0 8177 vpshufb ymm15,ymm15,ymm8 8178 vpshufb ymm14,ymm14,ymm8 8179 vpshufb ymm13,ymm13,ymm8 8180 vpshufb ymm12,ymm12,ymm8 8181 vpaddd ymm11,ymm11,ymm15 8182 vpaddd ymm10,ymm10,ymm14 8183 vpaddd ymm9,ymm9,ymm13 8184 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8185 vpxor ymm7,ymm7,ymm11 8186 vpxor ymm6,ymm6,ymm10 8187 vpxor ymm5,ymm5,ymm9 8188 vpxor ymm4,ymm4,ymm8 8189 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8190 vpsrld ymm8,ymm7,25 8191 vpslld ymm7,ymm7,32-25 8192 vpxor ymm7,ymm7,ymm8 8193 mov rdx,QWORD[((8+160+0))+rbp] 8194 mulx rax,r10,r10 8195 add r14,r10 8196 mulx r9,r11,r11 8197 adc r15,r11 8198 adc r9,0 8199 imul rdx,r12 8200 vpsrld ymm8,ymm6,25 8201 vpslld ymm6,ymm6,32-25 8202 vpxor ymm6,ymm6,ymm8 8203 vpsrld ymm8,ymm5,25 8204 vpslld ymm5,ymm5,32-25 8205 vpxor ymm5,ymm5,ymm8 8206 vpsrld ymm8,ymm4,25 8207 vpslld ymm4,ymm4,32-25 8208 vpxor ymm4,ymm4,ymm8 8209 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8210 vpalignr ymm7,ymm7,ymm7,4 8211 vpalignr ymm11,ymm11,ymm11,8 8212 vpalignr ymm15,ymm15,ymm15,12 8213 vpalignr ymm6,ymm6,ymm6,4 8214 vpalignr ymm10,ymm10,ymm10,8 8215 vpalignr ymm14,ymm14,ymm14,12 8216 vpalignr ymm5,ymm5,ymm5,4 8217 vpalignr ymm9,ymm9,ymm9,8 8218 vpalignr ymm13,ymm13,ymm13,12 8219 vpalignr ymm4,ymm4,ymm4,4 8220 add r15,rax 8221 adc r9,rdx 8222 vpalignr ymm8,ymm8,ymm8,8 8223 vpalignr ymm12,ymm12,ymm12,12 8224 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8225 vmovdqa ymm8,YMMWORD[$L$rol16] 8226 vpaddd ymm3,ymm3,ymm7 8227 vpaddd ymm2,ymm2,ymm6 8228 vpaddd ymm1,ymm1,ymm5 8229 vpaddd ymm0,ymm0,ymm4 8230 vpxor ymm15,ymm15,ymm3 8231 vpxor ymm14,ymm14,ymm2 8232 vpxor ymm13,ymm13,ymm1 8233 vpxor ymm12,ymm12,ymm0 8234 vpshufb ymm15,ymm15,ymm8 8235 vpshufb ymm14,ymm14,ymm8 8236 vpshufb ymm13,ymm13,ymm8 8237 vpshufb ymm12,ymm12,ymm8 8238 vpaddd ymm11,ymm11,ymm15 8239 vpaddd ymm10,ymm10,ymm14 8240 vpaddd ymm9,ymm9,ymm13 8241 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8242 mov r10,r13 8243 mov r11,r14 8244 mov r12,r15 8245 and r12,3 8246 mov r13,r15 8247 and r13,-4 8248 mov r14,r9 8249 shrd r15,r9,2 8250 shr r9,2 8251 add r15,r13 8252 adc r9,r14 8253 add r10,r15 8254 adc r11,r9 8255 adc r12,0 8256 vpxor ymm7,ymm7,ymm11 8257 vpxor ymm6,ymm6,ymm10 8258 vpxor ymm5,ymm5,ymm9 8259 vpxor ymm4,ymm4,ymm8 8260 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8261 vpsrld ymm8,ymm7,20 8262 vpslld ymm7,ymm7,32-20 8263 vpxor ymm7,ymm7,ymm8 8264 vpsrld ymm8,ymm6,20 8265 vpslld ymm6,ymm6,32-20 8266 vpxor ymm6,ymm6,ymm8 8267 vpsrld ymm8,ymm5,20 8268 vpslld ymm5,ymm5,32-20 8269 vpxor ymm5,ymm5,ymm8 8270 vpsrld ymm8,ymm4,20 8271 vpslld ymm4,ymm4,32-20 8272 vpxor ymm4,ymm4,ymm8 8273 vmovdqa ymm8,YMMWORD[$L$rol8] 8274 vpaddd ymm3,ymm3,ymm7 8275 vpaddd ymm2,ymm2,ymm6 8276 add r10,QWORD[((0+16))+rdi] 8277 adc r11,QWORD[((8+16))+rdi] 8278 adc r12,1 8279 vpaddd ymm1,ymm1,ymm5 8280 vpaddd ymm0,ymm0,ymm4 8281 vpxor ymm15,ymm15,ymm3 8282 vpxor ymm14,ymm14,ymm2 8283 vpxor ymm13,ymm13,ymm1 8284 vpxor ymm12,ymm12,ymm0 8285 vpshufb ymm15,ymm15,ymm8 8286 vpshufb ymm14,ymm14,ymm8 8287 vpshufb ymm13,ymm13,ymm8 8288 vpshufb ymm12,ymm12,ymm8 8289 vpaddd ymm11,ymm11,ymm15 8290 vpaddd ymm10,ymm10,ymm14 8291 vpaddd ymm9,ymm9,ymm13 8292 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8293 vpxor ymm7,ymm7,ymm11 8294 vpxor ymm6,ymm6,ymm10 8295 vpxor ymm5,ymm5,ymm9 8296 vpxor ymm4,ymm4,ymm8 8297 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8298 vpsrld ymm8,ymm7,25 8299 mov rdx,QWORD[((0+160+0))+rbp] 8300 mov r15,rdx 8301 mulx r14,r13,r10 8302 mulx rdx,rax,r11 8303 imul r15,r12 8304 add r14,rax 8305 adc r15,rdx 8306 vpslld ymm7,ymm7,32-25 8307 vpxor ymm7,ymm7,ymm8 8308 vpsrld ymm8,ymm6,25 8309 vpslld ymm6,ymm6,32-25 8310 vpxor ymm6,ymm6,ymm8 8311 vpsrld ymm8,ymm5,25 8312 vpslld ymm5,ymm5,32-25 8313 vpxor ymm5,ymm5,ymm8 8314 vpsrld ymm8,ymm4,25 8315 vpslld ymm4,ymm4,32-25 8316 vpxor ymm4,ymm4,ymm8 8317 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8318 vpalignr ymm7,ymm7,ymm7,12 8319 vpalignr ymm11,ymm11,ymm11,8 8320 vpalignr ymm15,ymm15,ymm15,4 8321 vpalignr ymm6,ymm6,ymm6,12 8322 vpalignr ymm10,ymm10,ymm10,8 8323 vpalignr ymm14,ymm14,ymm14,4 8324 vpalignr ymm5,ymm5,ymm5,12 8325 vpalignr ymm9,ymm9,ymm9,8 8326 mov rdx,QWORD[((8+160+0))+rbp] 8327 mulx rax,r10,r10 8328 add r14,r10 8329 mulx r9,r11,r11 8330 adc r15,r11 8331 adc r9,0 8332 imul rdx,r12 8333 vpalignr ymm13,ymm13,ymm13,4 8334 vpalignr ymm4,ymm4,ymm4,12 8335 vpalignr ymm8,ymm8,ymm8,8 8336 vpalignr ymm12,ymm12,ymm12,4 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 add r15,rax 8354 adc r9,rdx 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 mov r10,r13 8376 mov r11,r14 8377 mov r12,r15 8378 and r12,3 8379 mov r13,r15 8380 and r13,-4 8381 mov r14,r9 8382 shrd r15,r9,2 8383 shr r9,2 8384 add r15,r13 8385 adc r9,r14 8386 add r10,r15 8387 adc r11,r9 8388 adc r12,0 8389 8390 lea rdi,[32+rdi] 8391 dec rcx 8392 jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash 8393 dec r8 8394 jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash 8395 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 8396 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 8397 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 8398 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 8399 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8400 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8401 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8402 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8403 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8404 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8405 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8406 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8407 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8408 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8409 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8410 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8411 8412 vmovdqa YMMWORD[(160+128)+rbp],ymm0 8413 vperm2i128 ymm0,ymm7,ymm3,0x02 8414 vperm2i128 ymm7,ymm7,ymm3,0x13 8415 vperm2i128 ymm3,ymm15,ymm11,0x02 8416 vperm2i128 ymm11,ymm15,ymm11,0x13 8417 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 8418 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 8419 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 8420 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 8421 vmovdqu YMMWORD[(0+0)+rdi],ymm0 8422 vmovdqu YMMWORD[(32+0)+rdi],ymm3 8423 vmovdqu YMMWORD[(64+0)+rdi],ymm7 8424 vmovdqu YMMWORD[(96+0)+rdi],ymm11 8425 8426 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 8427 vperm2i128 ymm3,ymm6,ymm2,0x02 8428 vperm2i128 ymm6,ymm6,ymm2,0x13 8429 vperm2i128 ymm2,ymm14,ymm10,0x02 8430 vperm2i128 ymm10,ymm14,ymm10,0x13 8431 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8432 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 8433 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 8434 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 8435 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8436 vmovdqu YMMWORD[(32+128)+rdi],ymm2 8437 vmovdqu YMMWORD[(64+128)+rdi],ymm6 8438 vmovdqu YMMWORD[(96+128)+rdi],ymm10 8439 vperm2i128 ymm3,ymm5,ymm1,0x02 8440 vperm2i128 ymm5,ymm5,ymm1,0x13 8441 vperm2i128 ymm1,ymm13,ymm9,0x02 8442 vperm2i128 ymm9,ymm13,ymm9,0x13 8443 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 8444 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 8445 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 8446 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 8447 vmovdqu YMMWORD[(0+256)+rdi],ymm3 8448 vmovdqu YMMWORD[(32+256)+rdi],ymm1 8449 vmovdqu YMMWORD[(64+256)+rdi],ymm5 8450 vmovdqu YMMWORD[(96+256)+rdi],ymm9 8451 vperm2i128 ymm3,ymm4,ymm0,0x13 8452 vperm2i128 ymm0,ymm4,ymm0,0x02 8453 vperm2i128 ymm4,ymm12,ymm8,0x02 8454 vperm2i128 ymm12,ymm12,ymm8,0x13 8455 vmovdqa ymm8,ymm3 8456 8457 mov rcx,12*32 8458 lea rsi,[384+rsi] 8459 sub rbx,12*32 8460 jmp NEAR $L$seal_avx2_short_hash_remainder 8461 8462$L$seal_avx2_320: 8463 vmovdqa ymm1,ymm0 8464 vmovdqa ymm2,ymm0 8465 vmovdqa ymm5,ymm4 8466 vmovdqa ymm6,ymm4 8467 vmovdqa ymm9,ymm8 8468 vmovdqa ymm10,ymm8 8469 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8470 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 8471 vmovdqa ymm7,ymm4 8472 vmovdqa ymm11,ymm8 8473 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8474 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8475 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8476 mov r10,10 8477$L$seal_avx2_320_rounds: 8478 vpaddd ymm0,ymm0,ymm4 8479 vpxor ymm12,ymm12,ymm0 8480 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8481 vpaddd ymm8,ymm8,ymm12 8482 vpxor ymm4,ymm4,ymm8 8483 vpsrld ymm3,ymm4,20 8484 vpslld ymm4,ymm4,12 8485 vpxor ymm4,ymm4,ymm3 8486 vpaddd ymm0,ymm0,ymm4 8487 vpxor ymm12,ymm12,ymm0 8488 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8489 vpaddd ymm8,ymm8,ymm12 8490 vpxor ymm4,ymm4,ymm8 8491 vpslld ymm3,ymm4,7 8492 vpsrld ymm4,ymm4,25 8493 vpxor ymm4,ymm4,ymm3 8494 vpalignr ymm12,ymm12,ymm12,12 8495 vpalignr ymm8,ymm8,ymm8,8 8496 vpalignr ymm4,ymm4,ymm4,4 8497 vpaddd ymm1,ymm1,ymm5 8498 vpxor ymm13,ymm13,ymm1 8499 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8500 vpaddd ymm9,ymm9,ymm13 8501 vpxor ymm5,ymm5,ymm9 8502 vpsrld ymm3,ymm5,20 8503 vpslld ymm5,ymm5,12 8504 vpxor ymm5,ymm5,ymm3 8505 vpaddd ymm1,ymm1,ymm5 8506 vpxor ymm13,ymm13,ymm1 8507 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8508 vpaddd ymm9,ymm9,ymm13 8509 vpxor ymm5,ymm5,ymm9 8510 vpslld ymm3,ymm5,7 8511 vpsrld ymm5,ymm5,25 8512 vpxor ymm5,ymm5,ymm3 8513 vpalignr ymm13,ymm13,ymm13,12 8514 vpalignr ymm9,ymm9,ymm9,8 8515 vpalignr ymm5,ymm5,ymm5,4 8516 vpaddd ymm2,ymm2,ymm6 8517 vpxor ymm14,ymm14,ymm2 8518 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8519 vpaddd ymm10,ymm10,ymm14 8520 vpxor ymm6,ymm6,ymm10 8521 vpsrld ymm3,ymm6,20 8522 vpslld ymm6,ymm6,12 8523 vpxor ymm6,ymm6,ymm3 8524 vpaddd ymm2,ymm2,ymm6 8525 vpxor ymm14,ymm14,ymm2 8526 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8527 vpaddd ymm10,ymm10,ymm14 8528 vpxor ymm6,ymm6,ymm10 8529 vpslld ymm3,ymm6,7 8530 vpsrld ymm6,ymm6,25 8531 vpxor ymm6,ymm6,ymm3 8532 vpalignr ymm14,ymm14,ymm14,12 8533 vpalignr ymm10,ymm10,ymm10,8 8534 vpalignr ymm6,ymm6,ymm6,4 8535 vpaddd ymm0,ymm0,ymm4 8536 vpxor ymm12,ymm12,ymm0 8537 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8538 vpaddd ymm8,ymm8,ymm12 8539 vpxor ymm4,ymm4,ymm8 8540 vpsrld ymm3,ymm4,20 8541 vpslld ymm4,ymm4,12 8542 vpxor ymm4,ymm4,ymm3 8543 vpaddd ymm0,ymm0,ymm4 8544 vpxor ymm12,ymm12,ymm0 8545 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8546 vpaddd ymm8,ymm8,ymm12 8547 vpxor ymm4,ymm4,ymm8 8548 vpslld ymm3,ymm4,7 8549 vpsrld ymm4,ymm4,25 8550 vpxor ymm4,ymm4,ymm3 8551 vpalignr ymm12,ymm12,ymm12,4 8552 vpalignr ymm8,ymm8,ymm8,8 8553 vpalignr ymm4,ymm4,ymm4,12 8554 vpaddd ymm1,ymm1,ymm5 8555 vpxor ymm13,ymm13,ymm1 8556 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8557 vpaddd ymm9,ymm9,ymm13 8558 vpxor ymm5,ymm5,ymm9 8559 vpsrld ymm3,ymm5,20 8560 vpslld ymm5,ymm5,12 8561 vpxor ymm5,ymm5,ymm3 8562 vpaddd ymm1,ymm1,ymm5 8563 vpxor ymm13,ymm13,ymm1 8564 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8565 vpaddd ymm9,ymm9,ymm13 8566 vpxor ymm5,ymm5,ymm9 8567 vpslld ymm3,ymm5,7 8568 vpsrld ymm5,ymm5,25 8569 vpxor ymm5,ymm5,ymm3 8570 vpalignr ymm13,ymm13,ymm13,4 8571 vpalignr ymm9,ymm9,ymm9,8 8572 vpalignr ymm5,ymm5,ymm5,12 8573 vpaddd ymm2,ymm2,ymm6 8574 vpxor ymm14,ymm14,ymm2 8575 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8576 vpaddd ymm10,ymm10,ymm14 8577 vpxor ymm6,ymm6,ymm10 8578 vpsrld ymm3,ymm6,20 8579 vpslld ymm6,ymm6,12 8580 vpxor ymm6,ymm6,ymm3 8581 vpaddd ymm2,ymm2,ymm6 8582 vpxor ymm14,ymm14,ymm2 8583 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8584 vpaddd ymm10,ymm10,ymm14 8585 vpxor ymm6,ymm6,ymm10 8586 vpslld ymm3,ymm6,7 8587 vpsrld ymm6,ymm6,25 8588 vpxor ymm6,ymm6,ymm3 8589 vpalignr ymm14,ymm14,ymm14,4 8590 vpalignr ymm10,ymm10,ymm10,8 8591 vpalignr ymm6,ymm6,ymm6,12 8592 8593 dec r10 8594 jne NEAR $L$seal_avx2_320_rounds 8595 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8596 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8597 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8598 vpaddd ymm4,ymm4,ymm7 8599 vpaddd ymm5,ymm5,ymm7 8600 vpaddd ymm6,ymm6,ymm7 8601 vpaddd ymm8,ymm8,ymm11 8602 vpaddd ymm9,ymm9,ymm11 8603 vpaddd ymm10,ymm10,ymm11 8604 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8605 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8606 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8607 vperm2i128 ymm3,ymm4,ymm0,0x02 8608 8609 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8610 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8611 8612 vperm2i128 ymm0,ymm4,ymm0,0x13 8613 vperm2i128 ymm4,ymm12,ymm8,0x13 8614 vperm2i128 ymm8,ymm5,ymm1,0x02 8615 vperm2i128 ymm12,ymm13,ymm9,0x02 8616 vperm2i128 ymm1,ymm5,ymm1,0x13 8617 vperm2i128 ymm5,ymm13,ymm9,0x13 8618 vperm2i128 ymm9,ymm6,ymm2,0x02 8619 vperm2i128 ymm13,ymm14,ymm10,0x02 8620 vperm2i128 ymm2,ymm6,ymm2,0x13 8621 vperm2i128 ymm6,ymm14,ymm10,0x13 8622 jmp NEAR $L$seal_avx2_short 8623 8624$L$seal_avx2_192: 8625 vmovdqa ymm1,ymm0 8626 vmovdqa ymm2,ymm0 8627 vmovdqa ymm5,ymm4 8628 vmovdqa ymm6,ymm4 8629 vmovdqa ymm9,ymm8 8630 vmovdqa ymm10,ymm8 8631 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8632 vmovdqa ymm11,ymm12 8633 vmovdqa ymm15,ymm13 8634 mov r10,10 8635$L$seal_avx2_192_rounds: 8636 vpaddd ymm0,ymm0,ymm4 8637 vpxor ymm12,ymm12,ymm0 8638 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8639 vpaddd ymm8,ymm8,ymm12 8640 vpxor ymm4,ymm4,ymm8 8641 vpsrld ymm3,ymm4,20 8642 vpslld ymm4,ymm4,12 8643 vpxor ymm4,ymm4,ymm3 8644 vpaddd ymm0,ymm0,ymm4 8645 vpxor ymm12,ymm12,ymm0 8646 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8647 vpaddd ymm8,ymm8,ymm12 8648 vpxor ymm4,ymm4,ymm8 8649 vpslld ymm3,ymm4,7 8650 vpsrld ymm4,ymm4,25 8651 vpxor ymm4,ymm4,ymm3 8652 vpalignr ymm12,ymm12,ymm12,12 8653 vpalignr ymm8,ymm8,ymm8,8 8654 vpalignr ymm4,ymm4,ymm4,4 8655 vpaddd ymm1,ymm1,ymm5 8656 vpxor ymm13,ymm13,ymm1 8657 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8658 vpaddd ymm9,ymm9,ymm13 8659 vpxor ymm5,ymm5,ymm9 8660 vpsrld ymm3,ymm5,20 8661 vpslld ymm5,ymm5,12 8662 vpxor ymm5,ymm5,ymm3 8663 vpaddd ymm1,ymm1,ymm5 8664 vpxor ymm13,ymm13,ymm1 8665 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8666 vpaddd ymm9,ymm9,ymm13 8667 vpxor ymm5,ymm5,ymm9 8668 vpslld ymm3,ymm5,7 8669 vpsrld ymm5,ymm5,25 8670 vpxor ymm5,ymm5,ymm3 8671 vpalignr ymm13,ymm13,ymm13,12 8672 vpalignr ymm9,ymm9,ymm9,8 8673 vpalignr ymm5,ymm5,ymm5,4 8674 vpaddd ymm0,ymm0,ymm4 8675 vpxor ymm12,ymm12,ymm0 8676 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8677 vpaddd ymm8,ymm8,ymm12 8678 vpxor ymm4,ymm4,ymm8 8679 vpsrld ymm3,ymm4,20 8680 vpslld ymm4,ymm4,12 8681 vpxor ymm4,ymm4,ymm3 8682 vpaddd ymm0,ymm0,ymm4 8683 vpxor ymm12,ymm12,ymm0 8684 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8685 vpaddd ymm8,ymm8,ymm12 8686 vpxor ymm4,ymm4,ymm8 8687 vpslld ymm3,ymm4,7 8688 vpsrld ymm4,ymm4,25 8689 vpxor ymm4,ymm4,ymm3 8690 vpalignr ymm12,ymm12,ymm12,4 8691 vpalignr ymm8,ymm8,ymm8,8 8692 vpalignr ymm4,ymm4,ymm4,12 8693 vpaddd ymm1,ymm1,ymm5 8694 vpxor ymm13,ymm13,ymm1 8695 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8696 vpaddd ymm9,ymm9,ymm13 8697 vpxor ymm5,ymm5,ymm9 8698 vpsrld ymm3,ymm5,20 8699 vpslld ymm5,ymm5,12 8700 vpxor ymm5,ymm5,ymm3 8701 vpaddd ymm1,ymm1,ymm5 8702 vpxor ymm13,ymm13,ymm1 8703 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8704 vpaddd ymm9,ymm9,ymm13 8705 vpxor ymm5,ymm5,ymm9 8706 vpslld ymm3,ymm5,7 8707 vpsrld ymm5,ymm5,25 8708 vpxor ymm5,ymm5,ymm3 8709 vpalignr ymm13,ymm13,ymm13,4 8710 vpalignr ymm9,ymm9,ymm9,8 8711 vpalignr ymm5,ymm5,ymm5,12 8712 8713 dec r10 8714 jne NEAR $L$seal_avx2_192_rounds 8715 vpaddd ymm0,ymm0,ymm2 8716 vpaddd ymm1,ymm1,ymm2 8717 vpaddd ymm4,ymm4,ymm6 8718 vpaddd ymm5,ymm5,ymm6 8719 vpaddd ymm8,ymm8,ymm10 8720 vpaddd ymm9,ymm9,ymm10 8721 vpaddd ymm12,ymm12,ymm11 8722 vpaddd ymm13,ymm13,ymm15 8723 vperm2i128 ymm3,ymm4,ymm0,0x02 8724 8725 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8726 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8727 8728 vperm2i128 ymm0,ymm4,ymm0,0x13 8729 vperm2i128 ymm4,ymm12,ymm8,0x13 8730 vperm2i128 ymm8,ymm5,ymm1,0x02 8731 vperm2i128 ymm12,ymm13,ymm9,0x02 8732 vperm2i128 ymm1,ymm5,ymm1,0x13 8733 vperm2i128 ymm5,ymm13,ymm9,0x13 8734$L$seal_avx2_short: 8735 mov r8,r8 8736 call poly_hash_ad_internal 8737 xor rcx,rcx 8738$L$seal_avx2_short_hash_remainder: 8739 cmp rcx,16 8740 jb NEAR $L$seal_avx2_short_loop 8741 add r10,QWORD[((0+0))+rdi] 8742 adc r11,QWORD[((8+0))+rdi] 8743 adc r12,1 8744 mov rax,QWORD[((0+160+0))+rbp] 8745 mov r15,rax 8746 mul r10 8747 mov r13,rax 8748 mov r14,rdx 8749 mov rax,QWORD[((0+160+0))+rbp] 8750 mul r11 8751 imul r15,r12 8752 add r14,rax 8753 adc r15,rdx 8754 mov rax,QWORD[((8+160+0))+rbp] 8755 mov r9,rax 8756 mul r10 8757 add r14,rax 8758 adc rdx,0 8759 mov r10,rdx 8760 mov rax,QWORD[((8+160+0))+rbp] 8761 mul r11 8762 add r15,rax 8763 adc rdx,0 8764 imul r9,r12 8765 add r15,r10 8766 adc r9,rdx 8767 mov r10,r13 8768 mov r11,r14 8769 mov r12,r15 8770 and r12,3 8771 mov r13,r15 8772 and r13,-4 8773 mov r14,r9 8774 shrd r15,r9,2 8775 shr r9,2 8776 add r15,r13 8777 adc r9,r14 8778 add r10,r15 8779 adc r11,r9 8780 adc r12,0 8781 8782 sub rcx,16 8783 add rdi,16 8784 jmp NEAR $L$seal_avx2_short_hash_remainder 8785$L$seal_avx2_short_loop: 8786 cmp rbx,32 8787 jb NEAR $L$seal_avx2_short_tail 8788 sub rbx,32 8789 8790 vpxor ymm0,ymm0,YMMWORD[rsi] 8791 vmovdqu YMMWORD[rdi],ymm0 8792 lea rsi,[32+rsi] 8793 8794 add r10,QWORD[((0+0))+rdi] 8795 adc r11,QWORD[((8+0))+rdi] 8796 adc r12,1 8797 mov rax,QWORD[((0+160+0))+rbp] 8798 mov r15,rax 8799 mul r10 8800 mov r13,rax 8801 mov r14,rdx 8802 mov rax,QWORD[((0+160+0))+rbp] 8803 mul r11 8804 imul r15,r12 8805 add r14,rax 8806 adc r15,rdx 8807 mov rax,QWORD[((8+160+0))+rbp] 8808 mov r9,rax 8809 mul r10 8810 add r14,rax 8811 adc rdx,0 8812 mov r10,rdx 8813 mov rax,QWORD[((8+160+0))+rbp] 8814 mul r11 8815 add r15,rax 8816 adc rdx,0 8817 imul r9,r12 8818 add r15,r10 8819 adc r9,rdx 8820 mov r10,r13 8821 mov r11,r14 8822 mov r12,r15 8823 and r12,3 8824 mov r13,r15 8825 and r13,-4 8826 mov r14,r9 8827 shrd r15,r9,2 8828 shr r9,2 8829 add r15,r13 8830 adc r9,r14 8831 add r10,r15 8832 adc r11,r9 8833 adc r12,0 8834 add r10,QWORD[((0+16))+rdi] 8835 adc r11,QWORD[((8+16))+rdi] 8836 adc r12,1 8837 mov rax,QWORD[((0+160+0))+rbp] 8838 mov r15,rax 8839 mul r10 8840 mov r13,rax 8841 mov r14,rdx 8842 mov rax,QWORD[((0+160+0))+rbp] 8843 mul r11 8844 imul r15,r12 8845 add r14,rax 8846 adc r15,rdx 8847 mov rax,QWORD[((8+160+0))+rbp] 8848 mov r9,rax 8849 mul r10 8850 add r14,rax 8851 adc rdx,0 8852 mov r10,rdx 8853 mov rax,QWORD[((8+160+0))+rbp] 8854 mul r11 8855 add r15,rax 8856 adc rdx,0 8857 imul r9,r12 8858 add r15,r10 8859 adc r9,rdx 8860 mov r10,r13 8861 mov r11,r14 8862 mov r12,r15 8863 and r12,3 8864 mov r13,r15 8865 and r13,-4 8866 mov r14,r9 8867 shrd r15,r9,2 8868 shr r9,2 8869 add r15,r13 8870 adc r9,r14 8871 add r10,r15 8872 adc r11,r9 8873 adc r12,0 8874 8875 lea rdi,[32+rdi] 8876 8877 vmovdqa ymm0,ymm4 8878 vmovdqa ymm4,ymm8 8879 vmovdqa ymm8,ymm12 8880 vmovdqa ymm12,ymm1 8881 vmovdqa ymm1,ymm5 8882 vmovdqa ymm5,ymm9 8883 vmovdqa ymm9,ymm13 8884 vmovdqa ymm13,ymm2 8885 vmovdqa ymm2,ymm6 8886 jmp NEAR $L$seal_avx2_short_loop 8887$L$seal_avx2_short_tail: 8888 cmp rbx,16 8889 jb NEAR $L$seal_avx2_exit 8890 sub rbx,16 8891 vpxor xmm3,xmm0,XMMWORD[rsi] 8892 vmovdqu XMMWORD[rdi],xmm3 8893 lea rsi,[16+rsi] 8894 add r10,QWORD[((0+0))+rdi] 8895 adc r11,QWORD[((8+0))+rdi] 8896 adc r12,1 8897 mov rax,QWORD[((0+160+0))+rbp] 8898 mov r15,rax 8899 mul r10 8900 mov r13,rax 8901 mov r14,rdx 8902 mov rax,QWORD[((0+160+0))+rbp] 8903 mul r11 8904 imul r15,r12 8905 add r14,rax 8906 adc r15,rdx 8907 mov rax,QWORD[((8+160+0))+rbp] 8908 mov r9,rax 8909 mul r10 8910 add r14,rax 8911 adc rdx,0 8912 mov r10,rdx 8913 mov rax,QWORD[((8+160+0))+rbp] 8914 mul r11 8915 add r15,rax 8916 adc rdx,0 8917 imul r9,r12 8918 add r15,r10 8919 adc r9,rdx 8920 mov r10,r13 8921 mov r11,r14 8922 mov r12,r15 8923 and r12,3 8924 mov r13,r15 8925 and r13,-4 8926 mov r14,r9 8927 shrd r15,r9,2 8928 shr r9,2 8929 add r15,r13 8930 adc r9,r14 8931 add r10,r15 8932 adc r11,r9 8933 adc r12,0 8934 8935 lea rdi,[16+rdi] 8936 vextracti128 xmm0,ymm0,1 8937$L$seal_avx2_exit: 8938 vzeroupper 8939 jmp NEAR $L$seal_sse_tail_16 8940 8941 8942