1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */ 3.text 4 5 6 7.globl aesni_multi_cbc_encrypt 8.type aesni_multi_cbc_encrypt,@function 9.align 32 10aesni_multi_cbc_encrypt: 11.cfi_startproc 12 cmpl $2,%edx 13 jb .Lenc_non_avx 14 movl OPENSSL_ia32cap_P+4(%rip),%ecx 15 testl $268435456,%ecx 16 jnz _avx_cbc_enc_shortcut 17 jmp .Lenc_non_avx 18.align 16 19.Lenc_non_avx: 20 movq %rsp,%rax 21.cfi_def_cfa_register %rax 22 pushq %rbx 23.cfi_offset %rbx,-16 24 pushq %rbp 25.cfi_offset %rbp,-24 26 pushq %r12 27.cfi_offset %r12,-32 28 pushq %r13 29.cfi_offset %r13,-40 30 pushq %r14 31.cfi_offset %r14,-48 32 pushq %r15 33.cfi_offset %r15,-56 34 35 36 37 38 39 40 subq $48,%rsp 41 andq $-64,%rsp 42 movq %rax,16(%rsp) 43.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 44 45.Lenc4x_body: 46 movdqu (%rsi),%xmm12 47 leaq 120(%rsi),%rsi 48 leaq 80(%rdi),%rdi 49 50.Lenc4x_loop_grande: 51 movl %edx,24(%rsp) 52 xorl %edx,%edx 53 movl -64(%rdi),%ecx 54 movq -80(%rdi),%r8 55 cmpl %edx,%ecx 56 movq -72(%rdi),%r12 57 cmovgl %ecx,%edx 58 testl %ecx,%ecx 59 movdqu -56(%rdi),%xmm2 60 movl %ecx,32(%rsp) 61 cmovleq %rsp,%r8 62 movl -24(%rdi),%ecx 63 movq -40(%rdi),%r9 64 cmpl %edx,%ecx 65 movq -32(%rdi),%r13 66 cmovgl %ecx,%edx 67 testl %ecx,%ecx 68 movdqu -16(%rdi),%xmm3 69 movl %ecx,36(%rsp) 70 cmovleq %rsp,%r9 71 movl 16(%rdi),%ecx 72 movq 0(%rdi),%r10 73 cmpl %edx,%ecx 74 movq 8(%rdi),%r14 75 cmovgl %ecx,%edx 76 testl %ecx,%ecx 77 movdqu 24(%rdi),%xmm4 78 movl %ecx,40(%rsp) 79 cmovleq %rsp,%r10 80 movl 56(%rdi),%ecx 81 movq 40(%rdi),%r11 82 cmpl %edx,%ecx 83 movq 48(%rdi),%r15 84 cmovgl %ecx,%edx 85 testl %ecx,%ecx 86 movdqu 64(%rdi),%xmm5 87 movl %ecx,44(%rsp) 88 cmovleq %rsp,%r11 89 testl %edx,%edx 90 jz .Lenc4x_done 91 92 movups 16-120(%rsi),%xmm1 93 pxor %xmm12,%xmm2 94 movups 32-120(%rsi),%xmm0 95 pxor %xmm12,%xmm3 96 movl 240-120(%rsi),%eax 97 pxor %xmm12,%xmm4 98 movdqu (%r8),%xmm6 99 pxor %xmm12,%xmm5 100 movdqu (%r9),%xmm7 101 pxor %xmm6,%xmm2 102 movdqu (%r10),%xmm8 103 pxor %xmm7,%xmm3 104 movdqu (%r11),%xmm9 105 pxor %xmm8,%xmm4 106 pxor %xmm9,%xmm5 107 movdqa 32(%rsp),%xmm10 108 xorq %rbx,%rbx 109 jmp .Loop_enc4x 110 111.align 32 112.Loop_enc4x: 113 addq $16,%rbx 114 leaq 16(%rsp),%rbp 115 movl $1,%ecx 116 subq %rbx,%rbp 117 118.byte 102,15,56,220,209 119 prefetcht0 31(%r8,%rbx,1) 120 prefetcht0 31(%r9,%rbx,1) 121.byte 102,15,56,220,217 122 prefetcht0 31(%r10,%rbx,1) 123 prefetcht0 31(%r10,%rbx,1) 124.byte 102,15,56,220,225 125.byte 102,15,56,220,233 126 movups 48-120(%rsi),%xmm1 127 cmpl 32(%rsp),%ecx 128.byte 102,15,56,220,208 129.byte 102,15,56,220,216 130.byte 102,15,56,220,224 131 cmovgeq %rbp,%r8 132 cmovgq %rbp,%r12 133.byte 102,15,56,220,232 134 movups -56(%rsi),%xmm0 135 cmpl 36(%rsp),%ecx 136.byte 102,15,56,220,209 137.byte 102,15,56,220,217 138.byte 102,15,56,220,225 139 cmovgeq %rbp,%r9 140 cmovgq %rbp,%r13 141.byte 102,15,56,220,233 142 movups -40(%rsi),%xmm1 143 cmpl 40(%rsp),%ecx 144.byte 102,15,56,220,208 145.byte 102,15,56,220,216 146.byte 102,15,56,220,224 147 cmovgeq %rbp,%r10 148 cmovgq %rbp,%r14 149.byte 102,15,56,220,232 150 movups -24(%rsi),%xmm0 151 cmpl 44(%rsp),%ecx 152.byte 102,15,56,220,209 153.byte 102,15,56,220,217 154.byte 102,15,56,220,225 155 cmovgeq %rbp,%r11 156 cmovgq %rbp,%r15 157.byte 102,15,56,220,233 158 movups -8(%rsi),%xmm1 159 movdqa %xmm10,%xmm11 160.byte 102,15,56,220,208 161 prefetcht0 15(%r12,%rbx,1) 162 prefetcht0 15(%r13,%rbx,1) 163.byte 102,15,56,220,216 164 prefetcht0 15(%r14,%rbx,1) 165 prefetcht0 15(%r15,%rbx,1) 166.byte 102,15,56,220,224 167.byte 102,15,56,220,232 168 movups 128-120(%rsi),%xmm0 169 pxor %xmm12,%xmm12 170 171.byte 102,15,56,220,209 172 pcmpgtd %xmm12,%xmm11 173 movdqu -120(%rsi),%xmm12 174.byte 102,15,56,220,217 175 paddd %xmm11,%xmm10 176 movdqa %xmm10,32(%rsp) 177.byte 102,15,56,220,225 178.byte 102,15,56,220,233 179 movups 144-120(%rsi),%xmm1 180 181 cmpl $11,%eax 182 183.byte 102,15,56,220,208 184.byte 102,15,56,220,216 185.byte 102,15,56,220,224 186.byte 102,15,56,220,232 187 movups 160-120(%rsi),%xmm0 188 189 jb .Lenc4x_tail 190 191.byte 102,15,56,220,209 192.byte 102,15,56,220,217 193.byte 102,15,56,220,225 194.byte 102,15,56,220,233 195 movups 176-120(%rsi),%xmm1 196 197.byte 102,15,56,220,208 198.byte 102,15,56,220,216 199.byte 102,15,56,220,224 200.byte 102,15,56,220,232 201 movups 192-120(%rsi),%xmm0 202 203 je .Lenc4x_tail 204 205.byte 102,15,56,220,209 206.byte 102,15,56,220,217 207.byte 102,15,56,220,225 208.byte 102,15,56,220,233 209 movups 208-120(%rsi),%xmm1 210 211.byte 102,15,56,220,208 212.byte 102,15,56,220,216 213.byte 102,15,56,220,224 214.byte 102,15,56,220,232 215 movups 224-120(%rsi),%xmm0 216 jmp .Lenc4x_tail 217 218.align 32 219.Lenc4x_tail: 220.byte 102,15,56,220,209 221.byte 102,15,56,220,217 222.byte 102,15,56,220,225 223.byte 102,15,56,220,233 224 movdqu (%r8,%rbx,1),%xmm6 225 movdqu 16-120(%rsi),%xmm1 226 227.byte 102,15,56,221,208 228 movdqu (%r9,%rbx,1),%xmm7 229 pxor %xmm12,%xmm6 230.byte 102,15,56,221,216 231 movdqu (%r10,%rbx,1),%xmm8 232 pxor %xmm12,%xmm7 233.byte 102,15,56,221,224 234 movdqu (%r11,%rbx,1),%xmm9 235 pxor %xmm12,%xmm8 236.byte 102,15,56,221,232 237 movdqu 32-120(%rsi),%xmm0 238 pxor %xmm12,%xmm9 239 240 movups %xmm2,-16(%r12,%rbx,1) 241 pxor %xmm6,%xmm2 242 movups %xmm3,-16(%r13,%rbx,1) 243 pxor %xmm7,%xmm3 244 movups %xmm4,-16(%r14,%rbx,1) 245 pxor %xmm8,%xmm4 246 movups %xmm5,-16(%r15,%rbx,1) 247 pxor %xmm9,%xmm5 248 249 decl %edx 250 jnz .Loop_enc4x 251 252 movq 16(%rsp),%rax 253.cfi_def_cfa %rax,8 254 movl 24(%rsp),%edx 255 256 257 258 259 260 261 262 263 264 265 leaq 160(%rdi),%rdi 266 decl %edx 267 jnz .Lenc4x_loop_grande 268 269.Lenc4x_done: 270 movq -48(%rax),%r15 271.cfi_restore %r15 272 movq -40(%rax),%r14 273.cfi_restore %r14 274 movq -32(%rax),%r13 275.cfi_restore %r13 276 movq -24(%rax),%r12 277.cfi_restore %r12 278 movq -16(%rax),%rbp 279.cfi_restore %rbp 280 movq -8(%rax),%rbx 281.cfi_restore %rbx 282 leaq (%rax),%rsp 283.cfi_def_cfa_register %rsp 284.Lenc4x_epilogue: 285 .byte 0xf3,0xc3 286.cfi_endproc 287.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt 288 289.globl aesni_multi_cbc_decrypt 290.type aesni_multi_cbc_decrypt,@function 291.align 32 292aesni_multi_cbc_decrypt: 293.cfi_startproc 294 cmpl $2,%edx 295 jb .Ldec_non_avx 296 movl OPENSSL_ia32cap_P+4(%rip),%ecx 297 testl $268435456,%ecx 298 jnz _avx_cbc_dec_shortcut 299 jmp .Ldec_non_avx 300.align 16 301.Ldec_non_avx: 302 movq %rsp,%rax 303.cfi_def_cfa_register %rax 304 pushq %rbx 305.cfi_offset %rbx,-16 306 pushq %rbp 307.cfi_offset %rbp,-24 308 pushq %r12 309.cfi_offset %r12,-32 310 pushq %r13 311.cfi_offset %r13,-40 312 pushq %r14 313.cfi_offset %r14,-48 314 pushq %r15 315.cfi_offset %r15,-56 316 317 318 319 320 321 322 subq $48,%rsp 323 andq $-64,%rsp 324 movq %rax,16(%rsp) 325.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 326 327.Ldec4x_body: 328 movdqu (%rsi),%xmm12 329 leaq 120(%rsi),%rsi 330 leaq 80(%rdi),%rdi 331 332.Ldec4x_loop_grande: 333 movl %edx,24(%rsp) 334 xorl %edx,%edx 335 movl -64(%rdi),%ecx 336 movq -80(%rdi),%r8 337 cmpl %edx,%ecx 338 movq -72(%rdi),%r12 339 cmovgl %ecx,%edx 340 testl %ecx,%ecx 341 movdqu -56(%rdi),%xmm6 342 movl %ecx,32(%rsp) 343 cmovleq %rsp,%r8 344 movl -24(%rdi),%ecx 345 movq -40(%rdi),%r9 346 cmpl %edx,%ecx 347 movq -32(%rdi),%r13 348 cmovgl %ecx,%edx 349 testl %ecx,%ecx 350 movdqu -16(%rdi),%xmm7 351 movl %ecx,36(%rsp) 352 cmovleq %rsp,%r9 353 movl 16(%rdi),%ecx 354 movq 0(%rdi),%r10 355 cmpl %edx,%ecx 356 movq 8(%rdi),%r14 357 cmovgl %ecx,%edx 358 testl %ecx,%ecx 359 movdqu 24(%rdi),%xmm8 360 movl %ecx,40(%rsp) 361 cmovleq %rsp,%r10 362 movl 56(%rdi),%ecx 363 movq 40(%rdi),%r11 364 cmpl %edx,%ecx 365 movq 48(%rdi),%r15 366 cmovgl %ecx,%edx 367 testl %ecx,%ecx 368 movdqu 64(%rdi),%xmm9 369 movl %ecx,44(%rsp) 370 cmovleq %rsp,%r11 371 testl %edx,%edx 372 jz .Ldec4x_done 373 374 movups 16-120(%rsi),%xmm1 375 movups 32-120(%rsi),%xmm0 376 movl 240-120(%rsi),%eax 377 movdqu (%r8),%xmm2 378 movdqu (%r9),%xmm3 379 pxor %xmm12,%xmm2 380 movdqu (%r10),%xmm4 381 pxor %xmm12,%xmm3 382 movdqu (%r11),%xmm5 383 pxor %xmm12,%xmm4 384 pxor %xmm12,%xmm5 385 movdqa 32(%rsp),%xmm10 386 xorq %rbx,%rbx 387 jmp .Loop_dec4x 388 389.align 32 390.Loop_dec4x: 391 addq $16,%rbx 392 leaq 16(%rsp),%rbp 393 movl $1,%ecx 394 subq %rbx,%rbp 395 396.byte 102,15,56,222,209 397 prefetcht0 31(%r8,%rbx,1) 398 prefetcht0 31(%r9,%rbx,1) 399.byte 102,15,56,222,217 400 prefetcht0 31(%r10,%rbx,1) 401 prefetcht0 31(%r11,%rbx,1) 402.byte 102,15,56,222,225 403.byte 102,15,56,222,233 404 movups 48-120(%rsi),%xmm1 405 cmpl 32(%rsp),%ecx 406.byte 102,15,56,222,208 407.byte 102,15,56,222,216 408.byte 102,15,56,222,224 409 cmovgeq %rbp,%r8 410 cmovgq %rbp,%r12 411.byte 102,15,56,222,232 412 movups -56(%rsi),%xmm0 413 cmpl 36(%rsp),%ecx 414.byte 102,15,56,222,209 415.byte 102,15,56,222,217 416.byte 102,15,56,222,225 417 cmovgeq %rbp,%r9 418 cmovgq %rbp,%r13 419.byte 102,15,56,222,233 420 movups -40(%rsi),%xmm1 421 cmpl 40(%rsp),%ecx 422.byte 102,15,56,222,208 423.byte 102,15,56,222,216 424.byte 102,15,56,222,224 425 cmovgeq %rbp,%r10 426 cmovgq %rbp,%r14 427.byte 102,15,56,222,232 428 movups -24(%rsi),%xmm0 429 cmpl 44(%rsp),%ecx 430.byte 102,15,56,222,209 431.byte 102,15,56,222,217 432.byte 102,15,56,222,225 433 cmovgeq %rbp,%r11 434 cmovgq %rbp,%r15 435.byte 102,15,56,222,233 436 movups -8(%rsi),%xmm1 437 movdqa %xmm10,%xmm11 438.byte 102,15,56,222,208 439 prefetcht0 15(%r12,%rbx,1) 440 prefetcht0 15(%r13,%rbx,1) 441.byte 102,15,56,222,216 442 prefetcht0 15(%r14,%rbx,1) 443 prefetcht0 15(%r15,%rbx,1) 444.byte 102,15,56,222,224 445.byte 102,15,56,222,232 446 movups 128-120(%rsi),%xmm0 447 pxor %xmm12,%xmm12 448 449.byte 102,15,56,222,209 450 pcmpgtd %xmm12,%xmm11 451 movdqu -120(%rsi),%xmm12 452.byte 102,15,56,222,217 453 paddd %xmm11,%xmm10 454 movdqa %xmm10,32(%rsp) 455.byte 102,15,56,222,225 456.byte 102,15,56,222,233 457 movups 144-120(%rsi),%xmm1 458 459 cmpl $11,%eax 460 461.byte 102,15,56,222,208 462.byte 102,15,56,222,216 463.byte 102,15,56,222,224 464.byte 102,15,56,222,232 465 movups 160-120(%rsi),%xmm0 466 467 jb .Ldec4x_tail 468 469.byte 102,15,56,222,209 470.byte 102,15,56,222,217 471.byte 102,15,56,222,225 472.byte 102,15,56,222,233 473 movups 176-120(%rsi),%xmm1 474 475.byte 102,15,56,222,208 476.byte 102,15,56,222,216 477.byte 102,15,56,222,224 478.byte 102,15,56,222,232 479 movups 192-120(%rsi),%xmm0 480 481 je .Ldec4x_tail 482 483.byte 102,15,56,222,209 484.byte 102,15,56,222,217 485.byte 102,15,56,222,225 486.byte 102,15,56,222,233 487 movups 208-120(%rsi),%xmm1 488 489.byte 102,15,56,222,208 490.byte 102,15,56,222,216 491.byte 102,15,56,222,224 492.byte 102,15,56,222,232 493 movups 224-120(%rsi),%xmm0 494 jmp .Ldec4x_tail 495 496.align 32 497.Ldec4x_tail: 498.byte 102,15,56,222,209 499.byte 102,15,56,222,217 500.byte 102,15,56,222,225 501 pxor %xmm0,%xmm6 502 pxor %xmm0,%xmm7 503.byte 102,15,56,222,233 504 movdqu 16-120(%rsi),%xmm1 505 pxor %xmm0,%xmm8 506 pxor %xmm0,%xmm9 507 movdqu 32-120(%rsi),%xmm0 508 509.byte 102,15,56,223,214 510.byte 102,15,56,223,223 511 movdqu -16(%r8,%rbx,1),%xmm6 512 movdqu -16(%r9,%rbx,1),%xmm7 513.byte 102,65,15,56,223,224 514.byte 102,65,15,56,223,233 515 movdqu -16(%r10,%rbx,1),%xmm8 516 movdqu -16(%r11,%rbx,1),%xmm9 517 518 movups %xmm2,-16(%r12,%rbx,1) 519 movdqu (%r8,%rbx,1),%xmm2 520 movups %xmm3,-16(%r13,%rbx,1) 521 movdqu (%r9,%rbx,1),%xmm3 522 pxor %xmm12,%xmm2 523 movups %xmm4,-16(%r14,%rbx,1) 524 movdqu (%r10,%rbx,1),%xmm4 525 pxor %xmm12,%xmm3 526 movups %xmm5,-16(%r15,%rbx,1) 527 movdqu (%r11,%rbx,1),%xmm5 528 pxor %xmm12,%xmm4 529 pxor %xmm12,%xmm5 530 531 decl %edx 532 jnz .Loop_dec4x 533 534 movq 16(%rsp),%rax 535.cfi_def_cfa %rax,8 536 movl 24(%rsp),%edx 537 538 leaq 160(%rdi),%rdi 539 decl %edx 540 jnz .Ldec4x_loop_grande 541 542.Ldec4x_done: 543 movq -48(%rax),%r15 544.cfi_restore %r15 545 movq -40(%rax),%r14 546.cfi_restore %r14 547 movq -32(%rax),%r13 548.cfi_restore %r13 549 movq -24(%rax),%r12 550.cfi_restore %r12 551 movq -16(%rax),%rbp 552.cfi_restore %rbp 553 movq -8(%rax),%rbx 554.cfi_restore %rbx 555 leaq (%rax),%rsp 556.cfi_def_cfa_register %rsp 557.Ldec4x_epilogue: 558 .byte 0xf3,0xc3 559.cfi_endproc 560.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt 561.type aesni_multi_cbc_encrypt_avx,@function 562.align 32 563aesni_multi_cbc_encrypt_avx: 564.cfi_startproc 565_avx_cbc_enc_shortcut: 566 movq %rsp,%rax 567.cfi_def_cfa_register %rax 568 pushq %rbx 569.cfi_offset %rbx,-16 570 pushq %rbp 571.cfi_offset %rbp,-24 572 pushq %r12 573.cfi_offset %r12,-32 574 pushq %r13 575.cfi_offset %r13,-40 576 pushq %r14 577.cfi_offset %r14,-48 578 pushq %r15 579.cfi_offset %r15,-56 580 581 582 583 584 585 586 587 588 subq $192,%rsp 589 andq $-128,%rsp 590 movq %rax,16(%rsp) 591.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 592 593.Lenc8x_body: 594 vzeroupper 595 vmovdqu (%rsi),%xmm15 596 leaq 120(%rsi),%rsi 597 leaq 160(%rdi),%rdi 598 shrl $1,%edx 599 600.Lenc8x_loop_grande: 601 602 xorl %edx,%edx 603 movl -144(%rdi),%ecx 604 movq -160(%rdi),%r8 605 cmpl %edx,%ecx 606 movq -152(%rdi),%rbx 607 cmovgl %ecx,%edx 608 testl %ecx,%ecx 609 vmovdqu -136(%rdi),%xmm2 610 movl %ecx,32(%rsp) 611 cmovleq %rsp,%r8 612 subq %r8,%rbx 613 movq %rbx,64(%rsp) 614 movl -104(%rdi),%ecx 615 movq -120(%rdi),%r9 616 cmpl %edx,%ecx 617 movq -112(%rdi),%rbp 618 cmovgl %ecx,%edx 619 testl %ecx,%ecx 620 vmovdqu -96(%rdi),%xmm3 621 movl %ecx,36(%rsp) 622 cmovleq %rsp,%r9 623 subq %r9,%rbp 624 movq %rbp,72(%rsp) 625 movl -64(%rdi),%ecx 626 movq -80(%rdi),%r10 627 cmpl %edx,%ecx 628 movq -72(%rdi),%rbp 629 cmovgl %ecx,%edx 630 testl %ecx,%ecx 631 vmovdqu -56(%rdi),%xmm4 632 movl %ecx,40(%rsp) 633 cmovleq %rsp,%r10 634 subq %r10,%rbp 635 movq %rbp,80(%rsp) 636 movl -24(%rdi),%ecx 637 movq -40(%rdi),%r11 638 cmpl %edx,%ecx 639 movq -32(%rdi),%rbp 640 cmovgl %ecx,%edx 641 testl %ecx,%ecx 642 vmovdqu -16(%rdi),%xmm5 643 movl %ecx,44(%rsp) 644 cmovleq %rsp,%r11 645 subq %r11,%rbp 646 movq %rbp,88(%rsp) 647 movl 16(%rdi),%ecx 648 movq 0(%rdi),%r12 649 cmpl %edx,%ecx 650 movq 8(%rdi),%rbp 651 cmovgl %ecx,%edx 652 testl %ecx,%ecx 653 vmovdqu 24(%rdi),%xmm6 654 movl %ecx,48(%rsp) 655 cmovleq %rsp,%r12 656 subq %r12,%rbp 657 movq %rbp,96(%rsp) 658 movl 56(%rdi),%ecx 659 movq 40(%rdi),%r13 660 cmpl %edx,%ecx 661 movq 48(%rdi),%rbp 662 cmovgl %ecx,%edx 663 testl %ecx,%ecx 664 vmovdqu 64(%rdi),%xmm7 665 movl %ecx,52(%rsp) 666 cmovleq %rsp,%r13 667 subq %r13,%rbp 668 movq %rbp,104(%rsp) 669 movl 96(%rdi),%ecx 670 movq 80(%rdi),%r14 671 cmpl %edx,%ecx 672 movq 88(%rdi),%rbp 673 cmovgl %ecx,%edx 674 testl %ecx,%ecx 675 vmovdqu 104(%rdi),%xmm8 676 movl %ecx,56(%rsp) 677 cmovleq %rsp,%r14 678 subq %r14,%rbp 679 movq %rbp,112(%rsp) 680 movl 136(%rdi),%ecx 681 movq 120(%rdi),%r15 682 cmpl %edx,%ecx 683 movq 128(%rdi),%rbp 684 cmovgl %ecx,%edx 685 testl %ecx,%ecx 686 vmovdqu 144(%rdi),%xmm9 687 movl %ecx,60(%rsp) 688 cmovleq %rsp,%r15 689 subq %r15,%rbp 690 movq %rbp,120(%rsp) 691 testl %edx,%edx 692 jz .Lenc8x_done 693 694 vmovups 16-120(%rsi),%xmm1 695 vmovups 32-120(%rsi),%xmm0 696 movl 240-120(%rsi),%eax 697 698 vpxor (%r8),%xmm15,%xmm10 699 leaq 128(%rsp),%rbp 700 vpxor (%r9),%xmm15,%xmm11 701 vpxor (%r10),%xmm15,%xmm12 702 vpxor (%r11),%xmm15,%xmm13 703 vpxor %xmm10,%xmm2,%xmm2 704 vpxor (%r12),%xmm15,%xmm10 705 vpxor %xmm11,%xmm3,%xmm3 706 vpxor (%r13),%xmm15,%xmm11 707 vpxor %xmm12,%xmm4,%xmm4 708 vpxor (%r14),%xmm15,%xmm12 709 vpxor %xmm13,%xmm5,%xmm5 710 vpxor (%r15),%xmm15,%xmm13 711 vpxor %xmm10,%xmm6,%xmm6 712 movl $1,%ecx 713 vpxor %xmm11,%xmm7,%xmm7 714 vpxor %xmm12,%xmm8,%xmm8 715 vpxor %xmm13,%xmm9,%xmm9 716 jmp .Loop_enc8x 717 718.align 32 719.Loop_enc8x: 720 vaesenc %xmm1,%xmm2,%xmm2 721 cmpl 32+0(%rsp),%ecx 722 vaesenc %xmm1,%xmm3,%xmm3 723 prefetcht0 31(%r8) 724 vaesenc %xmm1,%xmm4,%xmm4 725 vaesenc %xmm1,%xmm5,%xmm5 726 leaq (%r8,%rbx,1),%rbx 727 cmovgeq %rsp,%r8 728 vaesenc %xmm1,%xmm6,%xmm6 729 cmovgq %rsp,%rbx 730 vaesenc %xmm1,%xmm7,%xmm7 731 subq %r8,%rbx 732 vaesenc %xmm1,%xmm8,%xmm8 733 vpxor 16(%r8),%xmm15,%xmm10 734 movq %rbx,64+0(%rsp) 735 vaesenc %xmm1,%xmm9,%xmm9 736 vmovups -72(%rsi),%xmm1 737 leaq 16(%r8,%rbx,1),%r8 738 vmovdqu %xmm10,0(%rbp) 739 vaesenc %xmm0,%xmm2,%xmm2 740 cmpl 32+4(%rsp),%ecx 741 movq 64+8(%rsp),%rbx 742 vaesenc %xmm0,%xmm3,%xmm3 743 prefetcht0 31(%r9) 744 vaesenc %xmm0,%xmm4,%xmm4 745 vaesenc %xmm0,%xmm5,%xmm5 746 leaq (%r9,%rbx,1),%rbx 747 cmovgeq %rsp,%r9 748 vaesenc %xmm0,%xmm6,%xmm6 749 cmovgq %rsp,%rbx 750 vaesenc %xmm0,%xmm7,%xmm7 751 subq %r9,%rbx 752 vaesenc %xmm0,%xmm8,%xmm8 753 vpxor 16(%r9),%xmm15,%xmm11 754 movq %rbx,64+8(%rsp) 755 vaesenc %xmm0,%xmm9,%xmm9 756 vmovups -56(%rsi),%xmm0 757 leaq 16(%r9,%rbx,1),%r9 758 vmovdqu %xmm11,16(%rbp) 759 vaesenc %xmm1,%xmm2,%xmm2 760 cmpl 32+8(%rsp),%ecx 761 movq 64+16(%rsp),%rbx 762 vaesenc %xmm1,%xmm3,%xmm3 763 prefetcht0 31(%r10) 764 vaesenc %xmm1,%xmm4,%xmm4 765 prefetcht0 15(%r8) 766 vaesenc %xmm1,%xmm5,%xmm5 767 leaq (%r10,%rbx,1),%rbx 768 cmovgeq %rsp,%r10 769 vaesenc %xmm1,%xmm6,%xmm6 770 cmovgq %rsp,%rbx 771 vaesenc %xmm1,%xmm7,%xmm7 772 subq %r10,%rbx 773 vaesenc %xmm1,%xmm8,%xmm8 774 vpxor 16(%r10),%xmm15,%xmm12 775 movq %rbx,64+16(%rsp) 776 vaesenc %xmm1,%xmm9,%xmm9 777 vmovups -40(%rsi),%xmm1 778 leaq 16(%r10,%rbx,1),%r10 779 vmovdqu %xmm12,32(%rbp) 780 vaesenc %xmm0,%xmm2,%xmm2 781 cmpl 32+12(%rsp),%ecx 782 movq 64+24(%rsp),%rbx 783 vaesenc %xmm0,%xmm3,%xmm3 784 prefetcht0 31(%r11) 785 vaesenc %xmm0,%xmm4,%xmm4 786 prefetcht0 15(%r9) 787 vaesenc %xmm0,%xmm5,%xmm5 788 leaq (%r11,%rbx,1),%rbx 789 cmovgeq %rsp,%r11 790 vaesenc %xmm0,%xmm6,%xmm6 791 cmovgq %rsp,%rbx 792 vaesenc %xmm0,%xmm7,%xmm7 793 subq %r11,%rbx 794 vaesenc %xmm0,%xmm8,%xmm8 795 vpxor 16(%r11),%xmm15,%xmm13 796 movq %rbx,64+24(%rsp) 797 vaesenc %xmm0,%xmm9,%xmm9 798 vmovups -24(%rsi),%xmm0 799 leaq 16(%r11,%rbx,1),%r11 800 vmovdqu %xmm13,48(%rbp) 801 vaesenc %xmm1,%xmm2,%xmm2 802 cmpl 32+16(%rsp),%ecx 803 movq 64+32(%rsp),%rbx 804 vaesenc %xmm1,%xmm3,%xmm3 805 prefetcht0 31(%r12) 806 vaesenc %xmm1,%xmm4,%xmm4 807 prefetcht0 15(%r10) 808 vaesenc %xmm1,%xmm5,%xmm5 809 leaq (%r12,%rbx,1),%rbx 810 cmovgeq %rsp,%r12 811 vaesenc %xmm1,%xmm6,%xmm6 812 cmovgq %rsp,%rbx 813 vaesenc %xmm1,%xmm7,%xmm7 814 subq %r12,%rbx 815 vaesenc %xmm1,%xmm8,%xmm8 816 vpxor 16(%r12),%xmm15,%xmm10 817 movq %rbx,64+32(%rsp) 818 vaesenc %xmm1,%xmm9,%xmm9 819 vmovups -8(%rsi),%xmm1 820 leaq 16(%r12,%rbx,1),%r12 821 vaesenc %xmm0,%xmm2,%xmm2 822 cmpl 32+20(%rsp),%ecx 823 movq 64+40(%rsp),%rbx 824 vaesenc %xmm0,%xmm3,%xmm3 825 prefetcht0 31(%r13) 826 vaesenc %xmm0,%xmm4,%xmm4 827 prefetcht0 15(%r11) 828 vaesenc %xmm0,%xmm5,%xmm5 829 leaq (%rbx,%r13,1),%rbx 830 cmovgeq %rsp,%r13 831 vaesenc %xmm0,%xmm6,%xmm6 832 cmovgq %rsp,%rbx 833 vaesenc %xmm0,%xmm7,%xmm7 834 subq %r13,%rbx 835 vaesenc %xmm0,%xmm8,%xmm8 836 vpxor 16(%r13),%xmm15,%xmm11 837 movq %rbx,64+40(%rsp) 838 vaesenc %xmm0,%xmm9,%xmm9 839 vmovups 8(%rsi),%xmm0 840 leaq 16(%r13,%rbx,1),%r13 841 vaesenc %xmm1,%xmm2,%xmm2 842 cmpl 32+24(%rsp),%ecx 843 movq 64+48(%rsp),%rbx 844 vaesenc %xmm1,%xmm3,%xmm3 845 prefetcht0 31(%r14) 846 vaesenc %xmm1,%xmm4,%xmm4 847 prefetcht0 15(%r12) 848 vaesenc %xmm1,%xmm5,%xmm5 849 leaq (%r14,%rbx,1),%rbx 850 cmovgeq %rsp,%r14 851 vaesenc %xmm1,%xmm6,%xmm6 852 cmovgq %rsp,%rbx 853 vaesenc %xmm1,%xmm7,%xmm7 854 subq %r14,%rbx 855 vaesenc %xmm1,%xmm8,%xmm8 856 vpxor 16(%r14),%xmm15,%xmm12 857 movq %rbx,64+48(%rsp) 858 vaesenc %xmm1,%xmm9,%xmm9 859 vmovups 24(%rsi),%xmm1 860 leaq 16(%r14,%rbx,1),%r14 861 vaesenc %xmm0,%xmm2,%xmm2 862 cmpl 32+28(%rsp),%ecx 863 movq 64+56(%rsp),%rbx 864 vaesenc %xmm0,%xmm3,%xmm3 865 prefetcht0 31(%r15) 866 vaesenc %xmm0,%xmm4,%xmm4 867 prefetcht0 15(%r13) 868 vaesenc %xmm0,%xmm5,%xmm5 869 leaq (%r15,%rbx,1),%rbx 870 cmovgeq %rsp,%r15 871 vaesenc %xmm0,%xmm6,%xmm6 872 cmovgq %rsp,%rbx 873 vaesenc %xmm0,%xmm7,%xmm7 874 subq %r15,%rbx 875 vaesenc %xmm0,%xmm8,%xmm8 876 vpxor 16(%r15),%xmm15,%xmm13 877 movq %rbx,64+56(%rsp) 878 vaesenc %xmm0,%xmm9,%xmm9 879 vmovups 40(%rsi),%xmm0 880 leaq 16(%r15,%rbx,1),%r15 881 vmovdqu 32(%rsp),%xmm14 882 prefetcht0 15(%r14) 883 prefetcht0 15(%r15) 884 cmpl $11,%eax 885 jb .Lenc8x_tail 886 887 vaesenc %xmm1,%xmm2,%xmm2 888 vaesenc %xmm1,%xmm3,%xmm3 889 vaesenc %xmm1,%xmm4,%xmm4 890 vaesenc %xmm1,%xmm5,%xmm5 891 vaesenc %xmm1,%xmm6,%xmm6 892 vaesenc %xmm1,%xmm7,%xmm7 893 vaesenc %xmm1,%xmm8,%xmm8 894 vaesenc %xmm1,%xmm9,%xmm9 895 vmovups 176-120(%rsi),%xmm1 896 897 vaesenc %xmm0,%xmm2,%xmm2 898 vaesenc %xmm0,%xmm3,%xmm3 899 vaesenc %xmm0,%xmm4,%xmm4 900 vaesenc %xmm0,%xmm5,%xmm5 901 vaesenc %xmm0,%xmm6,%xmm6 902 vaesenc %xmm0,%xmm7,%xmm7 903 vaesenc %xmm0,%xmm8,%xmm8 904 vaesenc %xmm0,%xmm9,%xmm9 905 vmovups 192-120(%rsi),%xmm0 906 je .Lenc8x_tail 907 908 vaesenc %xmm1,%xmm2,%xmm2 909 vaesenc %xmm1,%xmm3,%xmm3 910 vaesenc %xmm1,%xmm4,%xmm4 911 vaesenc %xmm1,%xmm5,%xmm5 912 vaesenc %xmm1,%xmm6,%xmm6 913 vaesenc %xmm1,%xmm7,%xmm7 914 vaesenc %xmm1,%xmm8,%xmm8 915 vaesenc %xmm1,%xmm9,%xmm9 916 vmovups 208-120(%rsi),%xmm1 917 918 vaesenc %xmm0,%xmm2,%xmm2 919 vaesenc %xmm0,%xmm3,%xmm3 920 vaesenc %xmm0,%xmm4,%xmm4 921 vaesenc %xmm0,%xmm5,%xmm5 922 vaesenc %xmm0,%xmm6,%xmm6 923 vaesenc %xmm0,%xmm7,%xmm7 924 vaesenc %xmm0,%xmm8,%xmm8 925 vaesenc %xmm0,%xmm9,%xmm9 926 vmovups 224-120(%rsi),%xmm0 927 928.Lenc8x_tail: 929 vaesenc %xmm1,%xmm2,%xmm2 930 vpxor %xmm15,%xmm15,%xmm15 931 vaesenc %xmm1,%xmm3,%xmm3 932 vaesenc %xmm1,%xmm4,%xmm4 933 vpcmpgtd %xmm15,%xmm14,%xmm15 934 vaesenc %xmm1,%xmm5,%xmm5 935 vaesenc %xmm1,%xmm6,%xmm6 936 vpaddd %xmm14,%xmm15,%xmm15 937 vmovdqu 48(%rsp),%xmm14 938 vaesenc %xmm1,%xmm7,%xmm7 939 movq 64(%rsp),%rbx 940 vaesenc %xmm1,%xmm8,%xmm8 941 vaesenc %xmm1,%xmm9,%xmm9 942 vmovups 16-120(%rsi),%xmm1 943 944 vaesenclast %xmm0,%xmm2,%xmm2 945 vmovdqa %xmm15,32(%rsp) 946 vpxor %xmm15,%xmm15,%xmm15 947 vaesenclast %xmm0,%xmm3,%xmm3 948 vaesenclast %xmm0,%xmm4,%xmm4 949 vpcmpgtd %xmm15,%xmm14,%xmm15 950 vaesenclast %xmm0,%xmm5,%xmm5 951 vaesenclast %xmm0,%xmm6,%xmm6 952 vpaddd %xmm15,%xmm14,%xmm14 953 vmovdqu -120(%rsi),%xmm15 954 vaesenclast %xmm0,%xmm7,%xmm7 955 vaesenclast %xmm0,%xmm8,%xmm8 956 vmovdqa %xmm14,48(%rsp) 957 vaesenclast %xmm0,%xmm9,%xmm9 958 vmovups 32-120(%rsi),%xmm0 959 960 vmovups %xmm2,-16(%r8) 961 subq %rbx,%r8 962 vpxor 0(%rbp),%xmm2,%xmm2 963 vmovups %xmm3,-16(%r9) 964 subq 72(%rsp),%r9 965 vpxor 16(%rbp),%xmm3,%xmm3 966 vmovups %xmm4,-16(%r10) 967 subq 80(%rsp),%r10 968 vpxor 32(%rbp),%xmm4,%xmm4 969 vmovups %xmm5,-16(%r11) 970 subq 88(%rsp),%r11 971 vpxor 48(%rbp),%xmm5,%xmm5 972 vmovups %xmm6,-16(%r12) 973 subq 96(%rsp),%r12 974 vpxor %xmm10,%xmm6,%xmm6 975 vmovups %xmm7,-16(%r13) 976 subq 104(%rsp),%r13 977 vpxor %xmm11,%xmm7,%xmm7 978 vmovups %xmm8,-16(%r14) 979 subq 112(%rsp),%r14 980 vpxor %xmm12,%xmm8,%xmm8 981 vmovups %xmm9,-16(%r15) 982 subq 120(%rsp),%r15 983 vpxor %xmm13,%xmm9,%xmm9 984 985 decl %edx 986 jnz .Loop_enc8x 987 988 movq 16(%rsp),%rax 989.cfi_def_cfa %rax,8 990 991 992 993 994 995.Lenc8x_done: 996 vzeroupper 997 movq -48(%rax),%r15 998.cfi_restore %r15 999 movq -40(%rax),%r14 1000.cfi_restore %r14 1001 movq -32(%rax),%r13 1002.cfi_restore %r13 1003 movq -24(%rax),%r12 1004.cfi_restore %r12 1005 movq -16(%rax),%rbp 1006.cfi_restore %rbp 1007 movq -8(%rax),%rbx 1008.cfi_restore %rbx 1009 leaq (%rax),%rsp 1010.cfi_def_cfa_register %rsp 1011.Lenc8x_epilogue: 1012 .byte 0xf3,0xc3 1013.cfi_endproc 1014.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx 1015 1016.type aesni_multi_cbc_decrypt_avx,@function 1017.align 32 1018aesni_multi_cbc_decrypt_avx: 1019.cfi_startproc 1020_avx_cbc_dec_shortcut: 1021 movq %rsp,%rax 1022.cfi_def_cfa_register %rax 1023 pushq %rbx 1024.cfi_offset %rbx,-16 1025 pushq %rbp 1026.cfi_offset %rbp,-24 1027 pushq %r12 1028.cfi_offset %r12,-32 1029 pushq %r13 1030.cfi_offset %r13,-40 1031 pushq %r14 1032.cfi_offset %r14,-48 1033 pushq %r15 1034.cfi_offset %r15,-56 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 subq $256,%rsp 1045 andq $-256,%rsp 1046 subq $192,%rsp 1047 movq %rax,16(%rsp) 1048.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 1049 1050.Ldec8x_body: 1051 vzeroupper 1052 vmovdqu (%rsi),%xmm15 1053 leaq 120(%rsi),%rsi 1054 leaq 160(%rdi),%rdi 1055 shrl $1,%edx 1056 1057.Ldec8x_loop_grande: 1058 1059 xorl %edx,%edx 1060 movl -144(%rdi),%ecx 1061 movq -160(%rdi),%r8 1062 cmpl %edx,%ecx 1063 movq -152(%rdi),%rbx 1064 cmovgl %ecx,%edx 1065 testl %ecx,%ecx 1066 vmovdqu -136(%rdi),%xmm2 1067 movl %ecx,32(%rsp) 1068 cmovleq %rsp,%r8 1069 subq %r8,%rbx 1070 movq %rbx,64(%rsp) 1071 vmovdqu %xmm2,192(%rsp) 1072 movl -104(%rdi),%ecx 1073 movq -120(%rdi),%r9 1074 cmpl %edx,%ecx 1075 movq -112(%rdi),%rbp 1076 cmovgl %ecx,%edx 1077 testl %ecx,%ecx 1078 vmovdqu -96(%rdi),%xmm3 1079 movl %ecx,36(%rsp) 1080 cmovleq %rsp,%r9 1081 subq %r9,%rbp 1082 movq %rbp,72(%rsp) 1083 vmovdqu %xmm3,208(%rsp) 1084 movl -64(%rdi),%ecx 1085 movq -80(%rdi),%r10 1086 cmpl %edx,%ecx 1087 movq -72(%rdi),%rbp 1088 cmovgl %ecx,%edx 1089 testl %ecx,%ecx 1090 vmovdqu -56(%rdi),%xmm4 1091 movl %ecx,40(%rsp) 1092 cmovleq %rsp,%r10 1093 subq %r10,%rbp 1094 movq %rbp,80(%rsp) 1095 vmovdqu %xmm4,224(%rsp) 1096 movl -24(%rdi),%ecx 1097 movq -40(%rdi),%r11 1098 cmpl %edx,%ecx 1099 movq -32(%rdi),%rbp 1100 cmovgl %ecx,%edx 1101 testl %ecx,%ecx 1102 vmovdqu -16(%rdi),%xmm5 1103 movl %ecx,44(%rsp) 1104 cmovleq %rsp,%r11 1105 subq %r11,%rbp 1106 movq %rbp,88(%rsp) 1107 vmovdqu %xmm5,240(%rsp) 1108 movl 16(%rdi),%ecx 1109 movq 0(%rdi),%r12 1110 cmpl %edx,%ecx 1111 movq 8(%rdi),%rbp 1112 cmovgl %ecx,%edx 1113 testl %ecx,%ecx 1114 vmovdqu 24(%rdi),%xmm6 1115 movl %ecx,48(%rsp) 1116 cmovleq %rsp,%r12 1117 subq %r12,%rbp 1118 movq %rbp,96(%rsp) 1119 vmovdqu %xmm6,256(%rsp) 1120 movl 56(%rdi),%ecx 1121 movq 40(%rdi),%r13 1122 cmpl %edx,%ecx 1123 movq 48(%rdi),%rbp 1124 cmovgl %ecx,%edx 1125 testl %ecx,%ecx 1126 vmovdqu 64(%rdi),%xmm7 1127 movl %ecx,52(%rsp) 1128 cmovleq %rsp,%r13 1129 subq %r13,%rbp 1130 movq %rbp,104(%rsp) 1131 vmovdqu %xmm7,272(%rsp) 1132 movl 96(%rdi),%ecx 1133 movq 80(%rdi),%r14 1134 cmpl %edx,%ecx 1135 movq 88(%rdi),%rbp 1136 cmovgl %ecx,%edx 1137 testl %ecx,%ecx 1138 vmovdqu 104(%rdi),%xmm8 1139 movl %ecx,56(%rsp) 1140 cmovleq %rsp,%r14 1141 subq %r14,%rbp 1142 movq %rbp,112(%rsp) 1143 vmovdqu %xmm8,288(%rsp) 1144 movl 136(%rdi),%ecx 1145 movq 120(%rdi),%r15 1146 cmpl %edx,%ecx 1147 movq 128(%rdi),%rbp 1148 cmovgl %ecx,%edx 1149 testl %ecx,%ecx 1150 vmovdqu 144(%rdi),%xmm9 1151 movl %ecx,60(%rsp) 1152 cmovleq %rsp,%r15 1153 subq %r15,%rbp 1154 movq %rbp,120(%rsp) 1155 vmovdqu %xmm9,304(%rsp) 1156 testl %edx,%edx 1157 jz .Ldec8x_done 1158 1159 vmovups 16-120(%rsi),%xmm1 1160 vmovups 32-120(%rsi),%xmm0 1161 movl 240-120(%rsi),%eax 1162 leaq 192+128(%rsp),%rbp 1163 1164 vmovdqu (%r8),%xmm2 1165 vmovdqu (%r9),%xmm3 1166 vmovdqu (%r10),%xmm4 1167 vmovdqu (%r11),%xmm5 1168 vmovdqu (%r12),%xmm6 1169 vmovdqu (%r13),%xmm7 1170 vmovdqu (%r14),%xmm8 1171 vmovdqu (%r15),%xmm9 1172 vmovdqu %xmm2,0(%rbp) 1173 vpxor %xmm15,%xmm2,%xmm2 1174 vmovdqu %xmm3,16(%rbp) 1175 vpxor %xmm15,%xmm3,%xmm3 1176 vmovdqu %xmm4,32(%rbp) 1177 vpxor %xmm15,%xmm4,%xmm4 1178 vmovdqu %xmm5,48(%rbp) 1179 vpxor %xmm15,%xmm5,%xmm5 1180 vmovdqu %xmm6,64(%rbp) 1181 vpxor %xmm15,%xmm6,%xmm6 1182 vmovdqu %xmm7,80(%rbp) 1183 vpxor %xmm15,%xmm7,%xmm7 1184 vmovdqu %xmm8,96(%rbp) 1185 vpxor %xmm15,%xmm8,%xmm8 1186 vmovdqu %xmm9,112(%rbp) 1187 vpxor %xmm15,%xmm9,%xmm9 1188 xorq $0x80,%rbp 1189 movl $1,%ecx 1190 jmp .Loop_dec8x 1191 1192.align 32 1193.Loop_dec8x: 1194 vaesdec %xmm1,%xmm2,%xmm2 1195 cmpl 32+0(%rsp),%ecx 1196 vaesdec %xmm1,%xmm3,%xmm3 1197 prefetcht0 31(%r8) 1198 vaesdec %xmm1,%xmm4,%xmm4 1199 vaesdec %xmm1,%xmm5,%xmm5 1200 leaq (%r8,%rbx,1),%rbx 1201 cmovgeq %rsp,%r8 1202 vaesdec %xmm1,%xmm6,%xmm6 1203 cmovgq %rsp,%rbx 1204 vaesdec %xmm1,%xmm7,%xmm7 1205 subq %r8,%rbx 1206 vaesdec %xmm1,%xmm8,%xmm8 1207 vmovdqu 16(%r8),%xmm10 1208 movq %rbx,64+0(%rsp) 1209 vaesdec %xmm1,%xmm9,%xmm9 1210 vmovups -72(%rsi),%xmm1 1211 leaq 16(%r8,%rbx,1),%r8 1212 vmovdqu %xmm10,128(%rsp) 1213 vaesdec %xmm0,%xmm2,%xmm2 1214 cmpl 32+4(%rsp),%ecx 1215 movq 64+8(%rsp),%rbx 1216 vaesdec %xmm0,%xmm3,%xmm3 1217 prefetcht0 31(%r9) 1218 vaesdec %xmm0,%xmm4,%xmm4 1219 vaesdec %xmm0,%xmm5,%xmm5 1220 leaq (%r9,%rbx,1),%rbx 1221 cmovgeq %rsp,%r9 1222 vaesdec %xmm0,%xmm6,%xmm6 1223 cmovgq %rsp,%rbx 1224 vaesdec %xmm0,%xmm7,%xmm7 1225 subq %r9,%rbx 1226 vaesdec %xmm0,%xmm8,%xmm8 1227 vmovdqu 16(%r9),%xmm11 1228 movq %rbx,64+8(%rsp) 1229 vaesdec %xmm0,%xmm9,%xmm9 1230 vmovups -56(%rsi),%xmm0 1231 leaq 16(%r9,%rbx,1),%r9 1232 vmovdqu %xmm11,144(%rsp) 1233 vaesdec %xmm1,%xmm2,%xmm2 1234 cmpl 32+8(%rsp),%ecx 1235 movq 64+16(%rsp),%rbx 1236 vaesdec %xmm1,%xmm3,%xmm3 1237 prefetcht0 31(%r10) 1238 vaesdec %xmm1,%xmm4,%xmm4 1239 prefetcht0 15(%r8) 1240 vaesdec %xmm1,%xmm5,%xmm5 1241 leaq (%r10,%rbx,1),%rbx 1242 cmovgeq %rsp,%r10 1243 vaesdec %xmm1,%xmm6,%xmm6 1244 cmovgq %rsp,%rbx 1245 vaesdec %xmm1,%xmm7,%xmm7 1246 subq %r10,%rbx 1247 vaesdec %xmm1,%xmm8,%xmm8 1248 vmovdqu 16(%r10),%xmm12 1249 movq %rbx,64+16(%rsp) 1250 vaesdec %xmm1,%xmm9,%xmm9 1251 vmovups -40(%rsi),%xmm1 1252 leaq 16(%r10,%rbx,1),%r10 1253 vmovdqu %xmm12,160(%rsp) 1254 vaesdec %xmm0,%xmm2,%xmm2 1255 cmpl 32+12(%rsp),%ecx 1256 movq 64+24(%rsp),%rbx 1257 vaesdec %xmm0,%xmm3,%xmm3 1258 prefetcht0 31(%r11) 1259 vaesdec %xmm0,%xmm4,%xmm4 1260 prefetcht0 15(%r9) 1261 vaesdec %xmm0,%xmm5,%xmm5 1262 leaq (%r11,%rbx,1),%rbx 1263 cmovgeq %rsp,%r11 1264 vaesdec %xmm0,%xmm6,%xmm6 1265 cmovgq %rsp,%rbx 1266 vaesdec %xmm0,%xmm7,%xmm7 1267 subq %r11,%rbx 1268 vaesdec %xmm0,%xmm8,%xmm8 1269 vmovdqu 16(%r11),%xmm13 1270 movq %rbx,64+24(%rsp) 1271 vaesdec %xmm0,%xmm9,%xmm9 1272 vmovups -24(%rsi),%xmm0 1273 leaq 16(%r11,%rbx,1),%r11 1274 vmovdqu %xmm13,176(%rsp) 1275 vaesdec %xmm1,%xmm2,%xmm2 1276 cmpl 32+16(%rsp),%ecx 1277 movq 64+32(%rsp),%rbx 1278 vaesdec %xmm1,%xmm3,%xmm3 1279 prefetcht0 31(%r12) 1280 vaesdec %xmm1,%xmm4,%xmm4 1281 prefetcht0 15(%r10) 1282 vaesdec %xmm1,%xmm5,%xmm5 1283 leaq (%r12,%rbx,1),%rbx 1284 cmovgeq %rsp,%r12 1285 vaesdec %xmm1,%xmm6,%xmm6 1286 cmovgq %rsp,%rbx 1287 vaesdec %xmm1,%xmm7,%xmm7 1288 subq %r12,%rbx 1289 vaesdec %xmm1,%xmm8,%xmm8 1290 vmovdqu 16(%r12),%xmm10 1291 movq %rbx,64+32(%rsp) 1292 vaesdec %xmm1,%xmm9,%xmm9 1293 vmovups -8(%rsi),%xmm1 1294 leaq 16(%r12,%rbx,1),%r12 1295 vaesdec %xmm0,%xmm2,%xmm2 1296 cmpl 32+20(%rsp),%ecx 1297 movq 64+40(%rsp),%rbx 1298 vaesdec %xmm0,%xmm3,%xmm3 1299 prefetcht0 31(%r13) 1300 vaesdec %xmm0,%xmm4,%xmm4 1301 prefetcht0 15(%r11) 1302 vaesdec %xmm0,%xmm5,%xmm5 1303 leaq (%rbx,%r13,1),%rbx 1304 cmovgeq %rsp,%r13 1305 vaesdec %xmm0,%xmm6,%xmm6 1306 cmovgq %rsp,%rbx 1307 vaesdec %xmm0,%xmm7,%xmm7 1308 subq %r13,%rbx 1309 vaesdec %xmm0,%xmm8,%xmm8 1310 vmovdqu 16(%r13),%xmm11 1311 movq %rbx,64+40(%rsp) 1312 vaesdec %xmm0,%xmm9,%xmm9 1313 vmovups 8(%rsi),%xmm0 1314 leaq 16(%r13,%rbx,1),%r13 1315 vaesdec %xmm1,%xmm2,%xmm2 1316 cmpl 32+24(%rsp),%ecx 1317 movq 64+48(%rsp),%rbx 1318 vaesdec %xmm1,%xmm3,%xmm3 1319 prefetcht0 31(%r14) 1320 vaesdec %xmm1,%xmm4,%xmm4 1321 prefetcht0 15(%r12) 1322 vaesdec %xmm1,%xmm5,%xmm5 1323 leaq (%r14,%rbx,1),%rbx 1324 cmovgeq %rsp,%r14 1325 vaesdec %xmm1,%xmm6,%xmm6 1326 cmovgq %rsp,%rbx 1327 vaesdec %xmm1,%xmm7,%xmm7 1328 subq %r14,%rbx 1329 vaesdec %xmm1,%xmm8,%xmm8 1330 vmovdqu 16(%r14),%xmm12 1331 movq %rbx,64+48(%rsp) 1332 vaesdec %xmm1,%xmm9,%xmm9 1333 vmovups 24(%rsi),%xmm1 1334 leaq 16(%r14,%rbx,1),%r14 1335 vaesdec %xmm0,%xmm2,%xmm2 1336 cmpl 32+28(%rsp),%ecx 1337 movq 64+56(%rsp),%rbx 1338 vaesdec %xmm0,%xmm3,%xmm3 1339 prefetcht0 31(%r15) 1340 vaesdec %xmm0,%xmm4,%xmm4 1341 prefetcht0 15(%r13) 1342 vaesdec %xmm0,%xmm5,%xmm5 1343 leaq (%r15,%rbx,1),%rbx 1344 cmovgeq %rsp,%r15 1345 vaesdec %xmm0,%xmm6,%xmm6 1346 cmovgq %rsp,%rbx 1347 vaesdec %xmm0,%xmm7,%xmm7 1348 subq %r15,%rbx 1349 vaesdec %xmm0,%xmm8,%xmm8 1350 vmovdqu 16(%r15),%xmm13 1351 movq %rbx,64+56(%rsp) 1352 vaesdec %xmm0,%xmm9,%xmm9 1353 vmovups 40(%rsi),%xmm0 1354 leaq 16(%r15,%rbx,1),%r15 1355 vmovdqu 32(%rsp),%xmm14 1356 prefetcht0 15(%r14) 1357 prefetcht0 15(%r15) 1358 cmpl $11,%eax 1359 jb .Ldec8x_tail 1360 1361 vaesdec %xmm1,%xmm2,%xmm2 1362 vaesdec %xmm1,%xmm3,%xmm3 1363 vaesdec %xmm1,%xmm4,%xmm4 1364 vaesdec %xmm1,%xmm5,%xmm5 1365 vaesdec %xmm1,%xmm6,%xmm6 1366 vaesdec %xmm1,%xmm7,%xmm7 1367 vaesdec %xmm1,%xmm8,%xmm8 1368 vaesdec %xmm1,%xmm9,%xmm9 1369 vmovups 176-120(%rsi),%xmm1 1370 1371 vaesdec %xmm0,%xmm2,%xmm2 1372 vaesdec %xmm0,%xmm3,%xmm3 1373 vaesdec %xmm0,%xmm4,%xmm4 1374 vaesdec %xmm0,%xmm5,%xmm5 1375 vaesdec %xmm0,%xmm6,%xmm6 1376 vaesdec %xmm0,%xmm7,%xmm7 1377 vaesdec %xmm0,%xmm8,%xmm8 1378 vaesdec %xmm0,%xmm9,%xmm9 1379 vmovups 192-120(%rsi),%xmm0 1380 je .Ldec8x_tail 1381 1382 vaesdec %xmm1,%xmm2,%xmm2 1383 vaesdec %xmm1,%xmm3,%xmm3 1384 vaesdec %xmm1,%xmm4,%xmm4 1385 vaesdec %xmm1,%xmm5,%xmm5 1386 vaesdec %xmm1,%xmm6,%xmm6 1387 vaesdec %xmm1,%xmm7,%xmm7 1388 vaesdec %xmm1,%xmm8,%xmm8 1389 vaesdec %xmm1,%xmm9,%xmm9 1390 vmovups 208-120(%rsi),%xmm1 1391 1392 vaesdec %xmm0,%xmm2,%xmm2 1393 vaesdec %xmm0,%xmm3,%xmm3 1394 vaesdec %xmm0,%xmm4,%xmm4 1395 vaesdec %xmm0,%xmm5,%xmm5 1396 vaesdec %xmm0,%xmm6,%xmm6 1397 vaesdec %xmm0,%xmm7,%xmm7 1398 vaesdec %xmm0,%xmm8,%xmm8 1399 vaesdec %xmm0,%xmm9,%xmm9 1400 vmovups 224-120(%rsi),%xmm0 1401 1402.Ldec8x_tail: 1403 vaesdec %xmm1,%xmm2,%xmm2 1404 vpxor %xmm15,%xmm15,%xmm15 1405 vaesdec %xmm1,%xmm3,%xmm3 1406 vaesdec %xmm1,%xmm4,%xmm4 1407 vpcmpgtd %xmm15,%xmm14,%xmm15 1408 vaesdec %xmm1,%xmm5,%xmm5 1409 vaesdec %xmm1,%xmm6,%xmm6 1410 vpaddd %xmm14,%xmm15,%xmm15 1411 vmovdqu 48(%rsp),%xmm14 1412 vaesdec %xmm1,%xmm7,%xmm7 1413 movq 64(%rsp),%rbx 1414 vaesdec %xmm1,%xmm8,%xmm8 1415 vaesdec %xmm1,%xmm9,%xmm9 1416 vmovups 16-120(%rsi),%xmm1 1417 1418 vaesdeclast %xmm0,%xmm2,%xmm2 1419 vmovdqa %xmm15,32(%rsp) 1420 vpxor %xmm15,%xmm15,%xmm15 1421 vaesdeclast %xmm0,%xmm3,%xmm3 1422 vpxor 0(%rbp),%xmm2,%xmm2 1423 vaesdeclast %xmm0,%xmm4,%xmm4 1424 vpxor 16(%rbp),%xmm3,%xmm3 1425 vpcmpgtd %xmm15,%xmm14,%xmm15 1426 vaesdeclast %xmm0,%xmm5,%xmm5 1427 vpxor 32(%rbp),%xmm4,%xmm4 1428 vaesdeclast %xmm0,%xmm6,%xmm6 1429 vpxor 48(%rbp),%xmm5,%xmm5 1430 vpaddd %xmm15,%xmm14,%xmm14 1431 vmovdqu -120(%rsi),%xmm15 1432 vaesdeclast %xmm0,%xmm7,%xmm7 1433 vpxor 64(%rbp),%xmm6,%xmm6 1434 vaesdeclast %xmm0,%xmm8,%xmm8 1435 vpxor 80(%rbp),%xmm7,%xmm7 1436 vmovdqa %xmm14,48(%rsp) 1437 vaesdeclast %xmm0,%xmm9,%xmm9 1438 vpxor 96(%rbp),%xmm8,%xmm8 1439 vmovups 32-120(%rsi),%xmm0 1440 1441 vmovups %xmm2,-16(%r8) 1442 subq %rbx,%r8 1443 vmovdqu 128+0(%rsp),%xmm2 1444 vpxor 112(%rbp),%xmm9,%xmm9 1445 vmovups %xmm3,-16(%r9) 1446 subq 72(%rsp),%r9 1447 vmovdqu %xmm2,0(%rbp) 1448 vpxor %xmm15,%xmm2,%xmm2 1449 vmovdqu 128+16(%rsp),%xmm3 1450 vmovups %xmm4,-16(%r10) 1451 subq 80(%rsp),%r10 1452 vmovdqu %xmm3,16(%rbp) 1453 vpxor %xmm15,%xmm3,%xmm3 1454 vmovdqu 128+32(%rsp),%xmm4 1455 vmovups %xmm5,-16(%r11) 1456 subq 88(%rsp),%r11 1457 vmovdqu %xmm4,32(%rbp) 1458 vpxor %xmm15,%xmm4,%xmm4 1459 vmovdqu 128+48(%rsp),%xmm5 1460 vmovups %xmm6,-16(%r12) 1461 subq 96(%rsp),%r12 1462 vmovdqu %xmm5,48(%rbp) 1463 vpxor %xmm15,%xmm5,%xmm5 1464 vmovdqu %xmm10,64(%rbp) 1465 vpxor %xmm10,%xmm15,%xmm6 1466 vmovups %xmm7,-16(%r13) 1467 subq 104(%rsp),%r13 1468 vmovdqu %xmm11,80(%rbp) 1469 vpxor %xmm11,%xmm15,%xmm7 1470 vmovups %xmm8,-16(%r14) 1471 subq 112(%rsp),%r14 1472 vmovdqu %xmm12,96(%rbp) 1473 vpxor %xmm12,%xmm15,%xmm8 1474 vmovups %xmm9,-16(%r15) 1475 subq 120(%rsp),%r15 1476 vmovdqu %xmm13,112(%rbp) 1477 vpxor %xmm13,%xmm15,%xmm9 1478 1479 xorq $128,%rbp 1480 decl %edx 1481 jnz .Loop_dec8x 1482 1483 movq 16(%rsp),%rax 1484.cfi_def_cfa %rax,8 1485 1486 1487 1488 1489 1490.Ldec8x_done: 1491 vzeroupper 1492 movq -48(%rax),%r15 1493.cfi_restore %r15 1494 movq -40(%rax),%r14 1495.cfi_restore %r14 1496 movq -32(%rax),%r13 1497.cfi_restore %r13 1498 movq -24(%rax),%r12 1499.cfi_restore %r12 1500 movq -16(%rax),%rbp 1501.cfi_restore %rbp 1502 movq -8(%rax),%rbx 1503.cfi_restore %rbx 1504 leaq (%rax),%rsp 1505.cfi_def_cfa_register %rsp 1506.Ldec8x_epilogue: 1507 .byte 0xf3,0xc3 1508.cfi_endproc 1509.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx 1510