1.text 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19.p2align 4 20_vpaes_encrypt_core: 21 22 movq %rdx,%r9 23 movq $16,%r11 24 movl 240(%rdx),%eax 25 movdqa %xmm9,%xmm1 26 movdqa L$k_ipt(%rip),%xmm2 27 pandn %xmm0,%xmm1 28 movdqu (%r9),%xmm5 29 psrld $4,%xmm1 30 pand %xmm9,%xmm0 31.byte 102,15,56,0,208 32 movdqa L$k_ipt+16(%rip),%xmm0 33.byte 102,15,56,0,193 34 pxor %xmm5,%xmm2 35 addq $16,%r9 36 pxor %xmm2,%xmm0 37 leaq L$k_mc_backward(%rip),%r10 38 jmp L$enc_entry 39 40.p2align 4 41L$enc_loop: 42 43 movdqa %xmm13,%xmm4 44 movdqa %xmm12,%xmm0 45.byte 102,15,56,0,226 46.byte 102,15,56,0,195 47 pxor %xmm5,%xmm4 48 movdqa %xmm15,%xmm5 49 pxor %xmm4,%xmm0 50 movdqa -64(%r11,%r10,1),%xmm1 51.byte 102,15,56,0,234 52 movdqa (%r11,%r10,1),%xmm4 53 movdqa %xmm14,%xmm2 54.byte 102,15,56,0,211 55 movdqa %xmm0,%xmm3 56 pxor %xmm5,%xmm2 57.byte 102,15,56,0,193 58 addq $16,%r9 59 pxor %xmm2,%xmm0 60.byte 102,15,56,0,220 61 addq $16,%r11 62 pxor %xmm0,%xmm3 63.byte 102,15,56,0,193 64 andq $0x30,%r11 65 subq $1,%rax 66 pxor %xmm3,%xmm0 67 68L$enc_entry: 69 70 movdqa %xmm9,%xmm1 71 movdqa %xmm11,%xmm5 72 pandn %xmm0,%xmm1 73 psrld $4,%xmm1 74 pand %xmm9,%xmm0 75.byte 102,15,56,0,232 76 movdqa %xmm10,%xmm3 77 pxor %xmm1,%xmm0 78.byte 102,15,56,0,217 79 movdqa %xmm10,%xmm4 80 pxor %xmm5,%xmm3 81.byte 102,15,56,0,224 82 movdqa %xmm10,%xmm2 83 pxor %xmm5,%xmm4 84.byte 102,15,56,0,211 85 movdqa %xmm10,%xmm3 86 pxor %xmm0,%xmm2 87.byte 102,15,56,0,220 88 movdqu (%r9),%xmm5 89 pxor %xmm1,%xmm3 90 jnz L$enc_loop 91 92 93 movdqa -96(%r10),%xmm4 94 movdqa -80(%r10),%xmm0 95.byte 102,15,56,0,226 96 pxor %xmm5,%xmm4 97.byte 102,15,56,0,195 98 movdqa 64(%r11,%r10,1),%xmm1 99 pxor %xmm4,%xmm0 100.byte 102,15,56,0,193 101 .byte 0xf3,0xc3 102 103 104 105 106 107 108 109 110 111.p2align 4 112_vpaes_decrypt_core: 113 114 movq %rdx,%r9 115 movl 240(%rdx),%eax 116 movdqa %xmm9,%xmm1 117 movdqa L$k_dipt(%rip),%xmm2 118 pandn %xmm0,%xmm1 119 movq %rax,%r11 120 psrld $4,%xmm1 121 movdqu (%r9),%xmm5 122 shlq $4,%r11 123 pand %xmm9,%xmm0 124.byte 102,15,56,0,208 125 movdqa L$k_dipt+16(%rip),%xmm0 126 xorq $0x30,%r11 127 leaq L$k_dsbd(%rip),%r10 128.byte 102,15,56,0,193 129 andq $0x30,%r11 130 pxor %xmm5,%xmm2 131 movdqa L$k_mc_forward+48(%rip),%xmm5 132 pxor %xmm2,%xmm0 133 addq $16,%r9 134 addq %r10,%r11 135 jmp L$dec_entry 136 137.p2align 4 138L$dec_loop: 139 140 141 142 movdqa -32(%r10),%xmm4 143 movdqa -16(%r10),%xmm1 144.byte 102,15,56,0,226 145.byte 102,15,56,0,203 146 pxor %xmm4,%xmm0 147 movdqa 0(%r10),%xmm4 148 pxor %xmm1,%xmm0 149 movdqa 16(%r10),%xmm1 150 151.byte 102,15,56,0,226 152.byte 102,15,56,0,197 153.byte 102,15,56,0,203 154 pxor %xmm4,%xmm0 155 movdqa 32(%r10),%xmm4 156 pxor %xmm1,%xmm0 157 movdqa 48(%r10),%xmm1 158 159.byte 102,15,56,0,226 160.byte 102,15,56,0,197 161.byte 102,15,56,0,203 162 pxor %xmm4,%xmm0 163 movdqa 64(%r10),%xmm4 164 pxor %xmm1,%xmm0 165 movdqa 80(%r10),%xmm1 166 167.byte 102,15,56,0,226 168.byte 102,15,56,0,197 169.byte 102,15,56,0,203 170 pxor %xmm4,%xmm0 171 addq $16,%r9 172.byte 102,15,58,15,237,12 173 pxor %xmm1,%xmm0 174 subq $1,%rax 175 176L$dec_entry: 177 178 movdqa %xmm9,%xmm1 179 pandn %xmm0,%xmm1 180 movdqa %xmm11,%xmm2 181 psrld $4,%xmm1 182 pand %xmm9,%xmm0 183.byte 102,15,56,0,208 184 movdqa %xmm10,%xmm3 185 pxor %xmm1,%xmm0 186.byte 102,15,56,0,217 187 movdqa %xmm10,%xmm4 188 pxor %xmm2,%xmm3 189.byte 102,15,56,0,224 190 pxor %xmm2,%xmm4 191 movdqa %xmm10,%xmm2 192.byte 102,15,56,0,211 193 movdqa %xmm10,%xmm3 194 pxor %xmm0,%xmm2 195.byte 102,15,56,0,220 196 movdqu (%r9),%xmm0 197 pxor %xmm1,%xmm3 198 jnz L$dec_loop 199 200 201 movdqa 96(%r10),%xmm4 202.byte 102,15,56,0,226 203 pxor %xmm0,%xmm4 204 movdqa 112(%r10),%xmm0 205 movdqa -352(%r11),%xmm2 206.byte 102,15,56,0,195 207 pxor %xmm4,%xmm0 208.byte 102,15,56,0,194 209 .byte 0xf3,0xc3 210 211 212 213 214 215 216 217 218 219.p2align 4 220_vpaes_schedule_core: 221 222 223 224 225 226 227 call _vpaes_preheat 228 movdqa L$k_rcon(%rip),%xmm8 229 movdqu (%rdi),%xmm0 230 231 232 movdqa %xmm0,%xmm3 233 leaq L$k_ipt(%rip),%r11 234 call _vpaes_schedule_transform 235 movdqa %xmm0,%xmm7 236 237 leaq L$k_sr(%rip),%r10 238 testq %rcx,%rcx 239 jnz L$schedule_am_decrypting 240 241 242 movdqu %xmm0,(%rdx) 243 jmp L$schedule_go 244 245L$schedule_am_decrypting: 246 247 movdqa (%r8,%r10,1),%xmm1 248.byte 102,15,56,0,217 249 movdqu %xmm3,(%rdx) 250 xorq $0x30,%r8 251 252L$schedule_go: 253 cmpl $192,%esi 254 ja L$schedule_256 255 je L$schedule_192 256 257 258 259 260 261 262 263 264 265 266L$schedule_128: 267 movl $10,%esi 268 269L$oop_schedule_128: 270 call _vpaes_schedule_round 271 decq %rsi 272 jz L$schedule_mangle_last 273 call _vpaes_schedule_mangle 274 jmp L$oop_schedule_128 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291.p2align 4 292L$schedule_192: 293 movdqu 8(%rdi),%xmm0 294 call _vpaes_schedule_transform 295 movdqa %xmm0,%xmm6 296 pxor %xmm4,%xmm4 297 movhlps %xmm4,%xmm6 298 movl $4,%esi 299 300L$oop_schedule_192: 301 call _vpaes_schedule_round 302.byte 102,15,58,15,198,8 303 call _vpaes_schedule_mangle 304 call _vpaes_schedule_192_smear 305 call _vpaes_schedule_mangle 306 call _vpaes_schedule_round 307 decq %rsi 308 jz L$schedule_mangle_last 309 call _vpaes_schedule_mangle 310 call _vpaes_schedule_192_smear 311 jmp L$oop_schedule_192 312 313 314 315 316 317 318 319 320 321 322 323.p2align 4 324L$schedule_256: 325 movdqu 16(%rdi),%xmm0 326 call _vpaes_schedule_transform 327 movl $7,%esi 328 329L$oop_schedule_256: 330 call _vpaes_schedule_mangle 331 movdqa %xmm0,%xmm6 332 333 334 call _vpaes_schedule_round 335 decq %rsi 336 jz L$schedule_mangle_last 337 call _vpaes_schedule_mangle 338 339 340 pshufd $0xFF,%xmm0,%xmm0 341 movdqa %xmm7,%xmm5 342 movdqa %xmm6,%xmm7 343 call _vpaes_schedule_low_round 344 movdqa %xmm5,%xmm7 345 346 jmp L$oop_schedule_256 347 348 349 350 351 352 353 354 355 356 357 358 359.p2align 4 360L$schedule_mangle_last: 361 362 leaq L$k_deskew(%rip),%r11 363 testq %rcx,%rcx 364 jnz L$schedule_mangle_last_dec 365 366 367 movdqa (%r8,%r10,1),%xmm1 368.byte 102,15,56,0,193 369 leaq L$k_opt(%rip),%r11 370 addq $32,%rdx 371 372L$schedule_mangle_last_dec: 373 addq $-16,%rdx 374 pxor L$k_s63(%rip),%xmm0 375 call _vpaes_schedule_transform 376 movdqu %xmm0,(%rdx) 377 378 379 pxor %xmm0,%xmm0 380 pxor %xmm1,%xmm1 381 pxor %xmm2,%xmm2 382 pxor %xmm3,%xmm3 383 pxor %xmm4,%xmm4 384 pxor %xmm5,%xmm5 385 pxor %xmm6,%xmm6 386 pxor %xmm7,%xmm7 387 .byte 0xf3,0xc3 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406.p2align 4 407_vpaes_schedule_192_smear: 408 409 pshufd $0x80,%xmm6,%xmm1 410 pshufd $0xFE,%xmm7,%xmm0 411 pxor %xmm1,%xmm6 412 pxor %xmm1,%xmm1 413 pxor %xmm0,%xmm6 414 movdqa %xmm6,%xmm0 415 movhlps %xmm1,%xmm6 416 .byte 0xf3,0xc3 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439.p2align 4 440_vpaes_schedule_round: 441 442 443 pxor %xmm1,%xmm1 444.byte 102,65,15,58,15,200,15 445.byte 102,69,15,58,15,192,15 446 pxor %xmm1,%xmm7 447 448 449 pshufd $0xFF,%xmm0,%xmm0 450.byte 102,15,58,15,192,1 451 452 453 454 455_vpaes_schedule_low_round: 456 457 movdqa %xmm7,%xmm1 458 pslldq $4,%xmm7 459 pxor %xmm1,%xmm7 460 movdqa %xmm7,%xmm1 461 pslldq $8,%xmm7 462 pxor %xmm1,%xmm7 463 pxor L$k_s63(%rip),%xmm7 464 465 466 movdqa %xmm9,%xmm1 467 pandn %xmm0,%xmm1 468 psrld $4,%xmm1 469 pand %xmm9,%xmm0 470 movdqa %xmm11,%xmm2 471.byte 102,15,56,0,208 472 pxor %xmm1,%xmm0 473 movdqa %xmm10,%xmm3 474.byte 102,15,56,0,217 475 pxor %xmm2,%xmm3 476 movdqa %xmm10,%xmm4 477.byte 102,15,56,0,224 478 pxor %xmm2,%xmm4 479 movdqa %xmm10,%xmm2 480.byte 102,15,56,0,211 481 pxor %xmm0,%xmm2 482 movdqa %xmm10,%xmm3 483.byte 102,15,56,0,220 484 pxor %xmm1,%xmm3 485 movdqa %xmm13,%xmm4 486.byte 102,15,56,0,226 487 movdqa %xmm12,%xmm0 488.byte 102,15,56,0,195 489 pxor %xmm4,%xmm0 490 491 492 pxor %xmm7,%xmm0 493 movdqa %xmm0,%xmm7 494 .byte 0xf3,0xc3 495 496 497 498 499 500 501 502 503 504 505 506 507 508.p2align 4 509_vpaes_schedule_transform: 510 511 movdqa %xmm9,%xmm1 512 pandn %xmm0,%xmm1 513 psrld $4,%xmm1 514 pand %xmm9,%xmm0 515 movdqa (%r11),%xmm2 516.byte 102,15,56,0,208 517 movdqa 16(%r11),%xmm0 518.byte 102,15,56,0,193 519 pxor %xmm2,%xmm0 520 .byte 0xf3,0xc3 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548.p2align 4 549_vpaes_schedule_mangle: 550 551 movdqa %xmm0,%xmm4 552 movdqa L$k_mc_forward(%rip),%xmm5 553 testq %rcx,%rcx 554 jnz L$schedule_mangle_dec 555 556 557 addq $16,%rdx 558 pxor L$k_s63(%rip),%xmm4 559.byte 102,15,56,0,229 560 movdqa %xmm4,%xmm3 561.byte 102,15,56,0,229 562 pxor %xmm4,%xmm3 563.byte 102,15,56,0,229 564 pxor %xmm4,%xmm3 565 566 jmp L$schedule_mangle_both 567.p2align 4 568L$schedule_mangle_dec: 569 570 leaq L$k_dksd(%rip),%r11 571 movdqa %xmm9,%xmm1 572 pandn %xmm4,%xmm1 573 psrld $4,%xmm1 574 pand %xmm9,%xmm4 575 576 movdqa 0(%r11),%xmm2 577.byte 102,15,56,0,212 578 movdqa 16(%r11),%xmm3 579.byte 102,15,56,0,217 580 pxor %xmm2,%xmm3 581.byte 102,15,56,0,221 582 583 movdqa 32(%r11),%xmm2 584.byte 102,15,56,0,212 585 pxor %xmm3,%xmm2 586 movdqa 48(%r11),%xmm3 587.byte 102,15,56,0,217 588 pxor %xmm2,%xmm3 589.byte 102,15,56,0,221 590 591 movdqa 64(%r11),%xmm2 592.byte 102,15,56,0,212 593 pxor %xmm3,%xmm2 594 movdqa 80(%r11),%xmm3 595.byte 102,15,56,0,217 596 pxor %xmm2,%xmm3 597.byte 102,15,56,0,221 598 599 movdqa 96(%r11),%xmm2 600.byte 102,15,56,0,212 601 pxor %xmm3,%xmm2 602 movdqa 112(%r11),%xmm3 603.byte 102,15,56,0,217 604 pxor %xmm2,%xmm3 605 606 addq $-16,%rdx 607 608L$schedule_mangle_both: 609 movdqa (%r8,%r10,1),%xmm1 610.byte 102,15,56,0,217 611 addq $-16,%r8 612 andq $0x30,%r8 613 movdqu %xmm3,(%rdx) 614 .byte 0xf3,0xc3 615 616 617 618 619 620 621.globl _vpaes_set_encrypt_key 622 623.p2align 4 624_vpaes_set_encrypt_key: 625 626 movl %esi,%eax 627 shrl $5,%eax 628 addl $5,%eax 629 movl %eax,240(%rdx) 630 631 movl $0,%ecx 632 movl $0x30,%r8d 633 call _vpaes_schedule_core 634 xorl %eax,%eax 635 .byte 0xf3,0xc3 636 637 638 639.globl _vpaes_set_decrypt_key 640 641.p2align 4 642_vpaes_set_decrypt_key: 643 644 movl %esi,%eax 645 shrl $5,%eax 646 addl $5,%eax 647 movl %eax,240(%rdx) 648 shll $4,%eax 649 leaq 16(%rdx,%rax,1),%rdx 650 651 movl $1,%ecx 652 movl %esi,%r8d 653 shrl $1,%r8d 654 andl $32,%r8d 655 xorl $32,%r8d 656 call _vpaes_schedule_core 657 xorl %eax,%eax 658 .byte 0xf3,0xc3 659 660 661 662.globl _vpaes_encrypt 663 664.p2align 4 665_vpaes_encrypt: 666 667 movdqu (%rdi),%xmm0 668 call _vpaes_preheat 669 call _vpaes_encrypt_core 670 movdqu %xmm0,(%rsi) 671 .byte 0xf3,0xc3 672 673 674 675.globl _vpaes_decrypt 676 677.p2align 4 678_vpaes_decrypt: 679 680 movdqu (%rdi),%xmm0 681 call _vpaes_preheat 682 call _vpaes_decrypt_core 683 movdqu %xmm0,(%rsi) 684 .byte 0xf3,0xc3 685 686 687.globl _vpaes_cbc_encrypt 688 689.p2align 4 690_vpaes_cbc_encrypt: 691 692 xchgq %rcx,%rdx 693 subq $16,%rcx 694 jc L$cbc_abort 695 movdqu (%r8),%xmm6 696 subq %rdi,%rsi 697 call _vpaes_preheat 698 cmpl $0,%r9d 699 je L$cbc_dec_loop 700 jmp L$cbc_enc_loop 701.p2align 4 702L$cbc_enc_loop: 703 movdqu (%rdi),%xmm0 704 pxor %xmm6,%xmm0 705 call _vpaes_encrypt_core 706 movdqa %xmm0,%xmm6 707 movdqu %xmm0,(%rsi,%rdi,1) 708 leaq 16(%rdi),%rdi 709 subq $16,%rcx 710 jnc L$cbc_enc_loop 711 jmp L$cbc_done 712.p2align 4 713L$cbc_dec_loop: 714 movdqu (%rdi),%xmm0 715 movdqa %xmm0,%xmm7 716 call _vpaes_decrypt_core 717 pxor %xmm6,%xmm0 718 movdqa %xmm7,%xmm6 719 movdqu %xmm0,(%rsi,%rdi,1) 720 leaq 16(%rdi),%rdi 721 subq $16,%rcx 722 jnc L$cbc_dec_loop 723L$cbc_done: 724 movdqu %xmm6,(%r8) 725L$cbc_abort: 726 .byte 0xf3,0xc3 727 728 729 730 731 732 733 734 735 736.p2align 4 737_vpaes_preheat: 738 739 leaq L$k_s0F(%rip),%r10 740 movdqa -32(%r10),%xmm10 741 movdqa -16(%r10),%xmm11 742 movdqa 0(%r10),%xmm9 743 movdqa 48(%r10),%xmm13 744 movdqa 64(%r10),%xmm12 745 movdqa 80(%r10),%xmm15 746 movdqa 96(%r10),%xmm14 747 .byte 0xf3,0xc3 748 749 750 751 752 753 754 755 756.p2align 6 757_vpaes_consts: 758L$k_inv: 759.quad 0x0E05060F0D080180, 0x040703090A0B0C02 760.quad 0x01040A060F0B0780, 0x030D0E0C02050809 761 762L$k_s0F: 763.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 764 765L$k_ipt: 766.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 767.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 768 769L$k_sb1: 770.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 771.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 772L$k_sb2: 773.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 774.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 775L$k_sbo: 776.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 777.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 778 779L$k_mc_forward: 780.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 781.quad 0x080B0A0904070605, 0x000302010C0F0E0D 782.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 783.quad 0x000302010C0F0E0D, 0x080B0A0904070605 784 785L$k_mc_backward: 786.quad 0x0605040702010003, 0x0E0D0C0F0A09080B 787.quad 0x020100030E0D0C0F, 0x0A09080B06050407 788.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 789.quad 0x0A09080B06050407, 0x020100030E0D0C0F 790 791L$k_sr: 792.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 793.quad 0x030E09040F0A0500, 0x0B06010C07020D08 794.quad 0x0F060D040B020900, 0x070E050C030A0108 795.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 796 797L$k_rcon: 798.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 799 800L$k_s63: 801.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 802 803L$k_opt: 804.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 805.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 806 807L$k_deskew: 808.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 809.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 810 811 812 813 814 815L$k_dksd: 816.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 817.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E 818L$k_dksb: 819.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 820.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 821L$k_dkse: 822.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 823.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 824L$k_dks9: 825.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC 826.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE 827 828 829 830 831 832L$k_dipt: 833.quad 0x0F505B040B545F00, 0x154A411E114E451A 834.quad 0x86E383E660056500, 0x12771772F491F194 835 836L$k_dsb9: 837.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 838.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 839L$k_dsbd: 840.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 841.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 842L$k_dsbb: 843.quad 0xD022649296B44200, 0x602646F6B0F2D404 844.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B 845L$k_dsbe: 846.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 847.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 848L$k_dsbo: 849.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D 850.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C 851.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 852.p2align 6 853 854