1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15 16.extern OPENSSL_ia32cap_P 17.hidden OPENSSL_ia32cap_P 18 19.globl bn_mul_mont 20.hidden bn_mul_mont 21.type bn_mul_mont,@function 22.align 16 23bn_mul_mont: 24.cfi_startproc 25 movl %r9d,%r9d 26 movq %rsp,%rax 27.cfi_def_cfa_register %rax 28 testl $3,%r9d 29 jnz .Lmul_enter 30 cmpl $8,%r9d 31 jb .Lmul_enter 32 leaq OPENSSL_ia32cap_P(%rip),%r11 33 movl 8(%r11),%r11d 34 cmpq %rsi,%rdx 35 jne .Lmul4x_enter 36 testl $7,%r9d 37 jz .Lsqr8x_enter 38 jmp .Lmul4x_enter 39 40.align 16 41.Lmul_enter: 42 pushq %rbx 43.cfi_offset %rbx,-16 44 pushq %rbp 45.cfi_offset %rbp,-24 46 pushq %r12 47.cfi_offset %r12,-32 48 pushq %r13 49.cfi_offset %r13,-40 50 pushq %r14 51.cfi_offset %r14,-48 52 pushq %r15 53.cfi_offset %r15,-56 54 55 negq %r9 56 movq %rsp,%r11 57 leaq -16(%rsp,%r9,8),%r10 58 negq %r9 59 andq $-1024,%r10 60 61 62 63 64 65 66 67 68 69 subq %r10,%r11 70 andq $-4096,%r11 71 leaq (%r10,%r11,1),%rsp 72 movq (%rsp),%r11 73 cmpq %r10,%rsp 74 ja .Lmul_page_walk 75 jmp .Lmul_page_walk_done 76 77.align 16 78.Lmul_page_walk: 79 leaq -4096(%rsp),%rsp 80 movq (%rsp),%r11 81 cmpq %r10,%rsp 82 ja .Lmul_page_walk 83.Lmul_page_walk_done: 84 85 movq %rax,8(%rsp,%r9,8) 86.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 87.Lmul_body: 88 movq %rdx,%r12 89 movq (%r8),%r8 90 movq (%r12),%rbx 91 movq (%rsi),%rax 92 93 xorq %r14,%r14 94 xorq %r15,%r15 95 96 movq %r8,%rbp 97 mulq %rbx 98 movq %rax,%r10 99 movq (%rcx),%rax 100 101 imulq %r10,%rbp 102 movq %rdx,%r11 103 104 mulq %rbp 105 addq %rax,%r10 106 movq 8(%rsi),%rax 107 adcq $0,%rdx 108 movq %rdx,%r13 109 110 leaq 1(%r15),%r15 111 jmp .L1st_enter 112 113.align 16 114.L1st: 115 addq %rax,%r13 116 movq (%rsi,%r15,8),%rax 117 adcq $0,%rdx 118 addq %r11,%r13 119 movq %r10,%r11 120 adcq $0,%rdx 121 movq %r13,-16(%rsp,%r15,8) 122 movq %rdx,%r13 123 124.L1st_enter: 125 mulq %rbx 126 addq %rax,%r11 127 movq (%rcx,%r15,8),%rax 128 adcq $0,%rdx 129 leaq 1(%r15),%r15 130 movq %rdx,%r10 131 132 mulq %rbp 133 cmpq %r9,%r15 134 jne .L1st 135 136 addq %rax,%r13 137 movq (%rsi),%rax 138 adcq $0,%rdx 139 addq %r11,%r13 140 adcq $0,%rdx 141 movq %r13,-16(%rsp,%r15,8) 142 movq %rdx,%r13 143 movq %r10,%r11 144 145 xorq %rdx,%rdx 146 addq %r11,%r13 147 adcq $0,%rdx 148 movq %r13,-8(%rsp,%r9,8) 149 movq %rdx,(%rsp,%r9,8) 150 151 leaq 1(%r14),%r14 152 jmp .Louter 153.align 16 154.Louter: 155 movq (%r12,%r14,8),%rbx 156 xorq %r15,%r15 157 movq %r8,%rbp 158 movq (%rsp),%r10 159 mulq %rbx 160 addq %rax,%r10 161 movq (%rcx),%rax 162 adcq $0,%rdx 163 164 imulq %r10,%rbp 165 movq %rdx,%r11 166 167 mulq %rbp 168 addq %rax,%r10 169 movq 8(%rsi),%rax 170 adcq $0,%rdx 171 movq 8(%rsp),%r10 172 movq %rdx,%r13 173 174 leaq 1(%r15),%r15 175 jmp .Linner_enter 176 177.align 16 178.Linner: 179 addq %rax,%r13 180 movq (%rsi,%r15,8),%rax 181 adcq $0,%rdx 182 addq %r10,%r13 183 movq (%rsp,%r15,8),%r10 184 adcq $0,%rdx 185 movq %r13,-16(%rsp,%r15,8) 186 movq %rdx,%r13 187 188.Linner_enter: 189 mulq %rbx 190 addq %rax,%r11 191 movq (%rcx,%r15,8),%rax 192 adcq $0,%rdx 193 addq %r11,%r10 194 movq %rdx,%r11 195 adcq $0,%r11 196 leaq 1(%r15),%r15 197 198 mulq %rbp 199 cmpq %r9,%r15 200 jne .Linner 201 202 addq %rax,%r13 203 movq (%rsi),%rax 204 adcq $0,%rdx 205 addq %r10,%r13 206 movq (%rsp,%r15,8),%r10 207 adcq $0,%rdx 208 movq %r13,-16(%rsp,%r15,8) 209 movq %rdx,%r13 210 211 xorq %rdx,%rdx 212 addq %r11,%r13 213 adcq $0,%rdx 214 addq %r10,%r13 215 adcq $0,%rdx 216 movq %r13,-8(%rsp,%r9,8) 217 movq %rdx,(%rsp,%r9,8) 218 219 leaq 1(%r14),%r14 220 cmpq %r9,%r14 221 jb .Louter 222 223 xorq %r14,%r14 224 movq (%rsp),%rax 225 movq %r9,%r15 226 227.align 16 228.Lsub: sbbq (%rcx,%r14,8),%rax 229 movq %rax,(%rdi,%r14,8) 230 movq 8(%rsp,%r14,8),%rax 231 leaq 1(%r14),%r14 232 decq %r15 233 jnz .Lsub 234 235 sbbq $0,%rax 236 movq $-1,%rbx 237 xorq %rax,%rbx 238 xorq %r14,%r14 239 movq %r9,%r15 240 241.Lcopy: 242 movq (%rdi,%r14,8),%rcx 243 movq (%rsp,%r14,8),%rdx 244 andq %rbx,%rcx 245 andq %rax,%rdx 246 movq %r9,(%rsp,%r14,8) 247 orq %rcx,%rdx 248 movq %rdx,(%rdi,%r14,8) 249 leaq 1(%r14),%r14 250 subq $1,%r15 251 jnz .Lcopy 252 253 movq 8(%rsp,%r9,8),%rsi 254.cfi_def_cfa %rsi,8 255 movq $1,%rax 256 movq -48(%rsi),%r15 257.cfi_restore %r15 258 movq -40(%rsi),%r14 259.cfi_restore %r14 260 movq -32(%rsi),%r13 261.cfi_restore %r13 262 movq -24(%rsi),%r12 263.cfi_restore %r12 264 movq -16(%rsi),%rbp 265.cfi_restore %rbp 266 movq -8(%rsi),%rbx 267.cfi_restore %rbx 268 leaq (%rsi),%rsp 269.cfi_def_cfa_register %rsp 270.Lmul_epilogue: 271 .byte 0xf3,0xc3 272.cfi_endproc 273.size bn_mul_mont,.-bn_mul_mont 274.type bn_mul4x_mont,@function 275.align 16 276bn_mul4x_mont: 277.cfi_startproc 278 movl %r9d,%r9d 279 movq %rsp,%rax 280.cfi_def_cfa_register %rax 281.Lmul4x_enter: 282 andl $0x80100,%r11d 283 cmpl $0x80100,%r11d 284 je .Lmulx4x_enter 285 pushq %rbx 286.cfi_offset %rbx,-16 287 pushq %rbp 288.cfi_offset %rbp,-24 289 pushq %r12 290.cfi_offset %r12,-32 291 pushq %r13 292.cfi_offset %r13,-40 293 pushq %r14 294.cfi_offset %r14,-48 295 pushq %r15 296.cfi_offset %r15,-56 297 298 negq %r9 299 movq %rsp,%r11 300 leaq -32(%rsp,%r9,8),%r10 301 negq %r9 302 andq $-1024,%r10 303 304 subq %r10,%r11 305 andq $-4096,%r11 306 leaq (%r10,%r11,1),%rsp 307 movq (%rsp),%r11 308 cmpq %r10,%rsp 309 ja .Lmul4x_page_walk 310 jmp .Lmul4x_page_walk_done 311 312.Lmul4x_page_walk: 313 leaq -4096(%rsp),%rsp 314 movq (%rsp),%r11 315 cmpq %r10,%rsp 316 ja .Lmul4x_page_walk 317.Lmul4x_page_walk_done: 318 319 movq %rax,8(%rsp,%r9,8) 320.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 321.Lmul4x_body: 322 movq %rdi,16(%rsp,%r9,8) 323 movq %rdx,%r12 324 movq (%r8),%r8 325 movq (%r12),%rbx 326 movq (%rsi),%rax 327 328 xorq %r14,%r14 329 xorq %r15,%r15 330 331 movq %r8,%rbp 332 mulq %rbx 333 movq %rax,%r10 334 movq (%rcx),%rax 335 336 imulq %r10,%rbp 337 movq %rdx,%r11 338 339 mulq %rbp 340 addq %rax,%r10 341 movq 8(%rsi),%rax 342 adcq $0,%rdx 343 movq %rdx,%rdi 344 345 mulq %rbx 346 addq %rax,%r11 347 movq 8(%rcx),%rax 348 adcq $0,%rdx 349 movq %rdx,%r10 350 351 mulq %rbp 352 addq %rax,%rdi 353 movq 16(%rsi),%rax 354 adcq $0,%rdx 355 addq %r11,%rdi 356 leaq 4(%r15),%r15 357 adcq $0,%rdx 358 movq %rdi,(%rsp) 359 movq %rdx,%r13 360 jmp .L1st4x 361.align 16 362.L1st4x: 363 mulq %rbx 364 addq %rax,%r10 365 movq -16(%rcx,%r15,8),%rax 366 adcq $0,%rdx 367 movq %rdx,%r11 368 369 mulq %rbp 370 addq %rax,%r13 371 movq -8(%rsi,%r15,8),%rax 372 adcq $0,%rdx 373 addq %r10,%r13 374 adcq $0,%rdx 375 movq %r13,-24(%rsp,%r15,8) 376 movq %rdx,%rdi 377 378 mulq %rbx 379 addq %rax,%r11 380 movq -8(%rcx,%r15,8),%rax 381 adcq $0,%rdx 382 movq %rdx,%r10 383 384 mulq %rbp 385 addq %rax,%rdi 386 movq (%rsi,%r15,8),%rax 387 adcq $0,%rdx 388 addq %r11,%rdi 389 adcq $0,%rdx 390 movq %rdi,-16(%rsp,%r15,8) 391 movq %rdx,%r13 392 393 mulq %rbx 394 addq %rax,%r10 395 movq (%rcx,%r15,8),%rax 396 adcq $0,%rdx 397 movq %rdx,%r11 398 399 mulq %rbp 400 addq %rax,%r13 401 movq 8(%rsi,%r15,8),%rax 402 adcq $0,%rdx 403 addq %r10,%r13 404 adcq $0,%rdx 405 movq %r13,-8(%rsp,%r15,8) 406 movq %rdx,%rdi 407 408 mulq %rbx 409 addq %rax,%r11 410 movq 8(%rcx,%r15,8),%rax 411 adcq $0,%rdx 412 leaq 4(%r15),%r15 413 movq %rdx,%r10 414 415 mulq %rbp 416 addq %rax,%rdi 417 movq -16(%rsi,%r15,8),%rax 418 adcq $0,%rdx 419 addq %r11,%rdi 420 adcq $0,%rdx 421 movq %rdi,-32(%rsp,%r15,8) 422 movq %rdx,%r13 423 cmpq %r9,%r15 424 jb .L1st4x 425 426 mulq %rbx 427 addq %rax,%r10 428 movq -16(%rcx,%r15,8),%rax 429 adcq $0,%rdx 430 movq %rdx,%r11 431 432 mulq %rbp 433 addq %rax,%r13 434 movq -8(%rsi,%r15,8),%rax 435 adcq $0,%rdx 436 addq %r10,%r13 437 adcq $0,%rdx 438 movq %r13,-24(%rsp,%r15,8) 439 movq %rdx,%rdi 440 441 mulq %rbx 442 addq %rax,%r11 443 movq -8(%rcx,%r15,8),%rax 444 adcq $0,%rdx 445 movq %rdx,%r10 446 447 mulq %rbp 448 addq %rax,%rdi 449 movq (%rsi),%rax 450 adcq $0,%rdx 451 addq %r11,%rdi 452 adcq $0,%rdx 453 movq %rdi,-16(%rsp,%r15,8) 454 movq %rdx,%r13 455 456 xorq %rdi,%rdi 457 addq %r10,%r13 458 adcq $0,%rdi 459 movq %r13,-8(%rsp,%r15,8) 460 movq %rdi,(%rsp,%r15,8) 461 462 leaq 1(%r14),%r14 463.align 4 464.Louter4x: 465 movq (%r12,%r14,8),%rbx 466 xorq %r15,%r15 467 movq (%rsp),%r10 468 movq %r8,%rbp 469 mulq %rbx 470 addq %rax,%r10 471 movq (%rcx),%rax 472 adcq $0,%rdx 473 474 imulq %r10,%rbp 475 movq %rdx,%r11 476 477 mulq %rbp 478 addq %rax,%r10 479 movq 8(%rsi),%rax 480 adcq $0,%rdx 481 movq %rdx,%rdi 482 483 mulq %rbx 484 addq %rax,%r11 485 movq 8(%rcx),%rax 486 adcq $0,%rdx 487 addq 8(%rsp),%r11 488 adcq $0,%rdx 489 movq %rdx,%r10 490 491 mulq %rbp 492 addq %rax,%rdi 493 movq 16(%rsi),%rax 494 adcq $0,%rdx 495 addq %r11,%rdi 496 leaq 4(%r15),%r15 497 adcq $0,%rdx 498 movq %rdi,(%rsp) 499 movq %rdx,%r13 500 jmp .Linner4x 501.align 16 502.Linner4x: 503 mulq %rbx 504 addq %rax,%r10 505 movq -16(%rcx,%r15,8),%rax 506 adcq $0,%rdx 507 addq -16(%rsp,%r15,8),%r10 508 adcq $0,%rdx 509 movq %rdx,%r11 510 511 mulq %rbp 512 addq %rax,%r13 513 movq -8(%rsi,%r15,8),%rax 514 adcq $0,%rdx 515 addq %r10,%r13 516 adcq $0,%rdx 517 movq %r13,-24(%rsp,%r15,8) 518 movq %rdx,%rdi 519 520 mulq %rbx 521 addq %rax,%r11 522 movq -8(%rcx,%r15,8),%rax 523 adcq $0,%rdx 524 addq -8(%rsp,%r15,8),%r11 525 adcq $0,%rdx 526 movq %rdx,%r10 527 528 mulq %rbp 529 addq %rax,%rdi 530 movq (%rsi,%r15,8),%rax 531 adcq $0,%rdx 532 addq %r11,%rdi 533 adcq $0,%rdx 534 movq %rdi,-16(%rsp,%r15,8) 535 movq %rdx,%r13 536 537 mulq %rbx 538 addq %rax,%r10 539 movq (%rcx,%r15,8),%rax 540 adcq $0,%rdx 541 addq (%rsp,%r15,8),%r10 542 adcq $0,%rdx 543 movq %rdx,%r11 544 545 mulq %rbp 546 addq %rax,%r13 547 movq 8(%rsi,%r15,8),%rax 548 adcq $0,%rdx 549 addq %r10,%r13 550 adcq $0,%rdx 551 movq %r13,-8(%rsp,%r15,8) 552 movq %rdx,%rdi 553 554 mulq %rbx 555 addq %rax,%r11 556 movq 8(%rcx,%r15,8),%rax 557 adcq $0,%rdx 558 addq 8(%rsp,%r15,8),%r11 559 adcq $0,%rdx 560 leaq 4(%r15),%r15 561 movq %rdx,%r10 562 563 mulq %rbp 564 addq %rax,%rdi 565 movq -16(%rsi,%r15,8),%rax 566 adcq $0,%rdx 567 addq %r11,%rdi 568 adcq $0,%rdx 569 movq %rdi,-32(%rsp,%r15,8) 570 movq %rdx,%r13 571 cmpq %r9,%r15 572 jb .Linner4x 573 574 mulq %rbx 575 addq %rax,%r10 576 movq -16(%rcx,%r15,8),%rax 577 adcq $0,%rdx 578 addq -16(%rsp,%r15,8),%r10 579 adcq $0,%rdx 580 movq %rdx,%r11 581 582 mulq %rbp 583 addq %rax,%r13 584 movq -8(%rsi,%r15,8),%rax 585 adcq $0,%rdx 586 addq %r10,%r13 587 adcq $0,%rdx 588 movq %r13,-24(%rsp,%r15,8) 589 movq %rdx,%rdi 590 591 mulq %rbx 592 addq %rax,%r11 593 movq -8(%rcx,%r15,8),%rax 594 adcq $0,%rdx 595 addq -8(%rsp,%r15,8),%r11 596 adcq $0,%rdx 597 leaq 1(%r14),%r14 598 movq %rdx,%r10 599 600 mulq %rbp 601 addq %rax,%rdi 602 movq (%rsi),%rax 603 adcq $0,%rdx 604 addq %r11,%rdi 605 adcq $0,%rdx 606 movq %rdi,-16(%rsp,%r15,8) 607 movq %rdx,%r13 608 609 xorq %rdi,%rdi 610 addq %r10,%r13 611 adcq $0,%rdi 612 addq (%rsp,%r9,8),%r13 613 adcq $0,%rdi 614 movq %r13,-8(%rsp,%r15,8) 615 movq %rdi,(%rsp,%r15,8) 616 617 cmpq %r9,%r14 618 jb .Louter4x 619 movq 16(%rsp,%r9,8),%rdi 620 leaq -4(%r9),%r15 621 movq 0(%rsp),%rax 622 movq 8(%rsp),%rdx 623 shrq $2,%r15 624 leaq (%rsp),%rsi 625 xorq %r14,%r14 626 627 subq 0(%rcx),%rax 628 movq 16(%rsi),%rbx 629 movq 24(%rsi),%rbp 630 sbbq 8(%rcx),%rdx 631 632.Lsub4x: 633 movq %rax,0(%rdi,%r14,8) 634 movq %rdx,8(%rdi,%r14,8) 635 sbbq 16(%rcx,%r14,8),%rbx 636 movq 32(%rsi,%r14,8),%rax 637 movq 40(%rsi,%r14,8),%rdx 638 sbbq 24(%rcx,%r14,8),%rbp 639 movq %rbx,16(%rdi,%r14,8) 640 movq %rbp,24(%rdi,%r14,8) 641 sbbq 32(%rcx,%r14,8),%rax 642 movq 48(%rsi,%r14,8),%rbx 643 movq 56(%rsi,%r14,8),%rbp 644 sbbq 40(%rcx,%r14,8),%rdx 645 leaq 4(%r14),%r14 646 decq %r15 647 jnz .Lsub4x 648 649 movq %rax,0(%rdi,%r14,8) 650 movq 32(%rsi,%r14,8),%rax 651 sbbq 16(%rcx,%r14,8),%rbx 652 movq %rdx,8(%rdi,%r14,8) 653 sbbq 24(%rcx,%r14,8),%rbp 654 movq %rbx,16(%rdi,%r14,8) 655 656 sbbq $0,%rax 657 movq %rbp,24(%rdi,%r14,8) 658 pxor %xmm0,%xmm0 659.byte 102,72,15,110,224 660 pcmpeqd %xmm5,%xmm5 661 pshufd $0,%xmm4,%xmm4 662 movq %r9,%r15 663 pxor %xmm4,%xmm5 664 shrq $2,%r15 665 xorl %eax,%eax 666 667 jmp .Lcopy4x 668.align 16 669.Lcopy4x: 670 movdqa (%rsp,%rax,1),%xmm1 671 movdqu (%rdi,%rax,1),%xmm2 672 pand %xmm4,%xmm1 673 pand %xmm5,%xmm2 674 movdqa 16(%rsp,%rax,1),%xmm3 675 movdqa %xmm0,(%rsp,%rax,1) 676 por %xmm2,%xmm1 677 movdqu 16(%rdi,%rax,1),%xmm2 678 movdqu %xmm1,(%rdi,%rax,1) 679 pand %xmm4,%xmm3 680 pand %xmm5,%xmm2 681 movdqa %xmm0,16(%rsp,%rax,1) 682 por %xmm2,%xmm3 683 movdqu %xmm3,16(%rdi,%rax,1) 684 leaq 32(%rax),%rax 685 decq %r15 686 jnz .Lcopy4x 687 movq 8(%rsp,%r9,8),%rsi 688.cfi_def_cfa %rsi, 8 689 movq $1,%rax 690 movq -48(%rsi),%r15 691.cfi_restore %r15 692 movq -40(%rsi),%r14 693.cfi_restore %r14 694 movq -32(%rsi),%r13 695.cfi_restore %r13 696 movq -24(%rsi),%r12 697.cfi_restore %r12 698 movq -16(%rsi),%rbp 699.cfi_restore %rbp 700 movq -8(%rsi),%rbx 701.cfi_restore %rbx 702 leaq (%rsi),%rsp 703.cfi_def_cfa_register %rsp 704.Lmul4x_epilogue: 705 .byte 0xf3,0xc3 706.cfi_endproc 707.size bn_mul4x_mont,.-bn_mul4x_mont 708.extern bn_sqrx8x_internal 709.hidden bn_sqrx8x_internal 710.extern bn_sqr8x_internal 711.hidden bn_sqr8x_internal 712 713.type bn_sqr8x_mont,@function 714.align 32 715bn_sqr8x_mont: 716.cfi_startproc 717 movq %rsp,%rax 718.cfi_def_cfa_register %rax 719.Lsqr8x_enter: 720 pushq %rbx 721.cfi_offset %rbx,-16 722 pushq %rbp 723.cfi_offset %rbp,-24 724 pushq %r12 725.cfi_offset %r12,-32 726 pushq %r13 727.cfi_offset %r13,-40 728 pushq %r14 729.cfi_offset %r14,-48 730 pushq %r15 731.cfi_offset %r15,-56 732.Lsqr8x_prologue: 733 734 movl %r9d,%r10d 735 shll $3,%r9d 736 shlq $3+2,%r10 737 negq %r9 738 739 740 741 742 743 744 leaq -64(%rsp,%r9,2),%r11 745 movq %rsp,%rbp 746 movq (%r8),%r8 747 subq %rsi,%r11 748 andq $4095,%r11 749 cmpq %r11,%r10 750 jb .Lsqr8x_sp_alt 751 subq %r11,%rbp 752 leaq -64(%rbp,%r9,2),%rbp 753 jmp .Lsqr8x_sp_done 754 755.align 32 756.Lsqr8x_sp_alt: 757 leaq 4096-64(,%r9,2),%r10 758 leaq -64(%rbp,%r9,2),%rbp 759 subq %r10,%r11 760 movq $0,%r10 761 cmovcq %r10,%r11 762 subq %r11,%rbp 763.Lsqr8x_sp_done: 764 andq $-64,%rbp 765 movq %rsp,%r11 766 subq %rbp,%r11 767 andq $-4096,%r11 768 leaq (%r11,%rbp,1),%rsp 769 movq (%rsp),%r10 770 cmpq %rbp,%rsp 771 ja .Lsqr8x_page_walk 772 jmp .Lsqr8x_page_walk_done 773 774.align 16 775.Lsqr8x_page_walk: 776 leaq -4096(%rsp),%rsp 777 movq (%rsp),%r10 778 cmpq %rbp,%rsp 779 ja .Lsqr8x_page_walk 780.Lsqr8x_page_walk_done: 781 782 movq %r9,%r10 783 negq %r9 784 785 movq %r8,32(%rsp) 786 movq %rax,40(%rsp) 787.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 788.Lsqr8x_body: 789 790.byte 102,72,15,110,209 791 pxor %xmm0,%xmm0 792.byte 102,72,15,110,207 793.byte 102,73,15,110,218 794 leaq OPENSSL_ia32cap_P(%rip),%rax 795 movl 8(%rax),%eax 796 andl $0x80100,%eax 797 cmpl $0x80100,%eax 798 jne .Lsqr8x_nox 799 800 call bn_sqrx8x_internal 801 802 803 804 805 leaq (%r8,%rcx,1),%rbx 806 movq %rcx,%r9 807 movq %rcx,%rdx 808.byte 102,72,15,126,207 809 sarq $3+2,%rcx 810 jmp .Lsqr8x_sub 811 812.align 32 813.Lsqr8x_nox: 814 call bn_sqr8x_internal 815 816 817 818 819 leaq (%rdi,%r9,1),%rbx 820 movq %r9,%rcx 821 movq %r9,%rdx 822.byte 102,72,15,126,207 823 sarq $3+2,%rcx 824 jmp .Lsqr8x_sub 825 826.align 32 827.Lsqr8x_sub: 828 movq 0(%rbx),%r12 829 movq 8(%rbx),%r13 830 movq 16(%rbx),%r14 831 movq 24(%rbx),%r15 832 leaq 32(%rbx),%rbx 833 sbbq 0(%rbp),%r12 834 sbbq 8(%rbp),%r13 835 sbbq 16(%rbp),%r14 836 sbbq 24(%rbp),%r15 837 leaq 32(%rbp),%rbp 838 movq %r12,0(%rdi) 839 movq %r13,8(%rdi) 840 movq %r14,16(%rdi) 841 movq %r15,24(%rdi) 842 leaq 32(%rdi),%rdi 843 incq %rcx 844 jnz .Lsqr8x_sub 845 846 sbbq $0,%rax 847 leaq (%rbx,%r9,1),%rbx 848 leaq (%rdi,%r9,1),%rdi 849 850.byte 102,72,15,110,200 851 pxor %xmm0,%xmm0 852 pshufd $0,%xmm1,%xmm1 853 movq 40(%rsp),%rsi 854.cfi_def_cfa %rsi,8 855 jmp .Lsqr8x_cond_copy 856 857.align 32 858.Lsqr8x_cond_copy: 859 movdqa 0(%rbx),%xmm2 860 movdqa 16(%rbx),%xmm3 861 leaq 32(%rbx),%rbx 862 movdqu 0(%rdi),%xmm4 863 movdqu 16(%rdi),%xmm5 864 leaq 32(%rdi),%rdi 865 movdqa %xmm0,-32(%rbx) 866 movdqa %xmm0,-16(%rbx) 867 movdqa %xmm0,-32(%rbx,%rdx,1) 868 movdqa %xmm0,-16(%rbx,%rdx,1) 869 pcmpeqd %xmm1,%xmm0 870 pand %xmm1,%xmm2 871 pand %xmm1,%xmm3 872 pand %xmm0,%xmm4 873 pand %xmm0,%xmm5 874 pxor %xmm0,%xmm0 875 por %xmm2,%xmm4 876 por %xmm3,%xmm5 877 movdqu %xmm4,-32(%rdi) 878 movdqu %xmm5,-16(%rdi) 879 addq $32,%r9 880 jnz .Lsqr8x_cond_copy 881 882 movq $1,%rax 883 movq -48(%rsi),%r15 884.cfi_restore %r15 885 movq -40(%rsi),%r14 886.cfi_restore %r14 887 movq -32(%rsi),%r13 888.cfi_restore %r13 889 movq -24(%rsi),%r12 890.cfi_restore %r12 891 movq -16(%rsi),%rbp 892.cfi_restore %rbp 893 movq -8(%rsi),%rbx 894.cfi_restore %rbx 895 leaq (%rsi),%rsp 896.cfi_def_cfa_register %rsp 897.Lsqr8x_epilogue: 898 .byte 0xf3,0xc3 899.cfi_endproc 900.size bn_sqr8x_mont,.-bn_sqr8x_mont 901.type bn_mulx4x_mont,@function 902.align 32 903bn_mulx4x_mont: 904.cfi_startproc 905 movq %rsp,%rax 906.cfi_def_cfa_register %rax 907.Lmulx4x_enter: 908 pushq %rbx 909.cfi_offset %rbx,-16 910 pushq %rbp 911.cfi_offset %rbp,-24 912 pushq %r12 913.cfi_offset %r12,-32 914 pushq %r13 915.cfi_offset %r13,-40 916 pushq %r14 917.cfi_offset %r14,-48 918 pushq %r15 919.cfi_offset %r15,-56 920.Lmulx4x_prologue: 921 922 shll $3,%r9d 923 xorq %r10,%r10 924 subq %r9,%r10 925 movq (%r8),%r8 926 leaq -72(%rsp,%r10,1),%rbp 927 andq $-128,%rbp 928 movq %rsp,%r11 929 subq %rbp,%r11 930 andq $-4096,%r11 931 leaq (%r11,%rbp,1),%rsp 932 movq (%rsp),%r10 933 cmpq %rbp,%rsp 934 ja .Lmulx4x_page_walk 935 jmp .Lmulx4x_page_walk_done 936 937.align 16 938.Lmulx4x_page_walk: 939 leaq -4096(%rsp),%rsp 940 movq (%rsp),%r10 941 cmpq %rbp,%rsp 942 ja .Lmulx4x_page_walk 943.Lmulx4x_page_walk_done: 944 945 leaq (%rdx,%r9,1),%r10 946 947 948 949 950 951 952 953 954 955 956 957 958 movq %r9,0(%rsp) 959 shrq $5,%r9 960 movq %r10,16(%rsp) 961 subq $1,%r9 962 movq %r8,24(%rsp) 963 movq %rdi,32(%rsp) 964 movq %rax,40(%rsp) 965.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 966 movq %r9,48(%rsp) 967 jmp .Lmulx4x_body 968 969.align 32 970.Lmulx4x_body: 971 leaq 8(%rdx),%rdi 972 movq (%rdx),%rdx 973 leaq 64+32(%rsp),%rbx 974 movq %rdx,%r9 975 976 mulxq 0(%rsi),%r8,%rax 977 mulxq 8(%rsi),%r11,%r14 978 addq %rax,%r11 979 movq %rdi,8(%rsp) 980 mulxq 16(%rsi),%r12,%r13 981 adcq %r14,%r12 982 adcq $0,%r13 983 984 movq %r8,%rdi 985 imulq 24(%rsp),%r8 986 xorq %rbp,%rbp 987 988 mulxq 24(%rsi),%rax,%r14 989 movq %r8,%rdx 990 leaq 32(%rsi),%rsi 991 adcxq %rax,%r13 992 adcxq %rbp,%r14 993 994 mulxq 0(%rcx),%rax,%r10 995 adcxq %rax,%rdi 996 adoxq %r11,%r10 997 mulxq 8(%rcx),%rax,%r11 998 adcxq %rax,%r10 999 adoxq %r12,%r11 1000.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1001 movq 48(%rsp),%rdi 1002 movq %r10,-32(%rbx) 1003 adcxq %rax,%r11 1004 adoxq %r13,%r12 1005 mulxq 24(%rcx),%rax,%r15 1006 movq %r9,%rdx 1007 movq %r11,-24(%rbx) 1008 adcxq %rax,%r12 1009 adoxq %rbp,%r15 1010 leaq 32(%rcx),%rcx 1011 movq %r12,-16(%rbx) 1012 1013 jmp .Lmulx4x_1st 1014 1015.align 32 1016.Lmulx4x_1st: 1017 adcxq %rbp,%r15 1018 mulxq 0(%rsi),%r10,%rax 1019 adcxq %r14,%r10 1020 mulxq 8(%rsi),%r11,%r14 1021 adcxq %rax,%r11 1022 mulxq 16(%rsi),%r12,%rax 1023 adcxq %r14,%r12 1024 mulxq 24(%rsi),%r13,%r14 1025.byte 0x67,0x67 1026 movq %r8,%rdx 1027 adcxq %rax,%r13 1028 adcxq %rbp,%r14 1029 leaq 32(%rsi),%rsi 1030 leaq 32(%rbx),%rbx 1031 1032 adoxq %r15,%r10 1033 mulxq 0(%rcx),%rax,%r15 1034 adcxq %rax,%r10 1035 adoxq %r15,%r11 1036 mulxq 8(%rcx),%rax,%r15 1037 adcxq %rax,%r11 1038 adoxq %r15,%r12 1039 mulxq 16(%rcx),%rax,%r15 1040 movq %r10,-40(%rbx) 1041 adcxq %rax,%r12 1042 movq %r11,-32(%rbx) 1043 adoxq %r15,%r13 1044 mulxq 24(%rcx),%rax,%r15 1045 movq %r9,%rdx 1046 movq %r12,-24(%rbx) 1047 adcxq %rax,%r13 1048 adoxq %rbp,%r15 1049 leaq 32(%rcx),%rcx 1050 movq %r13,-16(%rbx) 1051 1052 decq %rdi 1053 jnz .Lmulx4x_1st 1054 1055 movq 0(%rsp),%rax 1056 movq 8(%rsp),%rdi 1057 adcq %rbp,%r15 1058 addq %r15,%r14 1059 sbbq %r15,%r15 1060 movq %r14,-8(%rbx) 1061 jmp .Lmulx4x_outer 1062 1063.align 32 1064.Lmulx4x_outer: 1065 movq (%rdi),%rdx 1066 leaq 8(%rdi),%rdi 1067 subq %rax,%rsi 1068 movq %r15,(%rbx) 1069 leaq 64+32(%rsp),%rbx 1070 subq %rax,%rcx 1071 1072 mulxq 0(%rsi),%r8,%r11 1073 xorl %ebp,%ebp 1074 movq %rdx,%r9 1075 mulxq 8(%rsi),%r14,%r12 1076 adoxq -32(%rbx),%r8 1077 adcxq %r14,%r11 1078 mulxq 16(%rsi),%r15,%r13 1079 adoxq -24(%rbx),%r11 1080 adcxq %r15,%r12 1081 adoxq -16(%rbx),%r12 1082 adcxq %rbp,%r13 1083 adoxq %rbp,%r13 1084 1085 movq %rdi,8(%rsp) 1086 movq %r8,%r15 1087 imulq 24(%rsp),%r8 1088 xorl %ebp,%ebp 1089 1090 mulxq 24(%rsi),%rax,%r14 1091 movq %r8,%rdx 1092 adcxq %rax,%r13 1093 adoxq -8(%rbx),%r13 1094 adcxq %rbp,%r14 1095 leaq 32(%rsi),%rsi 1096 adoxq %rbp,%r14 1097 1098 mulxq 0(%rcx),%rax,%r10 1099 adcxq %rax,%r15 1100 adoxq %r11,%r10 1101 mulxq 8(%rcx),%rax,%r11 1102 adcxq %rax,%r10 1103 adoxq %r12,%r11 1104 mulxq 16(%rcx),%rax,%r12 1105 movq %r10,-32(%rbx) 1106 adcxq %rax,%r11 1107 adoxq %r13,%r12 1108 mulxq 24(%rcx),%rax,%r15 1109 movq %r9,%rdx 1110 movq %r11,-24(%rbx) 1111 leaq 32(%rcx),%rcx 1112 adcxq %rax,%r12 1113 adoxq %rbp,%r15 1114 movq 48(%rsp),%rdi 1115 movq %r12,-16(%rbx) 1116 1117 jmp .Lmulx4x_inner 1118 1119.align 32 1120.Lmulx4x_inner: 1121 mulxq 0(%rsi),%r10,%rax 1122 adcxq %rbp,%r15 1123 adoxq %r14,%r10 1124 mulxq 8(%rsi),%r11,%r14 1125 adcxq 0(%rbx),%r10 1126 adoxq %rax,%r11 1127 mulxq 16(%rsi),%r12,%rax 1128 adcxq 8(%rbx),%r11 1129 adoxq %r14,%r12 1130 mulxq 24(%rsi),%r13,%r14 1131 movq %r8,%rdx 1132 adcxq 16(%rbx),%r12 1133 adoxq %rax,%r13 1134 adcxq 24(%rbx),%r13 1135 adoxq %rbp,%r14 1136 leaq 32(%rsi),%rsi 1137 leaq 32(%rbx),%rbx 1138 adcxq %rbp,%r14 1139 1140 adoxq %r15,%r10 1141 mulxq 0(%rcx),%rax,%r15 1142 adcxq %rax,%r10 1143 adoxq %r15,%r11 1144 mulxq 8(%rcx),%rax,%r15 1145 adcxq %rax,%r11 1146 adoxq %r15,%r12 1147 mulxq 16(%rcx),%rax,%r15 1148 movq %r10,-40(%rbx) 1149 adcxq %rax,%r12 1150 adoxq %r15,%r13 1151 mulxq 24(%rcx),%rax,%r15 1152 movq %r9,%rdx 1153 movq %r11,-32(%rbx) 1154 movq %r12,-24(%rbx) 1155 adcxq %rax,%r13 1156 adoxq %rbp,%r15 1157 leaq 32(%rcx),%rcx 1158 movq %r13,-16(%rbx) 1159 1160 decq %rdi 1161 jnz .Lmulx4x_inner 1162 1163 movq 0(%rsp),%rax 1164 movq 8(%rsp),%rdi 1165 adcq %rbp,%r15 1166 subq 0(%rbx),%rbp 1167 adcq %r15,%r14 1168 sbbq %r15,%r15 1169 movq %r14,-8(%rbx) 1170 1171 cmpq 16(%rsp),%rdi 1172 jne .Lmulx4x_outer 1173 1174 leaq 64(%rsp),%rbx 1175 subq %rax,%rcx 1176 negq %r15 1177 movq %rax,%rdx 1178 shrq $3+2,%rax 1179 movq 32(%rsp),%rdi 1180 jmp .Lmulx4x_sub 1181 1182.align 32 1183.Lmulx4x_sub: 1184 movq 0(%rbx),%r11 1185 movq 8(%rbx),%r12 1186 movq 16(%rbx),%r13 1187 movq 24(%rbx),%r14 1188 leaq 32(%rbx),%rbx 1189 sbbq 0(%rcx),%r11 1190 sbbq 8(%rcx),%r12 1191 sbbq 16(%rcx),%r13 1192 sbbq 24(%rcx),%r14 1193 leaq 32(%rcx),%rcx 1194 movq %r11,0(%rdi) 1195 movq %r12,8(%rdi) 1196 movq %r13,16(%rdi) 1197 movq %r14,24(%rdi) 1198 leaq 32(%rdi),%rdi 1199 decq %rax 1200 jnz .Lmulx4x_sub 1201 1202 sbbq $0,%r15 1203 leaq 64(%rsp),%rbx 1204 subq %rdx,%rdi 1205 1206.byte 102,73,15,110,207 1207 pxor %xmm0,%xmm0 1208 pshufd $0,%xmm1,%xmm1 1209 movq 40(%rsp),%rsi 1210.cfi_def_cfa %rsi,8 1211 jmp .Lmulx4x_cond_copy 1212 1213.align 32 1214.Lmulx4x_cond_copy: 1215 movdqa 0(%rbx),%xmm2 1216 movdqa 16(%rbx),%xmm3 1217 leaq 32(%rbx),%rbx 1218 movdqu 0(%rdi),%xmm4 1219 movdqu 16(%rdi),%xmm5 1220 leaq 32(%rdi),%rdi 1221 movdqa %xmm0,-32(%rbx) 1222 movdqa %xmm0,-16(%rbx) 1223 pcmpeqd %xmm1,%xmm0 1224 pand %xmm1,%xmm2 1225 pand %xmm1,%xmm3 1226 pand %xmm0,%xmm4 1227 pand %xmm0,%xmm5 1228 pxor %xmm0,%xmm0 1229 por %xmm2,%xmm4 1230 por %xmm3,%xmm5 1231 movdqu %xmm4,-32(%rdi) 1232 movdqu %xmm5,-16(%rdi) 1233 subq $32,%rdx 1234 jnz .Lmulx4x_cond_copy 1235 1236 movq %rdx,(%rbx) 1237 1238 movq $1,%rax 1239 movq -48(%rsi),%r15 1240.cfi_restore %r15 1241 movq -40(%rsi),%r14 1242.cfi_restore %r14 1243 movq -32(%rsi),%r13 1244.cfi_restore %r13 1245 movq -24(%rsi),%r12 1246.cfi_restore %r12 1247 movq -16(%rsi),%rbp 1248.cfi_restore %rbp 1249 movq -8(%rsi),%rbx 1250.cfi_restore %rbx 1251 leaq (%rsi),%rsp 1252.cfi_def_cfa_register %rsp 1253.Lmulx4x_epilogue: 1254 .byte 0xf3,0xc3 1255.cfi_endproc 1256.size bn_mulx4x_mont,.-bn_mulx4x_mont 1257.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1258.align 16 1259#endif 1260.section .note.GNU-stack,"",@progbits 1261