1; PROLOGUE(mpn_mul_basecase) 2 3; Copyright 2009 Jason Moxham 4; 5; Windows Conversion Copyright 2008 Brian Gladman 6; 7; This file is part of the MPIR Library. 8; 9; The MPIR Library is free software; you can redistribute it and/or modify 10; it under the terms of the GNU Lesser General Public License as published 11; by the Free Software Foundation; either version 2.1 of the License, or (at 12; your option) any later version. 13 14; The MPIR Library is distributed in the hope that it will be useful, but 15; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17; License for more details. 18 19; You should have received a copy of the GNU Lesser General Public License 20; along with the MPIR Library; see the file COPYING.LIB. If not, write 21; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 22; Boston, MA 02110-1301, USA. 23; 24; mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t) 25; rax rdi rsi rdx rcx r8 26; rax rcx rdx r8 r9 [rsp+40] 27 28%include "yasm_mac.inc" 29 30%macro addmul2lp 0 31 xalign 16 32%%1:mov rax, [rsi+rbx*8] 33 mul r8 34 add r9, rax 35 mov rax, [rsi+rbx*8+8] 36 adc r10, rdx 37 mov r11, 0 38 mul rcx 39 add [rdi+rbx*8], r12 40 adc r9, rax 41 mov r12, 0 42 adc r10, rdx 43 mov rax, [rsi+rbx*8+8] 44 adc r11, 0 45 mul r8 46 add [rdi+rbx*8+8], r9 47 adc r10, rax 48 adc r11, rdx 49 mov rax, [rsi+rbx*8+16] 50 mul rcx 51 add r10, rax 52 mov rax, [rsi+rbx*8+16] 53 adc r11, rdx 54 adc r12, 0 55 mul r8 56 add [rdi+rbx*8+16], r10 57 mov r9, 0 58 adc r11, rax 59 mov r10, 0 60 mov rax, [rsi+rbx*8+24] 61 adc r12, rdx 62 mov r15, r15 63 mul rcx 64 add r11, rax 65 mov rax, [rsi+rbx*8+24] 66 adc r12, rdx 67 adc r9, 0 68 mul r8 69 add [rdi+rbx*8+24], r11 70 adc r12, rax 71 adc r9, rdx 72 mov rax, [rsi+rbx*8+32] 73 mul rcx 74 add r12, rax 75 adc r9, rdx 76 adc r10, 0 77 add rbx, 4 78 jnc %%1 79%endmacro 80 81%macro addmul2pro0 0 82 mov rcx, [r13+r15*8] 83 mul rcx 84 mov r12, rax 85 mov r9, rdx 86 mov r10, 0 87 mov r8, [r13+r15*8+8] 88%endmacro 89 90%macro addmul2epi0 0 91 mov rbx, r14 92 mov rax, [rsi+24] 93 mul r8 94 add [rdi+24], r12 95 adc r9, rax 96 adc r10, rdx 97 add r15, 2 98 mov rax, [rsi+r14*8] 99 mov [rdi+32], r9 100 lea rdi, [rdi+16] 101 mov [rdi+24], r10 102%endmacro 103 104%macro addmul2pro1 0 105 mov rcx, [r13+r15*8] 106 mul rcx 107 mov r12, rax 108 mov r10, 0 109 mov r9, rdx 110 mov r8, [r13+r15*8+8] 111%endmacro 112 113%macro addmul2epi1 0 114 mov rax, [rsi+16] 115 lea rdi, [rdi+16] 116 mul r8 117 add r9, rax 118 mov rax, [rsi+24] 119 mov r11, 0 120 adc r10, rdx 121 mul rcx 122 add [rdi], r12 123 adc r9, rax 124 adc r10, rdx 125 adc r11, 0 126 mov rax, [rsi+24] 127 mul r8 128 add [rdi+8], r9 129 adc r10, rax 130 adc r11, rdx 131 add r15, 2 132 mov rbx, r14 133 mov rax, [rsi+r14*8] 134 mov [rdi+24], r11 135 mov [rdi+16], r10 136%endmacro 137 138%macro addmul2pro2 0 139 mov rcx, [r13+r15*8] 140 mul rcx 141 mov r10, 0 142 mov r12, rax 143 mov r9, rdx 144 mov r8, [r13+r15*8+8] 145%endmacro 146 147%macro addmul2epi2 0 148 mov rax, [rsi+8] 149 lea rdi, [rdi+16] 150 mul r8 151 add r9, rax 152 mov rax, [rsi+16] 153 adc r10, rdx 154 mov r11, 0 155 mul rcx 156 add [rdi-8], r12 157 adc r9, rax 158 mov r12, 0 159 adc r10, rdx 160 mov rax, [rsi+16] 161 adc r11, 0 162 mul r8 163 add [rdi], r9 164 adc r10, rax 165 adc r11, rdx 166 mov rax, [rsi+24] 167 mul rcx 168 add r10, rax 169 mov rax, [rsi+24] 170 adc r11, rdx 171 adc r12, 0 172 mul r8 173 add [rdi+8], r10 174 adc r11, rax 175 adc r12, rdx 176 mov rax, [rsi+r14*8] 177 mov [rdi+16], r11 178 mov [rdi+24], r12 179 add r15, 2 180 mov rbx, r14 181%endmacro 182 183%macro addmul2pro3 0 184 mov rcx, [r13+r15*8] 185 mul rcx 186 mov r12, rax 187 mov r9, rdx 188 mov r8, [r13+r15*8+8] 189 mov r10, 0 190%endmacro 191 192%macro addmul2epi3 0 193 mov rax, [rsi] 194 lea rdi, [rdi+16] 195 mul r8 196 add r9, rax 197 mov rax, [rsi+8] 198 adc r10, rdx 199 mov r11, 0 200 mul rcx 201 add [rdi-16], r12 202 adc r9, rax 203 mov r12, 0 204 adc r10, rdx 205 mov rax, [rsi+8] 206 adc r11, 0 207 mul r8 208 add [rdi-8], r9 209 adc r10, rax 210 adc r11, rdx 211 mov rax, [rsi+16] 212 mul rcx 213 add r10, rax 214 mov rax, [rsi+16] 215 adc r11, rdx 216 adc r12, 0 217 mul r8 218 add [rdi], r10 219 mov r9, 0 220 adc r11, rax 221 mov r10, 0 222 mov rax, [rsi+24] 223 adc r12, rdx 224 mov r15, r15 225 mul rcx 226 add r11, rax 227 mov rax, [rsi+24] 228 adc r12, rdx 229 adc r9, 0 230 mul r8 231 add [rdi+8], r11 232 adc r12, rax 233 adc r9, rdx 234 mov rax, [rsi+r14*8] 235 mov [rdi+16], r12 236 mov [rdi+24], r9 237 add r15, 2 238 mov rbx, r14 239%endmacro 240 241%macro mul2lp 0 242 xalign 16 243%%1:mov rax, [rsi+rbx*8] 244 mul r8 245 add r9, rax 246 mov rax, [rsi+rbx*8+8] 247 adc r10, rdx 248 mov r11, 0 249 mul rcx 250 mov [rdi+rbx*8], r12 251 add r9, rax 252 mov r12, 0 253 adc r10, rdx 254 mov rax, [rsi+rbx*8+8] 255 adc r11, 0 256 mul r8 257 mov [rdi+rbx*8+8], r9 258 add r10, rax 259 adc r11, rdx 260 mov rax, [rsi+rbx*8+16] 261 mul rcx 262 add r10, rax 263 mov rax, [rsi+rbx*8+16] 264 adc r11, rdx 265 adc r12, 0 266 mul r8 267 mov [rdi+rbx*8+16], r10 268 mov r9, 0 269 add r11, rax 270 mov r10, 0 271 mov rax, [rsi+rbx*8+24] 272 adc r12, rdx 273 mov r15, r15 274 mul rcx 275 add r11, rax 276 mov rax, [rsi+rbx*8+24] 277 adc r12, rdx 278 adc r9, 0 279 mul r8 280 mov [rdi+rbx*8+24], r11 281 add r12, rax 282 adc r9, rdx 283 mov rax, [rsi+rbx*8+32] 284 mul rcx 285 add r12, rax 286 adc r9, rdx 287 adc r10, 0 288 add rbx, 4 289 jnc %%1 290%endmacro 291 292%macro mul2pro0 0 293 mov rcx, [r13+r15*8] 294 mul rcx 295 mov r12, rax 296 mov r9, rdx 297 mov r10, 0 298 mov r8, [r13+r15*8+8] 299%endmacro 300 301%macro mul2epi0 0 302 mov rbx, r14 303 mov rax, [rsi+24] 304 mul r8 305 mov [rdi+24], r12 306 add r9, rax 307 adc r10, rdx 308 add r15, 2 309 mov rax, [rsi+r14*8] 310 mov [rdi+32], r9 311 lea rdi, [rdi+16] 312 mov [rdi+24], r10 313%endmacro 314 315%macro mul2pro1 0 316 mov rcx, [r13+r15*8] 317 mul rcx 318 mov r12, rax 319 mov r10, 0 320 mov r9, rdx 321 mov r8, [r13+r15*8+8] 322%endmacro 323 324%macro mul2epi1 0 325 mov rax, [rsi+16] 326 lea rdi, [rdi+16] 327 mul r8 328 add r9, rax 329 mov rax, [rsi+24] 330 mov r11, 0 331 adc r10, rdx 332 mul rcx 333 mov [rdi], r12 334 add r9, rax 335 adc r10, rdx 336 adc r11, 0 337 mov rax, [rsi+24] 338 mul r8 339 mov [rdi+8], r9 340 add r10, rax 341 adc r11, rdx 342 add r15, 2 343 mov rbx, r14 344 mov rax, [rsi+r14*8] 345 mov [rdi+24], r11 346 mov [rdi+16], r10 347%endmacro 348 349%macro mul2pro2 0 350 mov rcx, [r13+r15*8] 351 mul rcx 352 mov r10, 0 353 mov r12, rax 354 mov r9, rdx 355 mov r8, [r13+r15*8+8] 356%endmacro 357 358%macro mul2epi2 0 359 mov rax, [rsi+8] 360 lea rdi, [rdi+16] 361 mul r8 362 add r9, rax 363 mov rax, [rsi+16] 364 adc r10, rdx 365 mov r11, 0 366 mul rcx 367 mov [rdi-8], r12 368 add r9, rax 369 mov r12, 0 370 adc r10, rdx 371 mov rax, [rsi+16] 372 adc r11, 0 373 mul r8 374 mov [rdi], r9 375 add r10, rax 376 adc r11, rdx 377 mov rax, [rsi+24] 378 mul rcx 379 add r10, rax 380 mov rax, [rsi+24] 381 adc r11, rdx 382 adc r12, 0 383 mul r8 384 mov [rdi+8], r10 385 add r11, rax 386 adc r12, rdx 387 mov rax, [rsi+r14*8] 388 mov [rdi+16], r11 389 mov [rdi+24], r12 390 add r15, 2 391 mov rbx, r14 392%endmacro 393 394%macro mul2pro3 0 395 mov rcx, [r13+r15*8] 396 mul rcx 397 mov r12, rax 398 mov r9, rdx 399 mov r8, [r13+r15*8+8] 400 mov r10, 0 401%endmacro 402 403%macro mul2epi3 0 404 mov rax, [rsi] 405 lea rdi, [rdi+16] 406 mul r8 407 add r9, rax 408 mov rax, [rsi+8] 409 adc r10, rdx 410 mov r11, 0 411 mul rcx 412 mov [rdi-16], r12 413 add r9, rax 414 mov r12, 0 415 adc r10, rdx 416 mov rax, [rsi+8] 417 adc r11, 0 418 mul r8 419 mov [rdi-8], r9 420 add r10, rax 421 adc r11, rdx 422 mov rax, [rsi+16] 423 mul rcx 424 add r10, rax 425 mov rax, [rsi+16] 426 adc r11, rdx 427 adc r12, 0 428 mul r8 429 mov [rdi], r10 430 mov r9, 0 431 add r11, rax 432 mov r10, 0 433 mov rax, [rsi+24] 434 adc r12, rdx 435 mov r15, r15 436 mul rcx 437 add r11, rax 438 mov rax, [rsi+24] 439 adc r12, rdx 440 adc r9, 0 441 mul r8 442 mov [rdi+8], r11 443 add r12, rax 444 adc r9, rdx 445 mov rax, [rsi+r14*8] 446 mov [rdi+16], r12 447 mov [rdi+24], r9 448 add r15, 2 449 mov rbx, r14 450%endmacro 451 452%macro mul1lp 0 453 xalign 16 454%%1: 455 mov r10, 0 456 mul r8 457 mov [rdi+rbx*8-8], r12 458 add r9, rax 459 db 0x26 460 adc r10, rdx 461 mov rax, [rsi+rbx*8+8] 462 mul r8 463 mov [rdi+rbx*8], r9 464 add r10, rax 465 mov r11d, 0 466 adc r11, rdx 467 mov rax, [rsi+rbx*8+16] 468 mov r12, 0 469 mov r9, 0 470 mul r8 471 mov [rdi+rbx*8+8], r10 472 db 0x26 473 add r11, rax 474 db 0x26 475 adc r12, rdx 476 mov rax, [rsi+rbx*8+24] 477 mul r8 478 mov [rdi+rbx*8+16], r11 479 db 0x26 480 add r12, rax 481 db 0x26 482 adc r9, rdx 483 add rbx, 4 484 mov rax, [rsi+rbx*8] 485 jnc %%1 486%endmacro 487 488; rbx is 0 489%macro mulnext0 0 490 mov rax, [rsi+8] 491 mul r8 492 mov [rdi], r9 493 add r10, rax 494 mov r11d, 0 495 adc r11, rdx 496 mov rax, [rsi+16] 497 mov r12d, 0 498 mul r8 499 mov [rdi+8], r10 500 add r11, rax 501 adc r12, rdx 502 mov rax, [rsi+24] 503 mul r8 504 mov [rdi+16], r11 505 add r12, rax 506 adc rdx, 0 507 mov [rdi+24], r12 508 mov rax, [rsi+r14*8] 509 mov [rdi+32], rdx 510 inc r15 511 lea rdi, [rdi+8] 512 mov rbx, r14 513%endmacro 514 515; rbx is 1 516%macro mulnext1 0 517 mov rax, [rsi+16] 518 mul r8 519 mov [rdi+8], r9 520 add r10, rax 521 mov r12d, 0 522 adc r12, rdx 523 mov rax, [rsi+24] 524 mul r8 525 mov [rdi+16], r10 526 add r12, rax 527 adc rdx, 0 528 mov [rdi+24], r12 529 mov [rdi+32], rdx 530 inc r15 531 lea rdi, [rdi+8] 532 mov rbx, r14 533 mov rax, [rsi+r14*8] 534%endmacro 535 536; rbx is 2 537%macro mulnext2 0 538 mov rax, [rsi+24] 539 mul r8 540 mov [rdi+16], r9 541 add r10, rax 542 mov r11d, 0 543 adc r11, rdx 544 mov [rdi+24], r10 545 mov [rdi+32], r11 546 inc r15 547 lea rdi, [rdi+8] 548 mov rax, [rsi+r14*8] 549 mov rbx, r14 550%endmacro 551 552; rbx is 3 553%macro mulnext3 0 554 mov [rdi+24], r9 555 mov [rdi+32], r10 556 inc r15 557 lea rdi, [rdi+8] 558 mov rax, [rsi+r14*8] 559 mov rbx, r14 560%endmacro 561 562%macro mpn_addmul_2_int 1 563 jz %%2 564 xalign 16 565%%1:addmul2pro%1 566 addmul2lp 567 addmul2epi%1 568 jnz %%1 569%%2: 570%endmacro 571 572%macro oldmulnext0 0 573 mov rax, [rsi+r11*8+16] 574 mul r13 575 mov [rdi+r11*8+8], r9 576 add r10, rax 577 mov ebx, 0 578 adc rbx, rdx 579 mov rax, [rsi+r11*8+24] 580 mov r12d, 0 581 mul r13 582 mov [rdi+r11*8+16], r10 583 add rbx, rax 584 adc r12, rdx 585 mov rax, [rsi+r11*8+32] 586 mul r13 587 mov [rdi+r11*8+24], rbx 588 add r12, rax 589 adc rdx, 0 590 mov [rdi+r11*8+32], r12 591 mov rax, [rsi+r14*8] 592 mov [rdi+r11*8+40], rdx 593 inc r8 594 mov r11, r14 595%endmacro 596 597%macro oldmulnext1 0 598 mov rax, [rsi+r11*8+16] 599 mul r13 600 mov [rdi+r11*8+8], r9 601 add r10, rax 602 mov r12d, 0 603 adc r12, rdx 604 mov rax, [rsi+r11*8+24] 605 mul r13 606 mov [rdi+r11*8+16], r10 607 add r12, rax 608 adc rdx, 0 609 mov [rdi+r11*8+24], r12 610 mov [rdi+r11*8+32], rdx 611 inc r8 612 lea rdi, [rdi+8] 613 mov r11, r14 614 mov rax, [rsi+r14*8] 615%endmacro 616 617%macro oldmulnext2 0 618 mov rax, [rsi+r11*8+16] 619 mul r13 620 mov [rdi+r11*8+8], r9 621 add r10, rax 622 mov ebx, 0 623 adc rbx, rdx 624 mov [rdi+r11*8+16], r10 625 mov [rdi+r11*8+24], rbx 626 inc r8 627 mov rax, [rsi+r14*8] 628 mov r11, r14 629%endmacro 630 631%macro oldmulnext3 0 632 mov [rdi+r11*8+8], r9 633 mov [rdi+r11*8+16], r10 634 inc r8 635 mov rax, [rsi+r14*8] 636 mov r11, r14 637%endmacro 638 639%macro oldaddmulpro0 0 640 mov r13, [rcx+r8*8] 641 db 0x26 642 mul r13 643 db 0x26 644 mov r12, rax 645 mov rax, [rsi+r14*8+8] 646 db 0x26 647 mov r9, rdx 648 lea rdi, [rdi+8] 649%endmacro 650 651%macro oldaddmulnext0 0 652 mov r10d, 0 653 mul r13 654 add [rdi], r12 655 adc r9, rax 656 adc r10, rdx 657 mov rax, [rsi+16] 658 mul r13 659 add [rdi+8], r9 660 adc r10, rax 661 mov ebx, 0 662 adc rbx, rdx 663 mov rax, [rsi+24] 664 mov r12d, 0 665 mov r11, r14 666 mul r13 667 add [rdi+16], r10 668 adc rbx, rax 669 adc r12, rdx 670 mov rax, [rsi+32] 671 mul r13 672 add [rdi+24], rbx 673 adc r12, rax 674 adc rdx, 0 675 add [rdi+32], r12 676 mov rax, [rsi+r14*8] 677 adc rdx, 0 678 inc r8 679 mov [rdi+40], rdx 680%endmacro 681 682%macro oldaddmulpro1 0 683 mov r13, [rcx+r8*8] 684 mul r13 685 mov r12, rax 686 mov rax, [rsi+r14*8+8] 687 mov r9, rdx 688%endmacro 689 690%macro oldaddmulnext1 0 691 mov r10d, 0 692 mul r13 693 add [rdi+8], r12 694 adc r9, rax 695 adc r10, rdx 696 mov rax, [rsi+24] 697 mul r13 698 lea rdi, [rdi+8] 699 add [rdi+8], r9 700 adc r10, rax 701 mov r12d, 0 702 mov rax, [rsi+32] 703 adc r12, rdx 704 mov r11, r14 705 mul r13 706 add [rdi+16], r10 707 adc r12, rax 708 adc rdx, 0 709 add [rdi+24], r12 710 adc rdx, 0 711 mov [rdi+32], rdx 712 inc r8 713 mov rax, [rsi+r14*8] 714%endmacro 715 716%macro oldaddmulpro2 0 717 mov r13, [rcx+r8*8] 718 lea rdi, [rdi+8] 719 mul r13 720 mov r12, rax 721 mov rax, [rsi+r14*8+8] 722 mov r9, rdx 723%endmacro 724 725%macro oldaddmulnext2 0 726 mov r10d, 0 727 mul r13 728 add [rdi+r11*8], r12 729 adc r9, rax 730 adc r10, rdx 731 mov rax, [rsi+r11*8+16] 732 mul r13 733 mov ebx, 0 734 add [rdi+r11*8+8], r9 735 adc r10, rax 736 adc rbx, rdx 737 mov rax, [rsi+r14*8] 738 add [rdi+r11*8+16], r10 739 adc rbx, 0 740 mov [rdi+r11*8+24], rbx 741 inc r8 742 mov r11, r14 743%endmacro 744 745%macro oldaddmulpro3 0 746 mov r13, [rcx+r8*8] 747 db 0x26 748 mul r13 749 db 0x26 750 mov r12, rax 751 db 0x26 752 lea rdi, [rdi+8] 753 db 0x26 754 mov r9, rdx 755 mov rax, [rsi+r14*8+8] 756%endmacro 757 758%macro oldaddmulnext3 0 759 mov r11, r14 760 mul r13 761 add [rdi+24], r12 762 adc r9, rax 763 adc rdx, 0 764 add [rdi+32], r9 765 mov rax, [rsi+r14*8] 766 adc rdx, 0 767 inc r8 768 mov [rdi+40], rdx 769%endmacro 770 771%macro oldmpn_muladdmul_1_int 1 772 oldmulnext%1 773 jz %%2 774 xalign 16 775%%1:oldaddmulpro%1 776 oldaddmulnext%1 777 jnz %%1 778%%2: 779%endmacro 780 781 CPU Core2 782 BITS 64 783 784; mp_limb_t mpn_mul_basecase(mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t) 785; rax rdi rsi rdx rcx r8 786; rax rcx rdx r8 r9 [rsp+40] 787 788%define reg_save_list rbx, rsi, rdi, r12, r13, r14 789 790 LEAF_PROC mpn_mul_basecase 791 ; the current mul does not handle case one 792 cmp r8d, 4 793 jg fiveormore 794 cmp r8d, 1 795 je one 796 797 WIN64_GCC_PROC mpn_bobcat_mbc1, 5, frame 798 799 mov r14, 5 800 sub r14, rdx 801 lea rdi, [rdi+rdx*8-40] 802 lea rcx, [rcx+r8*8] 803 neg r8 804 lea rsi, [rsi+rdx*8-40] 805 mov rax, [rsi+r14*8] 806 mov r13, [rcx+r8*8] 807 mov r11, r14 808 mul r13 809 mov r12, rax 810 mov rax, [rsi+r14*8+8] 811 mov r9, rdx 812 mov r10d, 0 813 mul r13 814 mov [rdi+r11*8], r12 815 add r9, rax 816 adc r10, rdx 817 cmp r11, 2 818 ja .4 819 jz .3 820 jp .2 821.1: oldmpn_muladdmul_1_int 0 822 jmp .5 823.2: oldmpn_muladdmul_1_int 1 824 jmp .5 825.3: oldmpn_muladdmul_1_int 2 826 jmp .5 827.4: oldmpn_muladdmul_1_int 3 828.5: WIN64_GCC_END frame 829 830; rdx >= 5 as we dont have an inner jump 831; (rdi,rdx+r8)=(rsi,rdx)*(rcx,r8) 832 833%undef reg_save_list 834%define reg_save_list rbx, rsi, rdi, r12, r13, r14, r15 835 836 xalign 16 837fiveormore: 838 WIN64_GCC_PROC mpn_bobcat_mbc2, 5, frame 839 movsxd rdx, edx 840 movsxd r8, r8d 841 842 mov r14, 4 843 sub r14, rdx 844 lea rdi, [rdi+rdx*8-32] 845 lea rsi, [rsi+rdx*8-32] 846 mov r13, rcx 847 mov r15, r8 848 lea r13, [r13+r15*8] 849 neg r15 850 mov rbx, r14 851 mov rax, [rsi+r14*8] 852 bt r15, 0 853 jnc .12 854.6: inc rbx 855 mov r8, [r13+r15*8] 856 mul r8 857 mov r12, rax 858 mov rax, [rsi+r14*8+8] 859 mov r9, rdx 860 cmp rbx, 0 861 jge .7 862 mul1lp 863.7: mov r10d, 0 864 mul r8 865 mov [rdi+rbx*8-8], r12 866 add r9, rax 867 adc r10, rdx 868 cmp rbx, 2 869 ja .11 870 jz .10 871 jp .9 872.8: mulnext0 873 jmp .20 874.9: mulnext1 875 jmp .14 876.10:mulnext2 877 jmp .16 878.11:mulnext3 879 jmp .18 880 ; as all the mul2pro? are the same 881.12:mul2pro0 882 mul2lp 883 cmp rbx, 2 884 ja .19 885 jz .17 886 jp .15 887.13:mul2epi3 888.14:mpn_addmul_2_int 3 889 WIN64_GCC_EXIT frame 890 891.15:mul2epi2 892.16:mpn_addmul_2_int 2 893 WIN64_GCC_EXIT frame 894 895.17:mul2epi1 896.18:mpn_addmul_2_int 1 897 WIN64_GCC_EXIT frame 898 899.19:mul2epi0 900.20:mpn_addmul_2_int 0 901 902 xalign 16 903.21:WIN64_GCC_END frame 904 905 xalign 16 906one:mov rax, [rdx] 907 mul qword [r9] 908 mov [rcx], rax 909 mov [rcx+8], rdx 910 ret 911 912 end 913