1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8section .text code align=64 9 10 11EXTERN GFp_ia32cap_P 12 13global GFp_bn_mul_mont_gather5 14 15ALIGN 64 16GFp_bn_mul_mont_gather5: 17 mov QWORD[8+rsp],rdi ;WIN64 prologue 18 mov QWORD[16+rsp],rsi 19 mov rax,rsp 20$L$SEH_begin_GFp_bn_mul_mont_gather5: 21 mov rdi,rcx 22 mov rsi,rdx 23 mov rdx,r8 24 mov rcx,r9 25 mov r8,QWORD[40+rsp] 26 mov r9,QWORD[48+rsp] 27 28 29 30 mov r9d,r9d 31 mov rax,rsp 32 33 test r9d,7 34 jnz NEAR $L$mul_enter 35 lea r11,[GFp_ia32cap_P] 36 mov r11d,DWORD[8+r11] 37 jmp NEAR $L$mul4x_enter 38 39ALIGN 16 40$L$mul_enter: 41 movd xmm5,DWORD[56+rsp] 42 push rbx 43 44 push rbp 45 46 push r12 47 48 push r13 49 50 push r14 51 52 push r15 53 54 55 neg r9 56 mov r11,rsp 57 lea r10,[((-280))+r9*8+rsp] 58 neg r9 59 and r10,-1024 60 61 62 63 64 65 66 67 68 69 sub r11,r10 70 and r11,-4096 71 lea rsp,[r11*1+r10] 72 mov r11,QWORD[rsp] 73 cmp rsp,r10 74 ja NEAR $L$mul_page_walk 75 jmp NEAR $L$mul_page_walk_done 76 77$L$mul_page_walk: 78 lea rsp,[((-4096))+rsp] 79 mov r11,QWORD[rsp] 80 cmp rsp,r10 81 ja NEAR $L$mul_page_walk 82$L$mul_page_walk_done: 83 84 lea r10,[$L$inc] 85 mov QWORD[8+r9*8+rsp],rax 86 87$L$mul_body: 88 89 lea r12,[128+rdx] 90 movdqa xmm0,XMMWORD[r10] 91 movdqa xmm1,XMMWORD[16+r10] 92 lea r10,[((24-112))+r9*8+rsp] 93 and r10,-16 94 95 pshufd xmm5,xmm5,0 96 movdqa xmm4,xmm1 97 movdqa xmm2,xmm1 98 paddd xmm1,xmm0 99 pcmpeqd xmm0,xmm5 100DB 0x67 101 movdqa xmm3,xmm4 102 paddd xmm2,xmm1 103 pcmpeqd xmm1,xmm5 104 movdqa XMMWORD[112+r10],xmm0 105 movdqa xmm0,xmm4 106 107 paddd xmm3,xmm2 108 pcmpeqd xmm2,xmm5 109 movdqa XMMWORD[128+r10],xmm1 110 movdqa xmm1,xmm4 111 112 paddd xmm0,xmm3 113 pcmpeqd xmm3,xmm5 114 movdqa XMMWORD[144+r10],xmm2 115 movdqa xmm2,xmm4 116 117 paddd xmm1,xmm0 118 pcmpeqd xmm0,xmm5 119 movdqa XMMWORD[160+r10],xmm3 120 movdqa xmm3,xmm4 121 paddd xmm2,xmm1 122 pcmpeqd xmm1,xmm5 123 movdqa XMMWORD[176+r10],xmm0 124 movdqa xmm0,xmm4 125 126 paddd xmm3,xmm2 127 pcmpeqd xmm2,xmm5 128 movdqa XMMWORD[192+r10],xmm1 129 movdqa xmm1,xmm4 130 131 paddd xmm0,xmm3 132 pcmpeqd xmm3,xmm5 133 movdqa XMMWORD[208+r10],xmm2 134 movdqa xmm2,xmm4 135 136 paddd xmm1,xmm0 137 pcmpeqd xmm0,xmm5 138 movdqa XMMWORD[224+r10],xmm3 139 movdqa xmm3,xmm4 140 paddd xmm2,xmm1 141 pcmpeqd xmm1,xmm5 142 movdqa XMMWORD[240+r10],xmm0 143 movdqa xmm0,xmm4 144 145 paddd xmm3,xmm2 146 pcmpeqd xmm2,xmm5 147 movdqa XMMWORD[256+r10],xmm1 148 movdqa xmm1,xmm4 149 150 paddd xmm0,xmm3 151 pcmpeqd xmm3,xmm5 152 movdqa XMMWORD[272+r10],xmm2 153 movdqa xmm2,xmm4 154 155 paddd xmm1,xmm0 156 pcmpeqd xmm0,xmm5 157 movdqa XMMWORD[288+r10],xmm3 158 movdqa xmm3,xmm4 159 paddd xmm2,xmm1 160 pcmpeqd xmm1,xmm5 161 movdqa XMMWORD[304+r10],xmm0 162 163 paddd xmm3,xmm2 164DB 0x67 165 pcmpeqd xmm2,xmm5 166 movdqa XMMWORD[320+r10],xmm1 167 168 pcmpeqd xmm3,xmm5 169 movdqa XMMWORD[336+r10],xmm2 170 pand xmm0,XMMWORD[64+r12] 171 172 pand xmm1,XMMWORD[80+r12] 173 pand xmm2,XMMWORD[96+r12] 174 movdqa XMMWORD[352+r10],xmm3 175 pand xmm3,XMMWORD[112+r12] 176 por xmm0,xmm2 177 por xmm1,xmm3 178 movdqa xmm4,XMMWORD[((-128))+r12] 179 movdqa xmm5,XMMWORD[((-112))+r12] 180 movdqa xmm2,XMMWORD[((-96))+r12] 181 pand xmm4,XMMWORD[112+r10] 182 movdqa xmm3,XMMWORD[((-80))+r12] 183 pand xmm5,XMMWORD[128+r10] 184 por xmm0,xmm4 185 pand xmm2,XMMWORD[144+r10] 186 por xmm1,xmm5 187 pand xmm3,XMMWORD[160+r10] 188 por xmm0,xmm2 189 por xmm1,xmm3 190 movdqa xmm4,XMMWORD[((-64))+r12] 191 movdqa xmm5,XMMWORD[((-48))+r12] 192 movdqa xmm2,XMMWORD[((-32))+r12] 193 pand xmm4,XMMWORD[176+r10] 194 movdqa xmm3,XMMWORD[((-16))+r12] 195 pand xmm5,XMMWORD[192+r10] 196 por xmm0,xmm4 197 pand xmm2,XMMWORD[208+r10] 198 por xmm1,xmm5 199 pand xmm3,XMMWORD[224+r10] 200 por xmm0,xmm2 201 por xmm1,xmm3 202 movdqa xmm4,XMMWORD[r12] 203 movdqa xmm5,XMMWORD[16+r12] 204 movdqa xmm2,XMMWORD[32+r12] 205 pand xmm4,XMMWORD[240+r10] 206 movdqa xmm3,XMMWORD[48+r12] 207 pand xmm5,XMMWORD[256+r10] 208 por xmm0,xmm4 209 pand xmm2,XMMWORD[272+r10] 210 por xmm1,xmm5 211 pand xmm3,XMMWORD[288+r10] 212 por xmm0,xmm2 213 por xmm1,xmm3 214 por xmm0,xmm1 215 pshufd xmm1,xmm0,0x4e 216 por xmm0,xmm1 217 lea r12,[256+r12] 218DB 102,72,15,126,195 219 220 mov r8,QWORD[r8] 221 mov rax,QWORD[rsi] 222 223 xor r14,r14 224 xor r15,r15 225 226 mov rbp,r8 227 mul rbx 228 mov r10,rax 229 mov rax,QWORD[rcx] 230 231 imul rbp,r10 232 mov r11,rdx 233 234 mul rbp 235 add r10,rax 236 mov rax,QWORD[8+rsi] 237 adc rdx,0 238 mov r13,rdx 239 240 lea r15,[1+r15] 241 jmp NEAR $L$1st_enter 242 243ALIGN 16 244$L$1st: 245 add r13,rax 246 mov rax,QWORD[r15*8+rsi] 247 adc rdx,0 248 add r13,r11 249 mov r11,r10 250 adc rdx,0 251 mov QWORD[((-16))+r15*8+rsp],r13 252 mov r13,rdx 253 254$L$1st_enter: 255 mul rbx 256 add r11,rax 257 mov rax,QWORD[r15*8+rcx] 258 adc rdx,0 259 lea r15,[1+r15] 260 mov r10,rdx 261 262 mul rbp 263 cmp r15,r9 264 jne NEAR $L$1st 265 266 267 add r13,rax 268 adc rdx,0 269 add r13,r11 270 adc rdx,0 271 mov QWORD[((-16))+r9*8+rsp],r13 272 mov r13,rdx 273 mov r11,r10 274 275 xor rdx,rdx 276 add r13,r11 277 adc rdx,0 278 mov QWORD[((-8))+r9*8+rsp],r13 279 mov QWORD[r9*8+rsp],rdx 280 281 lea r14,[1+r14] 282 jmp NEAR $L$outer 283ALIGN 16 284$L$outer: 285 lea rdx,[((24+128))+r9*8+rsp] 286 and rdx,-16 287 pxor xmm4,xmm4 288 pxor xmm5,xmm5 289 movdqa xmm0,XMMWORD[((-128))+r12] 290 movdqa xmm1,XMMWORD[((-112))+r12] 291 movdqa xmm2,XMMWORD[((-96))+r12] 292 movdqa xmm3,XMMWORD[((-80))+r12] 293 pand xmm0,XMMWORD[((-128))+rdx] 294 pand xmm1,XMMWORD[((-112))+rdx] 295 por xmm4,xmm0 296 pand xmm2,XMMWORD[((-96))+rdx] 297 por xmm5,xmm1 298 pand xmm3,XMMWORD[((-80))+rdx] 299 por xmm4,xmm2 300 por xmm5,xmm3 301 movdqa xmm0,XMMWORD[((-64))+r12] 302 movdqa xmm1,XMMWORD[((-48))+r12] 303 movdqa xmm2,XMMWORD[((-32))+r12] 304 movdqa xmm3,XMMWORD[((-16))+r12] 305 pand xmm0,XMMWORD[((-64))+rdx] 306 pand xmm1,XMMWORD[((-48))+rdx] 307 por xmm4,xmm0 308 pand xmm2,XMMWORD[((-32))+rdx] 309 por xmm5,xmm1 310 pand xmm3,XMMWORD[((-16))+rdx] 311 por xmm4,xmm2 312 por xmm5,xmm3 313 movdqa xmm0,XMMWORD[r12] 314 movdqa xmm1,XMMWORD[16+r12] 315 movdqa xmm2,XMMWORD[32+r12] 316 movdqa xmm3,XMMWORD[48+r12] 317 pand xmm0,XMMWORD[rdx] 318 pand xmm1,XMMWORD[16+rdx] 319 por xmm4,xmm0 320 pand xmm2,XMMWORD[32+rdx] 321 por xmm5,xmm1 322 pand xmm3,XMMWORD[48+rdx] 323 por xmm4,xmm2 324 por xmm5,xmm3 325 movdqa xmm0,XMMWORD[64+r12] 326 movdqa xmm1,XMMWORD[80+r12] 327 movdqa xmm2,XMMWORD[96+r12] 328 movdqa xmm3,XMMWORD[112+r12] 329 pand xmm0,XMMWORD[64+rdx] 330 pand xmm1,XMMWORD[80+rdx] 331 por xmm4,xmm0 332 pand xmm2,XMMWORD[96+rdx] 333 por xmm5,xmm1 334 pand xmm3,XMMWORD[112+rdx] 335 por xmm4,xmm2 336 por xmm5,xmm3 337 por xmm4,xmm5 338 pshufd xmm0,xmm4,0x4e 339 por xmm0,xmm4 340 lea r12,[256+r12] 341 342 mov rax,QWORD[rsi] 343DB 102,72,15,126,195 344 345 xor r15,r15 346 mov rbp,r8 347 mov r10,QWORD[rsp] 348 349 mul rbx 350 add r10,rax 351 mov rax,QWORD[rcx] 352 adc rdx,0 353 354 imul rbp,r10 355 mov r11,rdx 356 357 mul rbp 358 add r10,rax 359 mov rax,QWORD[8+rsi] 360 adc rdx,0 361 mov r10,QWORD[8+rsp] 362 mov r13,rdx 363 364 lea r15,[1+r15] 365 jmp NEAR $L$inner_enter 366 367ALIGN 16 368$L$inner: 369 add r13,rax 370 mov rax,QWORD[r15*8+rsi] 371 adc rdx,0 372 add r13,r10 373 mov r10,QWORD[r15*8+rsp] 374 adc rdx,0 375 mov QWORD[((-16))+r15*8+rsp],r13 376 mov r13,rdx 377 378$L$inner_enter: 379 mul rbx 380 add r11,rax 381 mov rax,QWORD[r15*8+rcx] 382 adc rdx,0 383 add r10,r11 384 mov r11,rdx 385 adc r11,0 386 lea r15,[1+r15] 387 388 mul rbp 389 cmp r15,r9 390 jne NEAR $L$inner 391 392 add r13,rax 393 adc rdx,0 394 add r13,r10 395 mov r10,QWORD[r9*8+rsp] 396 adc rdx,0 397 mov QWORD[((-16))+r9*8+rsp],r13 398 mov r13,rdx 399 400 xor rdx,rdx 401 add r13,r11 402 adc rdx,0 403 add r13,r10 404 adc rdx,0 405 mov QWORD[((-8))+r9*8+rsp],r13 406 mov QWORD[r9*8+rsp],rdx 407 408 lea r14,[1+r14] 409 cmp r14,r9 410 jb NEAR $L$outer 411 412 xor r14,r14 413 mov rax,QWORD[rsp] 414 lea rsi,[rsp] 415 mov r15,r9 416 jmp NEAR $L$sub 417ALIGN 16 418$L$sub: sbb rax,QWORD[r14*8+rcx] 419 mov QWORD[r14*8+rdi],rax 420 mov rax,QWORD[8+r14*8+rsi] 421 lea r14,[1+r14] 422 dec r15 423 jnz NEAR $L$sub 424 425 sbb rax,0 426 mov rbx,-1 427 xor rbx,rax 428 xor r14,r14 429 mov r15,r9 430 431$L$copy: 432 mov rcx,QWORD[r14*8+rdi] 433 mov rdx,QWORD[r14*8+rsp] 434 and rcx,rbx 435 and rdx,rax 436 mov QWORD[r14*8+rsp],r14 437 or rdx,rcx 438 mov QWORD[r14*8+rdi],rdx 439 lea r14,[1+r14] 440 sub r15,1 441 jnz NEAR $L$copy 442 443 mov rsi,QWORD[8+r9*8+rsp] 444 445 mov rax,1 446 447 mov r15,QWORD[((-48))+rsi] 448 449 mov r14,QWORD[((-40))+rsi] 450 451 mov r13,QWORD[((-32))+rsi] 452 453 mov r12,QWORD[((-24))+rsi] 454 455 mov rbp,QWORD[((-16))+rsi] 456 457 mov rbx,QWORD[((-8))+rsi] 458 459 lea rsp,[rsi] 460 461$L$mul_epilogue: 462 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 463 mov rsi,QWORD[16+rsp] 464 DB 0F3h,0C3h ;repret 465 466$L$SEH_end_GFp_bn_mul_mont_gather5: 467 468ALIGN 32 469bn_mul4x_mont_gather5: 470 mov QWORD[8+rsp],rdi ;WIN64 prologue 471 mov QWORD[16+rsp],rsi 472 mov rax,rsp 473$L$SEH_begin_bn_mul4x_mont_gather5: 474 mov rdi,rcx 475 mov rsi,rdx 476 mov rdx,r8 477 mov rcx,r9 478 mov r8,QWORD[40+rsp] 479 mov r9,QWORD[48+rsp] 480 481 482 483DB 0x67 484 mov rax,rsp 485 486$L$mul4x_enter: 487 and r11d,0x80108 488 cmp r11d,0x80108 489 je NEAR $L$mulx4x_enter 490 push rbx 491 492 push rbp 493 494 push r12 495 496 push r13 497 498 push r14 499 500 push r15 501 502$L$mul4x_prologue: 503 504DB 0x67 505 shl r9d,3 506 lea r10,[r9*2+r9] 507 neg r9 508 509 510 511 512 513 514 515 516 517 518 lea r11,[((-320))+r9*2+rsp] 519 mov rbp,rsp 520 sub r11,rdi 521 and r11,4095 522 cmp r10,r11 523 jb NEAR $L$mul4xsp_alt 524 sub rbp,r11 525 lea rbp,[((-320))+r9*2+rbp] 526 jmp NEAR $L$mul4xsp_done 527 528ALIGN 32 529$L$mul4xsp_alt: 530 lea r10,[((4096-320))+r9*2] 531 lea rbp,[((-320))+r9*2+rbp] 532 sub r11,r10 533 mov r10,0 534 cmovc r11,r10 535 sub rbp,r11 536$L$mul4xsp_done: 537 and rbp,-64 538 mov r11,rsp 539 sub r11,rbp 540 and r11,-4096 541 lea rsp,[rbp*1+r11] 542 mov r10,QWORD[rsp] 543 cmp rsp,rbp 544 ja NEAR $L$mul4x_page_walk 545 jmp NEAR $L$mul4x_page_walk_done 546 547$L$mul4x_page_walk: 548 lea rsp,[((-4096))+rsp] 549 mov r10,QWORD[rsp] 550 cmp rsp,rbp 551 ja NEAR $L$mul4x_page_walk 552$L$mul4x_page_walk_done: 553 554 neg r9 555 556 mov QWORD[40+rsp],rax 557 558$L$mul4x_body: 559 560 call mul4x_internal 561 562 mov rsi,QWORD[40+rsp] 563 564 mov rax,1 565 566 mov r15,QWORD[((-48))+rsi] 567 568 mov r14,QWORD[((-40))+rsi] 569 570 mov r13,QWORD[((-32))+rsi] 571 572 mov r12,QWORD[((-24))+rsi] 573 574 mov rbp,QWORD[((-16))+rsi] 575 576 mov rbx,QWORD[((-8))+rsi] 577 578 lea rsp,[rsi] 579 580$L$mul4x_epilogue: 581 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 582 mov rsi,QWORD[16+rsp] 583 DB 0F3h,0C3h ;repret 584 585$L$SEH_end_bn_mul4x_mont_gather5: 586 587 588ALIGN 32 589mul4x_internal: 590 591 shl r9,5 592 movd xmm5,DWORD[56+rax] 593 lea rax,[$L$inc] 594 lea r13,[128+r9*1+rdx] 595 shr r9,5 596 movdqa xmm0,XMMWORD[rax] 597 movdqa xmm1,XMMWORD[16+rax] 598 lea r10,[((88-112))+r9*1+rsp] 599 lea r12,[128+rdx] 600 601 pshufd xmm5,xmm5,0 602 movdqa xmm4,xmm1 603DB 0x67,0x67 604 movdqa xmm2,xmm1 605 paddd xmm1,xmm0 606 pcmpeqd xmm0,xmm5 607DB 0x67 608 movdqa xmm3,xmm4 609 paddd xmm2,xmm1 610 pcmpeqd xmm1,xmm5 611 movdqa XMMWORD[112+r10],xmm0 612 movdqa xmm0,xmm4 613 614 paddd xmm3,xmm2 615 pcmpeqd xmm2,xmm5 616 movdqa XMMWORD[128+r10],xmm1 617 movdqa xmm1,xmm4 618 619 paddd xmm0,xmm3 620 pcmpeqd xmm3,xmm5 621 movdqa XMMWORD[144+r10],xmm2 622 movdqa xmm2,xmm4 623 624 paddd xmm1,xmm0 625 pcmpeqd xmm0,xmm5 626 movdqa XMMWORD[160+r10],xmm3 627 movdqa xmm3,xmm4 628 paddd xmm2,xmm1 629 pcmpeqd xmm1,xmm5 630 movdqa XMMWORD[176+r10],xmm0 631 movdqa xmm0,xmm4 632 633 paddd xmm3,xmm2 634 pcmpeqd xmm2,xmm5 635 movdqa XMMWORD[192+r10],xmm1 636 movdqa xmm1,xmm4 637 638 paddd xmm0,xmm3 639 pcmpeqd xmm3,xmm5 640 movdqa XMMWORD[208+r10],xmm2 641 movdqa xmm2,xmm4 642 643 paddd xmm1,xmm0 644 pcmpeqd xmm0,xmm5 645 movdqa XMMWORD[224+r10],xmm3 646 movdqa xmm3,xmm4 647 paddd xmm2,xmm1 648 pcmpeqd xmm1,xmm5 649 movdqa XMMWORD[240+r10],xmm0 650 movdqa xmm0,xmm4 651 652 paddd xmm3,xmm2 653 pcmpeqd xmm2,xmm5 654 movdqa XMMWORD[256+r10],xmm1 655 movdqa xmm1,xmm4 656 657 paddd xmm0,xmm3 658 pcmpeqd xmm3,xmm5 659 movdqa XMMWORD[272+r10],xmm2 660 movdqa xmm2,xmm4 661 662 paddd xmm1,xmm0 663 pcmpeqd xmm0,xmm5 664 movdqa XMMWORD[288+r10],xmm3 665 movdqa xmm3,xmm4 666 paddd xmm2,xmm1 667 pcmpeqd xmm1,xmm5 668 movdqa XMMWORD[304+r10],xmm0 669 670 paddd xmm3,xmm2 671DB 0x67 672 pcmpeqd xmm2,xmm5 673 movdqa XMMWORD[320+r10],xmm1 674 675 pcmpeqd xmm3,xmm5 676 movdqa XMMWORD[336+r10],xmm2 677 pand xmm0,XMMWORD[64+r12] 678 679 pand xmm1,XMMWORD[80+r12] 680 pand xmm2,XMMWORD[96+r12] 681 movdqa XMMWORD[352+r10],xmm3 682 pand xmm3,XMMWORD[112+r12] 683 por xmm0,xmm2 684 por xmm1,xmm3 685 movdqa xmm4,XMMWORD[((-128))+r12] 686 movdqa xmm5,XMMWORD[((-112))+r12] 687 movdqa xmm2,XMMWORD[((-96))+r12] 688 pand xmm4,XMMWORD[112+r10] 689 movdqa xmm3,XMMWORD[((-80))+r12] 690 pand xmm5,XMMWORD[128+r10] 691 por xmm0,xmm4 692 pand xmm2,XMMWORD[144+r10] 693 por xmm1,xmm5 694 pand xmm3,XMMWORD[160+r10] 695 por xmm0,xmm2 696 por xmm1,xmm3 697 movdqa xmm4,XMMWORD[((-64))+r12] 698 movdqa xmm5,XMMWORD[((-48))+r12] 699 movdqa xmm2,XMMWORD[((-32))+r12] 700 pand xmm4,XMMWORD[176+r10] 701 movdqa xmm3,XMMWORD[((-16))+r12] 702 pand xmm5,XMMWORD[192+r10] 703 por xmm0,xmm4 704 pand xmm2,XMMWORD[208+r10] 705 por xmm1,xmm5 706 pand xmm3,XMMWORD[224+r10] 707 por xmm0,xmm2 708 por xmm1,xmm3 709 movdqa xmm4,XMMWORD[r12] 710 movdqa xmm5,XMMWORD[16+r12] 711 movdqa xmm2,XMMWORD[32+r12] 712 pand xmm4,XMMWORD[240+r10] 713 movdqa xmm3,XMMWORD[48+r12] 714 pand xmm5,XMMWORD[256+r10] 715 por xmm0,xmm4 716 pand xmm2,XMMWORD[272+r10] 717 por xmm1,xmm5 718 pand xmm3,XMMWORD[288+r10] 719 por xmm0,xmm2 720 por xmm1,xmm3 721 por xmm0,xmm1 722 pshufd xmm1,xmm0,0x4e 723 por xmm0,xmm1 724 lea r12,[256+r12] 725DB 102,72,15,126,195 726 727 mov QWORD[((16+8))+rsp],r13 728 mov QWORD[((56+8))+rsp],rdi 729 730 mov r8,QWORD[r8] 731 mov rax,QWORD[rsi] 732 lea rsi,[r9*1+rsi] 733 neg r9 734 735 mov rbp,r8 736 mul rbx 737 mov r10,rax 738 mov rax,QWORD[rcx] 739 740 imul rbp,r10 741 lea r14,[((64+8))+rsp] 742 mov r11,rdx 743 744 mul rbp 745 add r10,rax 746 mov rax,QWORD[8+r9*1+rsi] 747 adc rdx,0 748 mov rdi,rdx 749 750 mul rbx 751 add r11,rax 752 mov rax,QWORD[8+rcx] 753 adc rdx,0 754 mov r10,rdx 755 756 mul rbp 757 add rdi,rax 758 mov rax,QWORD[16+r9*1+rsi] 759 adc rdx,0 760 add rdi,r11 761 lea r15,[32+r9] 762 lea rcx,[32+rcx] 763 adc rdx,0 764 mov QWORD[r14],rdi 765 mov r13,rdx 766 jmp NEAR $L$1st4x 767 768ALIGN 32 769$L$1st4x: 770 mul rbx 771 add r10,rax 772 mov rax,QWORD[((-16))+rcx] 773 lea r14,[32+r14] 774 adc rdx,0 775 mov r11,rdx 776 777 mul rbp 778 add r13,rax 779 mov rax,QWORD[((-8))+r15*1+rsi] 780 adc rdx,0 781 add r13,r10 782 adc rdx,0 783 mov QWORD[((-24))+r14],r13 784 mov rdi,rdx 785 786 mul rbx 787 add r11,rax 788 mov rax,QWORD[((-8))+rcx] 789 adc rdx,0 790 mov r10,rdx 791 792 mul rbp 793 add rdi,rax 794 mov rax,QWORD[r15*1+rsi] 795 adc rdx,0 796 add rdi,r11 797 adc rdx,0 798 mov QWORD[((-16))+r14],rdi 799 mov r13,rdx 800 801 mul rbx 802 add r10,rax 803 mov rax,QWORD[rcx] 804 adc rdx,0 805 mov r11,rdx 806 807 mul rbp 808 add r13,rax 809 mov rax,QWORD[8+r15*1+rsi] 810 adc rdx,0 811 add r13,r10 812 adc rdx,0 813 mov QWORD[((-8))+r14],r13 814 mov rdi,rdx 815 816 mul rbx 817 add r11,rax 818 mov rax,QWORD[8+rcx] 819 adc rdx,0 820 mov r10,rdx 821 822 mul rbp 823 add rdi,rax 824 mov rax,QWORD[16+r15*1+rsi] 825 adc rdx,0 826 add rdi,r11 827 lea rcx,[32+rcx] 828 adc rdx,0 829 mov QWORD[r14],rdi 830 mov r13,rdx 831 832 add r15,32 833 jnz NEAR $L$1st4x 834 835 mul rbx 836 add r10,rax 837 mov rax,QWORD[((-16))+rcx] 838 lea r14,[32+r14] 839 adc rdx,0 840 mov r11,rdx 841 842 mul rbp 843 add r13,rax 844 mov rax,QWORD[((-8))+rsi] 845 adc rdx,0 846 add r13,r10 847 adc rdx,0 848 mov QWORD[((-24))+r14],r13 849 mov rdi,rdx 850 851 mul rbx 852 add r11,rax 853 mov rax,QWORD[((-8))+rcx] 854 adc rdx,0 855 mov r10,rdx 856 857 mul rbp 858 add rdi,rax 859 mov rax,QWORD[r9*1+rsi] 860 adc rdx,0 861 add rdi,r11 862 adc rdx,0 863 mov QWORD[((-16))+r14],rdi 864 mov r13,rdx 865 866 lea rcx,[r9*1+rcx] 867 868 xor rdi,rdi 869 add r13,r10 870 adc rdi,0 871 mov QWORD[((-8))+r14],r13 872 873 jmp NEAR $L$outer4x 874 875ALIGN 32 876$L$outer4x: 877 lea rdx,[((16+128))+r14] 878 pxor xmm4,xmm4 879 pxor xmm5,xmm5 880 movdqa xmm0,XMMWORD[((-128))+r12] 881 movdqa xmm1,XMMWORD[((-112))+r12] 882 movdqa xmm2,XMMWORD[((-96))+r12] 883 movdqa xmm3,XMMWORD[((-80))+r12] 884 pand xmm0,XMMWORD[((-128))+rdx] 885 pand xmm1,XMMWORD[((-112))+rdx] 886 por xmm4,xmm0 887 pand xmm2,XMMWORD[((-96))+rdx] 888 por xmm5,xmm1 889 pand xmm3,XMMWORD[((-80))+rdx] 890 por xmm4,xmm2 891 por xmm5,xmm3 892 movdqa xmm0,XMMWORD[((-64))+r12] 893 movdqa xmm1,XMMWORD[((-48))+r12] 894 movdqa xmm2,XMMWORD[((-32))+r12] 895 movdqa xmm3,XMMWORD[((-16))+r12] 896 pand xmm0,XMMWORD[((-64))+rdx] 897 pand xmm1,XMMWORD[((-48))+rdx] 898 por xmm4,xmm0 899 pand xmm2,XMMWORD[((-32))+rdx] 900 por xmm5,xmm1 901 pand xmm3,XMMWORD[((-16))+rdx] 902 por xmm4,xmm2 903 por xmm5,xmm3 904 movdqa xmm0,XMMWORD[r12] 905 movdqa xmm1,XMMWORD[16+r12] 906 movdqa xmm2,XMMWORD[32+r12] 907 movdqa xmm3,XMMWORD[48+r12] 908 pand xmm0,XMMWORD[rdx] 909 pand xmm1,XMMWORD[16+rdx] 910 por xmm4,xmm0 911 pand xmm2,XMMWORD[32+rdx] 912 por xmm5,xmm1 913 pand xmm3,XMMWORD[48+rdx] 914 por xmm4,xmm2 915 por xmm5,xmm3 916 movdqa xmm0,XMMWORD[64+r12] 917 movdqa xmm1,XMMWORD[80+r12] 918 movdqa xmm2,XMMWORD[96+r12] 919 movdqa xmm3,XMMWORD[112+r12] 920 pand xmm0,XMMWORD[64+rdx] 921 pand xmm1,XMMWORD[80+rdx] 922 por xmm4,xmm0 923 pand xmm2,XMMWORD[96+rdx] 924 por xmm5,xmm1 925 pand xmm3,XMMWORD[112+rdx] 926 por xmm4,xmm2 927 por xmm5,xmm3 928 por xmm4,xmm5 929 pshufd xmm0,xmm4,0x4e 930 por xmm0,xmm4 931 lea r12,[256+r12] 932DB 102,72,15,126,195 933 934 mov r10,QWORD[r9*1+r14] 935 mov rbp,r8 936 mul rbx 937 add r10,rax 938 mov rax,QWORD[rcx] 939 adc rdx,0 940 941 imul rbp,r10 942 mov r11,rdx 943 mov QWORD[r14],rdi 944 945 lea r14,[r9*1+r14] 946 947 mul rbp 948 add r10,rax 949 mov rax,QWORD[8+r9*1+rsi] 950 adc rdx,0 951 mov rdi,rdx 952 953 mul rbx 954 add r11,rax 955 mov rax,QWORD[8+rcx] 956 adc rdx,0 957 add r11,QWORD[8+r14] 958 adc rdx,0 959 mov r10,rdx 960 961 mul rbp 962 add rdi,rax 963 mov rax,QWORD[16+r9*1+rsi] 964 adc rdx,0 965 add rdi,r11 966 lea r15,[32+r9] 967 lea rcx,[32+rcx] 968 adc rdx,0 969 mov r13,rdx 970 jmp NEAR $L$inner4x 971 972ALIGN 32 973$L$inner4x: 974 mul rbx 975 add r10,rax 976 mov rax,QWORD[((-16))+rcx] 977 adc rdx,0 978 add r10,QWORD[16+r14] 979 lea r14,[32+r14] 980 adc rdx,0 981 mov r11,rdx 982 983 mul rbp 984 add r13,rax 985 mov rax,QWORD[((-8))+r15*1+rsi] 986 adc rdx,0 987 add r13,r10 988 adc rdx,0 989 mov QWORD[((-32))+r14],rdi 990 mov rdi,rdx 991 992 mul rbx 993 add r11,rax 994 mov rax,QWORD[((-8))+rcx] 995 adc rdx,0 996 add r11,QWORD[((-8))+r14] 997 adc rdx,0 998 mov r10,rdx 999 1000 mul rbp 1001 add rdi,rax 1002 mov rax,QWORD[r15*1+rsi] 1003 adc rdx,0 1004 add rdi,r11 1005 adc rdx,0 1006 mov QWORD[((-24))+r14],r13 1007 mov r13,rdx 1008 1009 mul rbx 1010 add r10,rax 1011 mov rax,QWORD[rcx] 1012 adc rdx,0 1013 add r10,QWORD[r14] 1014 adc rdx,0 1015 mov r11,rdx 1016 1017 mul rbp 1018 add r13,rax 1019 mov rax,QWORD[8+r15*1+rsi] 1020 adc rdx,0 1021 add r13,r10 1022 adc rdx,0 1023 mov QWORD[((-16))+r14],rdi 1024 mov rdi,rdx 1025 1026 mul rbx 1027 add r11,rax 1028 mov rax,QWORD[8+rcx] 1029 adc rdx,0 1030 add r11,QWORD[8+r14] 1031 adc rdx,0 1032 mov r10,rdx 1033 1034 mul rbp 1035 add rdi,rax 1036 mov rax,QWORD[16+r15*1+rsi] 1037 adc rdx,0 1038 add rdi,r11 1039 lea rcx,[32+rcx] 1040 adc rdx,0 1041 mov QWORD[((-8))+r14],r13 1042 mov r13,rdx 1043 1044 add r15,32 1045 jnz NEAR $L$inner4x 1046 1047 mul rbx 1048 add r10,rax 1049 mov rax,QWORD[((-16))+rcx] 1050 adc rdx,0 1051 add r10,QWORD[16+r14] 1052 lea r14,[32+r14] 1053 adc rdx,0 1054 mov r11,rdx 1055 1056 mul rbp 1057 add r13,rax 1058 mov rax,QWORD[((-8))+rsi] 1059 adc rdx,0 1060 add r13,r10 1061 adc rdx,0 1062 mov QWORD[((-32))+r14],rdi 1063 mov rdi,rdx 1064 1065 mul rbx 1066 add r11,rax 1067 mov rax,rbp 1068 mov rbp,QWORD[((-8))+rcx] 1069 adc rdx,0 1070 add r11,QWORD[((-8))+r14] 1071 adc rdx,0 1072 mov r10,rdx 1073 1074 mul rbp 1075 add rdi,rax 1076 mov rax,QWORD[r9*1+rsi] 1077 adc rdx,0 1078 add rdi,r11 1079 adc rdx,0 1080 mov QWORD[((-24))+r14],r13 1081 mov r13,rdx 1082 1083 mov QWORD[((-16))+r14],rdi 1084 lea rcx,[r9*1+rcx] 1085 1086 xor rdi,rdi 1087 add r13,r10 1088 adc rdi,0 1089 add r13,QWORD[r14] 1090 adc rdi,0 1091 mov QWORD[((-8))+r14],r13 1092 1093 cmp r12,QWORD[((16+8))+rsp] 1094 jb NEAR $L$outer4x 1095 xor rax,rax 1096 sub rbp,r13 1097 adc r15,r15 1098 or rdi,r15 1099 sub rax,rdi 1100 lea rbx,[r9*1+r14] 1101 mov r12,QWORD[rcx] 1102 lea rbp,[rcx] 1103 mov rcx,r9 1104 sar rcx,3+2 1105 mov rdi,QWORD[((56+8))+rsp] 1106 dec r12 1107 xor r10,r10 1108 mov r13,QWORD[8+rbp] 1109 mov r14,QWORD[16+rbp] 1110 mov r15,QWORD[24+rbp] 1111 jmp NEAR $L$sqr4x_sub_entry 1112 1113 1114global GFp_bn_power5 1115 1116ALIGN 32 1117GFp_bn_power5: 1118 mov QWORD[8+rsp],rdi ;WIN64 prologue 1119 mov QWORD[16+rsp],rsi 1120 mov rax,rsp 1121$L$SEH_begin_GFp_bn_power5: 1122 mov rdi,rcx 1123 mov rsi,rdx 1124 mov rdx,r8 1125 mov rcx,r9 1126 mov r8,QWORD[40+rsp] 1127 mov r9,QWORD[48+rsp] 1128 1129 1130 1131 mov rax,rsp 1132 1133 lea r11,[GFp_ia32cap_P] 1134 mov r11d,DWORD[8+r11] 1135 and r11d,0x80108 1136 cmp r11d,0x80108 1137 je NEAR $L$powerx5_enter 1138 push rbx 1139 1140 push rbp 1141 1142 push r12 1143 1144 push r13 1145 1146 push r14 1147 1148 push r15 1149 1150$L$power5_prologue: 1151 1152 shl r9d,3 1153 lea r10d,[r9*2+r9] 1154 neg r9 1155 mov r8,QWORD[r8] 1156 1157 1158 1159 1160 1161 1162 1163 1164 lea r11,[((-320))+r9*2+rsp] 1165 mov rbp,rsp 1166 sub r11,rdi 1167 and r11,4095 1168 cmp r10,r11 1169 jb NEAR $L$pwr_sp_alt 1170 sub rbp,r11 1171 lea rbp,[((-320))+r9*2+rbp] 1172 jmp NEAR $L$pwr_sp_done 1173 1174ALIGN 32 1175$L$pwr_sp_alt: 1176 lea r10,[((4096-320))+r9*2] 1177 lea rbp,[((-320))+r9*2+rbp] 1178 sub r11,r10 1179 mov r10,0 1180 cmovc r11,r10 1181 sub rbp,r11 1182$L$pwr_sp_done: 1183 and rbp,-64 1184 mov r11,rsp 1185 sub r11,rbp 1186 and r11,-4096 1187 lea rsp,[rbp*1+r11] 1188 mov r10,QWORD[rsp] 1189 cmp rsp,rbp 1190 ja NEAR $L$pwr_page_walk 1191 jmp NEAR $L$pwr_page_walk_done 1192 1193$L$pwr_page_walk: 1194 lea rsp,[((-4096))+rsp] 1195 mov r10,QWORD[rsp] 1196 cmp rsp,rbp 1197 ja NEAR $L$pwr_page_walk 1198$L$pwr_page_walk_done: 1199 1200 mov r10,r9 1201 neg r9 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 mov QWORD[32+rsp],r8 1213 mov QWORD[40+rsp],rax 1214 1215$L$power5_body: 1216DB 102,72,15,110,207 1217DB 102,72,15,110,209 1218DB 102,73,15,110,218 1219DB 102,72,15,110,226 1220 1221 call __bn_sqr8x_internal 1222 call __bn_post4x_internal 1223 call __bn_sqr8x_internal 1224 call __bn_post4x_internal 1225 call __bn_sqr8x_internal 1226 call __bn_post4x_internal 1227 call __bn_sqr8x_internal 1228 call __bn_post4x_internal 1229 call __bn_sqr8x_internal 1230 call __bn_post4x_internal 1231 1232DB 102,72,15,126,209 1233DB 102,72,15,126,226 1234 mov rdi,rsi 1235 mov rax,QWORD[40+rsp] 1236 lea r8,[32+rsp] 1237 1238 call mul4x_internal 1239 1240 mov rsi,QWORD[40+rsp] 1241 1242 mov rax,1 1243 mov r15,QWORD[((-48))+rsi] 1244 1245 mov r14,QWORD[((-40))+rsi] 1246 1247 mov r13,QWORD[((-32))+rsi] 1248 1249 mov r12,QWORD[((-24))+rsi] 1250 1251 mov rbp,QWORD[((-16))+rsi] 1252 1253 mov rbx,QWORD[((-8))+rsi] 1254 1255 lea rsp,[rsi] 1256 1257$L$power5_epilogue: 1258 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1259 mov rsi,QWORD[16+rsp] 1260 DB 0F3h,0C3h ;repret 1261 1262$L$SEH_end_GFp_bn_power5: 1263 1264global GFp_bn_sqr8x_internal 1265 1266 1267ALIGN 32 1268GFp_bn_sqr8x_internal: 1269__bn_sqr8x_internal: 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 lea rbp,[32+r10] 1345 lea rsi,[r9*1+rsi] 1346 1347 mov rcx,r9 1348 1349 1350 mov r14,QWORD[((-32))+rbp*1+rsi] 1351 lea rdi,[((48+8))+r9*2+rsp] 1352 mov rax,QWORD[((-24))+rbp*1+rsi] 1353 lea rdi,[((-32))+rbp*1+rdi] 1354 mov rbx,QWORD[((-16))+rbp*1+rsi] 1355 mov r15,rax 1356 1357 mul r14 1358 mov r10,rax 1359 mov rax,rbx 1360 mov r11,rdx 1361 mov QWORD[((-24))+rbp*1+rdi],r10 1362 1363 mul r14 1364 add r11,rax 1365 mov rax,rbx 1366 adc rdx,0 1367 mov QWORD[((-16))+rbp*1+rdi],r11 1368 mov r10,rdx 1369 1370 1371 mov rbx,QWORD[((-8))+rbp*1+rsi] 1372 mul r15 1373 mov r12,rax 1374 mov rax,rbx 1375 mov r13,rdx 1376 1377 lea rcx,[rbp] 1378 mul r14 1379 add r10,rax 1380 mov rax,rbx 1381 mov r11,rdx 1382 adc r11,0 1383 add r10,r12 1384 adc r11,0 1385 mov QWORD[((-8))+rcx*1+rdi],r10 1386 jmp NEAR $L$sqr4x_1st 1387 1388ALIGN 32 1389$L$sqr4x_1st: 1390 mov rbx,QWORD[rcx*1+rsi] 1391 mul r15 1392 add r13,rax 1393 mov rax,rbx 1394 mov r12,rdx 1395 adc r12,0 1396 1397 mul r14 1398 add r11,rax 1399 mov rax,rbx 1400 mov rbx,QWORD[8+rcx*1+rsi] 1401 mov r10,rdx 1402 adc r10,0 1403 add r11,r13 1404 adc r10,0 1405 1406 1407 mul r15 1408 add r12,rax 1409 mov rax,rbx 1410 mov QWORD[rcx*1+rdi],r11 1411 mov r13,rdx 1412 adc r13,0 1413 1414 mul r14 1415 add r10,rax 1416 mov rax,rbx 1417 mov rbx,QWORD[16+rcx*1+rsi] 1418 mov r11,rdx 1419 adc r11,0 1420 add r10,r12 1421 adc r11,0 1422 1423 mul r15 1424 add r13,rax 1425 mov rax,rbx 1426 mov QWORD[8+rcx*1+rdi],r10 1427 mov r12,rdx 1428 adc r12,0 1429 1430 mul r14 1431 add r11,rax 1432 mov rax,rbx 1433 mov rbx,QWORD[24+rcx*1+rsi] 1434 mov r10,rdx 1435 adc r10,0 1436 add r11,r13 1437 adc r10,0 1438 1439 1440 mul r15 1441 add r12,rax 1442 mov rax,rbx 1443 mov QWORD[16+rcx*1+rdi],r11 1444 mov r13,rdx 1445 adc r13,0 1446 lea rcx,[32+rcx] 1447 1448 mul r14 1449 add r10,rax 1450 mov rax,rbx 1451 mov r11,rdx 1452 adc r11,0 1453 add r10,r12 1454 adc r11,0 1455 mov QWORD[((-8))+rcx*1+rdi],r10 1456 1457 cmp rcx,0 1458 jne NEAR $L$sqr4x_1st 1459 1460 mul r15 1461 add r13,rax 1462 lea rbp,[16+rbp] 1463 adc rdx,0 1464 add r13,r11 1465 adc rdx,0 1466 1467 mov QWORD[rdi],r13 1468 mov r12,rdx 1469 mov QWORD[8+rdi],rdx 1470 jmp NEAR $L$sqr4x_outer 1471 1472ALIGN 32 1473$L$sqr4x_outer: 1474 mov r14,QWORD[((-32))+rbp*1+rsi] 1475 lea rdi,[((48+8))+r9*2+rsp] 1476 mov rax,QWORD[((-24))+rbp*1+rsi] 1477 lea rdi,[((-32))+rbp*1+rdi] 1478 mov rbx,QWORD[((-16))+rbp*1+rsi] 1479 mov r15,rax 1480 1481 mul r14 1482 mov r10,QWORD[((-24))+rbp*1+rdi] 1483 add r10,rax 1484 mov rax,rbx 1485 adc rdx,0 1486 mov QWORD[((-24))+rbp*1+rdi],r10 1487 mov r11,rdx 1488 1489 mul r14 1490 add r11,rax 1491 mov rax,rbx 1492 adc rdx,0 1493 add r11,QWORD[((-16))+rbp*1+rdi] 1494 mov r10,rdx 1495 adc r10,0 1496 mov QWORD[((-16))+rbp*1+rdi],r11 1497 1498 xor r12,r12 1499 1500 mov rbx,QWORD[((-8))+rbp*1+rsi] 1501 mul r15 1502 add r12,rax 1503 mov rax,rbx 1504 adc rdx,0 1505 add r12,QWORD[((-8))+rbp*1+rdi] 1506 mov r13,rdx 1507 adc r13,0 1508 1509 mul r14 1510 add r10,rax 1511 mov rax,rbx 1512 adc rdx,0 1513 add r10,r12 1514 mov r11,rdx 1515 adc r11,0 1516 mov QWORD[((-8))+rbp*1+rdi],r10 1517 1518 lea rcx,[rbp] 1519 jmp NEAR $L$sqr4x_inner 1520 1521ALIGN 32 1522$L$sqr4x_inner: 1523 mov rbx,QWORD[rcx*1+rsi] 1524 mul r15 1525 add r13,rax 1526 mov rax,rbx 1527 mov r12,rdx 1528 adc r12,0 1529 add r13,QWORD[rcx*1+rdi] 1530 adc r12,0 1531 1532DB 0x67 1533 mul r14 1534 add r11,rax 1535 mov rax,rbx 1536 mov rbx,QWORD[8+rcx*1+rsi] 1537 mov r10,rdx 1538 adc r10,0 1539 add r11,r13 1540 adc r10,0 1541 1542 mul r15 1543 add r12,rax 1544 mov QWORD[rcx*1+rdi],r11 1545 mov rax,rbx 1546 mov r13,rdx 1547 adc r13,0 1548 add r12,QWORD[8+rcx*1+rdi] 1549 lea rcx,[16+rcx] 1550 adc r13,0 1551 1552 mul r14 1553 add r10,rax 1554 mov rax,rbx 1555 adc rdx,0 1556 add r10,r12 1557 mov r11,rdx 1558 adc r11,0 1559 mov QWORD[((-8))+rcx*1+rdi],r10 1560 1561 cmp rcx,0 1562 jne NEAR $L$sqr4x_inner 1563 1564DB 0x67 1565 mul r15 1566 add r13,rax 1567 adc rdx,0 1568 add r13,r11 1569 adc rdx,0 1570 1571 mov QWORD[rdi],r13 1572 mov r12,rdx 1573 mov QWORD[8+rdi],rdx 1574 1575 add rbp,16 1576 jnz NEAR $L$sqr4x_outer 1577 1578 1579 mov r14,QWORD[((-32))+rsi] 1580 lea rdi,[((48+8))+r9*2+rsp] 1581 mov rax,QWORD[((-24))+rsi] 1582 lea rdi,[((-32))+rbp*1+rdi] 1583 mov rbx,QWORD[((-16))+rsi] 1584 mov r15,rax 1585 1586 mul r14 1587 add r10,rax 1588 mov rax,rbx 1589 mov r11,rdx 1590 adc r11,0 1591 1592 mul r14 1593 add r11,rax 1594 mov rax,rbx 1595 mov QWORD[((-24))+rdi],r10 1596 mov r10,rdx 1597 adc r10,0 1598 add r11,r13 1599 mov rbx,QWORD[((-8))+rsi] 1600 adc r10,0 1601 1602 mul r15 1603 add r12,rax 1604 mov rax,rbx 1605 mov QWORD[((-16))+rdi],r11 1606 mov r13,rdx 1607 adc r13,0 1608 1609 mul r14 1610 add r10,rax 1611 mov rax,rbx 1612 mov r11,rdx 1613 adc r11,0 1614 add r10,r12 1615 adc r11,0 1616 mov QWORD[((-8))+rdi],r10 1617 1618 mul r15 1619 add r13,rax 1620 mov rax,QWORD[((-16))+rsi] 1621 adc rdx,0 1622 add r13,r11 1623 adc rdx,0 1624 1625 mov QWORD[rdi],r13 1626 mov r12,rdx 1627 mov QWORD[8+rdi],rdx 1628 1629 mul rbx 1630 add rbp,16 1631 xor r14,r14 1632 sub rbp,r9 1633 xor r15,r15 1634 1635 add rax,r12 1636 adc rdx,0 1637 mov QWORD[8+rdi],rax 1638 mov QWORD[16+rdi],rdx 1639 mov QWORD[24+rdi],r15 1640 1641 mov rax,QWORD[((-16))+rbp*1+rsi] 1642 lea rdi,[((48+8))+rsp] 1643 xor r10,r10 1644 mov r11,QWORD[8+rdi] 1645 1646 lea r12,[r10*2+r14] 1647 shr r10,63 1648 lea r13,[r11*2+rcx] 1649 shr r11,63 1650 or r13,r10 1651 mov r10,QWORD[16+rdi] 1652 mov r14,r11 1653 mul rax 1654 neg r15 1655 mov r11,QWORD[24+rdi] 1656 adc r12,rax 1657 mov rax,QWORD[((-8))+rbp*1+rsi] 1658 mov QWORD[rdi],r12 1659 adc r13,rdx 1660 1661 lea rbx,[r10*2+r14] 1662 mov QWORD[8+rdi],r13 1663 sbb r15,r15 1664 shr r10,63 1665 lea r8,[r11*2+rcx] 1666 shr r11,63 1667 or r8,r10 1668 mov r10,QWORD[32+rdi] 1669 mov r14,r11 1670 mul rax 1671 neg r15 1672 mov r11,QWORD[40+rdi] 1673 adc rbx,rax 1674 mov rax,QWORD[rbp*1+rsi] 1675 mov QWORD[16+rdi],rbx 1676 adc r8,rdx 1677 lea rbp,[16+rbp] 1678 mov QWORD[24+rdi],r8 1679 sbb r15,r15 1680 lea rdi,[64+rdi] 1681 jmp NEAR $L$sqr4x_shift_n_add 1682 1683ALIGN 32 1684$L$sqr4x_shift_n_add: 1685 lea r12,[r10*2+r14] 1686 shr r10,63 1687 lea r13,[r11*2+rcx] 1688 shr r11,63 1689 or r13,r10 1690 mov r10,QWORD[((-16))+rdi] 1691 mov r14,r11 1692 mul rax 1693 neg r15 1694 mov r11,QWORD[((-8))+rdi] 1695 adc r12,rax 1696 mov rax,QWORD[((-8))+rbp*1+rsi] 1697 mov QWORD[((-32))+rdi],r12 1698 adc r13,rdx 1699 1700 lea rbx,[r10*2+r14] 1701 mov QWORD[((-24))+rdi],r13 1702 sbb r15,r15 1703 shr r10,63 1704 lea r8,[r11*2+rcx] 1705 shr r11,63 1706 or r8,r10 1707 mov r10,QWORD[rdi] 1708 mov r14,r11 1709 mul rax 1710 neg r15 1711 mov r11,QWORD[8+rdi] 1712 adc rbx,rax 1713 mov rax,QWORD[rbp*1+rsi] 1714 mov QWORD[((-16))+rdi],rbx 1715 adc r8,rdx 1716 1717 lea r12,[r10*2+r14] 1718 mov QWORD[((-8))+rdi],r8 1719 sbb r15,r15 1720 shr r10,63 1721 lea r13,[r11*2+rcx] 1722 shr r11,63 1723 or r13,r10 1724 mov r10,QWORD[16+rdi] 1725 mov r14,r11 1726 mul rax 1727 neg r15 1728 mov r11,QWORD[24+rdi] 1729 adc r12,rax 1730 mov rax,QWORD[8+rbp*1+rsi] 1731 mov QWORD[rdi],r12 1732 adc r13,rdx 1733 1734 lea rbx,[r10*2+r14] 1735 mov QWORD[8+rdi],r13 1736 sbb r15,r15 1737 shr r10,63 1738 lea r8,[r11*2+rcx] 1739 shr r11,63 1740 or r8,r10 1741 mov r10,QWORD[32+rdi] 1742 mov r14,r11 1743 mul rax 1744 neg r15 1745 mov r11,QWORD[40+rdi] 1746 adc rbx,rax 1747 mov rax,QWORD[16+rbp*1+rsi] 1748 mov QWORD[16+rdi],rbx 1749 adc r8,rdx 1750 mov QWORD[24+rdi],r8 1751 sbb r15,r15 1752 lea rdi,[64+rdi] 1753 add rbp,32 1754 jnz NEAR $L$sqr4x_shift_n_add 1755 1756 lea r12,[r10*2+r14] 1757DB 0x67 1758 shr r10,63 1759 lea r13,[r11*2+rcx] 1760 shr r11,63 1761 or r13,r10 1762 mov r10,QWORD[((-16))+rdi] 1763 mov r14,r11 1764 mul rax 1765 neg r15 1766 mov r11,QWORD[((-8))+rdi] 1767 adc r12,rax 1768 mov rax,QWORD[((-8))+rsi] 1769 mov QWORD[((-32))+rdi],r12 1770 adc r13,rdx 1771 1772 lea rbx,[r10*2+r14] 1773 mov QWORD[((-24))+rdi],r13 1774 sbb r15,r15 1775 shr r10,63 1776 lea r8,[r11*2+rcx] 1777 shr r11,63 1778 or r8,r10 1779 mul rax 1780 neg r15 1781 adc rbx,rax 1782 adc r8,rdx 1783 mov QWORD[((-16))+rdi],rbx 1784 mov QWORD[((-8))+rdi],r8 1785DB 102,72,15,126,213 1786__bn_sqr8x_reduction: 1787 xor rax,rax 1788 lea rcx,[rbp*1+r9] 1789 lea rdx,[((48+8))+r9*2+rsp] 1790 mov QWORD[((0+8))+rsp],rcx 1791 lea rdi,[((48+8))+r9*1+rsp] 1792 mov QWORD[((8+8))+rsp],rdx 1793 neg r9 1794 jmp NEAR $L$8x_reduction_loop 1795 1796ALIGN 32 1797$L$8x_reduction_loop: 1798 lea rdi,[r9*1+rdi] 1799DB 0x66 1800 mov rbx,QWORD[rdi] 1801 mov r9,QWORD[8+rdi] 1802 mov r10,QWORD[16+rdi] 1803 mov r11,QWORD[24+rdi] 1804 mov r12,QWORD[32+rdi] 1805 mov r13,QWORD[40+rdi] 1806 mov r14,QWORD[48+rdi] 1807 mov r15,QWORD[56+rdi] 1808 mov QWORD[rdx],rax 1809 lea rdi,[64+rdi] 1810 1811DB 0x67 1812 mov r8,rbx 1813 imul rbx,QWORD[((32+8))+rsp] 1814 mov rax,QWORD[rbp] 1815 mov ecx,8 1816 jmp NEAR $L$8x_reduce 1817 1818ALIGN 32 1819$L$8x_reduce: 1820 mul rbx 1821 mov rax,QWORD[8+rbp] 1822 neg r8 1823 mov r8,rdx 1824 adc r8,0 1825 1826 mul rbx 1827 add r9,rax 1828 mov rax,QWORD[16+rbp] 1829 adc rdx,0 1830 add r8,r9 1831 mov QWORD[((48-8+8))+rcx*8+rsp],rbx 1832 mov r9,rdx 1833 adc r9,0 1834 1835 mul rbx 1836 add r10,rax 1837 mov rax,QWORD[24+rbp] 1838 adc rdx,0 1839 add r9,r10 1840 mov rsi,QWORD[((32+8))+rsp] 1841 mov r10,rdx 1842 adc r10,0 1843 1844 mul rbx 1845 add r11,rax 1846 mov rax,QWORD[32+rbp] 1847 adc rdx,0 1848 imul rsi,r8 1849 add r10,r11 1850 mov r11,rdx 1851 adc r11,0 1852 1853 mul rbx 1854 add r12,rax 1855 mov rax,QWORD[40+rbp] 1856 adc rdx,0 1857 add r11,r12 1858 mov r12,rdx 1859 adc r12,0 1860 1861 mul rbx 1862 add r13,rax 1863 mov rax,QWORD[48+rbp] 1864 adc rdx,0 1865 add r12,r13 1866 mov r13,rdx 1867 adc r13,0 1868 1869 mul rbx 1870 add r14,rax 1871 mov rax,QWORD[56+rbp] 1872 adc rdx,0 1873 add r13,r14 1874 mov r14,rdx 1875 adc r14,0 1876 1877 mul rbx 1878 mov rbx,rsi 1879 add r15,rax 1880 mov rax,QWORD[rbp] 1881 adc rdx,0 1882 add r14,r15 1883 mov r15,rdx 1884 adc r15,0 1885 1886 dec ecx 1887 jnz NEAR $L$8x_reduce 1888 1889 lea rbp,[64+rbp] 1890 xor rax,rax 1891 mov rdx,QWORD[((8+8))+rsp] 1892 cmp rbp,QWORD[((0+8))+rsp] 1893 jae NEAR $L$8x_no_tail 1894 1895DB 0x66 1896 add r8,QWORD[rdi] 1897 adc r9,QWORD[8+rdi] 1898 adc r10,QWORD[16+rdi] 1899 adc r11,QWORD[24+rdi] 1900 adc r12,QWORD[32+rdi] 1901 adc r13,QWORD[40+rdi] 1902 adc r14,QWORD[48+rdi] 1903 adc r15,QWORD[56+rdi] 1904 sbb rsi,rsi 1905 1906 mov rbx,QWORD[((48+56+8))+rsp] 1907 mov ecx,8 1908 mov rax,QWORD[rbp] 1909 jmp NEAR $L$8x_tail 1910 1911ALIGN 32 1912$L$8x_tail: 1913 mul rbx 1914 add r8,rax 1915 mov rax,QWORD[8+rbp] 1916 mov QWORD[rdi],r8 1917 mov r8,rdx 1918 adc r8,0 1919 1920 mul rbx 1921 add r9,rax 1922 mov rax,QWORD[16+rbp] 1923 adc rdx,0 1924 add r8,r9 1925 lea rdi,[8+rdi] 1926 mov r9,rdx 1927 adc r9,0 1928 1929 mul rbx 1930 add r10,rax 1931 mov rax,QWORD[24+rbp] 1932 adc rdx,0 1933 add r9,r10 1934 mov r10,rdx 1935 adc r10,0 1936 1937 mul rbx 1938 add r11,rax 1939 mov rax,QWORD[32+rbp] 1940 adc rdx,0 1941 add r10,r11 1942 mov r11,rdx 1943 adc r11,0 1944 1945 mul rbx 1946 add r12,rax 1947 mov rax,QWORD[40+rbp] 1948 adc rdx,0 1949 add r11,r12 1950 mov r12,rdx 1951 adc r12,0 1952 1953 mul rbx 1954 add r13,rax 1955 mov rax,QWORD[48+rbp] 1956 adc rdx,0 1957 add r12,r13 1958 mov r13,rdx 1959 adc r13,0 1960 1961 mul rbx 1962 add r14,rax 1963 mov rax,QWORD[56+rbp] 1964 adc rdx,0 1965 add r13,r14 1966 mov r14,rdx 1967 adc r14,0 1968 1969 mul rbx 1970 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] 1971 add r15,rax 1972 adc rdx,0 1973 add r14,r15 1974 mov rax,QWORD[rbp] 1975 mov r15,rdx 1976 adc r15,0 1977 1978 dec ecx 1979 jnz NEAR $L$8x_tail 1980 1981 lea rbp,[64+rbp] 1982 mov rdx,QWORD[((8+8))+rsp] 1983 cmp rbp,QWORD[((0+8))+rsp] 1984 jae NEAR $L$8x_tail_done 1985 1986 mov rbx,QWORD[((48+56+8))+rsp] 1987 neg rsi 1988 mov rax,QWORD[rbp] 1989 adc r8,QWORD[rdi] 1990 adc r9,QWORD[8+rdi] 1991 adc r10,QWORD[16+rdi] 1992 adc r11,QWORD[24+rdi] 1993 adc r12,QWORD[32+rdi] 1994 adc r13,QWORD[40+rdi] 1995 adc r14,QWORD[48+rdi] 1996 adc r15,QWORD[56+rdi] 1997 sbb rsi,rsi 1998 1999 mov ecx,8 2000 jmp NEAR $L$8x_tail 2001 2002ALIGN 32 2003$L$8x_tail_done: 2004 xor rax,rax 2005 add r8,QWORD[rdx] 2006 adc r9,0 2007 adc r10,0 2008 adc r11,0 2009 adc r12,0 2010 adc r13,0 2011 adc r14,0 2012 adc r15,0 2013 adc rax,0 2014 2015 neg rsi 2016$L$8x_no_tail: 2017 adc r8,QWORD[rdi] 2018 adc r9,QWORD[8+rdi] 2019 adc r10,QWORD[16+rdi] 2020 adc r11,QWORD[24+rdi] 2021 adc r12,QWORD[32+rdi] 2022 adc r13,QWORD[40+rdi] 2023 adc r14,QWORD[48+rdi] 2024 adc r15,QWORD[56+rdi] 2025 adc rax,0 2026 mov rcx,QWORD[((-8))+rbp] 2027 xor rsi,rsi 2028 2029DB 102,72,15,126,213 2030 2031 mov QWORD[rdi],r8 2032 mov QWORD[8+rdi],r9 2033DB 102,73,15,126,217 2034 mov QWORD[16+rdi],r10 2035 mov QWORD[24+rdi],r11 2036 mov QWORD[32+rdi],r12 2037 mov QWORD[40+rdi],r13 2038 mov QWORD[48+rdi],r14 2039 mov QWORD[56+rdi],r15 2040 lea rdi,[64+rdi] 2041 2042 cmp rdi,rdx 2043 jb NEAR $L$8x_reduction_loop 2044 DB 0F3h,0C3h ;repret 2045 2046 2047 2048ALIGN 32 2049__bn_post4x_internal: 2050 2051 mov r12,QWORD[rbp] 2052 lea rbx,[r9*1+rdi] 2053 mov rcx,r9 2054DB 102,72,15,126,207 2055 neg rax 2056DB 102,72,15,126,206 2057 sar rcx,3+2 2058 dec r12 2059 xor r10,r10 2060 mov r13,QWORD[8+rbp] 2061 mov r14,QWORD[16+rbp] 2062 mov r15,QWORD[24+rbp] 2063 jmp NEAR $L$sqr4x_sub_entry 2064 2065ALIGN 16 2066$L$sqr4x_sub: 2067 mov r12,QWORD[rbp] 2068 mov r13,QWORD[8+rbp] 2069 mov r14,QWORD[16+rbp] 2070 mov r15,QWORD[24+rbp] 2071$L$sqr4x_sub_entry: 2072 lea rbp,[32+rbp] 2073 not r12 2074 not r13 2075 not r14 2076 not r15 2077 and r12,rax 2078 and r13,rax 2079 and r14,rax 2080 and r15,rax 2081 2082 neg r10 2083 adc r12,QWORD[rbx] 2084 adc r13,QWORD[8+rbx] 2085 adc r14,QWORD[16+rbx] 2086 adc r15,QWORD[24+rbx] 2087 mov QWORD[rdi],r12 2088 lea rbx,[32+rbx] 2089 mov QWORD[8+rdi],r13 2090 sbb r10,r10 2091 mov QWORD[16+rdi],r14 2092 mov QWORD[24+rdi],r15 2093 lea rdi,[32+rdi] 2094 2095 inc rcx 2096 jnz NEAR $L$sqr4x_sub 2097 2098 mov r10,r9 2099 neg r9 2100 DB 0F3h,0C3h ;repret 2101 2102 2103global GFp_bn_from_montgomery 2104 2105ALIGN 32 2106GFp_bn_from_montgomery: 2107 2108 test DWORD[48+rsp],7 2109 jz NEAR bn_from_mont8x 2110 xor eax,eax 2111 DB 0F3h,0C3h ;repret 2112 2113 2114 2115 2116ALIGN 32 2117bn_from_mont8x: 2118 mov QWORD[8+rsp],rdi ;WIN64 prologue 2119 mov QWORD[16+rsp],rsi 2120 mov rax,rsp 2121$L$SEH_begin_bn_from_mont8x: 2122 mov rdi,rcx 2123 mov rsi,rdx 2124 mov rdx,r8 2125 mov rcx,r9 2126 mov r8,QWORD[40+rsp] 2127 mov r9,QWORD[48+rsp] 2128 2129 2130 2131DB 0x67 2132 mov rax,rsp 2133 2134 push rbx 2135 2136 push rbp 2137 2138 push r12 2139 2140 push r13 2141 2142 push r14 2143 2144 push r15 2145 2146$L$from_prologue: 2147 2148 shl r9d,3 2149 lea r10,[r9*2+r9] 2150 neg r9 2151 mov r8,QWORD[r8] 2152 2153 2154 2155 2156 2157 2158 2159 2160 lea r11,[((-320))+r9*2+rsp] 2161 mov rbp,rsp 2162 sub r11,rdi 2163 and r11,4095 2164 cmp r10,r11 2165 jb NEAR $L$from_sp_alt 2166 sub rbp,r11 2167 lea rbp,[((-320))+r9*2+rbp] 2168 jmp NEAR $L$from_sp_done 2169 2170ALIGN 32 2171$L$from_sp_alt: 2172 lea r10,[((4096-320))+r9*2] 2173 lea rbp,[((-320))+r9*2+rbp] 2174 sub r11,r10 2175 mov r10,0 2176 cmovc r11,r10 2177 sub rbp,r11 2178$L$from_sp_done: 2179 and rbp,-64 2180 mov r11,rsp 2181 sub r11,rbp 2182 and r11,-4096 2183 lea rsp,[rbp*1+r11] 2184 mov r10,QWORD[rsp] 2185 cmp rsp,rbp 2186 ja NEAR $L$from_page_walk 2187 jmp NEAR $L$from_page_walk_done 2188 2189$L$from_page_walk: 2190 lea rsp,[((-4096))+rsp] 2191 mov r10,QWORD[rsp] 2192 cmp rsp,rbp 2193 ja NEAR $L$from_page_walk 2194$L$from_page_walk_done: 2195 2196 mov r10,r9 2197 neg r9 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 mov QWORD[32+rsp],r8 2209 mov QWORD[40+rsp],rax 2210 2211$L$from_body: 2212 mov r11,r9 2213 lea rax,[48+rsp] 2214 pxor xmm0,xmm0 2215 jmp NEAR $L$mul_by_1 2216 2217ALIGN 32 2218$L$mul_by_1: 2219 movdqu xmm1,XMMWORD[rsi] 2220 movdqu xmm2,XMMWORD[16+rsi] 2221 movdqu xmm3,XMMWORD[32+rsi] 2222 movdqa XMMWORD[r9*1+rax],xmm0 2223 movdqu xmm4,XMMWORD[48+rsi] 2224 movdqa XMMWORD[16+r9*1+rax],xmm0 2225DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2226 movdqa XMMWORD[rax],xmm1 2227 movdqa XMMWORD[32+r9*1+rax],xmm0 2228 movdqa XMMWORD[16+rax],xmm2 2229 movdqa XMMWORD[48+r9*1+rax],xmm0 2230 movdqa XMMWORD[32+rax],xmm3 2231 movdqa XMMWORD[48+rax],xmm4 2232 lea rax,[64+rax] 2233 sub r11,64 2234 jnz NEAR $L$mul_by_1 2235 2236DB 102,72,15,110,207 2237DB 102,72,15,110,209 2238DB 0x67 2239 mov rbp,rcx 2240DB 102,73,15,110,218 2241 lea r11,[GFp_ia32cap_P] 2242 mov r11d,DWORD[8+r11] 2243 and r11d,0x80108 2244 cmp r11d,0x80108 2245 jne NEAR $L$from_mont_nox 2246 2247 lea rdi,[r9*1+rax] 2248 call __bn_sqrx8x_reduction 2249 call __bn_postx4x_internal 2250 2251 pxor xmm0,xmm0 2252 lea rax,[48+rsp] 2253 jmp NEAR $L$from_mont_zero 2254 2255ALIGN 32 2256$L$from_mont_nox: 2257 call __bn_sqr8x_reduction 2258 call __bn_post4x_internal 2259 2260 pxor xmm0,xmm0 2261 lea rax,[48+rsp] 2262 jmp NEAR $L$from_mont_zero 2263 2264ALIGN 32 2265$L$from_mont_zero: 2266 mov rsi,QWORD[40+rsp] 2267 2268 movdqa XMMWORD[rax],xmm0 2269 movdqa XMMWORD[16+rax],xmm0 2270 movdqa XMMWORD[32+rax],xmm0 2271 movdqa XMMWORD[48+rax],xmm0 2272 lea rax,[64+rax] 2273 sub r9,32 2274 jnz NEAR $L$from_mont_zero 2275 2276 mov rax,1 2277 mov r15,QWORD[((-48))+rsi] 2278 2279 mov r14,QWORD[((-40))+rsi] 2280 2281 mov r13,QWORD[((-32))+rsi] 2282 2283 mov r12,QWORD[((-24))+rsi] 2284 2285 mov rbp,QWORD[((-16))+rsi] 2286 2287 mov rbx,QWORD[((-8))+rsi] 2288 2289 lea rsp,[rsi] 2290 2291$L$from_epilogue: 2292 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2293 mov rsi,QWORD[16+rsp] 2294 DB 0F3h,0C3h ;repret 2295 2296$L$SEH_end_bn_from_mont8x: 2297 2298ALIGN 32 2299bn_mulx4x_mont_gather5: 2300 mov QWORD[8+rsp],rdi ;WIN64 prologue 2301 mov QWORD[16+rsp],rsi 2302 mov rax,rsp 2303$L$SEH_begin_bn_mulx4x_mont_gather5: 2304 mov rdi,rcx 2305 mov rsi,rdx 2306 mov rdx,r8 2307 mov rcx,r9 2308 mov r8,QWORD[40+rsp] 2309 mov r9,QWORD[48+rsp] 2310 2311 2312 2313 mov rax,rsp 2314 2315$L$mulx4x_enter: 2316 push rbx 2317 2318 push rbp 2319 2320 push r12 2321 2322 push r13 2323 2324 push r14 2325 2326 push r15 2327 2328$L$mulx4x_prologue: 2329 2330 shl r9d,3 2331 lea r10,[r9*2+r9] 2332 neg r9 2333 mov r8,QWORD[r8] 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 lea r11,[((-320))+r9*2+rsp] 2345 mov rbp,rsp 2346 sub r11,rdi 2347 and r11,4095 2348 cmp r10,r11 2349 jb NEAR $L$mulx4xsp_alt 2350 sub rbp,r11 2351 lea rbp,[((-320))+r9*2+rbp] 2352 jmp NEAR $L$mulx4xsp_done 2353 2354$L$mulx4xsp_alt: 2355 lea r10,[((4096-320))+r9*2] 2356 lea rbp,[((-320))+r9*2+rbp] 2357 sub r11,r10 2358 mov r10,0 2359 cmovc r11,r10 2360 sub rbp,r11 2361$L$mulx4xsp_done: 2362 and rbp,-64 2363 mov r11,rsp 2364 sub r11,rbp 2365 and r11,-4096 2366 lea rsp,[rbp*1+r11] 2367 mov r10,QWORD[rsp] 2368 cmp rsp,rbp 2369 ja NEAR $L$mulx4x_page_walk 2370 jmp NEAR $L$mulx4x_page_walk_done 2371 2372$L$mulx4x_page_walk: 2373 lea rsp,[((-4096))+rsp] 2374 mov r10,QWORD[rsp] 2375 cmp rsp,rbp 2376 ja NEAR $L$mulx4x_page_walk 2377$L$mulx4x_page_walk_done: 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 mov QWORD[32+rsp],r8 2392 mov QWORD[40+rsp],rax 2393 2394$L$mulx4x_body: 2395 call mulx4x_internal 2396 2397 mov rsi,QWORD[40+rsp] 2398 2399 mov rax,1 2400 2401 mov r15,QWORD[((-48))+rsi] 2402 2403 mov r14,QWORD[((-40))+rsi] 2404 2405 mov r13,QWORD[((-32))+rsi] 2406 2407 mov r12,QWORD[((-24))+rsi] 2408 2409 mov rbp,QWORD[((-16))+rsi] 2410 2411 mov rbx,QWORD[((-8))+rsi] 2412 2413 lea rsp,[rsi] 2414 2415$L$mulx4x_epilogue: 2416 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2417 mov rsi,QWORD[16+rsp] 2418 DB 0F3h,0C3h ;repret 2419 2420$L$SEH_end_bn_mulx4x_mont_gather5: 2421 2422 2423ALIGN 32 2424mulx4x_internal: 2425 2426 mov QWORD[8+rsp],r9 2427 mov r10,r9 2428 neg r9 2429 shl r9,5 2430 neg r10 2431 lea r13,[128+r9*1+rdx] 2432 shr r9,5+5 2433 movd xmm5,DWORD[56+rax] 2434 sub r9,1 2435 lea rax,[$L$inc] 2436 mov QWORD[((16+8))+rsp],r13 2437 mov QWORD[((24+8))+rsp],r9 2438 mov QWORD[((56+8))+rsp],rdi 2439 movdqa xmm0,XMMWORD[rax] 2440 movdqa xmm1,XMMWORD[16+rax] 2441 lea r10,[((88-112))+r10*1+rsp] 2442 lea rdi,[128+rdx] 2443 2444 pshufd xmm5,xmm5,0 2445 movdqa xmm4,xmm1 2446DB 0x67 2447 movdqa xmm2,xmm1 2448DB 0x67 2449 paddd xmm1,xmm0 2450 pcmpeqd xmm0,xmm5 2451 movdqa xmm3,xmm4 2452 paddd xmm2,xmm1 2453 pcmpeqd xmm1,xmm5 2454 movdqa XMMWORD[112+r10],xmm0 2455 movdqa xmm0,xmm4 2456 2457 paddd xmm3,xmm2 2458 pcmpeqd xmm2,xmm5 2459 movdqa XMMWORD[128+r10],xmm1 2460 movdqa xmm1,xmm4 2461 2462 paddd xmm0,xmm3 2463 pcmpeqd xmm3,xmm5 2464 movdqa XMMWORD[144+r10],xmm2 2465 movdqa xmm2,xmm4 2466 2467 paddd xmm1,xmm0 2468 pcmpeqd xmm0,xmm5 2469 movdqa XMMWORD[160+r10],xmm3 2470 movdqa xmm3,xmm4 2471 paddd xmm2,xmm1 2472 pcmpeqd xmm1,xmm5 2473 movdqa XMMWORD[176+r10],xmm0 2474 movdqa xmm0,xmm4 2475 2476 paddd xmm3,xmm2 2477 pcmpeqd xmm2,xmm5 2478 movdqa XMMWORD[192+r10],xmm1 2479 movdqa xmm1,xmm4 2480 2481 paddd xmm0,xmm3 2482 pcmpeqd xmm3,xmm5 2483 movdqa XMMWORD[208+r10],xmm2 2484 movdqa xmm2,xmm4 2485 2486 paddd xmm1,xmm0 2487 pcmpeqd xmm0,xmm5 2488 movdqa XMMWORD[224+r10],xmm3 2489 movdqa xmm3,xmm4 2490 paddd xmm2,xmm1 2491 pcmpeqd xmm1,xmm5 2492 movdqa XMMWORD[240+r10],xmm0 2493 movdqa xmm0,xmm4 2494 2495 paddd xmm3,xmm2 2496 pcmpeqd xmm2,xmm5 2497 movdqa XMMWORD[256+r10],xmm1 2498 movdqa xmm1,xmm4 2499 2500 paddd xmm0,xmm3 2501 pcmpeqd xmm3,xmm5 2502 movdqa XMMWORD[272+r10],xmm2 2503 movdqa xmm2,xmm4 2504 2505 paddd xmm1,xmm0 2506 pcmpeqd xmm0,xmm5 2507 movdqa XMMWORD[288+r10],xmm3 2508 movdqa xmm3,xmm4 2509DB 0x67 2510 paddd xmm2,xmm1 2511 pcmpeqd xmm1,xmm5 2512 movdqa XMMWORD[304+r10],xmm0 2513 2514 paddd xmm3,xmm2 2515 pcmpeqd xmm2,xmm5 2516 movdqa XMMWORD[320+r10],xmm1 2517 2518 pcmpeqd xmm3,xmm5 2519 movdqa XMMWORD[336+r10],xmm2 2520 2521 pand xmm0,XMMWORD[64+rdi] 2522 pand xmm1,XMMWORD[80+rdi] 2523 pand xmm2,XMMWORD[96+rdi] 2524 movdqa XMMWORD[352+r10],xmm3 2525 pand xmm3,XMMWORD[112+rdi] 2526 por xmm0,xmm2 2527 por xmm1,xmm3 2528 movdqa xmm4,XMMWORD[((-128))+rdi] 2529 movdqa xmm5,XMMWORD[((-112))+rdi] 2530 movdqa xmm2,XMMWORD[((-96))+rdi] 2531 pand xmm4,XMMWORD[112+r10] 2532 movdqa xmm3,XMMWORD[((-80))+rdi] 2533 pand xmm5,XMMWORD[128+r10] 2534 por xmm0,xmm4 2535 pand xmm2,XMMWORD[144+r10] 2536 por xmm1,xmm5 2537 pand xmm3,XMMWORD[160+r10] 2538 por xmm0,xmm2 2539 por xmm1,xmm3 2540 movdqa xmm4,XMMWORD[((-64))+rdi] 2541 movdqa xmm5,XMMWORD[((-48))+rdi] 2542 movdqa xmm2,XMMWORD[((-32))+rdi] 2543 pand xmm4,XMMWORD[176+r10] 2544 movdqa xmm3,XMMWORD[((-16))+rdi] 2545 pand xmm5,XMMWORD[192+r10] 2546 por xmm0,xmm4 2547 pand xmm2,XMMWORD[208+r10] 2548 por xmm1,xmm5 2549 pand xmm3,XMMWORD[224+r10] 2550 por xmm0,xmm2 2551 por xmm1,xmm3 2552 movdqa xmm4,XMMWORD[rdi] 2553 movdqa xmm5,XMMWORD[16+rdi] 2554 movdqa xmm2,XMMWORD[32+rdi] 2555 pand xmm4,XMMWORD[240+r10] 2556 movdqa xmm3,XMMWORD[48+rdi] 2557 pand xmm5,XMMWORD[256+r10] 2558 por xmm0,xmm4 2559 pand xmm2,XMMWORD[272+r10] 2560 por xmm1,xmm5 2561 pand xmm3,XMMWORD[288+r10] 2562 por xmm0,xmm2 2563 por xmm1,xmm3 2564 pxor xmm0,xmm1 2565 pshufd xmm1,xmm0,0x4e 2566 por xmm0,xmm1 2567 lea rdi,[256+rdi] 2568DB 102,72,15,126,194 2569 lea rbx,[((64+32+8))+rsp] 2570 2571 mov r9,rdx 2572 mulx rax,r8,QWORD[rsi] 2573 mulx r12,r11,QWORD[8+rsi] 2574 add r11,rax 2575 mulx r13,rax,QWORD[16+rsi] 2576 adc r12,rax 2577 adc r13,0 2578 mulx r14,rax,QWORD[24+rsi] 2579 2580 mov r15,r8 2581 imul r8,QWORD[((32+8))+rsp] 2582 xor rbp,rbp 2583 mov rdx,r8 2584 2585 mov QWORD[((8+8))+rsp],rdi 2586 2587 lea rsi,[32+rsi] 2588 adcx r13,rax 2589 adcx r14,rbp 2590 2591 mulx r10,rax,QWORD[rcx] 2592 adcx r15,rax 2593 adox r10,r11 2594 mulx r11,rax,QWORD[8+rcx] 2595 adcx r10,rax 2596 adox r11,r12 2597 mulx r12,rax,QWORD[16+rcx] 2598 mov rdi,QWORD[((24+8))+rsp] 2599 mov QWORD[((-32))+rbx],r10 2600 adcx r11,rax 2601 adox r12,r13 2602 mulx r15,rax,QWORD[24+rcx] 2603 mov rdx,r9 2604 mov QWORD[((-24))+rbx],r11 2605 adcx r12,rax 2606 adox r15,rbp 2607 lea rcx,[32+rcx] 2608 mov QWORD[((-16))+rbx],r12 2609 jmp NEAR $L$mulx4x_1st 2610 2611ALIGN 32 2612$L$mulx4x_1st: 2613 adcx r15,rbp 2614 mulx rax,r10,QWORD[rsi] 2615 adcx r10,r14 2616 mulx r14,r11,QWORD[8+rsi] 2617 adcx r11,rax 2618 mulx rax,r12,QWORD[16+rsi] 2619 adcx r12,r14 2620 mulx r14,r13,QWORD[24+rsi] 2621DB 0x67,0x67 2622 mov rdx,r8 2623 adcx r13,rax 2624 adcx r14,rbp 2625 lea rsi,[32+rsi] 2626 lea rbx,[32+rbx] 2627 2628 adox r10,r15 2629 mulx r15,rax,QWORD[rcx] 2630 adcx r10,rax 2631 adox r11,r15 2632 mulx r15,rax,QWORD[8+rcx] 2633 adcx r11,rax 2634 adox r12,r15 2635 mulx r15,rax,QWORD[16+rcx] 2636 mov QWORD[((-40))+rbx],r10 2637 adcx r12,rax 2638 mov QWORD[((-32))+rbx],r11 2639 adox r13,r15 2640 mulx r15,rax,QWORD[24+rcx] 2641 mov rdx,r9 2642 mov QWORD[((-24))+rbx],r12 2643 adcx r13,rax 2644 adox r15,rbp 2645 lea rcx,[32+rcx] 2646 mov QWORD[((-16))+rbx],r13 2647 2648 dec rdi 2649 jnz NEAR $L$mulx4x_1st 2650 2651 mov rax,QWORD[8+rsp] 2652 adc r15,rbp 2653 lea rsi,[rax*1+rsi] 2654 add r14,r15 2655 mov rdi,QWORD[((8+8))+rsp] 2656 adc rbp,rbp 2657 mov QWORD[((-8))+rbx],r14 2658 jmp NEAR $L$mulx4x_outer 2659 2660ALIGN 32 2661$L$mulx4x_outer: 2662 lea r10,[((16-256))+rbx] 2663 pxor xmm4,xmm4 2664DB 0x67,0x67 2665 pxor xmm5,xmm5 2666 movdqa xmm0,XMMWORD[((-128))+rdi] 2667 movdqa xmm1,XMMWORD[((-112))+rdi] 2668 movdqa xmm2,XMMWORD[((-96))+rdi] 2669 pand xmm0,XMMWORD[256+r10] 2670 movdqa xmm3,XMMWORD[((-80))+rdi] 2671 pand xmm1,XMMWORD[272+r10] 2672 por xmm4,xmm0 2673 pand xmm2,XMMWORD[288+r10] 2674 por xmm5,xmm1 2675 pand xmm3,XMMWORD[304+r10] 2676 por xmm4,xmm2 2677 por xmm5,xmm3 2678 movdqa xmm0,XMMWORD[((-64))+rdi] 2679 movdqa xmm1,XMMWORD[((-48))+rdi] 2680 movdqa xmm2,XMMWORD[((-32))+rdi] 2681 pand xmm0,XMMWORD[320+r10] 2682 movdqa xmm3,XMMWORD[((-16))+rdi] 2683 pand xmm1,XMMWORD[336+r10] 2684 por xmm4,xmm0 2685 pand xmm2,XMMWORD[352+r10] 2686 por xmm5,xmm1 2687 pand xmm3,XMMWORD[368+r10] 2688 por xmm4,xmm2 2689 por xmm5,xmm3 2690 movdqa xmm0,XMMWORD[rdi] 2691 movdqa xmm1,XMMWORD[16+rdi] 2692 movdqa xmm2,XMMWORD[32+rdi] 2693 pand xmm0,XMMWORD[384+r10] 2694 movdqa xmm3,XMMWORD[48+rdi] 2695 pand xmm1,XMMWORD[400+r10] 2696 por xmm4,xmm0 2697 pand xmm2,XMMWORD[416+r10] 2698 por xmm5,xmm1 2699 pand xmm3,XMMWORD[432+r10] 2700 por xmm4,xmm2 2701 por xmm5,xmm3 2702 movdqa xmm0,XMMWORD[64+rdi] 2703 movdqa xmm1,XMMWORD[80+rdi] 2704 movdqa xmm2,XMMWORD[96+rdi] 2705 pand xmm0,XMMWORD[448+r10] 2706 movdqa xmm3,XMMWORD[112+rdi] 2707 pand xmm1,XMMWORD[464+r10] 2708 por xmm4,xmm0 2709 pand xmm2,XMMWORD[480+r10] 2710 por xmm5,xmm1 2711 pand xmm3,XMMWORD[496+r10] 2712 por xmm4,xmm2 2713 por xmm5,xmm3 2714 por xmm4,xmm5 2715 pshufd xmm0,xmm4,0x4e 2716 por xmm0,xmm4 2717 lea rdi,[256+rdi] 2718DB 102,72,15,126,194 2719 2720 mov QWORD[rbx],rbp 2721 lea rbx,[32+rax*1+rbx] 2722 mulx r11,r8,QWORD[rsi] 2723 xor rbp,rbp 2724 mov r9,rdx 2725 mulx r12,r14,QWORD[8+rsi] 2726 adox r8,QWORD[((-32))+rbx] 2727 adcx r11,r14 2728 mulx r13,r15,QWORD[16+rsi] 2729 adox r11,QWORD[((-24))+rbx] 2730 adcx r12,r15 2731 mulx r14,rdx,QWORD[24+rsi] 2732 adox r12,QWORD[((-16))+rbx] 2733 adcx r13,rdx 2734 lea rcx,[rax*1+rcx] 2735 lea rsi,[32+rsi] 2736 adox r13,QWORD[((-8))+rbx] 2737 adcx r14,rbp 2738 adox r14,rbp 2739 2740 mov r15,r8 2741 imul r8,QWORD[((32+8))+rsp] 2742 2743 mov rdx,r8 2744 xor rbp,rbp 2745 mov QWORD[((8+8))+rsp],rdi 2746 2747 mulx r10,rax,QWORD[rcx] 2748 adcx r15,rax 2749 adox r10,r11 2750 mulx r11,rax,QWORD[8+rcx] 2751 adcx r10,rax 2752 adox r11,r12 2753 mulx r12,rax,QWORD[16+rcx] 2754 adcx r11,rax 2755 adox r12,r13 2756 mulx r15,rax,QWORD[24+rcx] 2757 mov rdx,r9 2758 mov rdi,QWORD[((24+8))+rsp] 2759 mov QWORD[((-32))+rbx],r10 2760 adcx r12,rax 2761 mov QWORD[((-24))+rbx],r11 2762 adox r15,rbp 2763 mov QWORD[((-16))+rbx],r12 2764 lea rcx,[32+rcx] 2765 jmp NEAR $L$mulx4x_inner 2766 2767ALIGN 32 2768$L$mulx4x_inner: 2769 mulx rax,r10,QWORD[rsi] 2770 adcx r15,rbp 2771 adox r10,r14 2772 mulx r14,r11,QWORD[8+rsi] 2773 adcx r10,QWORD[rbx] 2774 adox r11,rax 2775 mulx rax,r12,QWORD[16+rsi] 2776 adcx r11,QWORD[8+rbx] 2777 adox r12,r14 2778 mulx r14,r13,QWORD[24+rsi] 2779 mov rdx,r8 2780 adcx r12,QWORD[16+rbx] 2781 adox r13,rax 2782 adcx r13,QWORD[24+rbx] 2783 adox r14,rbp 2784 lea rsi,[32+rsi] 2785 lea rbx,[32+rbx] 2786 adcx r14,rbp 2787 2788 adox r10,r15 2789 mulx r15,rax,QWORD[rcx] 2790 adcx r10,rax 2791 adox r11,r15 2792 mulx r15,rax,QWORD[8+rcx] 2793 adcx r11,rax 2794 adox r12,r15 2795 mulx r15,rax,QWORD[16+rcx] 2796 mov QWORD[((-40))+rbx],r10 2797 adcx r12,rax 2798 adox r13,r15 2799 mov QWORD[((-32))+rbx],r11 2800 mulx r15,rax,QWORD[24+rcx] 2801 mov rdx,r9 2802 lea rcx,[32+rcx] 2803 mov QWORD[((-24))+rbx],r12 2804 adcx r13,rax 2805 adox r15,rbp 2806 mov QWORD[((-16))+rbx],r13 2807 2808 dec rdi 2809 jnz NEAR $L$mulx4x_inner 2810 2811 mov rax,QWORD[((0+8))+rsp] 2812 adc r15,rbp 2813 sub rdi,QWORD[rbx] 2814 mov rdi,QWORD[((8+8))+rsp] 2815 mov r10,QWORD[((16+8))+rsp] 2816 adc r14,r15 2817 lea rsi,[rax*1+rsi] 2818 adc rbp,rbp 2819 mov QWORD[((-8))+rbx],r14 2820 2821 cmp rdi,r10 2822 jb NEAR $L$mulx4x_outer 2823 2824 mov r10,QWORD[((-8))+rcx] 2825 mov r8,rbp 2826 mov r12,QWORD[rax*1+rcx] 2827 lea rbp,[rax*1+rcx] 2828 mov rcx,rax 2829 lea rdi,[rax*1+rbx] 2830 xor eax,eax 2831 xor r15,r15 2832 sub r10,r14 2833 adc r15,r15 2834 or r8,r15 2835 sar rcx,3+2 2836 sub rax,r8 2837 mov rdx,QWORD[((56+8))+rsp] 2838 dec r12 2839 mov r13,QWORD[8+rbp] 2840 xor r8,r8 2841 mov r14,QWORD[16+rbp] 2842 mov r15,QWORD[24+rbp] 2843 jmp NEAR $L$sqrx4x_sub_entry 2844 2845 2846 2847ALIGN 32 2848bn_powerx5: 2849 mov QWORD[8+rsp],rdi ;WIN64 prologue 2850 mov QWORD[16+rsp],rsi 2851 mov rax,rsp 2852$L$SEH_begin_bn_powerx5: 2853 mov rdi,rcx 2854 mov rsi,rdx 2855 mov rdx,r8 2856 mov rcx,r9 2857 mov r8,QWORD[40+rsp] 2858 mov r9,QWORD[48+rsp] 2859 2860 2861 2862 mov rax,rsp 2863 2864$L$powerx5_enter: 2865 push rbx 2866 2867 push rbp 2868 2869 push r12 2870 2871 push r13 2872 2873 push r14 2874 2875 push r15 2876 2877$L$powerx5_prologue: 2878 2879 shl r9d,3 2880 lea r10,[r9*2+r9] 2881 neg r9 2882 mov r8,QWORD[r8] 2883 2884 2885 2886 2887 2888 2889 2890 2891 lea r11,[((-320))+r9*2+rsp] 2892 mov rbp,rsp 2893 sub r11,rdi 2894 and r11,4095 2895 cmp r10,r11 2896 jb NEAR $L$pwrx_sp_alt 2897 sub rbp,r11 2898 lea rbp,[((-320))+r9*2+rbp] 2899 jmp NEAR $L$pwrx_sp_done 2900 2901ALIGN 32 2902$L$pwrx_sp_alt: 2903 lea r10,[((4096-320))+r9*2] 2904 lea rbp,[((-320))+r9*2+rbp] 2905 sub r11,r10 2906 mov r10,0 2907 cmovc r11,r10 2908 sub rbp,r11 2909$L$pwrx_sp_done: 2910 and rbp,-64 2911 mov r11,rsp 2912 sub r11,rbp 2913 and r11,-4096 2914 lea rsp,[rbp*1+r11] 2915 mov r10,QWORD[rsp] 2916 cmp rsp,rbp 2917 ja NEAR $L$pwrx_page_walk 2918 jmp NEAR $L$pwrx_page_walk_done 2919 2920$L$pwrx_page_walk: 2921 lea rsp,[((-4096))+rsp] 2922 mov r10,QWORD[rsp] 2923 cmp rsp,rbp 2924 ja NEAR $L$pwrx_page_walk 2925$L$pwrx_page_walk_done: 2926 2927 mov r10,r9 2928 neg r9 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 pxor xmm0,xmm0 2942DB 102,72,15,110,207 2943DB 102,72,15,110,209 2944DB 102,73,15,110,218 2945DB 102,72,15,110,226 2946 mov QWORD[32+rsp],r8 2947 mov QWORD[40+rsp],rax 2948 2949$L$powerx5_body: 2950 2951 call __bn_sqrx8x_internal 2952 call __bn_postx4x_internal 2953 call __bn_sqrx8x_internal 2954 call __bn_postx4x_internal 2955 call __bn_sqrx8x_internal 2956 call __bn_postx4x_internal 2957 call __bn_sqrx8x_internal 2958 call __bn_postx4x_internal 2959 call __bn_sqrx8x_internal 2960 call __bn_postx4x_internal 2961 2962 mov r9,r10 2963 mov rdi,rsi 2964DB 102,72,15,126,209 2965DB 102,72,15,126,226 2966 mov rax,QWORD[40+rsp] 2967 2968 call mulx4x_internal 2969 2970 mov rsi,QWORD[40+rsp] 2971 2972 mov rax,1 2973 2974 mov r15,QWORD[((-48))+rsi] 2975 2976 mov r14,QWORD[((-40))+rsi] 2977 2978 mov r13,QWORD[((-32))+rsi] 2979 2980 mov r12,QWORD[((-24))+rsi] 2981 2982 mov rbp,QWORD[((-16))+rsi] 2983 2984 mov rbx,QWORD[((-8))+rsi] 2985 2986 lea rsp,[rsi] 2987 2988$L$powerx5_epilogue: 2989 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2990 mov rsi,QWORD[16+rsp] 2991 DB 0F3h,0C3h ;repret 2992 2993$L$SEH_end_bn_powerx5: 2994 2995global GFp_bn_sqrx8x_internal 2996 2997ALIGN 32 2998GFp_bn_sqrx8x_internal: 2999__bn_sqrx8x_internal: 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 lea rdi,[((48+8))+rsp] 3042 lea rbp,[r9*1+rsi] 3043 mov QWORD[((0+8))+rsp],r9 3044 mov QWORD[((8+8))+rsp],rbp 3045 jmp NEAR $L$sqr8x_zero_start 3046 3047ALIGN 32 3048DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 3049$L$sqrx8x_zero: 3050DB 0x3e 3051 movdqa XMMWORD[rdi],xmm0 3052 movdqa XMMWORD[16+rdi],xmm0 3053 movdqa XMMWORD[32+rdi],xmm0 3054 movdqa XMMWORD[48+rdi],xmm0 3055$L$sqr8x_zero_start: 3056 movdqa XMMWORD[64+rdi],xmm0 3057 movdqa XMMWORD[80+rdi],xmm0 3058 movdqa XMMWORD[96+rdi],xmm0 3059 movdqa XMMWORD[112+rdi],xmm0 3060 lea rdi,[128+rdi] 3061 sub r9,64 3062 jnz NEAR $L$sqrx8x_zero 3063 3064 mov rdx,QWORD[rsi] 3065 3066 xor r10,r10 3067 xor r11,r11 3068 xor r12,r12 3069 xor r13,r13 3070 xor r14,r14 3071 xor r15,r15 3072 lea rdi,[((48+8))+rsp] 3073 xor rbp,rbp 3074 jmp NEAR $L$sqrx8x_outer_loop 3075 3076ALIGN 32 3077$L$sqrx8x_outer_loop: 3078 mulx rax,r8,QWORD[8+rsi] 3079 adcx r8,r9 3080 adox r10,rax 3081 mulx rax,r9,QWORD[16+rsi] 3082 adcx r9,r10 3083 adox r11,rax 3084DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 3085 adcx r10,r11 3086 adox r12,rax 3087DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 3088 adcx r11,r12 3089 adox r13,rax 3090 mulx rax,r12,QWORD[40+rsi] 3091 adcx r12,r13 3092 adox r14,rax 3093 mulx rax,r13,QWORD[48+rsi] 3094 adcx r13,r14 3095 adox rax,r15 3096 mulx r15,r14,QWORD[56+rsi] 3097 mov rdx,QWORD[8+rsi] 3098 adcx r14,rax 3099 adox r15,rbp 3100 adc r15,QWORD[64+rdi] 3101 mov QWORD[8+rdi],r8 3102 mov QWORD[16+rdi],r9 3103 sbb rcx,rcx 3104 xor rbp,rbp 3105 3106 3107 mulx rbx,r8,QWORD[16+rsi] 3108 mulx rax,r9,QWORD[24+rsi] 3109 adcx r8,r10 3110 adox r9,rbx 3111 mulx rbx,r10,QWORD[32+rsi] 3112 adcx r9,r11 3113 adox r10,rax 3114DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3115 adcx r10,r12 3116 adox r11,rbx 3117DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3118 adcx r11,r13 3119 adox r12,r14 3120DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3121 mov rdx,QWORD[16+rsi] 3122 adcx r12,rax 3123 adox r13,rbx 3124 adcx r13,r15 3125 adox r14,rbp 3126 adcx r14,rbp 3127 3128 mov QWORD[24+rdi],r8 3129 mov QWORD[32+rdi],r9 3130 3131 mulx rbx,r8,QWORD[24+rsi] 3132 mulx rax,r9,QWORD[32+rsi] 3133 adcx r8,r10 3134 adox r9,rbx 3135 mulx rbx,r10,QWORD[40+rsi] 3136 adcx r9,r11 3137 adox r10,rax 3138DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3139 adcx r10,r12 3140 adox r11,r13 3141DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3142DB 0x3e 3143 mov rdx,QWORD[24+rsi] 3144 adcx r11,rbx 3145 adox r12,rax 3146 adcx r12,r14 3147 mov QWORD[40+rdi],r8 3148 mov QWORD[48+rdi],r9 3149 mulx rax,r8,QWORD[32+rsi] 3150 adox r13,rbp 3151 adcx r13,rbp 3152 3153 mulx rbx,r9,QWORD[40+rsi] 3154 adcx r8,r10 3155 adox r9,rax 3156 mulx rax,r10,QWORD[48+rsi] 3157 adcx r9,r11 3158 adox r10,r12 3159 mulx r12,r11,QWORD[56+rsi] 3160 mov rdx,QWORD[32+rsi] 3161 mov r14,QWORD[40+rsi] 3162 adcx r10,rbx 3163 adox r11,rax 3164 mov r15,QWORD[48+rsi] 3165 adcx r11,r13 3166 adox r12,rbp 3167 adcx r12,rbp 3168 3169 mov QWORD[56+rdi],r8 3170 mov QWORD[64+rdi],r9 3171 3172 mulx rax,r9,r14 3173 mov r8,QWORD[56+rsi] 3174 adcx r9,r10 3175 mulx rbx,r10,r15 3176 adox r10,rax 3177 adcx r10,r11 3178 mulx rax,r11,r8 3179 mov rdx,r14 3180 adox r11,rbx 3181 adcx r11,r12 3182 3183 adcx rax,rbp 3184 3185 mulx rbx,r14,r15 3186 mulx r13,r12,r8 3187 mov rdx,r15 3188 lea rsi,[64+rsi] 3189 adcx r11,r14 3190 adox r12,rbx 3191 adcx r12,rax 3192 adox r13,rbp 3193 3194DB 0x67,0x67 3195 mulx r14,r8,r8 3196 adcx r13,r8 3197 adcx r14,rbp 3198 3199 cmp rsi,QWORD[((8+8))+rsp] 3200 je NEAR $L$sqrx8x_outer_break 3201 3202 neg rcx 3203 mov rcx,-8 3204 mov r15,rbp 3205 mov r8,QWORD[64+rdi] 3206 adcx r9,QWORD[72+rdi] 3207 adcx r10,QWORD[80+rdi] 3208 adcx r11,QWORD[88+rdi] 3209 adc r12,QWORD[96+rdi] 3210 adc r13,QWORD[104+rdi] 3211 adc r14,QWORD[112+rdi] 3212 adc r15,QWORD[120+rdi] 3213 lea rbp,[rsi] 3214 lea rdi,[128+rdi] 3215 sbb rax,rax 3216 3217 mov rdx,QWORD[((-64))+rsi] 3218 mov QWORD[((16+8))+rsp],rax 3219 mov QWORD[((24+8))+rsp],rdi 3220 3221 3222 xor eax,eax 3223 jmp NEAR $L$sqrx8x_loop 3224 3225ALIGN 32 3226$L$sqrx8x_loop: 3227 mov rbx,r8 3228 mulx r8,rax,QWORD[rbp] 3229 adcx rbx,rax 3230 adox r8,r9 3231 3232 mulx r9,rax,QWORD[8+rbp] 3233 adcx r8,rax 3234 adox r9,r10 3235 3236 mulx r10,rax,QWORD[16+rbp] 3237 adcx r9,rax 3238 adox r10,r11 3239 3240 mulx r11,rax,QWORD[24+rbp] 3241 adcx r10,rax 3242 adox r11,r12 3243 3244DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3245 adcx r11,rax 3246 adox r12,r13 3247 3248 mulx r13,rax,QWORD[40+rbp] 3249 adcx r12,rax 3250 adox r13,r14 3251 3252 mulx r14,rax,QWORD[48+rbp] 3253 mov QWORD[rcx*8+rdi],rbx 3254 mov ebx,0 3255 adcx r13,rax 3256 adox r14,r15 3257 3258DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3259 mov rdx,QWORD[8+rcx*8+rsi] 3260 adcx r14,rax 3261 adox r15,rbx 3262 adcx r15,rbx 3263 3264DB 0x67 3265 inc rcx 3266 jnz NEAR $L$sqrx8x_loop 3267 3268 lea rbp,[64+rbp] 3269 mov rcx,-8 3270 cmp rbp,QWORD[((8+8))+rsp] 3271 je NEAR $L$sqrx8x_break 3272 3273 sub rbx,QWORD[((16+8))+rsp] 3274DB 0x66 3275 mov rdx,QWORD[((-64))+rsi] 3276 adcx r8,QWORD[rdi] 3277 adcx r9,QWORD[8+rdi] 3278 adc r10,QWORD[16+rdi] 3279 adc r11,QWORD[24+rdi] 3280 adc r12,QWORD[32+rdi] 3281 adc r13,QWORD[40+rdi] 3282 adc r14,QWORD[48+rdi] 3283 adc r15,QWORD[56+rdi] 3284 lea rdi,[64+rdi] 3285DB 0x67 3286 sbb rax,rax 3287 xor ebx,ebx 3288 mov QWORD[((16+8))+rsp],rax 3289 jmp NEAR $L$sqrx8x_loop 3290 3291ALIGN 32 3292$L$sqrx8x_break: 3293 xor rbp,rbp 3294 sub rbx,QWORD[((16+8))+rsp] 3295 adcx r8,rbp 3296 mov rcx,QWORD[((24+8))+rsp] 3297 adcx r9,rbp 3298 mov rdx,QWORD[rsi] 3299 adc r10,0 3300 mov QWORD[rdi],r8 3301 adc r11,0 3302 adc r12,0 3303 adc r13,0 3304 adc r14,0 3305 adc r15,0 3306 cmp rdi,rcx 3307 je NEAR $L$sqrx8x_outer_loop 3308 3309 mov QWORD[8+rdi],r9 3310 mov r9,QWORD[8+rcx] 3311 mov QWORD[16+rdi],r10 3312 mov r10,QWORD[16+rcx] 3313 mov QWORD[24+rdi],r11 3314 mov r11,QWORD[24+rcx] 3315 mov QWORD[32+rdi],r12 3316 mov r12,QWORD[32+rcx] 3317 mov QWORD[40+rdi],r13 3318 mov r13,QWORD[40+rcx] 3319 mov QWORD[48+rdi],r14 3320 mov r14,QWORD[48+rcx] 3321 mov QWORD[56+rdi],r15 3322 mov r15,QWORD[56+rcx] 3323 mov rdi,rcx 3324 jmp NEAR $L$sqrx8x_outer_loop 3325 3326ALIGN 32 3327$L$sqrx8x_outer_break: 3328 mov QWORD[72+rdi],r9 3329DB 102,72,15,126,217 3330 mov QWORD[80+rdi],r10 3331 mov QWORD[88+rdi],r11 3332 mov QWORD[96+rdi],r12 3333 mov QWORD[104+rdi],r13 3334 mov QWORD[112+rdi],r14 3335 lea rdi,[((48+8))+rsp] 3336 mov rdx,QWORD[rcx*1+rsi] 3337 3338 mov r11,QWORD[8+rdi] 3339 xor r10,r10 3340 mov r9,QWORD[((0+8))+rsp] 3341 adox r11,r11 3342 mov r12,QWORD[16+rdi] 3343 mov r13,QWORD[24+rdi] 3344 3345 3346ALIGN 32 3347$L$sqrx4x_shift_n_add: 3348 mulx rbx,rax,rdx 3349 adox r12,r12 3350 adcx rax,r10 3351DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3352DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3353 adox r13,r13 3354 adcx rbx,r11 3355 mov r11,QWORD[40+rdi] 3356 mov QWORD[rdi],rax 3357 mov QWORD[8+rdi],rbx 3358 3359 mulx rbx,rax,rdx 3360 adox r10,r10 3361 adcx rax,r12 3362 mov rdx,QWORD[16+rcx*1+rsi] 3363 mov r12,QWORD[48+rdi] 3364 adox r11,r11 3365 adcx rbx,r13 3366 mov r13,QWORD[56+rdi] 3367 mov QWORD[16+rdi],rax 3368 mov QWORD[24+rdi],rbx 3369 3370 mulx rbx,rax,rdx 3371 adox r12,r12 3372 adcx rax,r10 3373 mov rdx,QWORD[24+rcx*1+rsi] 3374 lea rcx,[32+rcx] 3375 mov r10,QWORD[64+rdi] 3376 adox r13,r13 3377 adcx rbx,r11 3378 mov r11,QWORD[72+rdi] 3379 mov QWORD[32+rdi],rax 3380 mov QWORD[40+rdi],rbx 3381 3382 mulx rbx,rax,rdx 3383 adox r10,r10 3384 adcx rax,r12 3385 jrcxz $L$sqrx4x_shift_n_add_break 3386DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3387 adox r11,r11 3388 adcx rbx,r13 3389 mov r12,QWORD[80+rdi] 3390 mov r13,QWORD[88+rdi] 3391 mov QWORD[48+rdi],rax 3392 mov QWORD[56+rdi],rbx 3393 lea rdi,[64+rdi] 3394 nop 3395 jmp NEAR $L$sqrx4x_shift_n_add 3396 3397ALIGN 32 3398$L$sqrx4x_shift_n_add_break: 3399 adcx rbx,r13 3400 mov QWORD[48+rdi],rax 3401 mov QWORD[56+rdi],rbx 3402 lea rdi,[64+rdi] 3403DB 102,72,15,126,213 3404__bn_sqrx8x_reduction: 3405 xor eax,eax 3406 mov rbx,QWORD[((32+8))+rsp] 3407 mov rdx,QWORD[((48+8))+rsp] 3408 lea rcx,[((-64))+r9*1+rbp] 3409 3410 mov QWORD[((0+8))+rsp],rcx 3411 mov QWORD[((8+8))+rsp],rdi 3412 3413 lea rdi,[((48+8))+rsp] 3414 jmp NEAR $L$sqrx8x_reduction_loop 3415 3416ALIGN 32 3417$L$sqrx8x_reduction_loop: 3418 mov r9,QWORD[8+rdi] 3419 mov r10,QWORD[16+rdi] 3420 mov r11,QWORD[24+rdi] 3421 mov r12,QWORD[32+rdi] 3422 mov r8,rdx 3423 imul rdx,rbx 3424 mov r13,QWORD[40+rdi] 3425 mov r14,QWORD[48+rdi] 3426 mov r15,QWORD[56+rdi] 3427 mov QWORD[((24+8))+rsp],rax 3428 3429 lea rdi,[64+rdi] 3430 xor rsi,rsi 3431 mov rcx,-8 3432 jmp NEAR $L$sqrx8x_reduce 3433 3434ALIGN 32 3435$L$sqrx8x_reduce: 3436 mov rbx,r8 3437 mulx r8,rax,QWORD[rbp] 3438 adcx rax,rbx 3439 adox r8,r9 3440 3441 mulx r9,rbx,QWORD[8+rbp] 3442 adcx r8,rbx 3443 adox r9,r10 3444 3445 mulx r10,rbx,QWORD[16+rbp] 3446 adcx r9,rbx 3447 adox r10,r11 3448 3449 mulx r11,rbx,QWORD[24+rbp] 3450 adcx r10,rbx 3451 adox r11,r12 3452 3453DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3454 mov rax,rdx 3455 mov rdx,r8 3456 adcx r11,rbx 3457 adox r12,r13 3458 3459 mulx rdx,rbx,QWORD[((32+8))+rsp] 3460 mov rdx,rax 3461 mov QWORD[((64+48+8))+rcx*8+rsp],rax 3462 3463 mulx r13,rax,QWORD[40+rbp] 3464 adcx r12,rax 3465 adox r13,r14 3466 3467 mulx r14,rax,QWORD[48+rbp] 3468 adcx r13,rax 3469 adox r14,r15 3470 3471 mulx r15,rax,QWORD[56+rbp] 3472 mov rdx,rbx 3473 adcx r14,rax 3474 adox r15,rsi 3475 adcx r15,rsi 3476 3477DB 0x67,0x67,0x67 3478 inc rcx 3479 jnz NEAR $L$sqrx8x_reduce 3480 3481 mov rax,rsi 3482 cmp rbp,QWORD[((0+8))+rsp] 3483 jae NEAR $L$sqrx8x_no_tail 3484 3485 mov rdx,QWORD[((48+8))+rsp] 3486 add r8,QWORD[rdi] 3487 lea rbp,[64+rbp] 3488 mov rcx,-8 3489 adcx r9,QWORD[8+rdi] 3490 adcx r10,QWORD[16+rdi] 3491 adc r11,QWORD[24+rdi] 3492 adc r12,QWORD[32+rdi] 3493 adc r13,QWORD[40+rdi] 3494 adc r14,QWORD[48+rdi] 3495 adc r15,QWORD[56+rdi] 3496 lea rdi,[64+rdi] 3497 sbb rax,rax 3498 3499 xor rsi,rsi 3500 mov QWORD[((16+8))+rsp],rax 3501 jmp NEAR $L$sqrx8x_tail 3502 3503ALIGN 32 3504$L$sqrx8x_tail: 3505 mov rbx,r8 3506 mulx r8,rax,QWORD[rbp] 3507 adcx rbx,rax 3508 adox r8,r9 3509 3510 mulx r9,rax,QWORD[8+rbp] 3511 adcx r8,rax 3512 adox r9,r10 3513 3514 mulx r10,rax,QWORD[16+rbp] 3515 adcx r9,rax 3516 adox r10,r11 3517 3518 mulx r11,rax,QWORD[24+rbp] 3519 adcx r10,rax 3520 adox r11,r12 3521 3522DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3523 adcx r11,rax 3524 adox r12,r13 3525 3526 mulx r13,rax,QWORD[40+rbp] 3527 adcx r12,rax 3528 adox r13,r14 3529 3530 mulx r14,rax,QWORD[48+rbp] 3531 adcx r13,rax 3532 adox r14,r15 3533 3534 mulx r15,rax,QWORD[56+rbp] 3535 mov rdx,QWORD[((72+48+8))+rcx*8+rsp] 3536 adcx r14,rax 3537 adox r15,rsi 3538 mov QWORD[rcx*8+rdi],rbx 3539 mov rbx,r8 3540 adcx r15,rsi 3541 3542 inc rcx 3543 jnz NEAR $L$sqrx8x_tail 3544 3545 cmp rbp,QWORD[((0+8))+rsp] 3546 jae NEAR $L$sqrx8x_tail_done 3547 3548 sub rsi,QWORD[((16+8))+rsp] 3549 mov rdx,QWORD[((48+8))+rsp] 3550 lea rbp,[64+rbp] 3551 adc r8,QWORD[rdi] 3552 adc r9,QWORD[8+rdi] 3553 adc r10,QWORD[16+rdi] 3554 adc r11,QWORD[24+rdi] 3555 adc r12,QWORD[32+rdi] 3556 adc r13,QWORD[40+rdi] 3557 adc r14,QWORD[48+rdi] 3558 adc r15,QWORD[56+rdi] 3559 lea rdi,[64+rdi] 3560 sbb rax,rax 3561 sub rcx,8 3562 3563 xor rsi,rsi 3564 mov QWORD[((16+8))+rsp],rax 3565 jmp NEAR $L$sqrx8x_tail 3566 3567ALIGN 32 3568$L$sqrx8x_tail_done: 3569 xor rax,rax 3570 add r8,QWORD[((24+8))+rsp] 3571 adc r9,0 3572 adc r10,0 3573 adc r11,0 3574 adc r12,0 3575 adc r13,0 3576 adc r14,0 3577 adc r15,0 3578 adc rax,0 3579 3580 sub rsi,QWORD[((16+8))+rsp] 3581$L$sqrx8x_no_tail: 3582 adc r8,QWORD[rdi] 3583DB 102,72,15,126,217 3584 adc r9,QWORD[8+rdi] 3585 mov rsi,QWORD[56+rbp] 3586DB 102,72,15,126,213 3587 adc r10,QWORD[16+rdi] 3588 adc r11,QWORD[24+rdi] 3589 adc r12,QWORD[32+rdi] 3590 adc r13,QWORD[40+rdi] 3591 adc r14,QWORD[48+rdi] 3592 adc r15,QWORD[56+rdi] 3593 adc rax,0 3594 3595 mov rbx,QWORD[((32+8))+rsp] 3596 mov rdx,QWORD[64+rcx*1+rdi] 3597 3598 mov QWORD[rdi],r8 3599 lea r8,[64+rdi] 3600 mov QWORD[8+rdi],r9 3601 mov QWORD[16+rdi],r10 3602 mov QWORD[24+rdi],r11 3603 mov QWORD[32+rdi],r12 3604 mov QWORD[40+rdi],r13 3605 mov QWORD[48+rdi],r14 3606 mov QWORD[56+rdi],r15 3607 3608 lea rdi,[64+rcx*1+rdi] 3609 cmp r8,QWORD[((8+8))+rsp] 3610 jb NEAR $L$sqrx8x_reduction_loop 3611 DB 0F3h,0C3h ;repret 3612 3613 3614ALIGN 32 3615 3616__bn_postx4x_internal: 3617 3618 mov r12,QWORD[rbp] 3619 mov r10,rcx 3620 mov r9,rcx 3621 neg rax 3622 sar rcx,3+2 3623 3624DB 102,72,15,126,202 3625DB 102,72,15,126,206 3626 dec r12 3627 mov r13,QWORD[8+rbp] 3628 xor r8,r8 3629 mov r14,QWORD[16+rbp] 3630 mov r15,QWORD[24+rbp] 3631 jmp NEAR $L$sqrx4x_sub_entry 3632 3633ALIGN 16 3634$L$sqrx4x_sub: 3635 mov r12,QWORD[rbp] 3636 mov r13,QWORD[8+rbp] 3637 mov r14,QWORD[16+rbp] 3638 mov r15,QWORD[24+rbp] 3639$L$sqrx4x_sub_entry: 3640 andn r12,r12,rax 3641 lea rbp,[32+rbp] 3642 andn r13,r13,rax 3643 andn r14,r14,rax 3644 andn r15,r15,rax 3645 3646 neg r8 3647 adc r12,QWORD[rdi] 3648 adc r13,QWORD[8+rdi] 3649 adc r14,QWORD[16+rdi] 3650 adc r15,QWORD[24+rdi] 3651 mov QWORD[rdx],r12 3652 lea rdi,[32+rdi] 3653 mov QWORD[8+rdx],r13 3654 sbb r8,r8 3655 mov QWORD[16+rdx],r14 3656 mov QWORD[24+rdx],r15 3657 lea rdx,[32+rdx] 3658 3659 inc rcx 3660 jnz NEAR $L$sqrx4x_sub 3661 3662 neg r9 3663 3664 DB 0F3h,0C3h ;repret 3665 3666 3667global GFp_bn_scatter5 3668 3669ALIGN 16 3670GFp_bn_scatter5: 3671 3672 cmp edx,0 3673 jz NEAR $L$scatter_epilogue 3674 lea r8,[r9*8+r8] 3675$L$scatter: 3676 mov rax,QWORD[rcx] 3677 lea rcx,[8+rcx] 3678 mov QWORD[r8],rax 3679 lea r8,[256+r8] 3680 sub edx,1 3681 jnz NEAR $L$scatter 3682$L$scatter_epilogue: 3683 DB 0F3h,0C3h ;repret 3684 3685 3686 3687global GFp_bn_gather5 3688 3689ALIGN 32 3690GFp_bn_gather5: 3691 3692$L$SEH_begin_GFp_bn_gather5: 3693 3694DB 0x4c,0x8d,0x14,0x24 3695 3696DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3697 lea rax,[$L$inc] 3698 and rsp,-16 3699 3700 movd xmm5,r9d 3701 movdqa xmm0,XMMWORD[rax] 3702 movdqa xmm1,XMMWORD[16+rax] 3703 lea r11,[128+r8] 3704 lea rax,[128+rsp] 3705 3706 pshufd xmm5,xmm5,0 3707 movdqa xmm4,xmm1 3708 movdqa xmm2,xmm1 3709 paddd xmm1,xmm0 3710 pcmpeqd xmm0,xmm5 3711 movdqa xmm3,xmm4 3712 3713 paddd xmm2,xmm1 3714 pcmpeqd xmm1,xmm5 3715 movdqa XMMWORD[(-128)+rax],xmm0 3716 movdqa xmm0,xmm4 3717 3718 paddd xmm3,xmm2 3719 pcmpeqd xmm2,xmm5 3720 movdqa XMMWORD[(-112)+rax],xmm1 3721 movdqa xmm1,xmm4 3722 3723 paddd xmm0,xmm3 3724 pcmpeqd xmm3,xmm5 3725 movdqa XMMWORD[(-96)+rax],xmm2 3726 movdqa xmm2,xmm4 3727 paddd xmm1,xmm0 3728 pcmpeqd xmm0,xmm5 3729 movdqa XMMWORD[(-80)+rax],xmm3 3730 movdqa xmm3,xmm4 3731 3732 paddd xmm2,xmm1 3733 pcmpeqd xmm1,xmm5 3734 movdqa XMMWORD[(-64)+rax],xmm0 3735 movdqa xmm0,xmm4 3736 3737 paddd xmm3,xmm2 3738 pcmpeqd xmm2,xmm5 3739 movdqa XMMWORD[(-48)+rax],xmm1 3740 movdqa xmm1,xmm4 3741 3742 paddd xmm0,xmm3 3743 pcmpeqd xmm3,xmm5 3744 movdqa XMMWORD[(-32)+rax],xmm2 3745 movdqa xmm2,xmm4 3746 paddd xmm1,xmm0 3747 pcmpeqd xmm0,xmm5 3748 movdqa XMMWORD[(-16)+rax],xmm3 3749 movdqa xmm3,xmm4 3750 3751 paddd xmm2,xmm1 3752 pcmpeqd xmm1,xmm5 3753 movdqa XMMWORD[rax],xmm0 3754 movdqa xmm0,xmm4 3755 3756 paddd xmm3,xmm2 3757 pcmpeqd xmm2,xmm5 3758 movdqa XMMWORD[16+rax],xmm1 3759 movdqa xmm1,xmm4 3760 3761 paddd xmm0,xmm3 3762 pcmpeqd xmm3,xmm5 3763 movdqa XMMWORD[32+rax],xmm2 3764 movdqa xmm2,xmm4 3765 paddd xmm1,xmm0 3766 pcmpeqd xmm0,xmm5 3767 movdqa XMMWORD[48+rax],xmm3 3768 movdqa xmm3,xmm4 3769 3770 paddd xmm2,xmm1 3771 pcmpeqd xmm1,xmm5 3772 movdqa XMMWORD[64+rax],xmm0 3773 movdqa xmm0,xmm4 3774 3775 paddd xmm3,xmm2 3776 pcmpeqd xmm2,xmm5 3777 movdqa XMMWORD[80+rax],xmm1 3778 movdqa xmm1,xmm4 3779 3780 paddd xmm0,xmm3 3781 pcmpeqd xmm3,xmm5 3782 movdqa XMMWORD[96+rax],xmm2 3783 movdqa xmm2,xmm4 3784 movdqa XMMWORD[112+rax],xmm3 3785 jmp NEAR $L$gather 3786 3787ALIGN 32 3788$L$gather: 3789 pxor xmm4,xmm4 3790 pxor xmm5,xmm5 3791 movdqa xmm0,XMMWORD[((-128))+r11] 3792 movdqa xmm1,XMMWORD[((-112))+r11] 3793 movdqa xmm2,XMMWORD[((-96))+r11] 3794 pand xmm0,XMMWORD[((-128))+rax] 3795 movdqa xmm3,XMMWORD[((-80))+r11] 3796 pand xmm1,XMMWORD[((-112))+rax] 3797 por xmm4,xmm0 3798 pand xmm2,XMMWORD[((-96))+rax] 3799 por xmm5,xmm1 3800 pand xmm3,XMMWORD[((-80))+rax] 3801 por xmm4,xmm2 3802 por xmm5,xmm3 3803 movdqa xmm0,XMMWORD[((-64))+r11] 3804 movdqa xmm1,XMMWORD[((-48))+r11] 3805 movdqa xmm2,XMMWORD[((-32))+r11] 3806 pand xmm0,XMMWORD[((-64))+rax] 3807 movdqa xmm3,XMMWORD[((-16))+r11] 3808 pand xmm1,XMMWORD[((-48))+rax] 3809 por xmm4,xmm0 3810 pand xmm2,XMMWORD[((-32))+rax] 3811 por xmm5,xmm1 3812 pand xmm3,XMMWORD[((-16))+rax] 3813 por xmm4,xmm2 3814 por xmm5,xmm3 3815 movdqa xmm0,XMMWORD[r11] 3816 movdqa xmm1,XMMWORD[16+r11] 3817 movdqa xmm2,XMMWORD[32+r11] 3818 pand xmm0,XMMWORD[rax] 3819 movdqa xmm3,XMMWORD[48+r11] 3820 pand xmm1,XMMWORD[16+rax] 3821 por xmm4,xmm0 3822 pand xmm2,XMMWORD[32+rax] 3823 por xmm5,xmm1 3824 pand xmm3,XMMWORD[48+rax] 3825 por xmm4,xmm2 3826 por xmm5,xmm3 3827 movdqa xmm0,XMMWORD[64+r11] 3828 movdqa xmm1,XMMWORD[80+r11] 3829 movdqa xmm2,XMMWORD[96+r11] 3830 pand xmm0,XMMWORD[64+rax] 3831 movdqa xmm3,XMMWORD[112+r11] 3832 pand xmm1,XMMWORD[80+rax] 3833 por xmm4,xmm0 3834 pand xmm2,XMMWORD[96+rax] 3835 por xmm5,xmm1 3836 pand xmm3,XMMWORD[112+rax] 3837 por xmm4,xmm2 3838 por xmm5,xmm3 3839 por xmm4,xmm5 3840 lea r11,[256+r11] 3841 pshufd xmm0,xmm4,0x4e 3842 por xmm0,xmm4 3843 movq QWORD[rcx],xmm0 3844 lea rcx,[8+rcx] 3845 sub edx,1 3846 jnz NEAR $L$gather 3847 3848 lea rsp,[r10] 3849 3850 DB 0F3h,0C3h ;repret 3851$L$SEH_end_GFp_bn_gather5: 3852 3853 3854ALIGN 64 3855$L$inc: 3856 DD 0,0,1,1 3857 DD 2,2,2,2 3858DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 3859DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 3860DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 3861DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 3862DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 3863DB 112,101,110,115,115,108,46,111,114,103,62,0 3864EXTERN __imp_RtlVirtualUnwind 3865 3866ALIGN 16 3867mul_handler: 3868 push rsi 3869 push rdi 3870 push rbx 3871 push rbp 3872 push r12 3873 push r13 3874 push r14 3875 push r15 3876 pushfq 3877 sub rsp,64 3878 3879 mov rax,QWORD[120+r8] 3880 mov rbx,QWORD[248+r8] 3881 3882 mov rsi,QWORD[8+r9] 3883 mov r11,QWORD[56+r9] 3884 3885 mov r10d,DWORD[r11] 3886 lea r10,[r10*1+rsi] 3887 cmp rbx,r10 3888 jb NEAR $L$common_seh_tail 3889 3890 mov r10d,DWORD[4+r11] 3891 lea r10,[r10*1+rsi] 3892 cmp rbx,r10 3893 jb NEAR $L$common_pop_regs 3894 3895 mov rax,QWORD[152+r8] 3896 3897 mov r10d,DWORD[8+r11] 3898 lea r10,[r10*1+rsi] 3899 cmp rbx,r10 3900 jae NEAR $L$common_seh_tail 3901 3902 lea r10,[$L$mul_epilogue] 3903 cmp rbx,r10 3904 ja NEAR $L$body_40 3905 3906 mov r10,QWORD[192+r8] 3907 mov rax,QWORD[8+r10*8+rax] 3908 3909 jmp NEAR $L$common_pop_regs 3910 3911$L$body_40: 3912 mov rax,QWORD[40+rax] 3913$L$common_pop_regs: 3914 mov rbx,QWORD[((-8))+rax] 3915 mov rbp,QWORD[((-16))+rax] 3916 mov r12,QWORD[((-24))+rax] 3917 mov r13,QWORD[((-32))+rax] 3918 mov r14,QWORD[((-40))+rax] 3919 mov r15,QWORD[((-48))+rax] 3920 mov QWORD[144+r8],rbx 3921 mov QWORD[160+r8],rbp 3922 mov QWORD[216+r8],r12 3923 mov QWORD[224+r8],r13 3924 mov QWORD[232+r8],r14 3925 mov QWORD[240+r8],r15 3926 3927$L$common_seh_tail: 3928 mov rdi,QWORD[8+rax] 3929 mov rsi,QWORD[16+rax] 3930 mov QWORD[152+r8],rax 3931 mov QWORD[168+r8],rsi 3932 mov QWORD[176+r8],rdi 3933 3934 mov rdi,QWORD[40+r9] 3935 mov rsi,r8 3936 mov ecx,154 3937 DD 0xa548f3fc 3938 3939 mov rsi,r9 3940 xor rcx,rcx 3941 mov rdx,QWORD[8+rsi] 3942 mov r8,QWORD[rsi] 3943 mov r9,QWORD[16+rsi] 3944 mov r10,QWORD[40+rsi] 3945 lea r11,[56+rsi] 3946 lea r12,[24+rsi] 3947 mov QWORD[32+rsp],r10 3948 mov QWORD[40+rsp],r11 3949 mov QWORD[48+rsp],r12 3950 mov QWORD[56+rsp],rcx 3951 call QWORD[__imp_RtlVirtualUnwind] 3952 3953 mov eax,1 3954 add rsp,64 3955 popfq 3956 pop r15 3957 pop r14 3958 pop r13 3959 pop r12 3960 pop rbp 3961 pop rbx 3962 pop rdi 3963 pop rsi 3964 DB 0F3h,0C3h ;repret 3965 3966 3967section .pdata rdata align=4 3968ALIGN 4 3969 DD $L$SEH_begin_GFp_bn_mul_mont_gather5 wrt ..imagebase 3970 DD $L$SEH_end_GFp_bn_mul_mont_gather5 wrt ..imagebase 3971 DD $L$SEH_info_GFp_bn_mul_mont_gather5 wrt ..imagebase 3972 3973 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase 3974 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase 3975 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase 3976 3977 DD $L$SEH_begin_GFp_bn_power5 wrt ..imagebase 3978 DD $L$SEH_end_GFp_bn_power5 wrt ..imagebase 3979 DD $L$SEH_info_GFp_bn_power5 wrt ..imagebase 3980 3981 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase 3982 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 3983 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 3984 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase 3985 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase 3986 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase 3987 3988 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase 3989 DD $L$SEH_end_bn_powerx5 wrt ..imagebase 3990 DD $L$SEH_info_GFp_bn_powerx5 wrt ..imagebase 3991 DD $L$SEH_begin_GFp_bn_gather5 wrt ..imagebase 3992 DD $L$SEH_end_GFp_bn_gather5 wrt ..imagebase 3993 DD $L$SEH_info_GFp_bn_gather5 wrt ..imagebase 3994 3995section .xdata rdata align=8 3996ALIGN 8 3997$L$SEH_info_GFp_bn_mul_mont_gather5: 3998DB 9,0,0,0 3999 DD mul_handler wrt ..imagebase 4000 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 4001ALIGN 8 4002$L$SEH_info_bn_mul4x_mont_gather5: 4003DB 9,0,0,0 4004 DD mul_handler wrt ..imagebase 4005 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 4006ALIGN 8 4007$L$SEH_info_GFp_bn_power5: 4008DB 9,0,0,0 4009 DD mul_handler wrt ..imagebase 4010 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 4011ALIGN 8 4012$L$SEH_info_bn_from_mont8x: 4013DB 9,0,0,0 4014 DD mul_handler wrt ..imagebase 4015 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 4016ALIGN 8 4017$L$SEH_info_bn_mulx4x_mont_gather5: 4018DB 9,0,0,0 4019 DD mul_handler wrt ..imagebase 4020 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 4021ALIGN 8 4022$L$SEH_info_GFp_bn_powerx5: 4023DB 9,0,0,0 4024 DD mul_handler wrt ..imagebase 4025 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase 4026ALIGN 8 4027$L$SEH_info_GFp_bn_gather5: 4028DB 0x01,0x0b,0x03,0x0a 4029DB 0x0b,0x01,0x21,0x00 4030DB 0x04,0xa3,0x00,0x00 4031ALIGN 8 4032