1bc3d5698SJohn Baldwin/* $FreeBSD$ */ 2bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */ 3bc3d5698SJohn Baldwin.text 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin 7bc3d5698SJohn Baldwin.globl rsaz_512_sqr 8bc3d5698SJohn Baldwin.type rsaz_512_sqr,@function 9bc3d5698SJohn Baldwin.align 32 10bc3d5698SJohn Baldwinrsaz_512_sqr: 11bc3d5698SJohn Baldwin.cfi_startproc 12bc3d5698SJohn Baldwin pushq %rbx 13bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 14bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 15bc3d5698SJohn Baldwin pushq %rbp 16bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 17bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 18bc3d5698SJohn Baldwin pushq %r12 19bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 20bc3d5698SJohn Baldwin.cfi_offset %r12,-32 21bc3d5698SJohn Baldwin pushq %r13 22bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 23bc3d5698SJohn Baldwin.cfi_offset %r13,-40 24bc3d5698SJohn Baldwin pushq %r14 25bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 26bc3d5698SJohn Baldwin.cfi_offset %r14,-48 27bc3d5698SJohn Baldwin pushq %r15 28bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 29bc3d5698SJohn Baldwin.cfi_offset %r15,-56 30bc3d5698SJohn Baldwin 31bc3d5698SJohn Baldwin subq $128+24,%rsp 32bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 33bc3d5698SJohn Baldwin.Lsqr_body: 34bc3d5698SJohn Baldwin.byte 102,72,15,110,202 35bc3d5698SJohn Baldwin movq (%rsi),%rdx 36bc3d5698SJohn Baldwin movq 8(%rsi),%rax 37bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 38bc3d5698SJohn Baldwin movl $0x80100,%r11d 39bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 40bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 41bc3d5698SJohn Baldwin je .Loop_sqrx 42bc3d5698SJohn Baldwin jmp .Loop_sqr 43bc3d5698SJohn Baldwin 44bc3d5698SJohn Baldwin.align 32 45bc3d5698SJohn Baldwin.Loop_sqr: 46bc3d5698SJohn Baldwin movl %r8d,128+8(%rsp) 47bc3d5698SJohn Baldwin 48bc3d5698SJohn Baldwin movq %rdx,%rbx 49bc3d5698SJohn Baldwin movq %rax,%rbp 50bc3d5698SJohn Baldwin mulq %rdx 51bc3d5698SJohn Baldwin movq %rax,%r8 52bc3d5698SJohn Baldwin movq 16(%rsi),%rax 53bc3d5698SJohn Baldwin movq %rdx,%r9 54bc3d5698SJohn Baldwin 55bc3d5698SJohn Baldwin mulq %rbx 56bc3d5698SJohn Baldwin addq %rax,%r9 57bc3d5698SJohn Baldwin movq 24(%rsi),%rax 58bc3d5698SJohn Baldwin movq %rdx,%r10 59bc3d5698SJohn Baldwin adcq $0,%r10 60bc3d5698SJohn Baldwin 61bc3d5698SJohn Baldwin mulq %rbx 62bc3d5698SJohn Baldwin addq %rax,%r10 63bc3d5698SJohn Baldwin movq 32(%rsi),%rax 64bc3d5698SJohn Baldwin movq %rdx,%r11 65bc3d5698SJohn Baldwin adcq $0,%r11 66bc3d5698SJohn Baldwin 67bc3d5698SJohn Baldwin mulq %rbx 68bc3d5698SJohn Baldwin addq %rax,%r11 69bc3d5698SJohn Baldwin movq 40(%rsi),%rax 70bc3d5698SJohn Baldwin movq %rdx,%r12 71bc3d5698SJohn Baldwin adcq $0,%r12 72bc3d5698SJohn Baldwin 73bc3d5698SJohn Baldwin mulq %rbx 74bc3d5698SJohn Baldwin addq %rax,%r12 75bc3d5698SJohn Baldwin movq 48(%rsi),%rax 76bc3d5698SJohn Baldwin movq %rdx,%r13 77bc3d5698SJohn Baldwin adcq $0,%r13 78bc3d5698SJohn Baldwin 79bc3d5698SJohn Baldwin mulq %rbx 80bc3d5698SJohn Baldwin addq %rax,%r13 81bc3d5698SJohn Baldwin movq 56(%rsi),%rax 82bc3d5698SJohn Baldwin movq %rdx,%r14 83bc3d5698SJohn Baldwin adcq $0,%r14 84bc3d5698SJohn Baldwin 85bc3d5698SJohn Baldwin mulq %rbx 86bc3d5698SJohn Baldwin addq %rax,%r14 87bc3d5698SJohn Baldwin movq %rbx,%rax 88bc3d5698SJohn Baldwin adcq $0,%rdx 89bc3d5698SJohn Baldwin 90bc3d5698SJohn Baldwin xorq %rcx,%rcx 91bc3d5698SJohn Baldwin addq %r8,%r8 92bc3d5698SJohn Baldwin movq %rdx,%r15 93bc3d5698SJohn Baldwin adcq $0,%rcx 94bc3d5698SJohn Baldwin 95bc3d5698SJohn Baldwin mulq %rax 96bc3d5698SJohn Baldwin addq %r8,%rdx 97bc3d5698SJohn Baldwin adcq $0,%rcx 98bc3d5698SJohn Baldwin 99bc3d5698SJohn Baldwin movq %rax,(%rsp) 100bc3d5698SJohn Baldwin movq %rdx,8(%rsp) 101bc3d5698SJohn Baldwin 102bc3d5698SJohn Baldwin 103bc3d5698SJohn Baldwin movq 16(%rsi),%rax 104bc3d5698SJohn Baldwin mulq %rbp 105bc3d5698SJohn Baldwin addq %rax,%r10 106bc3d5698SJohn Baldwin movq 24(%rsi),%rax 107bc3d5698SJohn Baldwin movq %rdx,%rbx 108bc3d5698SJohn Baldwin adcq $0,%rbx 109bc3d5698SJohn Baldwin 110bc3d5698SJohn Baldwin mulq %rbp 111bc3d5698SJohn Baldwin addq %rax,%r11 112bc3d5698SJohn Baldwin movq 32(%rsi),%rax 113bc3d5698SJohn Baldwin adcq $0,%rdx 114bc3d5698SJohn Baldwin addq %rbx,%r11 115bc3d5698SJohn Baldwin movq %rdx,%rbx 116bc3d5698SJohn Baldwin adcq $0,%rbx 117bc3d5698SJohn Baldwin 118bc3d5698SJohn Baldwin mulq %rbp 119bc3d5698SJohn Baldwin addq %rax,%r12 120bc3d5698SJohn Baldwin movq 40(%rsi),%rax 121bc3d5698SJohn Baldwin adcq $0,%rdx 122bc3d5698SJohn Baldwin addq %rbx,%r12 123bc3d5698SJohn Baldwin movq %rdx,%rbx 124bc3d5698SJohn Baldwin adcq $0,%rbx 125bc3d5698SJohn Baldwin 126bc3d5698SJohn Baldwin mulq %rbp 127bc3d5698SJohn Baldwin addq %rax,%r13 128bc3d5698SJohn Baldwin movq 48(%rsi),%rax 129bc3d5698SJohn Baldwin adcq $0,%rdx 130bc3d5698SJohn Baldwin addq %rbx,%r13 131bc3d5698SJohn Baldwin movq %rdx,%rbx 132bc3d5698SJohn Baldwin adcq $0,%rbx 133bc3d5698SJohn Baldwin 134bc3d5698SJohn Baldwin mulq %rbp 135bc3d5698SJohn Baldwin addq %rax,%r14 136bc3d5698SJohn Baldwin movq 56(%rsi),%rax 137bc3d5698SJohn Baldwin adcq $0,%rdx 138bc3d5698SJohn Baldwin addq %rbx,%r14 139bc3d5698SJohn Baldwin movq %rdx,%rbx 140bc3d5698SJohn Baldwin adcq $0,%rbx 141bc3d5698SJohn Baldwin 142bc3d5698SJohn Baldwin mulq %rbp 143bc3d5698SJohn Baldwin addq %rax,%r15 144bc3d5698SJohn Baldwin movq %rbp,%rax 145bc3d5698SJohn Baldwin adcq $0,%rdx 146bc3d5698SJohn Baldwin addq %rbx,%r15 147bc3d5698SJohn Baldwin adcq $0,%rdx 148bc3d5698SJohn Baldwin 149bc3d5698SJohn Baldwin xorq %rbx,%rbx 150bc3d5698SJohn Baldwin addq %r9,%r9 151bc3d5698SJohn Baldwin movq %rdx,%r8 152bc3d5698SJohn Baldwin adcq %r10,%r10 153bc3d5698SJohn Baldwin adcq $0,%rbx 154bc3d5698SJohn Baldwin 155bc3d5698SJohn Baldwin mulq %rax 156bc3d5698SJohn Baldwin 157bc3d5698SJohn Baldwin addq %rcx,%rax 158bc3d5698SJohn Baldwin movq 16(%rsi),%rbp 159bc3d5698SJohn Baldwin addq %rax,%r9 160bc3d5698SJohn Baldwin movq 24(%rsi),%rax 161bc3d5698SJohn Baldwin adcq %rdx,%r10 162bc3d5698SJohn Baldwin adcq $0,%rbx 163bc3d5698SJohn Baldwin 164bc3d5698SJohn Baldwin movq %r9,16(%rsp) 165bc3d5698SJohn Baldwin movq %r10,24(%rsp) 166bc3d5698SJohn Baldwin 167bc3d5698SJohn Baldwin 168bc3d5698SJohn Baldwin mulq %rbp 169bc3d5698SJohn Baldwin addq %rax,%r12 170bc3d5698SJohn Baldwin movq 32(%rsi),%rax 171bc3d5698SJohn Baldwin movq %rdx,%rcx 172bc3d5698SJohn Baldwin adcq $0,%rcx 173bc3d5698SJohn Baldwin 174bc3d5698SJohn Baldwin mulq %rbp 175bc3d5698SJohn Baldwin addq %rax,%r13 176bc3d5698SJohn Baldwin movq 40(%rsi),%rax 177bc3d5698SJohn Baldwin adcq $0,%rdx 178bc3d5698SJohn Baldwin addq %rcx,%r13 179bc3d5698SJohn Baldwin movq %rdx,%rcx 180bc3d5698SJohn Baldwin adcq $0,%rcx 181bc3d5698SJohn Baldwin 182bc3d5698SJohn Baldwin mulq %rbp 183bc3d5698SJohn Baldwin addq %rax,%r14 184bc3d5698SJohn Baldwin movq 48(%rsi),%rax 185bc3d5698SJohn Baldwin adcq $0,%rdx 186bc3d5698SJohn Baldwin addq %rcx,%r14 187bc3d5698SJohn Baldwin movq %rdx,%rcx 188bc3d5698SJohn Baldwin adcq $0,%rcx 189bc3d5698SJohn Baldwin 190bc3d5698SJohn Baldwin mulq %rbp 191bc3d5698SJohn Baldwin addq %rax,%r15 192bc3d5698SJohn Baldwin movq 56(%rsi),%rax 193bc3d5698SJohn Baldwin adcq $0,%rdx 194bc3d5698SJohn Baldwin addq %rcx,%r15 195bc3d5698SJohn Baldwin movq %rdx,%rcx 196bc3d5698SJohn Baldwin adcq $0,%rcx 197bc3d5698SJohn Baldwin 198bc3d5698SJohn Baldwin mulq %rbp 199bc3d5698SJohn Baldwin addq %rax,%r8 200bc3d5698SJohn Baldwin movq %rbp,%rax 201bc3d5698SJohn Baldwin adcq $0,%rdx 202bc3d5698SJohn Baldwin addq %rcx,%r8 203bc3d5698SJohn Baldwin adcq $0,%rdx 204bc3d5698SJohn Baldwin 205bc3d5698SJohn Baldwin xorq %rcx,%rcx 206bc3d5698SJohn Baldwin addq %r11,%r11 207bc3d5698SJohn Baldwin movq %rdx,%r9 208bc3d5698SJohn Baldwin adcq %r12,%r12 209bc3d5698SJohn Baldwin adcq $0,%rcx 210bc3d5698SJohn Baldwin 211bc3d5698SJohn Baldwin mulq %rax 212bc3d5698SJohn Baldwin 213bc3d5698SJohn Baldwin addq %rbx,%rax 214bc3d5698SJohn Baldwin movq 24(%rsi),%r10 215bc3d5698SJohn Baldwin addq %rax,%r11 216bc3d5698SJohn Baldwin movq 32(%rsi),%rax 217bc3d5698SJohn Baldwin adcq %rdx,%r12 218bc3d5698SJohn Baldwin adcq $0,%rcx 219bc3d5698SJohn Baldwin 220bc3d5698SJohn Baldwin movq %r11,32(%rsp) 221bc3d5698SJohn Baldwin movq %r12,40(%rsp) 222bc3d5698SJohn Baldwin 223bc3d5698SJohn Baldwin 224bc3d5698SJohn Baldwin movq %rax,%r11 225bc3d5698SJohn Baldwin mulq %r10 226bc3d5698SJohn Baldwin addq %rax,%r14 227bc3d5698SJohn Baldwin movq 40(%rsi),%rax 228bc3d5698SJohn Baldwin movq %rdx,%rbx 229bc3d5698SJohn Baldwin adcq $0,%rbx 230bc3d5698SJohn Baldwin 231bc3d5698SJohn Baldwin movq %rax,%r12 232bc3d5698SJohn Baldwin mulq %r10 233bc3d5698SJohn Baldwin addq %rax,%r15 234bc3d5698SJohn Baldwin movq 48(%rsi),%rax 235bc3d5698SJohn Baldwin adcq $0,%rdx 236bc3d5698SJohn Baldwin addq %rbx,%r15 237bc3d5698SJohn Baldwin movq %rdx,%rbx 238bc3d5698SJohn Baldwin adcq $0,%rbx 239bc3d5698SJohn Baldwin 240bc3d5698SJohn Baldwin movq %rax,%rbp 241bc3d5698SJohn Baldwin mulq %r10 242bc3d5698SJohn Baldwin addq %rax,%r8 243bc3d5698SJohn Baldwin movq 56(%rsi),%rax 244bc3d5698SJohn Baldwin adcq $0,%rdx 245bc3d5698SJohn Baldwin addq %rbx,%r8 246bc3d5698SJohn Baldwin movq %rdx,%rbx 247bc3d5698SJohn Baldwin adcq $0,%rbx 248bc3d5698SJohn Baldwin 249bc3d5698SJohn Baldwin mulq %r10 250bc3d5698SJohn Baldwin addq %rax,%r9 251bc3d5698SJohn Baldwin movq %r10,%rax 252bc3d5698SJohn Baldwin adcq $0,%rdx 253bc3d5698SJohn Baldwin addq %rbx,%r9 254bc3d5698SJohn Baldwin adcq $0,%rdx 255bc3d5698SJohn Baldwin 256bc3d5698SJohn Baldwin xorq %rbx,%rbx 257bc3d5698SJohn Baldwin addq %r13,%r13 258bc3d5698SJohn Baldwin movq %rdx,%r10 259bc3d5698SJohn Baldwin adcq %r14,%r14 260bc3d5698SJohn Baldwin adcq $0,%rbx 261bc3d5698SJohn Baldwin 262bc3d5698SJohn Baldwin mulq %rax 263bc3d5698SJohn Baldwin 264bc3d5698SJohn Baldwin addq %rcx,%rax 265bc3d5698SJohn Baldwin addq %rax,%r13 266bc3d5698SJohn Baldwin movq %r12,%rax 267bc3d5698SJohn Baldwin adcq %rdx,%r14 268bc3d5698SJohn Baldwin adcq $0,%rbx 269bc3d5698SJohn Baldwin 270bc3d5698SJohn Baldwin movq %r13,48(%rsp) 271bc3d5698SJohn Baldwin movq %r14,56(%rsp) 272bc3d5698SJohn Baldwin 273bc3d5698SJohn Baldwin 274bc3d5698SJohn Baldwin mulq %r11 275bc3d5698SJohn Baldwin addq %rax,%r8 276bc3d5698SJohn Baldwin movq %rbp,%rax 277bc3d5698SJohn Baldwin movq %rdx,%rcx 278bc3d5698SJohn Baldwin adcq $0,%rcx 279bc3d5698SJohn Baldwin 280bc3d5698SJohn Baldwin mulq %r11 281bc3d5698SJohn Baldwin addq %rax,%r9 282bc3d5698SJohn Baldwin movq 56(%rsi),%rax 283bc3d5698SJohn Baldwin adcq $0,%rdx 284bc3d5698SJohn Baldwin addq %rcx,%r9 285bc3d5698SJohn Baldwin movq %rdx,%rcx 286bc3d5698SJohn Baldwin adcq $0,%rcx 287bc3d5698SJohn Baldwin 288bc3d5698SJohn Baldwin movq %rax,%r14 289bc3d5698SJohn Baldwin mulq %r11 290bc3d5698SJohn Baldwin addq %rax,%r10 291bc3d5698SJohn Baldwin movq %r11,%rax 292bc3d5698SJohn Baldwin adcq $0,%rdx 293bc3d5698SJohn Baldwin addq %rcx,%r10 294bc3d5698SJohn Baldwin adcq $0,%rdx 295bc3d5698SJohn Baldwin 296bc3d5698SJohn Baldwin xorq %rcx,%rcx 297bc3d5698SJohn Baldwin addq %r15,%r15 298bc3d5698SJohn Baldwin movq %rdx,%r11 299bc3d5698SJohn Baldwin adcq %r8,%r8 300bc3d5698SJohn Baldwin adcq $0,%rcx 301bc3d5698SJohn Baldwin 302bc3d5698SJohn Baldwin mulq %rax 303bc3d5698SJohn Baldwin 304bc3d5698SJohn Baldwin addq %rbx,%rax 305bc3d5698SJohn Baldwin addq %rax,%r15 306bc3d5698SJohn Baldwin movq %rbp,%rax 307bc3d5698SJohn Baldwin adcq %rdx,%r8 308bc3d5698SJohn Baldwin adcq $0,%rcx 309bc3d5698SJohn Baldwin 310bc3d5698SJohn Baldwin movq %r15,64(%rsp) 311bc3d5698SJohn Baldwin movq %r8,72(%rsp) 312bc3d5698SJohn Baldwin 313bc3d5698SJohn Baldwin 314bc3d5698SJohn Baldwin mulq %r12 315bc3d5698SJohn Baldwin addq %rax,%r10 316bc3d5698SJohn Baldwin movq %r14,%rax 317bc3d5698SJohn Baldwin movq %rdx,%rbx 318bc3d5698SJohn Baldwin adcq $0,%rbx 319bc3d5698SJohn Baldwin 320bc3d5698SJohn Baldwin mulq %r12 321bc3d5698SJohn Baldwin addq %rax,%r11 322bc3d5698SJohn Baldwin movq %r12,%rax 323bc3d5698SJohn Baldwin adcq $0,%rdx 324bc3d5698SJohn Baldwin addq %rbx,%r11 325bc3d5698SJohn Baldwin adcq $0,%rdx 326bc3d5698SJohn Baldwin 327bc3d5698SJohn Baldwin xorq %rbx,%rbx 328bc3d5698SJohn Baldwin addq %r9,%r9 329bc3d5698SJohn Baldwin movq %rdx,%r12 330bc3d5698SJohn Baldwin adcq %r10,%r10 331bc3d5698SJohn Baldwin adcq $0,%rbx 332bc3d5698SJohn Baldwin 333bc3d5698SJohn Baldwin mulq %rax 334bc3d5698SJohn Baldwin 335bc3d5698SJohn Baldwin addq %rcx,%rax 336bc3d5698SJohn Baldwin addq %rax,%r9 337bc3d5698SJohn Baldwin movq %r14,%rax 338bc3d5698SJohn Baldwin adcq %rdx,%r10 339bc3d5698SJohn Baldwin adcq $0,%rbx 340bc3d5698SJohn Baldwin 341bc3d5698SJohn Baldwin movq %r9,80(%rsp) 342bc3d5698SJohn Baldwin movq %r10,88(%rsp) 343bc3d5698SJohn Baldwin 344bc3d5698SJohn Baldwin 345bc3d5698SJohn Baldwin mulq %rbp 346bc3d5698SJohn Baldwin addq %rax,%r12 347bc3d5698SJohn Baldwin movq %rbp,%rax 348bc3d5698SJohn Baldwin adcq $0,%rdx 349bc3d5698SJohn Baldwin 350bc3d5698SJohn Baldwin xorq %rcx,%rcx 351bc3d5698SJohn Baldwin addq %r11,%r11 352bc3d5698SJohn Baldwin movq %rdx,%r13 353bc3d5698SJohn Baldwin adcq %r12,%r12 354bc3d5698SJohn Baldwin adcq $0,%rcx 355bc3d5698SJohn Baldwin 356bc3d5698SJohn Baldwin mulq %rax 357bc3d5698SJohn Baldwin 358bc3d5698SJohn Baldwin addq %rbx,%rax 359bc3d5698SJohn Baldwin addq %rax,%r11 360bc3d5698SJohn Baldwin movq %r14,%rax 361bc3d5698SJohn Baldwin adcq %rdx,%r12 362bc3d5698SJohn Baldwin adcq $0,%rcx 363bc3d5698SJohn Baldwin 364bc3d5698SJohn Baldwin movq %r11,96(%rsp) 365bc3d5698SJohn Baldwin movq %r12,104(%rsp) 366bc3d5698SJohn Baldwin 367bc3d5698SJohn Baldwin 368bc3d5698SJohn Baldwin xorq %rbx,%rbx 369bc3d5698SJohn Baldwin addq %r13,%r13 370bc3d5698SJohn Baldwin adcq $0,%rbx 371bc3d5698SJohn Baldwin 372bc3d5698SJohn Baldwin mulq %rax 373bc3d5698SJohn Baldwin 374bc3d5698SJohn Baldwin addq %rcx,%rax 375bc3d5698SJohn Baldwin addq %r13,%rax 376bc3d5698SJohn Baldwin adcq %rbx,%rdx 377bc3d5698SJohn Baldwin 378bc3d5698SJohn Baldwin movq (%rsp),%r8 379bc3d5698SJohn Baldwin movq 8(%rsp),%r9 380bc3d5698SJohn Baldwin movq 16(%rsp),%r10 381bc3d5698SJohn Baldwin movq 24(%rsp),%r11 382bc3d5698SJohn Baldwin movq 32(%rsp),%r12 383bc3d5698SJohn Baldwin movq 40(%rsp),%r13 384bc3d5698SJohn Baldwin movq 48(%rsp),%r14 385bc3d5698SJohn Baldwin movq 56(%rsp),%r15 386bc3d5698SJohn Baldwin.byte 102,72,15,126,205 387bc3d5698SJohn Baldwin 388bc3d5698SJohn Baldwin movq %rax,112(%rsp) 389bc3d5698SJohn Baldwin movq %rdx,120(%rsp) 390bc3d5698SJohn Baldwin 391bc3d5698SJohn Baldwin call __rsaz_512_reduce 392bc3d5698SJohn Baldwin 393bc3d5698SJohn Baldwin addq 64(%rsp),%r8 394bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 395bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 396bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 397bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 398bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 399bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 400bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 401bc3d5698SJohn Baldwin sbbq %rcx,%rcx 402bc3d5698SJohn Baldwin 403bc3d5698SJohn Baldwin call __rsaz_512_subtract 404bc3d5698SJohn Baldwin 405bc3d5698SJohn Baldwin movq %r8,%rdx 406bc3d5698SJohn Baldwin movq %r9,%rax 407bc3d5698SJohn Baldwin movl 128+8(%rsp),%r8d 408bc3d5698SJohn Baldwin movq %rdi,%rsi 409bc3d5698SJohn Baldwin 410bc3d5698SJohn Baldwin decl %r8d 411bc3d5698SJohn Baldwin jnz .Loop_sqr 412bc3d5698SJohn Baldwin jmp .Lsqr_tail 413bc3d5698SJohn Baldwin 414bc3d5698SJohn Baldwin.align 32 415bc3d5698SJohn Baldwin.Loop_sqrx: 416bc3d5698SJohn Baldwin movl %r8d,128+8(%rsp) 417bc3d5698SJohn Baldwin.byte 102,72,15,110,199 418bc3d5698SJohn Baldwin 419bc3d5698SJohn Baldwin mulxq %rax,%r8,%r9 420bc3d5698SJohn Baldwin movq %rax,%rbx 421bc3d5698SJohn Baldwin 422bc3d5698SJohn Baldwin mulxq 16(%rsi),%rcx,%r10 423bc3d5698SJohn Baldwin xorq %rbp,%rbp 424bc3d5698SJohn Baldwin 425bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 426bc3d5698SJohn Baldwin adcxq %rcx,%r9 427bc3d5698SJohn Baldwin 428bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 429bc3d5698SJohn Baldwin adcxq %rax,%r10 430bc3d5698SJohn Baldwin 431bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 432bc3d5698SJohn Baldwin adcxq %rcx,%r11 433bc3d5698SJohn Baldwin 434bc3d5698SJohn Baldwin mulxq 48(%rsi),%rcx,%r14 435bc3d5698SJohn Baldwin adcxq %rax,%r12 436bc3d5698SJohn Baldwin adcxq %rcx,%r13 437bc3d5698SJohn Baldwin 438bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 439bc3d5698SJohn Baldwin adcxq %rax,%r14 440bc3d5698SJohn Baldwin adcxq %rbp,%r15 441bc3d5698SJohn Baldwin 442bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 443bc3d5698SJohn Baldwin movq %rbx,%rdx 444bc3d5698SJohn Baldwin xorq %rcx,%rcx 445bc3d5698SJohn Baldwin adoxq %r8,%r8 446bc3d5698SJohn Baldwin adcxq %rdi,%r8 447bc3d5698SJohn Baldwin adoxq %rbp,%rcx 448bc3d5698SJohn Baldwin adcxq %rbp,%rcx 449bc3d5698SJohn Baldwin 450bc3d5698SJohn Baldwin movq %rax,(%rsp) 451bc3d5698SJohn Baldwin movq %r8,8(%rsp) 452bc3d5698SJohn Baldwin 453bc3d5698SJohn Baldwin 454bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 455bc3d5698SJohn Baldwin adoxq %rax,%r10 456bc3d5698SJohn Baldwin adcxq %rbx,%r11 457bc3d5698SJohn Baldwin 458bc3d5698SJohn Baldwin mulxq 24(%rsi),%rdi,%r8 459bc3d5698SJohn Baldwin adoxq %rdi,%r11 460bc3d5698SJohn Baldwin.byte 0x66 461bc3d5698SJohn Baldwin adcxq %r8,%r12 462bc3d5698SJohn Baldwin 463bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rbx 464bc3d5698SJohn Baldwin adoxq %rax,%r12 465bc3d5698SJohn Baldwin adcxq %rbx,%r13 466bc3d5698SJohn Baldwin 467bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r8 468bc3d5698SJohn Baldwin adoxq %rdi,%r13 469bc3d5698SJohn Baldwin adcxq %r8,%r14 470bc3d5698SJohn Baldwin 471bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 472bc3d5698SJohn Baldwin adoxq %rax,%r14 473bc3d5698SJohn Baldwin adcxq %rbx,%r15 474bc3d5698SJohn Baldwin 475bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 476bc3d5698SJohn Baldwin adoxq %rdi,%r15 477bc3d5698SJohn Baldwin adcxq %rbp,%r8 478bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 479bc3d5698SJohn Baldwin adoxq %rbp,%r8 480bc3d5698SJohn Baldwin.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 481bc3d5698SJohn Baldwin 482bc3d5698SJohn Baldwin xorq %rbx,%rbx 483bc3d5698SJohn Baldwin adoxq %r9,%r9 484bc3d5698SJohn Baldwin 485bc3d5698SJohn Baldwin adcxq %rcx,%rax 486bc3d5698SJohn Baldwin adoxq %r10,%r10 487bc3d5698SJohn Baldwin adcxq %rax,%r9 488bc3d5698SJohn Baldwin adoxq %rbp,%rbx 489bc3d5698SJohn Baldwin adcxq %rdi,%r10 490bc3d5698SJohn Baldwin adcxq %rbp,%rbx 491bc3d5698SJohn Baldwin 492bc3d5698SJohn Baldwin movq %r9,16(%rsp) 493bc3d5698SJohn Baldwin.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 494bc3d5698SJohn Baldwin 495bc3d5698SJohn Baldwin 496bc3d5698SJohn Baldwin mulxq 24(%rsi),%rdi,%r9 497bc3d5698SJohn Baldwin adoxq %rdi,%r12 498bc3d5698SJohn Baldwin adcxq %r9,%r13 499bc3d5698SJohn Baldwin 500bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rcx 501bc3d5698SJohn Baldwin adoxq %rax,%r13 502bc3d5698SJohn Baldwin adcxq %rcx,%r14 503bc3d5698SJohn Baldwin 504bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 505bc3d5698SJohn Baldwin adoxq %rdi,%r14 506bc3d5698SJohn Baldwin adcxq %r9,%r15 507bc3d5698SJohn Baldwin 508bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 509bc3d5698SJohn Baldwin adoxq %rax,%r15 510bc3d5698SJohn Baldwin adcxq %rcx,%r8 511bc3d5698SJohn Baldwin 512bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r9 513bc3d5698SJohn Baldwin adoxq %rdi,%r8 514bc3d5698SJohn Baldwin adcxq %rbp,%r9 515bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 516bc3d5698SJohn Baldwin adoxq %rbp,%r9 517bc3d5698SJohn Baldwin movq 24(%rsi),%rdx 518bc3d5698SJohn Baldwin 519bc3d5698SJohn Baldwin xorq %rcx,%rcx 520bc3d5698SJohn Baldwin adoxq %r11,%r11 521bc3d5698SJohn Baldwin 522bc3d5698SJohn Baldwin adcxq %rbx,%rax 523bc3d5698SJohn Baldwin adoxq %r12,%r12 524bc3d5698SJohn Baldwin adcxq %rax,%r11 525bc3d5698SJohn Baldwin adoxq %rbp,%rcx 526bc3d5698SJohn Baldwin adcxq %rdi,%r12 527bc3d5698SJohn Baldwin adcxq %rbp,%rcx 528bc3d5698SJohn Baldwin 529bc3d5698SJohn Baldwin movq %r11,32(%rsp) 530bc3d5698SJohn Baldwin movq %r12,40(%rsp) 531bc3d5698SJohn Baldwin 532bc3d5698SJohn Baldwin 533bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rbx 534bc3d5698SJohn Baldwin adoxq %rax,%r14 535bc3d5698SJohn Baldwin adcxq %rbx,%r15 536bc3d5698SJohn Baldwin 537bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r10 538bc3d5698SJohn Baldwin adoxq %rdi,%r15 539bc3d5698SJohn Baldwin adcxq %r10,%r8 540bc3d5698SJohn Baldwin 541bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%rbx 542bc3d5698SJohn Baldwin adoxq %rax,%r8 543bc3d5698SJohn Baldwin adcxq %rbx,%r9 544bc3d5698SJohn Baldwin 545bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r10 546bc3d5698SJohn Baldwin adoxq %rdi,%r9 547bc3d5698SJohn Baldwin adcxq %rbp,%r10 548bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 549bc3d5698SJohn Baldwin adoxq %rbp,%r10 550bc3d5698SJohn Baldwin movq 32(%rsi),%rdx 551bc3d5698SJohn Baldwin 552bc3d5698SJohn Baldwin xorq %rbx,%rbx 553bc3d5698SJohn Baldwin adoxq %r13,%r13 554bc3d5698SJohn Baldwin 555bc3d5698SJohn Baldwin adcxq %rcx,%rax 556bc3d5698SJohn Baldwin adoxq %r14,%r14 557bc3d5698SJohn Baldwin adcxq %rax,%r13 558bc3d5698SJohn Baldwin adoxq %rbp,%rbx 559bc3d5698SJohn Baldwin adcxq %rdi,%r14 560bc3d5698SJohn Baldwin adcxq %rbp,%rbx 561bc3d5698SJohn Baldwin 562bc3d5698SJohn Baldwin movq %r13,48(%rsp) 563bc3d5698SJohn Baldwin movq %r14,56(%rsp) 564bc3d5698SJohn Baldwin 565bc3d5698SJohn Baldwin 566bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r11 567bc3d5698SJohn Baldwin adoxq %rdi,%r8 568bc3d5698SJohn Baldwin adcxq %r11,%r9 569bc3d5698SJohn Baldwin 570bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%rcx 571bc3d5698SJohn Baldwin adoxq %rax,%r9 572bc3d5698SJohn Baldwin adcxq %rcx,%r10 573bc3d5698SJohn Baldwin 574bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r11 575bc3d5698SJohn Baldwin adoxq %rdi,%r10 576bc3d5698SJohn Baldwin adcxq %rbp,%r11 577bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 578bc3d5698SJohn Baldwin movq 40(%rsi),%rdx 579bc3d5698SJohn Baldwin adoxq %rbp,%r11 580bc3d5698SJohn Baldwin 581bc3d5698SJohn Baldwin xorq %rcx,%rcx 582bc3d5698SJohn Baldwin adoxq %r15,%r15 583bc3d5698SJohn Baldwin 584bc3d5698SJohn Baldwin adcxq %rbx,%rax 585bc3d5698SJohn Baldwin adoxq %r8,%r8 586bc3d5698SJohn Baldwin adcxq %rax,%r15 587bc3d5698SJohn Baldwin adoxq %rbp,%rcx 588bc3d5698SJohn Baldwin adcxq %rdi,%r8 589bc3d5698SJohn Baldwin adcxq %rbp,%rcx 590bc3d5698SJohn Baldwin 591bc3d5698SJohn Baldwin movq %r15,64(%rsp) 592bc3d5698SJohn Baldwin movq %r8,72(%rsp) 593bc3d5698SJohn Baldwin 594bc3d5698SJohn Baldwin 595bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 596bc3d5698SJohn Baldwin adoxq %rax,%r10 597bc3d5698SJohn Baldwin adcxq %rbx,%r11 598bc3d5698SJohn Baldwin 599bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 600bc3d5698SJohn Baldwin adoxq %rdi,%r11 601bc3d5698SJohn Baldwin adcxq %rbp,%r12 602bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 603bc3d5698SJohn Baldwin adoxq %rbp,%r12 604bc3d5698SJohn Baldwin movq 48(%rsi),%rdx 605bc3d5698SJohn Baldwin 606bc3d5698SJohn Baldwin xorq %rbx,%rbx 607bc3d5698SJohn Baldwin adoxq %r9,%r9 608bc3d5698SJohn Baldwin 609bc3d5698SJohn Baldwin adcxq %rcx,%rax 610bc3d5698SJohn Baldwin adoxq %r10,%r10 611bc3d5698SJohn Baldwin adcxq %rax,%r9 612bc3d5698SJohn Baldwin adcxq %rdi,%r10 613bc3d5698SJohn Baldwin adoxq %rbp,%rbx 614bc3d5698SJohn Baldwin adcxq %rbp,%rbx 615bc3d5698SJohn Baldwin 616bc3d5698SJohn Baldwin movq %r9,80(%rsp) 617bc3d5698SJohn Baldwin movq %r10,88(%rsp) 618bc3d5698SJohn Baldwin 619bc3d5698SJohn Baldwin 620bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 621bc3d5698SJohn Baldwin adoxq %rax,%r12 622bc3d5698SJohn Baldwin adoxq %rbp,%r13 623bc3d5698SJohn Baldwin 624bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 625bc3d5698SJohn Baldwin xorq %rcx,%rcx 626bc3d5698SJohn Baldwin movq 56(%rsi),%rdx 627bc3d5698SJohn Baldwin adoxq %r11,%r11 628bc3d5698SJohn Baldwin 629bc3d5698SJohn Baldwin adcxq %rbx,%rax 630bc3d5698SJohn Baldwin adoxq %r12,%r12 631bc3d5698SJohn Baldwin adcxq %rax,%r11 632bc3d5698SJohn Baldwin adoxq %rbp,%rcx 633bc3d5698SJohn Baldwin adcxq %rdi,%r12 634bc3d5698SJohn Baldwin adcxq %rbp,%rcx 635bc3d5698SJohn Baldwin 636bc3d5698SJohn Baldwin.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 637bc3d5698SJohn Baldwin.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 638bc3d5698SJohn Baldwin 639bc3d5698SJohn Baldwin 640bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdx 641bc3d5698SJohn Baldwin xorq %rbx,%rbx 642bc3d5698SJohn Baldwin adoxq %r13,%r13 643bc3d5698SJohn Baldwin 644bc3d5698SJohn Baldwin adcxq %rcx,%rax 645bc3d5698SJohn Baldwin adoxq %rbp,%rbx 646bc3d5698SJohn Baldwin adcxq %r13,%rax 647bc3d5698SJohn Baldwin adcxq %rdx,%rbx 648bc3d5698SJohn Baldwin 649bc3d5698SJohn Baldwin.byte 102,72,15,126,199 650bc3d5698SJohn Baldwin.byte 102,72,15,126,205 651bc3d5698SJohn Baldwin 652bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 653bc3d5698SJohn Baldwin movq (%rsp),%r8 654bc3d5698SJohn Baldwin movq 8(%rsp),%r9 655bc3d5698SJohn Baldwin movq 16(%rsp),%r10 656bc3d5698SJohn Baldwin movq 24(%rsp),%r11 657bc3d5698SJohn Baldwin movq 32(%rsp),%r12 658bc3d5698SJohn Baldwin movq 40(%rsp),%r13 659bc3d5698SJohn Baldwin movq 48(%rsp),%r14 660bc3d5698SJohn Baldwin movq 56(%rsp),%r15 661bc3d5698SJohn Baldwin 662bc3d5698SJohn Baldwin movq %rax,112(%rsp) 663bc3d5698SJohn Baldwin movq %rbx,120(%rsp) 664bc3d5698SJohn Baldwin 665bc3d5698SJohn Baldwin call __rsaz_512_reducex 666bc3d5698SJohn Baldwin 667bc3d5698SJohn Baldwin addq 64(%rsp),%r8 668bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 669bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 670bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 671bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 672bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 673bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 674bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 675bc3d5698SJohn Baldwin sbbq %rcx,%rcx 676bc3d5698SJohn Baldwin 677bc3d5698SJohn Baldwin call __rsaz_512_subtract 678bc3d5698SJohn Baldwin 679bc3d5698SJohn Baldwin movq %r8,%rdx 680bc3d5698SJohn Baldwin movq %r9,%rax 681bc3d5698SJohn Baldwin movl 128+8(%rsp),%r8d 682bc3d5698SJohn Baldwin movq %rdi,%rsi 683bc3d5698SJohn Baldwin 684bc3d5698SJohn Baldwin decl %r8d 685bc3d5698SJohn Baldwin jnz .Loop_sqrx 686bc3d5698SJohn Baldwin 687bc3d5698SJohn Baldwin.Lsqr_tail: 688bc3d5698SJohn Baldwin 689bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 690bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 691bc3d5698SJohn Baldwin movq -48(%rax),%r15 692bc3d5698SJohn Baldwin.cfi_restore %r15 693bc3d5698SJohn Baldwin movq -40(%rax),%r14 694bc3d5698SJohn Baldwin.cfi_restore %r14 695bc3d5698SJohn Baldwin movq -32(%rax),%r13 696bc3d5698SJohn Baldwin.cfi_restore %r13 697bc3d5698SJohn Baldwin movq -24(%rax),%r12 698bc3d5698SJohn Baldwin.cfi_restore %r12 699bc3d5698SJohn Baldwin movq -16(%rax),%rbp 700bc3d5698SJohn Baldwin.cfi_restore %rbp 701bc3d5698SJohn Baldwin movq -8(%rax),%rbx 702bc3d5698SJohn Baldwin.cfi_restore %rbx 703bc3d5698SJohn Baldwin leaq (%rax),%rsp 704bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 705bc3d5698SJohn Baldwin.Lsqr_epilogue: 706bc3d5698SJohn Baldwin .byte 0xf3,0xc3 707bc3d5698SJohn Baldwin.cfi_endproc 708bc3d5698SJohn Baldwin.size rsaz_512_sqr,.-rsaz_512_sqr 709bc3d5698SJohn Baldwin.globl rsaz_512_mul 710bc3d5698SJohn Baldwin.type rsaz_512_mul,@function 711bc3d5698SJohn Baldwin.align 32 712bc3d5698SJohn Baldwinrsaz_512_mul: 713bc3d5698SJohn Baldwin.cfi_startproc 714bc3d5698SJohn Baldwin pushq %rbx 715bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 716bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 717bc3d5698SJohn Baldwin pushq %rbp 718bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 719bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 720bc3d5698SJohn Baldwin pushq %r12 721bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 722bc3d5698SJohn Baldwin.cfi_offset %r12,-32 723bc3d5698SJohn Baldwin pushq %r13 724bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 725bc3d5698SJohn Baldwin.cfi_offset %r13,-40 726bc3d5698SJohn Baldwin pushq %r14 727bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 728bc3d5698SJohn Baldwin.cfi_offset %r14,-48 729bc3d5698SJohn Baldwin pushq %r15 730bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 731bc3d5698SJohn Baldwin.cfi_offset %r15,-56 732bc3d5698SJohn Baldwin 733bc3d5698SJohn Baldwin subq $128+24,%rsp 734bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 735bc3d5698SJohn Baldwin.Lmul_body: 736bc3d5698SJohn Baldwin.byte 102,72,15,110,199 737bc3d5698SJohn Baldwin.byte 102,72,15,110,201 738bc3d5698SJohn Baldwin movq %r8,128(%rsp) 739bc3d5698SJohn Baldwin movl $0x80100,%r11d 740bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 741bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 742bc3d5698SJohn Baldwin je .Lmulx 743bc3d5698SJohn Baldwin movq (%rdx),%rbx 744bc3d5698SJohn Baldwin movq %rdx,%rbp 745bc3d5698SJohn Baldwin call __rsaz_512_mul 746bc3d5698SJohn Baldwin 747bc3d5698SJohn Baldwin.byte 102,72,15,126,199 748bc3d5698SJohn Baldwin.byte 102,72,15,126,205 749bc3d5698SJohn Baldwin 750bc3d5698SJohn Baldwin movq (%rsp),%r8 751bc3d5698SJohn Baldwin movq 8(%rsp),%r9 752bc3d5698SJohn Baldwin movq 16(%rsp),%r10 753bc3d5698SJohn Baldwin movq 24(%rsp),%r11 754bc3d5698SJohn Baldwin movq 32(%rsp),%r12 755bc3d5698SJohn Baldwin movq 40(%rsp),%r13 756bc3d5698SJohn Baldwin movq 48(%rsp),%r14 757bc3d5698SJohn Baldwin movq 56(%rsp),%r15 758bc3d5698SJohn Baldwin 759bc3d5698SJohn Baldwin call __rsaz_512_reduce 760bc3d5698SJohn Baldwin jmp .Lmul_tail 761bc3d5698SJohn Baldwin 762bc3d5698SJohn Baldwin.align 32 763bc3d5698SJohn Baldwin.Lmulx: 764bc3d5698SJohn Baldwin movq %rdx,%rbp 765bc3d5698SJohn Baldwin movq (%rdx),%rdx 766bc3d5698SJohn Baldwin call __rsaz_512_mulx 767bc3d5698SJohn Baldwin 768bc3d5698SJohn Baldwin.byte 102,72,15,126,199 769bc3d5698SJohn Baldwin.byte 102,72,15,126,205 770bc3d5698SJohn Baldwin 771bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 772bc3d5698SJohn Baldwin movq (%rsp),%r8 773bc3d5698SJohn Baldwin movq 8(%rsp),%r9 774bc3d5698SJohn Baldwin movq 16(%rsp),%r10 775bc3d5698SJohn Baldwin movq 24(%rsp),%r11 776bc3d5698SJohn Baldwin movq 32(%rsp),%r12 777bc3d5698SJohn Baldwin movq 40(%rsp),%r13 778bc3d5698SJohn Baldwin movq 48(%rsp),%r14 779bc3d5698SJohn Baldwin movq 56(%rsp),%r15 780bc3d5698SJohn Baldwin 781bc3d5698SJohn Baldwin call __rsaz_512_reducex 782bc3d5698SJohn Baldwin.Lmul_tail: 783bc3d5698SJohn Baldwin addq 64(%rsp),%r8 784bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 785bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 786bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 787bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 788bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 789bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 790bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 791bc3d5698SJohn Baldwin sbbq %rcx,%rcx 792bc3d5698SJohn Baldwin 793bc3d5698SJohn Baldwin call __rsaz_512_subtract 794bc3d5698SJohn Baldwin 795bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 796bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 797bc3d5698SJohn Baldwin movq -48(%rax),%r15 798bc3d5698SJohn Baldwin.cfi_restore %r15 799bc3d5698SJohn Baldwin movq -40(%rax),%r14 800bc3d5698SJohn Baldwin.cfi_restore %r14 801bc3d5698SJohn Baldwin movq -32(%rax),%r13 802bc3d5698SJohn Baldwin.cfi_restore %r13 803bc3d5698SJohn Baldwin movq -24(%rax),%r12 804bc3d5698SJohn Baldwin.cfi_restore %r12 805bc3d5698SJohn Baldwin movq -16(%rax),%rbp 806bc3d5698SJohn Baldwin.cfi_restore %rbp 807bc3d5698SJohn Baldwin movq -8(%rax),%rbx 808bc3d5698SJohn Baldwin.cfi_restore %rbx 809bc3d5698SJohn Baldwin leaq (%rax),%rsp 810bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 811bc3d5698SJohn Baldwin.Lmul_epilogue: 812bc3d5698SJohn Baldwin .byte 0xf3,0xc3 813bc3d5698SJohn Baldwin.cfi_endproc 814bc3d5698SJohn Baldwin.size rsaz_512_mul,.-rsaz_512_mul 815bc3d5698SJohn Baldwin.globl rsaz_512_mul_gather4 816bc3d5698SJohn Baldwin.type rsaz_512_mul_gather4,@function 817bc3d5698SJohn Baldwin.align 32 818bc3d5698SJohn Baldwinrsaz_512_mul_gather4: 819bc3d5698SJohn Baldwin.cfi_startproc 820bc3d5698SJohn Baldwin pushq %rbx 821bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 822bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 823bc3d5698SJohn Baldwin pushq %rbp 824bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 825bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 826bc3d5698SJohn Baldwin pushq %r12 827bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 828bc3d5698SJohn Baldwin.cfi_offset %r12,-32 829bc3d5698SJohn Baldwin pushq %r13 830bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 831bc3d5698SJohn Baldwin.cfi_offset %r13,-40 832bc3d5698SJohn Baldwin pushq %r14 833bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 834bc3d5698SJohn Baldwin.cfi_offset %r14,-48 835bc3d5698SJohn Baldwin pushq %r15 836bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 837bc3d5698SJohn Baldwin.cfi_offset %r15,-56 838bc3d5698SJohn Baldwin 839bc3d5698SJohn Baldwin subq $152,%rsp 840bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 152 841bc3d5698SJohn Baldwin.Lmul_gather4_body: 842bc3d5698SJohn Baldwin movd %r9d,%xmm8 843bc3d5698SJohn Baldwin movdqa .Linc+16(%rip),%xmm1 844bc3d5698SJohn Baldwin movdqa .Linc(%rip),%xmm0 845bc3d5698SJohn Baldwin 846bc3d5698SJohn Baldwin pshufd $0,%xmm8,%xmm8 847bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 848bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 849bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 850bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm0 851bc3d5698SJohn Baldwin movdqa %xmm7,%xmm3 852bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 853bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm1 854bc3d5698SJohn Baldwin movdqa %xmm7,%xmm4 855bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 856bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm2 857bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 858bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 859bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm3 860bc3d5698SJohn Baldwin movdqa %xmm7,%xmm6 861bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 862bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm4 863bc3d5698SJohn Baldwin paddd %xmm5,%xmm6 864bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm5 865bc3d5698SJohn Baldwin paddd %xmm6,%xmm7 866bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm6 867bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm7 868bc3d5698SJohn Baldwin 869bc3d5698SJohn Baldwin movdqa 0(%rdx),%xmm8 870bc3d5698SJohn Baldwin movdqa 16(%rdx),%xmm9 871bc3d5698SJohn Baldwin movdqa 32(%rdx),%xmm10 872bc3d5698SJohn Baldwin movdqa 48(%rdx),%xmm11 873bc3d5698SJohn Baldwin pand %xmm0,%xmm8 874bc3d5698SJohn Baldwin movdqa 64(%rdx),%xmm12 875bc3d5698SJohn Baldwin pand %xmm1,%xmm9 876bc3d5698SJohn Baldwin movdqa 80(%rdx),%xmm13 877bc3d5698SJohn Baldwin pand %xmm2,%xmm10 878bc3d5698SJohn Baldwin movdqa 96(%rdx),%xmm14 879bc3d5698SJohn Baldwin pand %xmm3,%xmm11 880bc3d5698SJohn Baldwin movdqa 112(%rdx),%xmm15 881bc3d5698SJohn Baldwin leaq 128(%rdx),%rbp 882bc3d5698SJohn Baldwin pand %xmm4,%xmm12 883bc3d5698SJohn Baldwin pand %xmm5,%xmm13 884bc3d5698SJohn Baldwin pand %xmm6,%xmm14 885bc3d5698SJohn Baldwin pand %xmm7,%xmm15 886bc3d5698SJohn Baldwin por %xmm10,%xmm8 887bc3d5698SJohn Baldwin por %xmm11,%xmm9 888bc3d5698SJohn Baldwin por %xmm12,%xmm8 889bc3d5698SJohn Baldwin por %xmm13,%xmm9 890bc3d5698SJohn Baldwin por %xmm14,%xmm8 891bc3d5698SJohn Baldwin por %xmm15,%xmm9 892bc3d5698SJohn Baldwin 893bc3d5698SJohn Baldwin por %xmm9,%xmm8 894bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 895bc3d5698SJohn Baldwin por %xmm9,%xmm8 896bc3d5698SJohn Baldwin movl $0x80100,%r11d 897bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 898bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 899bc3d5698SJohn Baldwin je .Lmulx_gather 900bc3d5698SJohn Baldwin.byte 102,76,15,126,195 901bc3d5698SJohn Baldwin 902bc3d5698SJohn Baldwin movq %r8,128(%rsp) 903bc3d5698SJohn Baldwin movq %rdi,128+8(%rsp) 904bc3d5698SJohn Baldwin movq %rcx,128+16(%rsp) 905bc3d5698SJohn Baldwin 906bc3d5698SJohn Baldwin movq (%rsi),%rax 907bc3d5698SJohn Baldwin movq 8(%rsi),%rcx 908bc3d5698SJohn Baldwin mulq %rbx 909bc3d5698SJohn Baldwin movq %rax,(%rsp) 910bc3d5698SJohn Baldwin movq %rcx,%rax 911bc3d5698SJohn Baldwin movq %rdx,%r8 912bc3d5698SJohn Baldwin 913bc3d5698SJohn Baldwin mulq %rbx 914bc3d5698SJohn Baldwin addq %rax,%r8 915bc3d5698SJohn Baldwin movq 16(%rsi),%rax 916bc3d5698SJohn Baldwin movq %rdx,%r9 917bc3d5698SJohn Baldwin adcq $0,%r9 918bc3d5698SJohn Baldwin 919bc3d5698SJohn Baldwin mulq %rbx 920bc3d5698SJohn Baldwin addq %rax,%r9 921bc3d5698SJohn Baldwin movq 24(%rsi),%rax 922bc3d5698SJohn Baldwin movq %rdx,%r10 923bc3d5698SJohn Baldwin adcq $0,%r10 924bc3d5698SJohn Baldwin 925bc3d5698SJohn Baldwin mulq %rbx 926bc3d5698SJohn Baldwin addq %rax,%r10 927bc3d5698SJohn Baldwin movq 32(%rsi),%rax 928bc3d5698SJohn Baldwin movq %rdx,%r11 929bc3d5698SJohn Baldwin adcq $0,%r11 930bc3d5698SJohn Baldwin 931bc3d5698SJohn Baldwin mulq %rbx 932bc3d5698SJohn Baldwin addq %rax,%r11 933bc3d5698SJohn Baldwin movq 40(%rsi),%rax 934bc3d5698SJohn Baldwin movq %rdx,%r12 935bc3d5698SJohn Baldwin adcq $0,%r12 936bc3d5698SJohn Baldwin 937bc3d5698SJohn Baldwin mulq %rbx 938bc3d5698SJohn Baldwin addq %rax,%r12 939bc3d5698SJohn Baldwin movq 48(%rsi),%rax 940bc3d5698SJohn Baldwin movq %rdx,%r13 941bc3d5698SJohn Baldwin adcq $0,%r13 942bc3d5698SJohn Baldwin 943bc3d5698SJohn Baldwin mulq %rbx 944bc3d5698SJohn Baldwin addq %rax,%r13 945bc3d5698SJohn Baldwin movq 56(%rsi),%rax 946bc3d5698SJohn Baldwin movq %rdx,%r14 947bc3d5698SJohn Baldwin adcq $0,%r14 948bc3d5698SJohn Baldwin 949bc3d5698SJohn Baldwin mulq %rbx 950bc3d5698SJohn Baldwin addq %rax,%r14 951bc3d5698SJohn Baldwin movq (%rsi),%rax 952bc3d5698SJohn Baldwin movq %rdx,%r15 953bc3d5698SJohn Baldwin adcq $0,%r15 954bc3d5698SJohn Baldwin 955bc3d5698SJohn Baldwin leaq 8(%rsp),%rdi 956bc3d5698SJohn Baldwin movl $7,%ecx 957bc3d5698SJohn Baldwin jmp .Loop_mul_gather 958bc3d5698SJohn Baldwin 959bc3d5698SJohn Baldwin.align 32 960bc3d5698SJohn Baldwin.Loop_mul_gather: 961bc3d5698SJohn Baldwin movdqa 0(%rbp),%xmm8 962bc3d5698SJohn Baldwin movdqa 16(%rbp),%xmm9 963bc3d5698SJohn Baldwin movdqa 32(%rbp),%xmm10 964bc3d5698SJohn Baldwin movdqa 48(%rbp),%xmm11 965bc3d5698SJohn Baldwin pand %xmm0,%xmm8 966bc3d5698SJohn Baldwin movdqa 64(%rbp),%xmm12 967bc3d5698SJohn Baldwin pand %xmm1,%xmm9 968bc3d5698SJohn Baldwin movdqa 80(%rbp),%xmm13 969bc3d5698SJohn Baldwin pand %xmm2,%xmm10 970bc3d5698SJohn Baldwin movdqa 96(%rbp),%xmm14 971bc3d5698SJohn Baldwin pand %xmm3,%xmm11 972bc3d5698SJohn Baldwin movdqa 112(%rbp),%xmm15 973bc3d5698SJohn Baldwin leaq 128(%rbp),%rbp 974bc3d5698SJohn Baldwin pand %xmm4,%xmm12 975bc3d5698SJohn Baldwin pand %xmm5,%xmm13 976bc3d5698SJohn Baldwin pand %xmm6,%xmm14 977bc3d5698SJohn Baldwin pand %xmm7,%xmm15 978bc3d5698SJohn Baldwin por %xmm10,%xmm8 979bc3d5698SJohn Baldwin por %xmm11,%xmm9 980bc3d5698SJohn Baldwin por %xmm12,%xmm8 981bc3d5698SJohn Baldwin por %xmm13,%xmm9 982bc3d5698SJohn Baldwin por %xmm14,%xmm8 983bc3d5698SJohn Baldwin por %xmm15,%xmm9 984bc3d5698SJohn Baldwin 985bc3d5698SJohn Baldwin por %xmm9,%xmm8 986bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 987bc3d5698SJohn Baldwin por %xmm9,%xmm8 988bc3d5698SJohn Baldwin.byte 102,76,15,126,195 989bc3d5698SJohn Baldwin 990bc3d5698SJohn Baldwin mulq %rbx 991bc3d5698SJohn Baldwin addq %rax,%r8 992bc3d5698SJohn Baldwin movq 8(%rsi),%rax 993bc3d5698SJohn Baldwin movq %r8,(%rdi) 994bc3d5698SJohn Baldwin movq %rdx,%r8 995bc3d5698SJohn Baldwin adcq $0,%r8 996bc3d5698SJohn Baldwin 997bc3d5698SJohn Baldwin mulq %rbx 998bc3d5698SJohn Baldwin addq %rax,%r9 999bc3d5698SJohn Baldwin movq 16(%rsi),%rax 1000bc3d5698SJohn Baldwin adcq $0,%rdx 1001bc3d5698SJohn Baldwin addq %r9,%r8 1002bc3d5698SJohn Baldwin movq %rdx,%r9 1003bc3d5698SJohn Baldwin adcq $0,%r9 1004bc3d5698SJohn Baldwin 1005bc3d5698SJohn Baldwin mulq %rbx 1006bc3d5698SJohn Baldwin addq %rax,%r10 1007bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1008bc3d5698SJohn Baldwin adcq $0,%rdx 1009bc3d5698SJohn Baldwin addq %r10,%r9 1010bc3d5698SJohn Baldwin movq %rdx,%r10 1011bc3d5698SJohn Baldwin adcq $0,%r10 1012bc3d5698SJohn Baldwin 1013bc3d5698SJohn Baldwin mulq %rbx 1014bc3d5698SJohn Baldwin addq %rax,%r11 1015bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1016bc3d5698SJohn Baldwin adcq $0,%rdx 1017bc3d5698SJohn Baldwin addq %r11,%r10 1018bc3d5698SJohn Baldwin movq %rdx,%r11 1019bc3d5698SJohn Baldwin adcq $0,%r11 1020bc3d5698SJohn Baldwin 1021bc3d5698SJohn Baldwin mulq %rbx 1022bc3d5698SJohn Baldwin addq %rax,%r12 1023bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1024bc3d5698SJohn Baldwin adcq $0,%rdx 1025bc3d5698SJohn Baldwin addq %r12,%r11 1026bc3d5698SJohn Baldwin movq %rdx,%r12 1027bc3d5698SJohn Baldwin adcq $0,%r12 1028bc3d5698SJohn Baldwin 1029bc3d5698SJohn Baldwin mulq %rbx 1030bc3d5698SJohn Baldwin addq %rax,%r13 1031bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1032bc3d5698SJohn Baldwin adcq $0,%rdx 1033bc3d5698SJohn Baldwin addq %r13,%r12 1034bc3d5698SJohn Baldwin movq %rdx,%r13 1035bc3d5698SJohn Baldwin adcq $0,%r13 1036bc3d5698SJohn Baldwin 1037bc3d5698SJohn Baldwin mulq %rbx 1038bc3d5698SJohn Baldwin addq %rax,%r14 1039bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1040bc3d5698SJohn Baldwin adcq $0,%rdx 1041bc3d5698SJohn Baldwin addq %r14,%r13 1042bc3d5698SJohn Baldwin movq %rdx,%r14 1043bc3d5698SJohn Baldwin adcq $0,%r14 1044bc3d5698SJohn Baldwin 1045bc3d5698SJohn Baldwin mulq %rbx 1046bc3d5698SJohn Baldwin addq %rax,%r15 1047bc3d5698SJohn Baldwin movq (%rsi),%rax 1048bc3d5698SJohn Baldwin adcq $0,%rdx 1049bc3d5698SJohn Baldwin addq %r15,%r14 1050bc3d5698SJohn Baldwin movq %rdx,%r15 1051bc3d5698SJohn Baldwin adcq $0,%r15 1052bc3d5698SJohn Baldwin 1053bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1054bc3d5698SJohn Baldwin 1055bc3d5698SJohn Baldwin decl %ecx 1056bc3d5698SJohn Baldwin jnz .Loop_mul_gather 1057bc3d5698SJohn Baldwin 1058bc3d5698SJohn Baldwin movq %r8,(%rdi) 1059bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1060bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1061bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1062bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1063bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1064bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1065bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1066bc3d5698SJohn Baldwin 1067bc3d5698SJohn Baldwin movq 128+8(%rsp),%rdi 1068bc3d5698SJohn Baldwin movq 128+16(%rsp),%rbp 1069bc3d5698SJohn Baldwin 1070bc3d5698SJohn Baldwin movq (%rsp),%r8 1071bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1072bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1073bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1074bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1075bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1076bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1077bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1078bc3d5698SJohn Baldwin 1079bc3d5698SJohn Baldwin call __rsaz_512_reduce 1080bc3d5698SJohn Baldwin jmp .Lmul_gather_tail 1081bc3d5698SJohn Baldwin 1082bc3d5698SJohn Baldwin.align 32 1083bc3d5698SJohn Baldwin.Lmulx_gather: 1084bc3d5698SJohn Baldwin.byte 102,76,15,126,194 1085bc3d5698SJohn Baldwin 1086bc3d5698SJohn Baldwin movq %r8,128(%rsp) 1087bc3d5698SJohn Baldwin movq %rdi,128+8(%rsp) 1088bc3d5698SJohn Baldwin movq %rcx,128+16(%rsp) 1089bc3d5698SJohn Baldwin 1090bc3d5698SJohn Baldwin mulxq (%rsi),%rbx,%r8 1091bc3d5698SJohn Baldwin movq %rbx,(%rsp) 1092bc3d5698SJohn Baldwin xorl %edi,%edi 1093bc3d5698SJohn Baldwin 1094bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1095bc3d5698SJohn Baldwin 1096bc3d5698SJohn Baldwin mulxq 16(%rsi),%rbx,%r10 1097bc3d5698SJohn Baldwin adcxq %rax,%r8 1098bc3d5698SJohn Baldwin 1099bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1100bc3d5698SJohn Baldwin adcxq %rbx,%r9 1101bc3d5698SJohn Baldwin 1102bc3d5698SJohn Baldwin mulxq 32(%rsi),%rbx,%r12 1103bc3d5698SJohn Baldwin adcxq %rax,%r10 1104bc3d5698SJohn Baldwin 1105bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1106bc3d5698SJohn Baldwin adcxq %rbx,%r11 1107bc3d5698SJohn Baldwin 1108bc3d5698SJohn Baldwin mulxq 48(%rsi),%rbx,%r14 1109bc3d5698SJohn Baldwin adcxq %rax,%r12 1110bc3d5698SJohn Baldwin 1111bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1112bc3d5698SJohn Baldwin adcxq %rbx,%r13 1113bc3d5698SJohn Baldwin adcxq %rax,%r14 1114bc3d5698SJohn Baldwin.byte 0x67 1115bc3d5698SJohn Baldwin movq %r8,%rbx 1116bc3d5698SJohn Baldwin adcxq %rdi,%r15 1117bc3d5698SJohn Baldwin 1118bc3d5698SJohn Baldwin movq $-7,%rcx 1119bc3d5698SJohn Baldwin jmp .Loop_mulx_gather 1120bc3d5698SJohn Baldwin 1121bc3d5698SJohn Baldwin.align 32 1122bc3d5698SJohn Baldwin.Loop_mulx_gather: 1123bc3d5698SJohn Baldwin movdqa 0(%rbp),%xmm8 1124bc3d5698SJohn Baldwin movdqa 16(%rbp),%xmm9 1125bc3d5698SJohn Baldwin movdqa 32(%rbp),%xmm10 1126bc3d5698SJohn Baldwin movdqa 48(%rbp),%xmm11 1127bc3d5698SJohn Baldwin pand %xmm0,%xmm8 1128bc3d5698SJohn Baldwin movdqa 64(%rbp),%xmm12 1129bc3d5698SJohn Baldwin pand %xmm1,%xmm9 1130bc3d5698SJohn Baldwin movdqa 80(%rbp),%xmm13 1131bc3d5698SJohn Baldwin pand %xmm2,%xmm10 1132bc3d5698SJohn Baldwin movdqa 96(%rbp),%xmm14 1133bc3d5698SJohn Baldwin pand %xmm3,%xmm11 1134bc3d5698SJohn Baldwin movdqa 112(%rbp),%xmm15 1135bc3d5698SJohn Baldwin leaq 128(%rbp),%rbp 1136bc3d5698SJohn Baldwin pand %xmm4,%xmm12 1137bc3d5698SJohn Baldwin pand %xmm5,%xmm13 1138bc3d5698SJohn Baldwin pand %xmm6,%xmm14 1139bc3d5698SJohn Baldwin pand %xmm7,%xmm15 1140bc3d5698SJohn Baldwin por %xmm10,%xmm8 1141bc3d5698SJohn Baldwin por %xmm11,%xmm9 1142bc3d5698SJohn Baldwin por %xmm12,%xmm8 1143bc3d5698SJohn Baldwin por %xmm13,%xmm9 1144bc3d5698SJohn Baldwin por %xmm14,%xmm8 1145bc3d5698SJohn Baldwin por %xmm15,%xmm9 1146bc3d5698SJohn Baldwin 1147bc3d5698SJohn Baldwin por %xmm9,%xmm8 1148bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 1149bc3d5698SJohn Baldwin por %xmm9,%xmm8 1150bc3d5698SJohn Baldwin.byte 102,76,15,126,194 1151bc3d5698SJohn Baldwin 1152bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 1153bc3d5698SJohn Baldwin adcxq %rax,%rbx 1154bc3d5698SJohn Baldwin adoxq %r9,%r8 1155bc3d5698SJohn Baldwin 1156bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1157bc3d5698SJohn Baldwin adcxq %rax,%r8 1158bc3d5698SJohn Baldwin adoxq %r10,%r9 1159bc3d5698SJohn Baldwin 1160bc3d5698SJohn Baldwin mulxq 16(%rsi),%rax,%r10 1161bc3d5698SJohn Baldwin adcxq %rax,%r9 1162bc3d5698SJohn Baldwin adoxq %r11,%r10 1163bc3d5698SJohn Baldwin 1164bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 1165bc3d5698SJohn Baldwin adcxq %rax,%r10 1166bc3d5698SJohn Baldwin adoxq %r12,%r11 1167bc3d5698SJohn Baldwin 1168bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%r12 1169bc3d5698SJohn Baldwin adcxq %rax,%r11 1170bc3d5698SJohn Baldwin adoxq %r13,%r12 1171bc3d5698SJohn Baldwin 1172bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1173bc3d5698SJohn Baldwin adcxq %rax,%r12 1174bc3d5698SJohn Baldwin adoxq %r14,%r13 1175bc3d5698SJohn Baldwin 1176bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1177bc3d5698SJohn Baldwin adcxq %rax,%r13 1178bc3d5698SJohn Baldwin.byte 0x67 1179bc3d5698SJohn Baldwin adoxq %r15,%r14 1180bc3d5698SJohn Baldwin 1181bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1182bc3d5698SJohn Baldwin movq %rbx,64(%rsp,%rcx,8) 1183bc3d5698SJohn Baldwin adcxq %rax,%r14 1184bc3d5698SJohn Baldwin adoxq %rdi,%r15 1185bc3d5698SJohn Baldwin movq %r8,%rbx 1186bc3d5698SJohn Baldwin adcxq %rdi,%r15 1187bc3d5698SJohn Baldwin 1188bc3d5698SJohn Baldwin incq %rcx 1189bc3d5698SJohn Baldwin jnz .Loop_mulx_gather 1190bc3d5698SJohn Baldwin 1191bc3d5698SJohn Baldwin movq %r8,64(%rsp) 1192bc3d5698SJohn Baldwin movq %r9,64+8(%rsp) 1193bc3d5698SJohn Baldwin movq %r10,64+16(%rsp) 1194bc3d5698SJohn Baldwin movq %r11,64+24(%rsp) 1195bc3d5698SJohn Baldwin movq %r12,64+32(%rsp) 1196bc3d5698SJohn Baldwin movq %r13,64+40(%rsp) 1197bc3d5698SJohn Baldwin movq %r14,64+48(%rsp) 1198bc3d5698SJohn Baldwin movq %r15,64+56(%rsp) 1199bc3d5698SJohn Baldwin 1200bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1201bc3d5698SJohn Baldwin movq 128+8(%rsp),%rdi 1202bc3d5698SJohn Baldwin movq 128+16(%rsp),%rbp 1203bc3d5698SJohn Baldwin 1204bc3d5698SJohn Baldwin movq (%rsp),%r8 1205bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1206bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1207bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1208bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1209bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1210bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1211bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1212bc3d5698SJohn Baldwin 1213bc3d5698SJohn Baldwin call __rsaz_512_reducex 1214bc3d5698SJohn Baldwin 1215bc3d5698SJohn Baldwin.Lmul_gather_tail: 1216bc3d5698SJohn Baldwin addq 64(%rsp),%r8 1217bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 1218bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 1219bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 1220bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 1221bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 1222bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 1223bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 1224bc3d5698SJohn Baldwin sbbq %rcx,%rcx 1225bc3d5698SJohn Baldwin 1226bc3d5698SJohn Baldwin call __rsaz_512_subtract 1227bc3d5698SJohn Baldwin 1228bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1229bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1230bc3d5698SJohn Baldwin movq -48(%rax),%r15 1231bc3d5698SJohn Baldwin.cfi_restore %r15 1232bc3d5698SJohn Baldwin movq -40(%rax),%r14 1233bc3d5698SJohn Baldwin.cfi_restore %r14 1234bc3d5698SJohn Baldwin movq -32(%rax),%r13 1235bc3d5698SJohn Baldwin.cfi_restore %r13 1236bc3d5698SJohn Baldwin movq -24(%rax),%r12 1237bc3d5698SJohn Baldwin.cfi_restore %r12 1238bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1239bc3d5698SJohn Baldwin.cfi_restore %rbp 1240bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1241bc3d5698SJohn Baldwin.cfi_restore %rbx 1242bc3d5698SJohn Baldwin leaq (%rax),%rsp 1243bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1244bc3d5698SJohn Baldwin.Lmul_gather4_epilogue: 1245bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1246bc3d5698SJohn Baldwin.cfi_endproc 1247bc3d5698SJohn Baldwin.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 1248bc3d5698SJohn Baldwin.globl rsaz_512_mul_scatter4 1249bc3d5698SJohn Baldwin.type rsaz_512_mul_scatter4,@function 1250bc3d5698SJohn Baldwin.align 32 1251bc3d5698SJohn Baldwinrsaz_512_mul_scatter4: 1252bc3d5698SJohn Baldwin.cfi_startproc 1253bc3d5698SJohn Baldwin pushq %rbx 1254bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1255bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1256bc3d5698SJohn Baldwin pushq %rbp 1257bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1258bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1259bc3d5698SJohn Baldwin pushq %r12 1260bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1261bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1262bc3d5698SJohn Baldwin pushq %r13 1263bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1264bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1265bc3d5698SJohn Baldwin pushq %r14 1266bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1267bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1268bc3d5698SJohn Baldwin pushq %r15 1269bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1270bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1271bc3d5698SJohn Baldwin 1272bc3d5698SJohn Baldwin movl %r9d,%r9d 1273bc3d5698SJohn Baldwin subq $128+24,%rsp 1274bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 1275bc3d5698SJohn Baldwin.Lmul_scatter4_body: 1276bc3d5698SJohn Baldwin leaq (%r8,%r9,8),%r8 1277bc3d5698SJohn Baldwin.byte 102,72,15,110,199 1278bc3d5698SJohn Baldwin.byte 102,72,15,110,202 1279bc3d5698SJohn Baldwin.byte 102,73,15,110,208 1280bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 1281bc3d5698SJohn Baldwin 1282bc3d5698SJohn Baldwin movq %rdi,%rbp 1283bc3d5698SJohn Baldwin movl $0x80100,%r11d 1284bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 1285bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 1286bc3d5698SJohn Baldwin je .Lmulx_scatter 1287bc3d5698SJohn Baldwin movq (%rdi),%rbx 1288bc3d5698SJohn Baldwin call __rsaz_512_mul 1289bc3d5698SJohn Baldwin 1290bc3d5698SJohn Baldwin.byte 102,72,15,126,199 1291bc3d5698SJohn Baldwin.byte 102,72,15,126,205 1292bc3d5698SJohn Baldwin 1293bc3d5698SJohn Baldwin movq (%rsp),%r8 1294bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1295bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1296bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1297bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1298bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1299bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1300bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1301bc3d5698SJohn Baldwin 1302bc3d5698SJohn Baldwin call __rsaz_512_reduce 1303bc3d5698SJohn Baldwin jmp .Lmul_scatter_tail 1304bc3d5698SJohn Baldwin 1305bc3d5698SJohn Baldwin.align 32 1306bc3d5698SJohn Baldwin.Lmulx_scatter: 1307bc3d5698SJohn Baldwin movq (%rdi),%rdx 1308bc3d5698SJohn Baldwin call __rsaz_512_mulx 1309bc3d5698SJohn Baldwin 1310bc3d5698SJohn Baldwin.byte 102,72,15,126,199 1311bc3d5698SJohn Baldwin.byte 102,72,15,126,205 1312bc3d5698SJohn Baldwin 1313bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1314bc3d5698SJohn Baldwin movq (%rsp),%r8 1315bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1316bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1317bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1318bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1319bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1320bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1321bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1322bc3d5698SJohn Baldwin 1323bc3d5698SJohn Baldwin call __rsaz_512_reducex 1324bc3d5698SJohn Baldwin 1325bc3d5698SJohn Baldwin.Lmul_scatter_tail: 1326bc3d5698SJohn Baldwin addq 64(%rsp),%r8 1327bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 1328bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 1329bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 1330bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 1331bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 1332bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 1333bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 1334bc3d5698SJohn Baldwin.byte 102,72,15,126,214 1335bc3d5698SJohn Baldwin sbbq %rcx,%rcx 1336bc3d5698SJohn Baldwin 1337bc3d5698SJohn Baldwin call __rsaz_512_subtract 1338bc3d5698SJohn Baldwin 1339bc3d5698SJohn Baldwin movq %r8,0(%rsi) 1340bc3d5698SJohn Baldwin movq %r9,128(%rsi) 1341bc3d5698SJohn Baldwin movq %r10,256(%rsi) 1342bc3d5698SJohn Baldwin movq %r11,384(%rsi) 1343bc3d5698SJohn Baldwin movq %r12,512(%rsi) 1344bc3d5698SJohn Baldwin movq %r13,640(%rsi) 1345bc3d5698SJohn Baldwin movq %r14,768(%rsi) 1346bc3d5698SJohn Baldwin movq %r15,896(%rsi) 1347bc3d5698SJohn Baldwin 1348bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1349bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1350bc3d5698SJohn Baldwin movq -48(%rax),%r15 1351bc3d5698SJohn Baldwin.cfi_restore %r15 1352bc3d5698SJohn Baldwin movq -40(%rax),%r14 1353bc3d5698SJohn Baldwin.cfi_restore %r14 1354bc3d5698SJohn Baldwin movq -32(%rax),%r13 1355bc3d5698SJohn Baldwin.cfi_restore %r13 1356bc3d5698SJohn Baldwin movq -24(%rax),%r12 1357bc3d5698SJohn Baldwin.cfi_restore %r12 1358bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1359bc3d5698SJohn Baldwin.cfi_restore %rbp 1360bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1361bc3d5698SJohn Baldwin.cfi_restore %rbx 1362bc3d5698SJohn Baldwin leaq (%rax),%rsp 1363bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1364bc3d5698SJohn Baldwin.Lmul_scatter4_epilogue: 1365bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1366bc3d5698SJohn Baldwin.cfi_endproc 1367bc3d5698SJohn Baldwin.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 1368bc3d5698SJohn Baldwin.globl rsaz_512_mul_by_one 1369bc3d5698SJohn Baldwin.type rsaz_512_mul_by_one,@function 1370bc3d5698SJohn Baldwin.align 32 1371bc3d5698SJohn Baldwinrsaz_512_mul_by_one: 1372bc3d5698SJohn Baldwin.cfi_startproc 1373bc3d5698SJohn Baldwin pushq %rbx 1374bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1375bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1376bc3d5698SJohn Baldwin pushq %rbp 1377bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1378bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1379bc3d5698SJohn Baldwin pushq %r12 1380bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1381bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1382bc3d5698SJohn Baldwin pushq %r13 1383bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1384bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1385bc3d5698SJohn Baldwin pushq %r14 1386bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1387bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1388bc3d5698SJohn Baldwin pushq %r15 1389bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1390bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1391bc3d5698SJohn Baldwin 1392bc3d5698SJohn Baldwin subq $128+24,%rsp 1393bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 1394bc3d5698SJohn Baldwin.Lmul_by_one_body: 1395bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%eax 1396bc3d5698SJohn Baldwin movq %rdx,%rbp 1397bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 1398bc3d5698SJohn Baldwin 1399bc3d5698SJohn Baldwin movq (%rsi),%r8 1400bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1401bc3d5698SJohn Baldwin movq 8(%rsi),%r9 1402bc3d5698SJohn Baldwin movq 16(%rsi),%r10 1403bc3d5698SJohn Baldwin movq 24(%rsi),%r11 1404bc3d5698SJohn Baldwin movq 32(%rsi),%r12 1405bc3d5698SJohn Baldwin movq 40(%rsi),%r13 1406bc3d5698SJohn Baldwin movq 48(%rsi),%r14 1407bc3d5698SJohn Baldwin movq 56(%rsi),%r15 1408bc3d5698SJohn Baldwin 1409bc3d5698SJohn Baldwin movdqa %xmm0,(%rsp) 1410bc3d5698SJohn Baldwin movdqa %xmm0,16(%rsp) 1411bc3d5698SJohn Baldwin movdqa %xmm0,32(%rsp) 1412bc3d5698SJohn Baldwin movdqa %xmm0,48(%rsp) 1413bc3d5698SJohn Baldwin movdqa %xmm0,64(%rsp) 1414bc3d5698SJohn Baldwin movdqa %xmm0,80(%rsp) 1415bc3d5698SJohn Baldwin movdqa %xmm0,96(%rsp) 1416bc3d5698SJohn Baldwin andl $0x80100,%eax 1417bc3d5698SJohn Baldwin cmpl $0x80100,%eax 1418bc3d5698SJohn Baldwin je .Lby_one_callx 1419bc3d5698SJohn Baldwin call __rsaz_512_reduce 1420bc3d5698SJohn Baldwin jmp .Lby_one_tail 1421bc3d5698SJohn Baldwin.align 32 1422bc3d5698SJohn Baldwin.Lby_one_callx: 1423bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1424bc3d5698SJohn Baldwin call __rsaz_512_reducex 1425bc3d5698SJohn Baldwin.Lby_one_tail: 1426bc3d5698SJohn Baldwin movq %r8,(%rdi) 1427bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1428bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1429bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1430bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1431bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1432bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1433bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1434bc3d5698SJohn Baldwin 1435bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1436bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1437bc3d5698SJohn Baldwin movq -48(%rax),%r15 1438bc3d5698SJohn Baldwin.cfi_restore %r15 1439bc3d5698SJohn Baldwin movq -40(%rax),%r14 1440bc3d5698SJohn Baldwin.cfi_restore %r14 1441bc3d5698SJohn Baldwin movq -32(%rax),%r13 1442bc3d5698SJohn Baldwin.cfi_restore %r13 1443bc3d5698SJohn Baldwin movq -24(%rax),%r12 1444bc3d5698SJohn Baldwin.cfi_restore %r12 1445bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1446bc3d5698SJohn Baldwin.cfi_restore %rbp 1447bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1448bc3d5698SJohn Baldwin.cfi_restore %rbx 1449bc3d5698SJohn Baldwin leaq (%rax),%rsp 1450bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1451bc3d5698SJohn Baldwin.Lmul_by_one_epilogue: 1452bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1453bc3d5698SJohn Baldwin.cfi_endproc 1454bc3d5698SJohn Baldwin.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one 1455bc3d5698SJohn Baldwin.type __rsaz_512_reduce,@function 1456bc3d5698SJohn Baldwin.align 32 1457bc3d5698SJohn Baldwin__rsaz_512_reduce: 1458bc3d5698SJohn Baldwin.cfi_startproc 1459bc3d5698SJohn Baldwin movq %r8,%rbx 1460bc3d5698SJohn Baldwin imulq 128+8(%rsp),%rbx 1461bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1462bc3d5698SJohn Baldwin movl $8,%ecx 1463bc3d5698SJohn Baldwin jmp .Lreduction_loop 1464bc3d5698SJohn Baldwin 1465bc3d5698SJohn Baldwin.align 32 1466bc3d5698SJohn Baldwin.Lreduction_loop: 1467bc3d5698SJohn Baldwin mulq %rbx 1468bc3d5698SJohn Baldwin movq 8(%rbp),%rax 1469bc3d5698SJohn Baldwin negq %r8 1470bc3d5698SJohn Baldwin movq %rdx,%r8 1471bc3d5698SJohn Baldwin adcq $0,%r8 1472bc3d5698SJohn Baldwin 1473bc3d5698SJohn Baldwin mulq %rbx 1474bc3d5698SJohn Baldwin addq %rax,%r9 1475bc3d5698SJohn Baldwin movq 16(%rbp),%rax 1476bc3d5698SJohn Baldwin adcq $0,%rdx 1477bc3d5698SJohn Baldwin addq %r9,%r8 1478bc3d5698SJohn Baldwin movq %rdx,%r9 1479bc3d5698SJohn Baldwin adcq $0,%r9 1480bc3d5698SJohn Baldwin 1481bc3d5698SJohn Baldwin mulq %rbx 1482bc3d5698SJohn Baldwin addq %rax,%r10 1483bc3d5698SJohn Baldwin movq 24(%rbp),%rax 1484bc3d5698SJohn Baldwin adcq $0,%rdx 1485bc3d5698SJohn Baldwin addq %r10,%r9 1486bc3d5698SJohn Baldwin movq %rdx,%r10 1487bc3d5698SJohn Baldwin adcq $0,%r10 1488bc3d5698SJohn Baldwin 1489bc3d5698SJohn Baldwin mulq %rbx 1490bc3d5698SJohn Baldwin addq %rax,%r11 1491bc3d5698SJohn Baldwin movq 32(%rbp),%rax 1492bc3d5698SJohn Baldwin adcq $0,%rdx 1493bc3d5698SJohn Baldwin addq %r11,%r10 1494bc3d5698SJohn Baldwin movq 128+8(%rsp),%rsi 1495bc3d5698SJohn Baldwin 1496bc3d5698SJohn Baldwin 1497bc3d5698SJohn Baldwin adcq $0,%rdx 1498bc3d5698SJohn Baldwin movq %rdx,%r11 1499bc3d5698SJohn Baldwin 1500bc3d5698SJohn Baldwin mulq %rbx 1501bc3d5698SJohn Baldwin addq %rax,%r12 1502bc3d5698SJohn Baldwin movq 40(%rbp),%rax 1503bc3d5698SJohn Baldwin adcq $0,%rdx 1504bc3d5698SJohn Baldwin imulq %r8,%rsi 1505bc3d5698SJohn Baldwin addq %r12,%r11 1506bc3d5698SJohn Baldwin movq %rdx,%r12 1507bc3d5698SJohn Baldwin adcq $0,%r12 1508bc3d5698SJohn Baldwin 1509bc3d5698SJohn Baldwin mulq %rbx 1510bc3d5698SJohn Baldwin addq %rax,%r13 1511bc3d5698SJohn Baldwin movq 48(%rbp),%rax 1512bc3d5698SJohn Baldwin adcq $0,%rdx 1513bc3d5698SJohn Baldwin addq %r13,%r12 1514bc3d5698SJohn Baldwin movq %rdx,%r13 1515bc3d5698SJohn Baldwin adcq $0,%r13 1516bc3d5698SJohn Baldwin 1517bc3d5698SJohn Baldwin mulq %rbx 1518bc3d5698SJohn Baldwin addq %rax,%r14 1519bc3d5698SJohn Baldwin movq 56(%rbp),%rax 1520bc3d5698SJohn Baldwin adcq $0,%rdx 1521bc3d5698SJohn Baldwin addq %r14,%r13 1522bc3d5698SJohn Baldwin movq %rdx,%r14 1523bc3d5698SJohn Baldwin adcq $0,%r14 1524bc3d5698SJohn Baldwin 1525bc3d5698SJohn Baldwin mulq %rbx 1526bc3d5698SJohn Baldwin movq %rsi,%rbx 1527bc3d5698SJohn Baldwin addq %rax,%r15 1528bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1529bc3d5698SJohn Baldwin adcq $0,%rdx 1530bc3d5698SJohn Baldwin addq %r15,%r14 1531bc3d5698SJohn Baldwin movq %rdx,%r15 1532bc3d5698SJohn Baldwin adcq $0,%r15 1533bc3d5698SJohn Baldwin 1534bc3d5698SJohn Baldwin decl %ecx 1535bc3d5698SJohn Baldwin jne .Lreduction_loop 1536bc3d5698SJohn Baldwin 1537bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1538bc3d5698SJohn Baldwin.cfi_endproc 1539bc3d5698SJohn Baldwin.size __rsaz_512_reduce,.-__rsaz_512_reduce 1540bc3d5698SJohn Baldwin.type __rsaz_512_reducex,@function 1541bc3d5698SJohn Baldwin.align 32 1542bc3d5698SJohn Baldwin__rsaz_512_reducex: 1543bc3d5698SJohn Baldwin.cfi_startproc 1544bc3d5698SJohn Baldwin 1545bc3d5698SJohn Baldwin imulq %r8,%rdx 1546bc3d5698SJohn Baldwin xorq %rsi,%rsi 1547bc3d5698SJohn Baldwin movl $8,%ecx 1548bc3d5698SJohn Baldwin jmp .Lreduction_loopx 1549bc3d5698SJohn Baldwin 1550bc3d5698SJohn Baldwin.align 32 1551bc3d5698SJohn Baldwin.Lreduction_loopx: 1552bc3d5698SJohn Baldwin movq %r8,%rbx 1553bc3d5698SJohn Baldwin mulxq 0(%rbp),%rax,%r8 1554bc3d5698SJohn Baldwin adcxq %rbx,%rax 1555bc3d5698SJohn Baldwin adoxq %r9,%r8 1556bc3d5698SJohn Baldwin 1557bc3d5698SJohn Baldwin mulxq 8(%rbp),%rax,%r9 1558bc3d5698SJohn Baldwin adcxq %rax,%r8 1559bc3d5698SJohn Baldwin adoxq %r10,%r9 1560bc3d5698SJohn Baldwin 1561bc3d5698SJohn Baldwin mulxq 16(%rbp),%rbx,%r10 1562bc3d5698SJohn Baldwin adcxq %rbx,%r9 1563bc3d5698SJohn Baldwin adoxq %r11,%r10 1564bc3d5698SJohn Baldwin 1565bc3d5698SJohn Baldwin mulxq 24(%rbp),%rbx,%r11 1566bc3d5698SJohn Baldwin adcxq %rbx,%r10 1567bc3d5698SJohn Baldwin adoxq %r12,%r11 1568bc3d5698SJohn Baldwin 1569bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 1570bc3d5698SJohn Baldwin movq %rdx,%rax 1571bc3d5698SJohn Baldwin movq %r8,%rdx 1572bc3d5698SJohn Baldwin adcxq %rbx,%r11 1573bc3d5698SJohn Baldwin adoxq %r13,%r12 1574bc3d5698SJohn Baldwin 1575bc3d5698SJohn Baldwin mulxq 128+8(%rsp),%rbx,%rdx 1576bc3d5698SJohn Baldwin movq %rax,%rdx 1577bc3d5698SJohn Baldwin 1578bc3d5698SJohn Baldwin mulxq 40(%rbp),%rax,%r13 1579bc3d5698SJohn Baldwin adcxq %rax,%r12 1580bc3d5698SJohn Baldwin adoxq %r14,%r13 1581bc3d5698SJohn Baldwin 1582bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 1583bc3d5698SJohn Baldwin adcxq %rax,%r13 1584bc3d5698SJohn Baldwin adoxq %r15,%r14 1585bc3d5698SJohn Baldwin 1586bc3d5698SJohn Baldwin mulxq 56(%rbp),%rax,%r15 1587bc3d5698SJohn Baldwin movq %rbx,%rdx 1588bc3d5698SJohn Baldwin adcxq %rax,%r14 1589bc3d5698SJohn Baldwin adoxq %rsi,%r15 1590bc3d5698SJohn Baldwin adcxq %rsi,%r15 1591bc3d5698SJohn Baldwin 1592bc3d5698SJohn Baldwin decl %ecx 1593bc3d5698SJohn Baldwin jne .Lreduction_loopx 1594bc3d5698SJohn Baldwin 1595bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1596bc3d5698SJohn Baldwin.cfi_endproc 1597bc3d5698SJohn Baldwin.size __rsaz_512_reducex,.-__rsaz_512_reducex 1598bc3d5698SJohn Baldwin.type __rsaz_512_subtract,@function 1599bc3d5698SJohn Baldwin.align 32 1600bc3d5698SJohn Baldwin__rsaz_512_subtract: 1601bc3d5698SJohn Baldwin.cfi_startproc 1602bc3d5698SJohn Baldwin movq %r8,(%rdi) 1603bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1604bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1605bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1606bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1607bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1608bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1609bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1610bc3d5698SJohn Baldwin 1611bc3d5698SJohn Baldwin movq 0(%rbp),%r8 1612bc3d5698SJohn Baldwin movq 8(%rbp),%r9 1613bc3d5698SJohn Baldwin negq %r8 1614bc3d5698SJohn Baldwin notq %r9 1615bc3d5698SJohn Baldwin andq %rcx,%r8 1616bc3d5698SJohn Baldwin movq 16(%rbp),%r10 1617bc3d5698SJohn Baldwin andq %rcx,%r9 1618bc3d5698SJohn Baldwin notq %r10 1619bc3d5698SJohn Baldwin movq 24(%rbp),%r11 1620bc3d5698SJohn Baldwin andq %rcx,%r10 1621bc3d5698SJohn Baldwin notq %r11 1622bc3d5698SJohn Baldwin movq 32(%rbp),%r12 1623bc3d5698SJohn Baldwin andq %rcx,%r11 1624bc3d5698SJohn Baldwin notq %r12 1625bc3d5698SJohn Baldwin movq 40(%rbp),%r13 1626bc3d5698SJohn Baldwin andq %rcx,%r12 1627bc3d5698SJohn Baldwin notq %r13 1628bc3d5698SJohn Baldwin movq 48(%rbp),%r14 1629bc3d5698SJohn Baldwin andq %rcx,%r13 1630bc3d5698SJohn Baldwin notq %r14 1631bc3d5698SJohn Baldwin movq 56(%rbp),%r15 1632bc3d5698SJohn Baldwin andq %rcx,%r14 1633bc3d5698SJohn Baldwin notq %r15 1634bc3d5698SJohn Baldwin andq %rcx,%r15 1635bc3d5698SJohn Baldwin 1636bc3d5698SJohn Baldwin addq (%rdi),%r8 1637bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 1638bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 1639bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 1640bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 1641bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 1642bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 1643bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 1644bc3d5698SJohn Baldwin 1645bc3d5698SJohn Baldwin movq %r8,(%rdi) 1646bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1647bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1648bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1649bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1650bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1651bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1652bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1653bc3d5698SJohn Baldwin 1654bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1655bc3d5698SJohn Baldwin.cfi_endproc 1656bc3d5698SJohn Baldwin.size __rsaz_512_subtract,.-__rsaz_512_subtract 1657bc3d5698SJohn Baldwin.type __rsaz_512_mul,@function 1658bc3d5698SJohn Baldwin.align 32 1659bc3d5698SJohn Baldwin__rsaz_512_mul: 1660bc3d5698SJohn Baldwin.cfi_startproc 1661bc3d5698SJohn Baldwin leaq 8(%rsp),%rdi 1662bc3d5698SJohn Baldwin 1663bc3d5698SJohn Baldwin movq (%rsi),%rax 1664bc3d5698SJohn Baldwin mulq %rbx 1665bc3d5698SJohn Baldwin movq %rax,(%rdi) 1666bc3d5698SJohn Baldwin movq 8(%rsi),%rax 1667bc3d5698SJohn Baldwin movq %rdx,%r8 1668bc3d5698SJohn Baldwin 1669bc3d5698SJohn Baldwin mulq %rbx 1670bc3d5698SJohn Baldwin addq %rax,%r8 1671bc3d5698SJohn Baldwin movq 16(%rsi),%rax 1672bc3d5698SJohn Baldwin movq %rdx,%r9 1673bc3d5698SJohn Baldwin adcq $0,%r9 1674bc3d5698SJohn Baldwin 1675bc3d5698SJohn Baldwin mulq %rbx 1676bc3d5698SJohn Baldwin addq %rax,%r9 1677bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1678bc3d5698SJohn Baldwin movq %rdx,%r10 1679bc3d5698SJohn Baldwin adcq $0,%r10 1680bc3d5698SJohn Baldwin 1681bc3d5698SJohn Baldwin mulq %rbx 1682bc3d5698SJohn Baldwin addq %rax,%r10 1683bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1684bc3d5698SJohn Baldwin movq %rdx,%r11 1685bc3d5698SJohn Baldwin adcq $0,%r11 1686bc3d5698SJohn Baldwin 1687bc3d5698SJohn Baldwin mulq %rbx 1688bc3d5698SJohn Baldwin addq %rax,%r11 1689bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1690bc3d5698SJohn Baldwin movq %rdx,%r12 1691bc3d5698SJohn Baldwin adcq $0,%r12 1692bc3d5698SJohn Baldwin 1693bc3d5698SJohn Baldwin mulq %rbx 1694bc3d5698SJohn Baldwin addq %rax,%r12 1695bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1696bc3d5698SJohn Baldwin movq %rdx,%r13 1697bc3d5698SJohn Baldwin adcq $0,%r13 1698bc3d5698SJohn Baldwin 1699bc3d5698SJohn Baldwin mulq %rbx 1700bc3d5698SJohn Baldwin addq %rax,%r13 1701bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1702bc3d5698SJohn Baldwin movq %rdx,%r14 1703bc3d5698SJohn Baldwin adcq $0,%r14 1704bc3d5698SJohn Baldwin 1705bc3d5698SJohn Baldwin mulq %rbx 1706bc3d5698SJohn Baldwin addq %rax,%r14 1707bc3d5698SJohn Baldwin movq (%rsi),%rax 1708bc3d5698SJohn Baldwin movq %rdx,%r15 1709bc3d5698SJohn Baldwin adcq $0,%r15 1710bc3d5698SJohn Baldwin 1711bc3d5698SJohn Baldwin leaq 8(%rbp),%rbp 1712bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1713bc3d5698SJohn Baldwin 1714bc3d5698SJohn Baldwin movl $7,%ecx 1715bc3d5698SJohn Baldwin jmp .Loop_mul 1716bc3d5698SJohn Baldwin 1717bc3d5698SJohn Baldwin.align 32 1718bc3d5698SJohn Baldwin.Loop_mul: 1719bc3d5698SJohn Baldwin movq (%rbp),%rbx 1720bc3d5698SJohn Baldwin mulq %rbx 1721bc3d5698SJohn Baldwin addq %rax,%r8 1722bc3d5698SJohn Baldwin movq 8(%rsi),%rax 1723bc3d5698SJohn Baldwin movq %r8,(%rdi) 1724bc3d5698SJohn Baldwin movq %rdx,%r8 1725bc3d5698SJohn Baldwin adcq $0,%r8 1726bc3d5698SJohn Baldwin 1727bc3d5698SJohn Baldwin mulq %rbx 1728bc3d5698SJohn Baldwin addq %rax,%r9 1729bc3d5698SJohn Baldwin movq 16(%rsi),%rax 1730bc3d5698SJohn Baldwin adcq $0,%rdx 1731bc3d5698SJohn Baldwin addq %r9,%r8 1732bc3d5698SJohn Baldwin movq %rdx,%r9 1733bc3d5698SJohn Baldwin adcq $0,%r9 1734bc3d5698SJohn Baldwin 1735bc3d5698SJohn Baldwin mulq %rbx 1736bc3d5698SJohn Baldwin addq %rax,%r10 1737bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1738bc3d5698SJohn Baldwin adcq $0,%rdx 1739bc3d5698SJohn Baldwin addq %r10,%r9 1740bc3d5698SJohn Baldwin movq %rdx,%r10 1741bc3d5698SJohn Baldwin adcq $0,%r10 1742bc3d5698SJohn Baldwin 1743bc3d5698SJohn Baldwin mulq %rbx 1744bc3d5698SJohn Baldwin addq %rax,%r11 1745bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1746bc3d5698SJohn Baldwin adcq $0,%rdx 1747bc3d5698SJohn Baldwin addq %r11,%r10 1748bc3d5698SJohn Baldwin movq %rdx,%r11 1749bc3d5698SJohn Baldwin adcq $0,%r11 1750bc3d5698SJohn Baldwin 1751bc3d5698SJohn Baldwin mulq %rbx 1752bc3d5698SJohn Baldwin addq %rax,%r12 1753bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1754bc3d5698SJohn Baldwin adcq $0,%rdx 1755bc3d5698SJohn Baldwin addq %r12,%r11 1756bc3d5698SJohn Baldwin movq %rdx,%r12 1757bc3d5698SJohn Baldwin adcq $0,%r12 1758bc3d5698SJohn Baldwin 1759bc3d5698SJohn Baldwin mulq %rbx 1760bc3d5698SJohn Baldwin addq %rax,%r13 1761bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1762bc3d5698SJohn Baldwin adcq $0,%rdx 1763bc3d5698SJohn Baldwin addq %r13,%r12 1764bc3d5698SJohn Baldwin movq %rdx,%r13 1765bc3d5698SJohn Baldwin adcq $0,%r13 1766bc3d5698SJohn Baldwin 1767bc3d5698SJohn Baldwin mulq %rbx 1768bc3d5698SJohn Baldwin addq %rax,%r14 1769bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1770bc3d5698SJohn Baldwin adcq $0,%rdx 1771bc3d5698SJohn Baldwin addq %r14,%r13 1772bc3d5698SJohn Baldwin movq %rdx,%r14 1773bc3d5698SJohn Baldwin leaq 8(%rbp),%rbp 1774bc3d5698SJohn Baldwin adcq $0,%r14 1775bc3d5698SJohn Baldwin 1776bc3d5698SJohn Baldwin mulq %rbx 1777bc3d5698SJohn Baldwin addq %rax,%r15 1778bc3d5698SJohn Baldwin movq (%rsi),%rax 1779bc3d5698SJohn Baldwin adcq $0,%rdx 1780bc3d5698SJohn Baldwin addq %r15,%r14 1781bc3d5698SJohn Baldwin movq %rdx,%r15 1782bc3d5698SJohn Baldwin adcq $0,%r15 1783bc3d5698SJohn Baldwin 1784bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1785bc3d5698SJohn Baldwin 1786bc3d5698SJohn Baldwin decl %ecx 1787bc3d5698SJohn Baldwin jnz .Loop_mul 1788bc3d5698SJohn Baldwin 1789bc3d5698SJohn Baldwin movq %r8,(%rdi) 1790bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1791bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1792bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1793bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1794bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1795bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1796bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1797bc3d5698SJohn Baldwin 1798bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1799bc3d5698SJohn Baldwin.cfi_endproc 1800bc3d5698SJohn Baldwin.size __rsaz_512_mul,.-__rsaz_512_mul 1801bc3d5698SJohn Baldwin.type __rsaz_512_mulx,@function 1802bc3d5698SJohn Baldwin.align 32 1803bc3d5698SJohn Baldwin__rsaz_512_mulx: 1804bc3d5698SJohn Baldwin.cfi_startproc 1805bc3d5698SJohn Baldwin mulxq (%rsi),%rbx,%r8 1806bc3d5698SJohn Baldwin movq $-6,%rcx 1807bc3d5698SJohn Baldwin 1808bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1809bc3d5698SJohn Baldwin movq %rbx,8(%rsp) 1810bc3d5698SJohn Baldwin 1811bc3d5698SJohn Baldwin mulxq 16(%rsi),%rbx,%r10 1812bc3d5698SJohn Baldwin adcq %rax,%r8 1813bc3d5698SJohn Baldwin 1814bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1815bc3d5698SJohn Baldwin adcq %rbx,%r9 1816bc3d5698SJohn Baldwin 1817bc3d5698SJohn Baldwin mulxq 32(%rsi),%rbx,%r12 1818bc3d5698SJohn Baldwin adcq %rax,%r10 1819bc3d5698SJohn Baldwin 1820bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1821bc3d5698SJohn Baldwin adcq %rbx,%r11 1822bc3d5698SJohn Baldwin 1823bc3d5698SJohn Baldwin mulxq 48(%rsi),%rbx,%r14 1824bc3d5698SJohn Baldwin adcq %rax,%r12 1825bc3d5698SJohn Baldwin 1826bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1827bc3d5698SJohn Baldwin movq 8(%rbp),%rdx 1828bc3d5698SJohn Baldwin adcq %rbx,%r13 1829bc3d5698SJohn Baldwin adcq %rax,%r14 1830bc3d5698SJohn Baldwin adcq $0,%r15 1831bc3d5698SJohn Baldwin 1832bc3d5698SJohn Baldwin xorq %rdi,%rdi 1833bc3d5698SJohn Baldwin jmp .Loop_mulx 1834bc3d5698SJohn Baldwin 1835bc3d5698SJohn Baldwin.align 32 1836bc3d5698SJohn Baldwin.Loop_mulx: 1837bc3d5698SJohn Baldwin movq %r8,%rbx 1838bc3d5698SJohn Baldwin mulxq (%rsi),%rax,%r8 1839bc3d5698SJohn Baldwin adcxq %rax,%rbx 1840bc3d5698SJohn Baldwin adoxq %r9,%r8 1841bc3d5698SJohn Baldwin 1842bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1843bc3d5698SJohn Baldwin adcxq %rax,%r8 1844bc3d5698SJohn Baldwin adoxq %r10,%r9 1845bc3d5698SJohn Baldwin 1846bc3d5698SJohn Baldwin mulxq 16(%rsi),%rax,%r10 1847bc3d5698SJohn Baldwin adcxq %rax,%r9 1848bc3d5698SJohn Baldwin adoxq %r11,%r10 1849bc3d5698SJohn Baldwin 1850bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1851bc3d5698SJohn Baldwin adcxq %rax,%r10 1852bc3d5698SJohn Baldwin adoxq %r12,%r11 1853bc3d5698SJohn Baldwin 1854bc3d5698SJohn Baldwin.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 1855bc3d5698SJohn Baldwin adcxq %rax,%r11 1856bc3d5698SJohn Baldwin adoxq %r13,%r12 1857bc3d5698SJohn Baldwin 1858bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1859bc3d5698SJohn Baldwin adcxq %rax,%r12 1860bc3d5698SJohn Baldwin adoxq %r14,%r13 1861bc3d5698SJohn Baldwin 1862bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%r14 1863bc3d5698SJohn Baldwin adcxq %rax,%r13 1864bc3d5698SJohn Baldwin adoxq %r15,%r14 1865bc3d5698SJohn Baldwin 1866bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1867bc3d5698SJohn Baldwin movq 64(%rbp,%rcx,8),%rdx 1868bc3d5698SJohn Baldwin movq %rbx,8+64-8(%rsp,%rcx,8) 1869bc3d5698SJohn Baldwin adcxq %rax,%r14 1870bc3d5698SJohn Baldwin adoxq %rdi,%r15 1871bc3d5698SJohn Baldwin adcxq %rdi,%r15 1872bc3d5698SJohn Baldwin 1873bc3d5698SJohn Baldwin incq %rcx 1874bc3d5698SJohn Baldwin jnz .Loop_mulx 1875bc3d5698SJohn Baldwin 1876bc3d5698SJohn Baldwin movq %r8,%rbx 1877bc3d5698SJohn Baldwin mulxq (%rsi),%rax,%r8 1878bc3d5698SJohn Baldwin adcxq %rax,%rbx 1879bc3d5698SJohn Baldwin adoxq %r9,%r8 1880bc3d5698SJohn Baldwin 1881bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 1882bc3d5698SJohn Baldwin adcxq %rax,%r8 1883bc3d5698SJohn Baldwin adoxq %r10,%r9 1884bc3d5698SJohn Baldwin 1885bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 1886bc3d5698SJohn Baldwin adcxq %rax,%r9 1887bc3d5698SJohn Baldwin adoxq %r11,%r10 1888bc3d5698SJohn Baldwin 1889bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1890bc3d5698SJohn Baldwin adcxq %rax,%r10 1891bc3d5698SJohn Baldwin adoxq %r12,%r11 1892bc3d5698SJohn Baldwin 1893bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%r12 1894bc3d5698SJohn Baldwin adcxq %rax,%r11 1895bc3d5698SJohn Baldwin adoxq %r13,%r12 1896bc3d5698SJohn Baldwin 1897bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1898bc3d5698SJohn Baldwin adcxq %rax,%r12 1899bc3d5698SJohn Baldwin adoxq %r14,%r13 1900bc3d5698SJohn Baldwin 1901bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1902bc3d5698SJohn Baldwin adcxq %rax,%r13 1903bc3d5698SJohn Baldwin adoxq %r15,%r14 1904bc3d5698SJohn Baldwin 1905bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 1906bc3d5698SJohn Baldwin adcxq %rax,%r14 1907bc3d5698SJohn Baldwin adoxq %rdi,%r15 1908bc3d5698SJohn Baldwin adcxq %rdi,%r15 1909bc3d5698SJohn Baldwin 1910bc3d5698SJohn Baldwin movq %rbx,8+64-8(%rsp) 1911bc3d5698SJohn Baldwin movq %r8,8+64(%rsp) 1912bc3d5698SJohn Baldwin movq %r9,8+64+8(%rsp) 1913bc3d5698SJohn Baldwin movq %r10,8+64+16(%rsp) 1914bc3d5698SJohn Baldwin movq %r11,8+64+24(%rsp) 1915bc3d5698SJohn Baldwin movq %r12,8+64+32(%rsp) 1916bc3d5698SJohn Baldwin movq %r13,8+64+40(%rsp) 1917bc3d5698SJohn Baldwin movq %r14,8+64+48(%rsp) 1918bc3d5698SJohn Baldwin movq %r15,8+64+56(%rsp) 1919bc3d5698SJohn Baldwin 1920bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1921bc3d5698SJohn Baldwin.cfi_endproc 1922bc3d5698SJohn Baldwin.size __rsaz_512_mulx,.-__rsaz_512_mulx 1923bc3d5698SJohn Baldwin.globl rsaz_512_scatter4 1924bc3d5698SJohn Baldwin.type rsaz_512_scatter4,@function 1925bc3d5698SJohn Baldwin.align 16 1926bc3d5698SJohn Baldwinrsaz_512_scatter4: 1927bc3d5698SJohn Baldwin.cfi_startproc 1928bc3d5698SJohn Baldwin leaq (%rdi,%rdx,8),%rdi 1929bc3d5698SJohn Baldwin movl $8,%r9d 1930bc3d5698SJohn Baldwin jmp .Loop_scatter 1931bc3d5698SJohn Baldwin.align 16 1932bc3d5698SJohn Baldwin.Loop_scatter: 1933bc3d5698SJohn Baldwin movq (%rsi),%rax 1934bc3d5698SJohn Baldwin leaq 8(%rsi),%rsi 1935bc3d5698SJohn Baldwin movq %rax,(%rdi) 1936bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1937bc3d5698SJohn Baldwin decl %r9d 1938bc3d5698SJohn Baldwin jnz .Loop_scatter 1939bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1940bc3d5698SJohn Baldwin.cfi_endproc 1941bc3d5698SJohn Baldwin.size rsaz_512_scatter4,.-rsaz_512_scatter4 1942bc3d5698SJohn Baldwin 1943bc3d5698SJohn Baldwin.globl rsaz_512_gather4 1944bc3d5698SJohn Baldwin.type rsaz_512_gather4,@function 1945bc3d5698SJohn Baldwin.align 16 1946bc3d5698SJohn Baldwinrsaz_512_gather4: 1947bc3d5698SJohn Baldwin.cfi_startproc 1948bc3d5698SJohn Baldwin movd %edx,%xmm8 1949bc3d5698SJohn Baldwin movdqa .Linc+16(%rip),%xmm1 1950bc3d5698SJohn Baldwin movdqa .Linc(%rip),%xmm0 1951bc3d5698SJohn Baldwin 1952bc3d5698SJohn Baldwin pshufd $0,%xmm8,%xmm8 1953bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 1954bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 1955bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 1956bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm0 1957bc3d5698SJohn Baldwin movdqa %xmm7,%xmm3 1958bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 1959bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm1 1960bc3d5698SJohn Baldwin movdqa %xmm7,%xmm4 1961bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 1962bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm2 1963bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1964bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 1965bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm3 1966bc3d5698SJohn Baldwin movdqa %xmm7,%xmm6 1967bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 1968bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm4 1969bc3d5698SJohn Baldwin paddd %xmm5,%xmm6 1970bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm5 1971bc3d5698SJohn Baldwin paddd %xmm6,%xmm7 1972bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm6 1973bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm7 1974bc3d5698SJohn Baldwin movl $8,%r9d 1975bc3d5698SJohn Baldwin jmp .Loop_gather 1976bc3d5698SJohn Baldwin.align 16 1977bc3d5698SJohn Baldwin.Loop_gather: 1978bc3d5698SJohn Baldwin movdqa 0(%rsi),%xmm8 1979bc3d5698SJohn Baldwin movdqa 16(%rsi),%xmm9 1980bc3d5698SJohn Baldwin movdqa 32(%rsi),%xmm10 1981bc3d5698SJohn Baldwin movdqa 48(%rsi),%xmm11 1982bc3d5698SJohn Baldwin pand %xmm0,%xmm8 1983bc3d5698SJohn Baldwin movdqa 64(%rsi),%xmm12 1984bc3d5698SJohn Baldwin pand %xmm1,%xmm9 1985bc3d5698SJohn Baldwin movdqa 80(%rsi),%xmm13 1986bc3d5698SJohn Baldwin pand %xmm2,%xmm10 1987bc3d5698SJohn Baldwin movdqa 96(%rsi),%xmm14 1988bc3d5698SJohn Baldwin pand %xmm3,%xmm11 1989bc3d5698SJohn Baldwin movdqa 112(%rsi),%xmm15 1990bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1991bc3d5698SJohn Baldwin pand %xmm4,%xmm12 1992bc3d5698SJohn Baldwin pand %xmm5,%xmm13 1993bc3d5698SJohn Baldwin pand %xmm6,%xmm14 1994bc3d5698SJohn Baldwin pand %xmm7,%xmm15 1995bc3d5698SJohn Baldwin por %xmm10,%xmm8 1996bc3d5698SJohn Baldwin por %xmm11,%xmm9 1997bc3d5698SJohn Baldwin por %xmm12,%xmm8 1998bc3d5698SJohn Baldwin por %xmm13,%xmm9 1999bc3d5698SJohn Baldwin por %xmm14,%xmm8 2000bc3d5698SJohn Baldwin por %xmm15,%xmm9 2001bc3d5698SJohn Baldwin 2002bc3d5698SJohn Baldwin por %xmm9,%xmm8 2003bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 2004bc3d5698SJohn Baldwin por %xmm9,%xmm8 2005bc3d5698SJohn Baldwin movq %xmm8,(%rdi) 2006bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 2007bc3d5698SJohn Baldwin decl %r9d 2008bc3d5698SJohn Baldwin jnz .Loop_gather 2009bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2010bc3d5698SJohn Baldwin.LSEH_end_rsaz_512_gather4: 2011bc3d5698SJohn Baldwin.cfi_endproc 2012bc3d5698SJohn Baldwin.size rsaz_512_gather4,.-rsaz_512_gather4 2013bc3d5698SJohn Baldwin 2014bc3d5698SJohn Baldwin.align 64 2015bc3d5698SJohn Baldwin.Linc: 2016bc3d5698SJohn Baldwin.long 0,0, 1,1 2017bc3d5698SJohn Baldwin.long 2,2, 2,2 2018