1#ifndef __ASSEMBLER__ 2# define __ASSEMBLER__ 1 3#endif 4#include "crypto/sparc_arch.h" 5 6#ifdef __arch64__ 7.register %g2,#scratch 8.register %g3,#scratch 9#endif 10 11.section ".text",#alloc,#execinstr 12 13#ifdef __PIC__ 14SPARC_PIC_THUNK(%g1) 15#endif 16.globl bn_mul_mont_t4_8 17.align 32 18bn_mul_mont_t4_8: 19#ifdef __arch64__ 20 mov 0,%g5 21 mov -128,%g4 22#elif defined(SPARCV9_64BIT_STACK) 23 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 24 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 25 mov -2047,%g4 26 and %g1,SPARCV9_64BIT_STACK,%g1 27 movrz %g1,0,%g4 28 mov -1,%g5 29 add %g4,-128,%g4 30#else 31 mov -1,%g5 32 mov -128,%g4 33#endif 34 sllx %g5,32,%g5 35 save %sp,%g4,%sp 36#ifndef __arch64__ 37 save %sp,-128,%sp ! warm it up 38 save %sp,-128,%sp 39 save %sp,-128,%sp 40 save %sp,-128,%sp 41 save %sp,-128,%sp 42 save %sp,-128,%sp 43 restore 44 restore 45 restore 46 restore 47 restore 48 restore 49#endif 50 and %sp,1,%g4 51 or %g5,%fp,%fp 52 or %g4,%g5,%g5 53 54 ! copy arguments to global registers 55 mov %i0,%g1 56 mov %i1,%g2 57 mov %i2,%g3 58 mov %i3,%g4 59 ld [%i4+0],%f1 ! load *n0 60 ld [%i4+4],%f0 61 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 62 save %sp,-128,%sp; or %g5,%fp,%fp 63 ld [%g2+0*8+0],%l1 64 ld [%g2+0*8+4],%l0 65 sllx %l0,32,%l0 66 or %l1,%l0,%l0 67 ld [%g2+1*8+0],%l2 68 ld [%g2+1*8+4],%l1 69 sllx %l1,32,%l1 70 or %l2,%l1,%l1 71 ld [%g2+2*8+0],%l3 72 ld [%g2+2*8+4],%l2 73 sllx %l2,32,%l2 74 or %l3,%l2,%l2 75 ld [%g2+3*8+0],%l4 76 ld [%g2+3*8+4],%l3 77 sllx %l3,32,%l3 78 or %l4,%l3,%l3 79 ld [%g2+4*8+0],%l5 80 ld [%g2+4*8+4],%l4 81 sllx %l4,32,%l4 82 or %l5,%l4,%l4 83 ld [%g2+5*8+0],%l6 84 ld [%g2+5*8+4],%l5 85 sllx %l5,32,%l5 86 or %l6,%l5,%l5 87 ld [%g2+6*8+0],%l7 88 ld [%g2+6*8+4],%l6 89 sllx %l6,32,%l6 90 or %l7,%l6,%l6 91 ld [%g2+7*8+0],%o0 92 ld [%g2+7*8+4],%l7 93 sllx %l7,32,%l7 94 or %o0,%l7,%l7 95 save %sp,-128,%sp; or %g5,%fp,%fp 96 ld [%g4+0*8+0],%l1 97 ld [%g4+0*8+4],%l0 98 sllx %l0,32,%l0 99 or %l1,%l0,%l0 100 ld [%g4+1*8+0],%l2 101 ld [%g4+1*8+4],%l1 102 sllx %l1,32,%l1 103 or %l2,%l1,%l1 104 ld [%g4+2*8+0],%l3 105 ld [%g4+2*8+4],%l2 106 sllx %l2,32,%l2 107 or %l3,%l2,%l2 108 ld [%g4+3*8+0],%l4 109 ld [%g4+3*8+4],%l3 110 sllx %l3,32,%l3 111 or %l4,%l3,%l3 112 ld [%g4+4*8+0],%l5 113 ld [%g4+4*8+4],%l4 114 sllx %l4,32,%l4 115 or %l5,%l4,%l4 116 ld [%g4+5*8+0],%l6 117 ld [%g4+5*8+4],%l5 118 sllx %l5,32,%l5 119 or %l6,%l5,%l5 120 ld [%g4+6*8+0],%l7 121 ld [%g4+6*8+4],%l6 122 sllx %l6,32,%l6 123 or %l7,%l6,%l6 124 ld [%g4+7*8+0],%o0 125 ld [%g4+7*8+4],%l7 126 sllx %l7,32,%l7 127 or %o0,%l7,%l7 128 save %sp,-128,%sp; or %g5,%fp,%fp 129 save %sp,-128,%sp; or %g5,%fp,%fp 130 cmp %g2,%g3 131 be SIZE_T_CC,.Lmsquare_8 132 nop 133 save %sp,-128,%sp; or %g5,%fp,%fp 134 ld [%g3+0*8+0],%i1 135 ld [%g3+0*8+4],%i0 136 sllx %i0,32,%i0 137 or %i1,%i0,%i0 138 ld [%g3+1*8+0],%i2 139 ld [%g3+1*8+4],%i1 140 sllx %i1,32,%i1 141 or %i2,%i1,%i1 142 ld [%g3+2*8+0],%i3 143 ld [%g3+2*8+4],%i2 144 sllx %i2,32,%i2 145 or %i3,%i2,%i2 146 ld [%g3+3*8+0],%i4 147 ld [%g3+3*8+4],%i3 148 sllx %i3,32,%i3 149 or %i4,%i3,%i3 150 ld [%g3+4*8+0],%i5 151 ld [%g3+4*8+4],%i4 152 sllx %i4,32,%i4 153 or %i5,%i4,%i4 154 ld [%g3+5*8+0],%l0 155 ld [%g3+5*8+4],%i5 156 sllx %i5,32,%i5 157 or %l0,%i5,%i5 158 ld [%g3+6*8+0],%l1 159 ld [%g3+6*8+4],%l0 160 sllx %l0,32,%l0 161 or %l1,%l0,%l0 162 ld [%g3+7*8+0],%l2 163 ld [%g3+7*8+4],%l1 164 sllx %l1,32,%l1 165 or %l2,%l1,%l1 166 save %sp,-128,%sp; or %g5,%fp,%fp 167 .word 0x81b02920+8-1 ! montmul 8-1 168.Lmresume_8: 169 fbu,pn %fcc3,.Lmabort_8 170#ifndef __arch64__ 171 and %fp,%g5,%g5 172 brz,pn %g5,.Lmabort_8 173#endif 174 nop 175#ifdef __arch64__ 176 restore 177 restore 178 restore 179 restore 180 restore 181#else 182 restore; and %fp,%g5,%g5 183 restore; and %fp,%g5,%g5 184 restore; and %fp,%g5,%g5 185 restore; and %fp,%g5,%g5 186 brz,pn %g5,.Lmabort1_8 187 restore 188#endif 189 .word 0x81b02310 !movxtod %l0,%f0 190 .word 0x85b02311 !movxtod %l1,%f2 191 .word 0x89b02312 !movxtod %l2,%f4 192 .word 0x8db02313 !movxtod %l3,%f6 193 .word 0x91b02314 !movxtod %l4,%f8 194 .word 0x95b02315 !movxtod %l5,%f10 195 .word 0x99b02316 !movxtod %l6,%f12 196 .word 0x9db02317 !movxtod %l7,%f14 197#ifdef __arch64__ 198 restore 199#else 200 and %fp,%g5,%g5 201 restore 202 and %g5,1,%o7 203 and %fp,%g5,%g5 204 srl %fp,0,%fp ! just in case? 205 or %o7,%g5,%g5 206 brz,a,pn %g5,.Lmdone_8 207 mov 0,%i0 ! return failure 208#endif 209 st %f1,[%g1+0*8+0] 210 st %f0,[%g1+0*8+4] 211 st %f3,[%g1+1*8+0] 212 st %f2,[%g1+1*8+4] 213 st %f5,[%g1+2*8+0] 214 st %f4,[%g1+2*8+4] 215 st %f7,[%g1+3*8+0] 216 st %f6,[%g1+3*8+4] 217 st %f9,[%g1+4*8+0] 218 st %f8,[%g1+4*8+4] 219 st %f11,[%g1+5*8+0] 220 st %f10,[%g1+5*8+4] 221 st %f13,[%g1+6*8+0] 222 st %f12,[%g1+6*8+4] 223 st %f15,[%g1+7*8+0] 224 st %f14,[%g1+7*8+4] 225 mov 1,%i0 ! return success 226.Lmdone_8: 227 ret 228 restore 229 230.Lmabort_8: 231 restore 232 restore 233 restore 234 restore 235 restore 236.Lmabort1_8: 237 restore 238 239 mov 0,%i0 ! return failure 240 ret 241 restore 242 243.align 32 244.Lmsquare_8: 245 save %sp,-128,%sp; or %g5,%fp,%fp 246 save %sp,-128,%sp; or %g5,%fp,%fp 247 .word 0x81b02940+8-1 ! montsqr 8-1 248 ba .Lmresume_8 249 nop 250.type bn_mul_mont_t4_8, #function 251.size bn_mul_mont_t4_8, .-bn_mul_mont_t4_8 252.globl bn_mul_mont_t4_16 253.align 32 254bn_mul_mont_t4_16: 255#ifdef __arch64__ 256 mov 0,%g5 257 mov -128,%g4 258#elif defined(SPARCV9_64BIT_STACK) 259 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 260 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 261 mov -2047,%g4 262 and %g1,SPARCV9_64BIT_STACK,%g1 263 movrz %g1,0,%g4 264 mov -1,%g5 265 add %g4,-128,%g4 266#else 267 mov -1,%g5 268 mov -128,%g4 269#endif 270 sllx %g5,32,%g5 271 save %sp,%g4,%sp 272#ifndef __arch64__ 273 save %sp,-128,%sp ! warm it up 274 save %sp,-128,%sp 275 save %sp,-128,%sp 276 save %sp,-128,%sp 277 save %sp,-128,%sp 278 save %sp,-128,%sp 279 restore 280 restore 281 restore 282 restore 283 restore 284 restore 285#endif 286 and %sp,1,%g4 287 or %g5,%fp,%fp 288 or %g4,%g5,%g5 289 290 ! copy arguments to global registers 291 mov %i0,%g1 292 mov %i1,%g2 293 mov %i2,%g3 294 mov %i3,%g4 295 ld [%i4+0],%f1 ! load *n0 296 ld [%i4+4],%f0 297 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 298 save %sp,-128,%sp; or %g5,%fp,%fp 299 ld [%g2+0*8+0],%l1 300 ld [%g2+0*8+4],%l0 301 sllx %l0,32,%l0 302 or %l1,%l0,%l0 303 ld [%g2+1*8+0],%l2 304 ld [%g2+1*8+4],%l1 305 sllx %l1,32,%l1 306 or %l2,%l1,%l1 307 ld [%g2+2*8+0],%l3 308 ld [%g2+2*8+4],%l2 309 sllx %l2,32,%l2 310 or %l3,%l2,%l2 311 ld [%g2+3*8+0],%l4 312 ld [%g2+3*8+4],%l3 313 sllx %l3,32,%l3 314 or %l4,%l3,%l3 315 ld [%g2+4*8+0],%l5 316 ld [%g2+4*8+4],%l4 317 sllx %l4,32,%l4 318 or %l5,%l4,%l4 319 ld [%g2+5*8+0],%l6 320 ld [%g2+5*8+4],%l5 321 sllx %l5,32,%l5 322 or %l6,%l5,%l5 323 ld [%g2+6*8+0],%l7 324 ld [%g2+6*8+4],%l6 325 sllx %l6,32,%l6 326 or %l7,%l6,%l6 327 ld [%g2+7*8+0],%o0 328 ld [%g2+7*8+4],%l7 329 sllx %l7,32,%l7 330 or %o0,%l7,%l7 331 ld [%g2+8*8+0],%o1 332 ld [%g2+8*8+4],%o0 333 sllx %o0,32,%o0 334 or %o1,%o0,%o0 335 ld [%g2+9*8+0],%o2 336 ld [%g2+9*8+4],%o1 337 sllx %o1,32,%o1 338 or %o2,%o1,%o1 339 ld [%g2+10*8+0],%o3 340 ld [%g2+10*8+4],%o2 341 sllx %o2,32,%o2 342 or %o3,%o2,%o2 343 ld [%g2+11*8+0],%o4 344 ld [%g2+11*8+4],%o3 345 sllx %o3,32,%o3 346 or %o4,%o3,%o3 347 ld [%g2+12*8+0],%o5 348 ld [%g2+12*8+4],%o4 349 sllx %o4,32,%o4 350 or %o5,%o4,%o4 351 ld [%g2+13*8+0],%o7 352 ld [%g2+13*8+4],%o5 353 sllx %o5,32,%o5 354 or %o7,%o5,%o5 355 ld [%g2+14*8+0],%f5 356 ld [%g2+14*8+4],%f4 357 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 358 ld [%g2+15*8+0],%f7 359 ld [%g2+15*8+4],%f6 360 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 361 save %sp,-128,%sp; or %g5,%fp,%fp 362 ld [%g4+0*8+0],%l1 363 ld [%g4+0*8+4],%l0 364 sllx %l0,32,%l0 365 or %l1,%l0,%l0 366 ld [%g4+1*8+0],%l2 367 ld [%g4+1*8+4],%l1 368 sllx %l1,32,%l1 369 or %l2,%l1,%l1 370 ld [%g4+2*8+0],%l3 371 ld [%g4+2*8+4],%l2 372 sllx %l2,32,%l2 373 or %l3,%l2,%l2 374 ld [%g4+3*8+0],%l4 375 ld [%g4+3*8+4],%l3 376 sllx %l3,32,%l3 377 or %l4,%l3,%l3 378 ld [%g4+4*8+0],%l5 379 ld [%g4+4*8+4],%l4 380 sllx %l4,32,%l4 381 or %l5,%l4,%l4 382 ld [%g4+5*8+0],%l6 383 ld [%g4+5*8+4],%l5 384 sllx %l5,32,%l5 385 or %l6,%l5,%l5 386 ld [%g4+6*8+0],%l7 387 ld [%g4+6*8+4],%l6 388 sllx %l6,32,%l6 389 or %l7,%l6,%l6 390 ld [%g4+7*8+0],%o0 391 ld [%g4+7*8+4],%l7 392 sllx %l7,32,%l7 393 or %o0,%l7,%l7 394 ld [%g4+8*8+0],%o1 395 ld [%g4+8*8+4],%o0 396 sllx %o0,32,%o0 397 or %o1,%o0,%o0 398 ld [%g4+9*8+0],%o2 399 ld [%g4+9*8+4],%o1 400 sllx %o1,32,%o1 401 or %o2,%o1,%o1 402 ld [%g4+10*8+0],%o3 403 ld [%g4+10*8+4],%o2 404 sllx %o2,32,%o2 405 or %o3,%o2,%o2 406 ld [%g4+11*8+0],%o4 407 ld [%g4+11*8+4],%o3 408 sllx %o3,32,%o3 409 or %o4,%o3,%o3 410 ld [%g4+12*8+0],%o5 411 ld [%g4+12*8+4],%o4 412 sllx %o4,32,%o4 413 or %o5,%o4,%o4 414 ld [%g4+13*8+0],%o7 415 ld [%g4+13*8+4],%o5 416 sllx %o5,32,%o5 417 or %o7,%o5,%o5 418 save %sp,-128,%sp; or %g5,%fp,%fp 419 ld [%g4+14*8+0],%l1 420 ld [%g4+14*8+4],%l0 421 sllx %l0,32,%l0 422 or %l1,%l0,%l0 423 ld [%g4+15*8+0],%l2 424 ld [%g4+15*8+4],%l1 425 sllx %l1,32,%l1 426 or %l2,%l1,%l1 427 save %sp,-128,%sp; or %g5,%fp,%fp 428 cmp %g2,%g3 429 be SIZE_T_CC,.Lmsquare_16 430 nop 431 save %sp,-128,%sp; or %g5,%fp,%fp 432 ld [%g3+0*8+0],%i1 433 ld [%g3+0*8+4],%i0 434 sllx %i0,32,%i0 435 or %i1,%i0,%i0 436 ld [%g3+1*8+0],%i2 437 ld [%g3+1*8+4],%i1 438 sllx %i1,32,%i1 439 or %i2,%i1,%i1 440 ld [%g3+2*8+0],%i3 441 ld [%g3+2*8+4],%i2 442 sllx %i2,32,%i2 443 or %i3,%i2,%i2 444 ld [%g3+3*8+0],%i4 445 ld [%g3+3*8+4],%i3 446 sllx %i3,32,%i3 447 or %i4,%i3,%i3 448 ld [%g3+4*8+0],%i5 449 ld [%g3+4*8+4],%i4 450 sllx %i4,32,%i4 451 or %i5,%i4,%i4 452 ld [%g3+5*8+0],%l0 453 ld [%g3+5*8+4],%i5 454 sllx %i5,32,%i5 455 or %l0,%i5,%i5 456 ld [%g3+6*8+0],%l1 457 ld [%g3+6*8+4],%l0 458 sllx %l0,32,%l0 459 or %l1,%l0,%l0 460 ld [%g3+7*8+0],%l2 461 ld [%g3+7*8+4],%l1 462 sllx %l1,32,%l1 463 or %l2,%l1,%l1 464 ld [%g3+8*8+0],%l3 465 ld [%g3+8*8+4],%l2 466 sllx %l2,32,%l2 467 or %l3,%l2,%l2 468 ld [%g3+9*8+0],%l4 469 ld [%g3+9*8+4],%l3 470 sllx %l3,32,%l3 471 or %l4,%l3,%l3 472 ld [%g3+10*8+0],%l5 473 ld [%g3+10*8+4],%l4 474 sllx %l4,32,%l4 475 or %l5,%l4,%l4 476 ld [%g3+11*8+0],%l6 477 ld [%g3+11*8+4],%l5 478 sllx %l5,32,%l5 479 or %l6,%l5,%l5 480 ld [%g3+12*8+0],%l7 481 ld [%g3+12*8+4],%l6 482 sllx %l6,32,%l6 483 or %l7,%l6,%l6 484 ld [%g3+13*8+0],%o7 485 ld [%g3+13*8+4],%l7 486 sllx %l7,32,%l7 487 or %o7,%l7,%l7 488 save %sp,-128,%sp; or %g5,%fp,%fp 489 ld [%g3+14*8+0],%i1 490 ld [%g3+14*8+4],%i0 491 sllx %i0,32,%i0 492 or %i1,%i0,%i0 493 ld [%g3+15*8+0],%o7 494 ld [%g3+15*8+4],%i1 495 sllx %i1,32,%i1 496 or %o7,%i1,%i1 497 .word 0x81b02920+16-1 ! montmul 16-1 498.Lmresume_16: 499 fbu,pn %fcc3,.Lmabort_16 500#ifndef __arch64__ 501 and %fp,%g5,%g5 502 brz,pn %g5,.Lmabort_16 503#endif 504 nop 505#ifdef __arch64__ 506 restore 507 restore 508 restore 509 restore 510 restore 511#else 512 restore; and %fp,%g5,%g5 513 restore; and %fp,%g5,%g5 514 restore; and %fp,%g5,%g5 515 restore; and %fp,%g5,%g5 516 brz,pn %g5,.Lmabort1_16 517 restore 518#endif 519 .word 0x81b02310 !movxtod %l0,%f0 520 .word 0x85b02311 !movxtod %l1,%f2 521 .word 0x89b02312 !movxtod %l2,%f4 522 .word 0x8db02313 !movxtod %l3,%f6 523 .word 0x91b02314 !movxtod %l4,%f8 524 .word 0x95b02315 !movxtod %l5,%f10 525 .word 0x99b02316 !movxtod %l6,%f12 526 .word 0x9db02317 !movxtod %l7,%f14 527 .word 0xa1b02308 !movxtod %o0,%f16 528 .word 0xa5b02309 !movxtod %o1,%f18 529 .word 0xa9b0230a !movxtod %o2,%f20 530 .word 0xadb0230b !movxtod %o3,%f22 531 .word 0xbbb0230c !movxtod %o4,%f60 532 .word 0xbfb0230d !movxtod %o5,%f62 533#ifdef __arch64__ 534 restore 535#else 536 and %fp,%g5,%g5 537 restore 538 and %g5,1,%o7 539 and %fp,%g5,%g5 540 srl %fp,0,%fp ! just in case? 541 or %o7,%g5,%g5 542 brz,a,pn %g5,.Lmdone_16 543 mov 0,%i0 ! return failure 544#endif 545 st %f1,[%g1+0*8+0] 546 st %f0,[%g1+0*8+4] 547 st %f3,[%g1+1*8+0] 548 st %f2,[%g1+1*8+4] 549 st %f5,[%g1+2*8+0] 550 st %f4,[%g1+2*8+4] 551 st %f7,[%g1+3*8+0] 552 st %f6,[%g1+3*8+4] 553 st %f9,[%g1+4*8+0] 554 st %f8,[%g1+4*8+4] 555 st %f11,[%g1+5*8+0] 556 st %f10,[%g1+5*8+4] 557 st %f13,[%g1+6*8+0] 558 st %f12,[%g1+6*8+4] 559 st %f15,[%g1+7*8+0] 560 st %f14,[%g1+7*8+4] 561 st %f17,[%g1+8*8+0] 562 st %f16,[%g1+8*8+4] 563 st %f19,[%g1+9*8+0] 564 st %f18,[%g1+9*8+4] 565 st %f21,[%g1+10*8+0] 566 st %f20,[%g1+10*8+4] 567 st %f23,[%g1+11*8+0] 568 st %f22,[%g1+11*8+4] 569 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 570 st %f1,[%g1+12*8+0] 571 st %f0,[%g1+12*8+4] 572 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 573 st %f3,[%g1+13*8+0] 574 st %f2,[%g1+13*8+4] 575 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 576 st %f5,[%g1+14*8+0] 577 st %f4,[%g1+14*8+4] 578 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 579 st %f7,[%g1+15*8+0] 580 st %f6,[%g1+15*8+4] 581 mov 1,%i0 ! return success 582.Lmdone_16: 583 ret 584 restore 585 586.Lmabort_16: 587 restore 588 restore 589 restore 590 restore 591 restore 592.Lmabort1_16: 593 restore 594 595 mov 0,%i0 ! return failure 596 ret 597 restore 598 599.align 32 600.Lmsquare_16: 601 save %sp,-128,%sp; or %g5,%fp,%fp 602 save %sp,-128,%sp; or %g5,%fp,%fp 603 .word 0x81b02940+16-1 ! montsqr 16-1 604 ba .Lmresume_16 605 nop 606.type bn_mul_mont_t4_16, #function 607.size bn_mul_mont_t4_16, .-bn_mul_mont_t4_16 608.globl bn_mul_mont_t4_24 609.align 32 610bn_mul_mont_t4_24: 611#ifdef __arch64__ 612 mov 0,%g5 613 mov -128,%g4 614#elif defined(SPARCV9_64BIT_STACK) 615 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 616 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 617 mov -2047,%g4 618 and %g1,SPARCV9_64BIT_STACK,%g1 619 movrz %g1,0,%g4 620 mov -1,%g5 621 add %g4,-128,%g4 622#else 623 mov -1,%g5 624 mov -128,%g4 625#endif 626 sllx %g5,32,%g5 627 save %sp,%g4,%sp 628#ifndef __arch64__ 629 save %sp,-128,%sp ! warm it up 630 save %sp,-128,%sp 631 save %sp,-128,%sp 632 save %sp,-128,%sp 633 save %sp,-128,%sp 634 save %sp,-128,%sp 635 restore 636 restore 637 restore 638 restore 639 restore 640 restore 641#endif 642 and %sp,1,%g4 643 or %g5,%fp,%fp 644 or %g4,%g5,%g5 645 646 ! copy arguments to global registers 647 mov %i0,%g1 648 mov %i1,%g2 649 mov %i2,%g3 650 mov %i3,%g4 651 ld [%i4+0],%f1 ! load *n0 652 ld [%i4+4],%f0 653 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 654 save %sp,-128,%sp; or %g5,%fp,%fp 655 ld [%g2+0*8+0],%l1 656 ld [%g2+0*8+4],%l0 657 sllx %l0,32,%l0 658 or %l1,%l0,%l0 659 ld [%g2+1*8+0],%l2 660 ld [%g2+1*8+4],%l1 661 sllx %l1,32,%l1 662 or %l2,%l1,%l1 663 ld [%g2+2*8+0],%l3 664 ld [%g2+2*8+4],%l2 665 sllx %l2,32,%l2 666 or %l3,%l2,%l2 667 ld [%g2+3*8+0],%l4 668 ld [%g2+3*8+4],%l3 669 sllx %l3,32,%l3 670 or %l4,%l3,%l3 671 ld [%g2+4*8+0],%l5 672 ld [%g2+4*8+4],%l4 673 sllx %l4,32,%l4 674 or %l5,%l4,%l4 675 ld [%g2+5*8+0],%l6 676 ld [%g2+5*8+4],%l5 677 sllx %l5,32,%l5 678 or %l6,%l5,%l5 679 ld [%g2+6*8+0],%l7 680 ld [%g2+6*8+4],%l6 681 sllx %l6,32,%l6 682 or %l7,%l6,%l6 683 ld [%g2+7*8+0],%o0 684 ld [%g2+7*8+4],%l7 685 sllx %l7,32,%l7 686 or %o0,%l7,%l7 687 ld [%g2+8*8+0],%o1 688 ld [%g2+8*8+4],%o0 689 sllx %o0,32,%o0 690 or %o1,%o0,%o0 691 ld [%g2+9*8+0],%o2 692 ld [%g2+9*8+4],%o1 693 sllx %o1,32,%o1 694 or %o2,%o1,%o1 695 ld [%g2+10*8+0],%o3 696 ld [%g2+10*8+4],%o2 697 sllx %o2,32,%o2 698 or %o3,%o2,%o2 699 ld [%g2+11*8+0],%o4 700 ld [%g2+11*8+4],%o3 701 sllx %o3,32,%o3 702 or %o4,%o3,%o3 703 ld [%g2+12*8+0],%o5 704 ld [%g2+12*8+4],%o4 705 sllx %o4,32,%o4 706 or %o5,%o4,%o4 707 ld [%g2+13*8+0],%o7 708 ld [%g2+13*8+4],%o5 709 sllx %o5,32,%o5 710 or %o7,%o5,%o5 711 ld [%g2+14*8+0],%f5 712 ld [%g2+14*8+4],%f4 713 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 714 ld [%g2+15*8+0],%f7 715 ld [%g2+15*8+4],%f6 716 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 717 ld [%g2+16*8+0],%f1 718 ld [%g2+16*8+4],%f0 719 .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 720 ld [%g2+17*8+0],%f3 721 ld [%g2+17*8+4],%f2 722 .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 723 ld [%g2+18*8+0],%f5 724 ld [%g2+18*8+4],%f4 725 .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 726 ld [%g2+19*8+0],%f7 727 ld [%g2+19*8+4],%f6 728 .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 729 ld [%g2+20*8+0],%f1 730 ld [%g2+20*8+4],%f0 731 .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 732 ld [%g2+21*8+0],%f3 733 ld [%g2+21*8+4],%f2 734 .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 735 ld [%g2+22*8+0],%f5 736 ld [%g2+22*8+4],%f4 737 .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 738 ld [%g2+23*8+0],%f7 739 ld [%g2+23*8+4],%f6 740 .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 741 save %sp,-128,%sp; or %g5,%fp,%fp 742 ld [%g4+0*8+0],%l1 743 ld [%g4+0*8+4],%l0 744 sllx %l0,32,%l0 745 or %l1,%l0,%l0 746 ld [%g4+1*8+0],%l2 747 ld [%g4+1*8+4],%l1 748 sllx %l1,32,%l1 749 or %l2,%l1,%l1 750 ld [%g4+2*8+0],%l3 751 ld [%g4+2*8+4],%l2 752 sllx %l2,32,%l2 753 or %l3,%l2,%l2 754 ld [%g4+3*8+0],%l4 755 ld [%g4+3*8+4],%l3 756 sllx %l3,32,%l3 757 or %l4,%l3,%l3 758 ld [%g4+4*8+0],%l5 759 ld [%g4+4*8+4],%l4 760 sllx %l4,32,%l4 761 or %l5,%l4,%l4 762 ld [%g4+5*8+0],%l6 763 ld [%g4+5*8+4],%l5 764 sllx %l5,32,%l5 765 or %l6,%l5,%l5 766 ld [%g4+6*8+0],%l7 767 ld [%g4+6*8+4],%l6 768 sllx %l6,32,%l6 769 or %l7,%l6,%l6 770 ld [%g4+7*8+0],%o0 771 ld [%g4+7*8+4],%l7 772 sllx %l7,32,%l7 773 or %o0,%l7,%l7 774 ld [%g4+8*8+0],%o1 775 ld [%g4+8*8+4],%o0 776 sllx %o0,32,%o0 777 or %o1,%o0,%o0 778 ld [%g4+9*8+0],%o2 779 ld [%g4+9*8+4],%o1 780 sllx %o1,32,%o1 781 or %o2,%o1,%o1 782 ld [%g4+10*8+0],%o3 783 ld [%g4+10*8+4],%o2 784 sllx %o2,32,%o2 785 or %o3,%o2,%o2 786 ld [%g4+11*8+0],%o4 787 ld [%g4+11*8+4],%o3 788 sllx %o3,32,%o3 789 or %o4,%o3,%o3 790 ld [%g4+12*8+0],%o5 791 ld [%g4+12*8+4],%o4 792 sllx %o4,32,%o4 793 or %o5,%o4,%o4 794 ld [%g4+13*8+0],%o7 795 ld [%g4+13*8+4],%o5 796 sllx %o5,32,%o5 797 or %o7,%o5,%o5 798 save %sp,-128,%sp; or %g5,%fp,%fp 799 ld [%g4+14*8+0],%l1 800 ld [%g4+14*8+4],%l0 801 sllx %l0,32,%l0 802 or %l1,%l0,%l0 803 ld [%g4+15*8+0],%l2 804 ld [%g4+15*8+4],%l1 805 sllx %l1,32,%l1 806 or %l2,%l1,%l1 807 ld [%g4+16*8+0],%l3 808 ld [%g4+16*8+4],%l2 809 sllx %l2,32,%l2 810 or %l3,%l2,%l2 811 ld [%g4+17*8+0],%l4 812 ld [%g4+17*8+4],%l3 813 sllx %l3,32,%l3 814 or %l4,%l3,%l3 815 ld [%g4+18*8+0],%l5 816 ld [%g4+18*8+4],%l4 817 sllx %l4,32,%l4 818 or %l5,%l4,%l4 819 ld [%g4+19*8+0],%l6 820 ld [%g4+19*8+4],%l5 821 sllx %l5,32,%l5 822 or %l6,%l5,%l5 823 ld [%g4+20*8+0],%l7 824 ld [%g4+20*8+4],%l6 825 sllx %l6,32,%l6 826 or %l7,%l6,%l6 827 ld [%g4+21*8+0],%o0 828 ld [%g4+21*8+4],%l7 829 sllx %l7,32,%l7 830 or %o0,%l7,%l7 831 ld [%g4+22*8+0],%o1 832 ld [%g4+22*8+4],%o0 833 sllx %o0,32,%o0 834 or %o1,%o0,%o0 835 ld [%g4+23*8+0],%o2 836 ld [%g4+23*8+4],%o1 837 sllx %o1,32,%o1 838 or %o2,%o1,%o1 839 save %sp,-128,%sp; or %g5,%fp,%fp 840 cmp %g2,%g3 841 be SIZE_T_CC,.Lmsquare_24 842 nop 843 save %sp,-128,%sp; or %g5,%fp,%fp 844 ld [%g3+0*8+0],%i1 845 ld [%g3+0*8+4],%i0 846 sllx %i0,32,%i0 847 or %i1,%i0,%i0 848 ld [%g3+1*8+0],%i2 849 ld [%g3+1*8+4],%i1 850 sllx %i1,32,%i1 851 or %i2,%i1,%i1 852 ld [%g3+2*8+0],%i3 853 ld [%g3+2*8+4],%i2 854 sllx %i2,32,%i2 855 or %i3,%i2,%i2 856 ld [%g3+3*8+0],%i4 857 ld [%g3+3*8+4],%i3 858 sllx %i3,32,%i3 859 or %i4,%i3,%i3 860 ld [%g3+4*8+0],%i5 861 ld [%g3+4*8+4],%i4 862 sllx %i4,32,%i4 863 or %i5,%i4,%i4 864 ld [%g3+5*8+0],%l0 865 ld [%g3+5*8+4],%i5 866 sllx %i5,32,%i5 867 or %l0,%i5,%i5 868 ld [%g3+6*8+0],%l1 869 ld [%g3+6*8+4],%l0 870 sllx %l0,32,%l0 871 or %l1,%l0,%l0 872 ld [%g3+7*8+0],%l2 873 ld [%g3+7*8+4],%l1 874 sllx %l1,32,%l1 875 or %l2,%l1,%l1 876 ld [%g3+8*8+0],%l3 877 ld [%g3+8*8+4],%l2 878 sllx %l2,32,%l2 879 or %l3,%l2,%l2 880 ld [%g3+9*8+0],%l4 881 ld [%g3+9*8+4],%l3 882 sllx %l3,32,%l3 883 or %l4,%l3,%l3 884 ld [%g3+10*8+0],%l5 885 ld [%g3+10*8+4],%l4 886 sllx %l4,32,%l4 887 or %l5,%l4,%l4 888 ld [%g3+11*8+0],%l6 889 ld [%g3+11*8+4],%l5 890 sllx %l5,32,%l5 891 or %l6,%l5,%l5 892 ld [%g3+12*8+0],%l7 893 ld [%g3+12*8+4],%l6 894 sllx %l6,32,%l6 895 or %l7,%l6,%l6 896 ld [%g3+13*8+0],%o7 897 ld [%g3+13*8+4],%l7 898 sllx %l7,32,%l7 899 or %o7,%l7,%l7 900 save %sp,-128,%sp; or %g5,%fp,%fp 901 ld [%g3+14*8+0],%i1 902 ld [%g3+14*8+4],%i0 903 sllx %i0,32,%i0 904 or %i1,%i0,%i0 905 ld [%g3+15*8+0],%i2 906 ld [%g3+15*8+4],%i1 907 sllx %i1,32,%i1 908 or %i2,%i1,%i1 909 ld [%g3+16*8+0],%i3 910 ld [%g3+16*8+4],%i2 911 sllx %i2,32,%i2 912 or %i3,%i2,%i2 913 ld [%g3+17*8+0],%i4 914 ld [%g3+17*8+4],%i3 915 sllx %i3,32,%i3 916 or %i4,%i3,%i3 917 ld [%g3+18*8+0],%i5 918 ld [%g3+18*8+4],%i4 919 sllx %i4,32,%i4 920 or %i5,%i4,%i4 921 ld [%g3+19*8+0],%l0 922 ld [%g3+19*8+4],%i5 923 sllx %i5,32,%i5 924 or %l0,%i5,%i5 925 ld [%g3+20*8+0],%l1 926 ld [%g3+20*8+4],%l0 927 sllx %l0,32,%l0 928 or %l1,%l0,%l0 929 ld [%g3+21*8+0],%l2 930 ld [%g3+21*8+4],%l1 931 sllx %l1,32,%l1 932 or %l2,%l1,%l1 933 ld [%g3+22*8+0],%l3 934 ld [%g3+22*8+4],%l2 935 sllx %l2,32,%l2 936 or %l3,%l2,%l2 937 ld [%g3+23*8+0],%o7 938 ld [%g3+23*8+4],%l3 939 sllx %l3,32,%l3 940 or %o7,%l3,%l3 941 .word 0x81b02920+24-1 ! montmul 24-1 942.Lmresume_24: 943 fbu,pn %fcc3,.Lmabort_24 944#ifndef __arch64__ 945 and %fp,%g5,%g5 946 brz,pn %g5,.Lmabort_24 947#endif 948 nop 949#ifdef __arch64__ 950 restore 951 restore 952 restore 953 restore 954 restore 955#else 956 restore; and %fp,%g5,%g5 957 restore; and %fp,%g5,%g5 958 restore; and %fp,%g5,%g5 959 restore; and %fp,%g5,%g5 960 brz,pn %g5,.Lmabort1_24 961 restore 962#endif 963 .word 0x81b02310 !movxtod %l0,%f0 964 .word 0x85b02311 !movxtod %l1,%f2 965 .word 0x89b02312 !movxtod %l2,%f4 966 .word 0x8db02313 !movxtod %l3,%f6 967 .word 0x91b02314 !movxtod %l4,%f8 968 .word 0x95b02315 !movxtod %l5,%f10 969 .word 0x99b02316 !movxtod %l6,%f12 970 .word 0x9db02317 !movxtod %l7,%f14 971 .word 0xa1b02308 !movxtod %o0,%f16 972 .word 0xa5b02309 !movxtod %o1,%f18 973 .word 0xa9b0230a !movxtod %o2,%f20 974 .word 0xadb0230b !movxtod %o3,%f22 975 .word 0xbbb0230c !movxtod %o4,%f60 976 .word 0xbfb0230d !movxtod %o5,%f62 977#ifdef __arch64__ 978 restore 979#else 980 and %fp,%g5,%g5 981 restore 982 and %g5,1,%o7 983 and %fp,%g5,%g5 984 srl %fp,0,%fp ! just in case? 985 or %o7,%g5,%g5 986 brz,a,pn %g5,.Lmdone_24 987 mov 0,%i0 ! return failure 988#endif 989 st %f1,[%g1+0*8+0] 990 st %f0,[%g1+0*8+4] 991 st %f3,[%g1+1*8+0] 992 st %f2,[%g1+1*8+4] 993 st %f5,[%g1+2*8+0] 994 st %f4,[%g1+2*8+4] 995 st %f7,[%g1+3*8+0] 996 st %f6,[%g1+3*8+4] 997 st %f9,[%g1+4*8+0] 998 st %f8,[%g1+4*8+4] 999 st %f11,[%g1+5*8+0] 1000 st %f10,[%g1+5*8+4] 1001 st %f13,[%g1+6*8+0] 1002 st %f12,[%g1+6*8+4] 1003 st %f15,[%g1+7*8+0] 1004 st %f14,[%g1+7*8+4] 1005 st %f17,[%g1+8*8+0] 1006 st %f16,[%g1+8*8+4] 1007 st %f19,[%g1+9*8+0] 1008 st %f18,[%g1+9*8+4] 1009 st %f21,[%g1+10*8+0] 1010 st %f20,[%g1+10*8+4] 1011 st %f23,[%g1+11*8+0] 1012 st %f22,[%g1+11*8+4] 1013 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1014 st %f1,[%g1+12*8+0] 1015 st %f0,[%g1+12*8+4] 1016 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1017 st %f3,[%g1+13*8+0] 1018 st %f2,[%g1+13*8+4] 1019 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1020 st %f5,[%g1+14*8+0] 1021 st %f4,[%g1+14*8+4] 1022 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1023 st %f7,[%g1+15*8+0] 1024 st %f6,[%g1+15*8+4] 1025 .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1026 st %f1,[%g1+16*8+0] 1027 st %f0,[%g1+16*8+4] 1028 .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1029 st %f3,[%g1+17*8+0] 1030 st %f2,[%g1+17*8+4] 1031 .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1032 st %f5,[%g1+18*8+0] 1033 st %f4,[%g1+18*8+4] 1034 .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1035 st %f7,[%g1+19*8+0] 1036 st %f6,[%g1+19*8+4] 1037 .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1038 st %f1,[%g1+20*8+0] 1039 st %f0,[%g1+20*8+4] 1040 .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1041 st %f3,[%g1+21*8+0] 1042 st %f2,[%g1+21*8+4] 1043 .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1044 st %f5,[%g1+22*8+0] 1045 st %f4,[%g1+22*8+4] 1046 .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1047 st %f7,[%g1+23*8+0] 1048 st %f6,[%g1+23*8+4] 1049 mov 1,%i0 ! return success 1050.Lmdone_24: 1051 ret 1052 restore 1053 1054.Lmabort_24: 1055 restore 1056 restore 1057 restore 1058 restore 1059 restore 1060.Lmabort1_24: 1061 restore 1062 1063 mov 0,%i0 ! return failure 1064 ret 1065 restore 1066 1067.align 32 1068.Lmsquare_24: 1069 save %sp,-128,%sp; or %g5,%fp,%fp 1070 save %sp,-128,%sp; or %g5,%fp,%fp 1071 .word 0x81b02940+24-1 ! montsqr 24-1 1072 ba .Lmresume_24 1073 nop 1074.type bn_mul_mont_t4_24, #function 1075.size bn_mul_mont_t4_24, .-bn_mul_mont_t4_24 1076.globl bn_mul_mont_t4_32 1077.align 32 1078bn_mul_mont_t4_32: 1079#ifdef __arch64__ 1080 mov 0,%g5 1081 mov -128,%g4 1082#elif defined(SPARCV9_64BIT_STACK) 1083 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1084 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1085 mov -2047,%g4 1086 and %g1,SPARCV9_64BIT_STACK,%g1 1087 movrz %g1,0,%g4 1088 mov -1,%g5 1089 add %g4,-128,%g4 1090#else 1091 mov -1,%g5 1092 mov -128,%g4 1093#endif 1094 sllx %g5,32,%g5 1095 save %sp,%g4,%sp 1096#ifndef __arch64__ 1097 save %sp,-128,%sp ! warm it up 1098 save %sp,-128,%sp 1099 save %sp,-128,%sp 1100 save %sp,-128,%sp 1101 save %sp,-128,%sp 1102 save %sp,-128,%sp 1103 restore 1104 restore 1105 restore 1106 restore 1107 restore 1108 restore 1109#endif 1110 and %sp,1,%g4 1111 or %g5,%fp,%fp 1112 or %g4,%g5,%g5 1113 1114 ! copy arguments to global registers 1115 mov %i0,%g1 1116 mov %i1,%g2 1117 mov %i2,%g3 1118 mov %i3,%g4 1119 ld [%i4+0],%f1 ! load *n0 1120 ld [%i4+4],%f0 1121 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1122 save %sp,-128,%sp; or %g5,%fp,%fp 1123 ld [%g2+0*8+0],%l1 1124 ld [%g2+0*8+4],%l0 1125 sllx %l0,32,%l0 1126 or %l1,%l0,%l0 1127 ld [%g2+1*8+0],%l2 1128 ld [%g2+1*8+4],%l1 1129 sllx %l1,32,%l1 1130 or %l2,%l1,%l1 1131 ld [%g2+2*8+0],%l3 1132 ld [%g2+2*8+4],%l2 1133 sllx %l2,32,%l2 1134 or %l3,%l2,%l2 1135 ld [%g2+3*8+0],%l4 1136 ld [%g2+3*8+4],%l3 1137 sllx %l3,32,%l3 1138 or %l4,%l3,%l3 1139 ld [%g2+4*8+0],%l5 1140 ld [%g2+4*8+4],%l4 1141 sllx %l4,32,%l4 1142 or %l5,%l4,%l4 1143 ld [%g2+5*8+0],%l6 1144 ld [%g2+5*8+4],%l5 1145 sllx %l5,32,%l5 1146 or %l6,%l5,%l5 1147 ld [%g2+6*8+0],%l7 1148 ld [%g2+6*8+4],%l6 1149 sllx %l6,32,%l6 1150 or %l7,%l6,%l6 1151 ld [%g2+7*8+0],%o0 1152 ld [%g2+7*8+4],%l7 1153 sllx %l7,32,%l7 1154 or %o0,%l7,%l7 1155 ld [%g2+8*8+0],%o1 1156 ld [%g2+8*8+4],%o0 1157 sllx %o0,32,%o0 1158 or %o1,%o0,%o0 1159 ld [%g2+9*8+0],%o2 1160 ld [%g2+9*8+4],%o1 1161 sllx %o1,32,%o1 1162 or %o2,%o1,%o1 1163 ld [%g2+10*8+0],%o3 1164 ld [%g2+10*8+4],%o2 1165 sllx %o2,32,%o2 1166 or %o3,%o2,%o2 1167 ld [%g2+11*8+0],%o4 1168 ld [%g2+11*8+4],%o3 1169 sllx %o3,32,%o3 1170 or %o4,%o3,%o3 1171 ld [%g2+12*8+0],%o5 1172 ld [%g2+12*8+4],%o4 1173 sllx %o4,32,%o4 1174 or %o5,%o4,%o4 1175 ld [%g2+13*8+0],%o7 1176 ld [%g2+13*8+4],%o5 1177 sllx %o5,32,%o5 1178 or %o7,%o5,%o5 1179 ld [%g2+14*8+0],%f5 1180 ld [%g2+14*8+4],%f4 1181 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 1182 ld [%g2+15*8+0],%f7 1183 ld [%g2+15*8+4],%f6 1184 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 1185 ld [%g2+16*8+0],%f1 1186 ld [%g2+16*8+4],%f0 1187 .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 1188 ld [%g2+17*8+0],%f3 1189 ld [%g2+17*8+4],%f2 1190 .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 1191 ld [%g2+18*8+0],%f5 1192 ld [%g2+18*8+4],%f4 1193 .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 1194 ld [%g2+19*8+0],%f7 1195 ld [%g2+19*8+4],%f6 1196 .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 1197 ld [%g2+20*8+0],%f1 1198 ld [%g2+20*8+4],%f0 1199 .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 1200 ld [%g2+21*8+0],%f3 1201 ld [%g2+21*8+4],%f2 1202 .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 1203 ld [%g2+22*8+0],%f5 1204 ld [%g2+22*8+4],%f4 1205 .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 1206 ld [%g2+23*8+0],%f7 1207 ld [%g2+23*8+4],%f6 1208 .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 1209 ld [%g2+24*8+0],%f1 1210 ld [%g2+24*8+4],%f0 1211 .word 0x9bb00f00 !fsrc2 %f0,%f0,%f44 1212 ld [%g2+25*8+0],%f3 1213 ld [%g2+25*8+4],%f2 1214 .word 0x9fb00f02 !fsrc2 %f0,%f2,%f46 1215 ld [%g2+26*8+0],%f5 1216 ld [%g2+26*8+4],%f4 1217 .word 0xa3b00f04 !fsrc2 %f0,%f4,%f48 1218 ld [%g2+27*8+0],%f7 1219 ld [%g2+27*8+4],%f6 1220 .word 0xa7b00f06 !fsrc2 %f0,%f6,%f50 1221 ld [%g2+28*8+0],%f1 1222 ld [%g2+28*8+4],%f0 1223 .word 0xabb00f00 !fsrc2 %f0,%f0,%f52 1224 ld [%g2+29*8+0],%f3 1225 ld [%g2+29*8+4],%f2 1226 .word 0xafb00f02 !fsrc2 %f0,%f2,%f54 1227 ld [%g2+30*8+0],%f5 1228 ld [%g2+30*8+4],%f4 1229 .word 0xb3b00f04 !fsrc2 %f0,%f4,%f56 1230 ld [%g2+31*8+0],%f7 1231 ld [%g2+31*8+4],%f6 1232 .word 0xb7b00f06 !fsrc2 %f0,%f6,%f58 1233 save %sp,-128,%sp; or %g5,%fp,%fp 1234 ld [%g4+0*8+0],%l1 1235 ld [%g4+0*8+4],%l0 1236 sllx %l0,32,%l0 1237 or %l1,%l0,%l0 1238 ld [%g4+1*8+0],%l2 1239 ld [%g4+1*8+4],%l1 1240 sllx %l1,32,%l1 1241 or %l2,%l1,%l1 1242 ld [%g4+2*8+0],%l3 1243 ld [%g4+2*8+4],%l2 1244 sllx %l2,32,%l2 1245 or %l3,%l2,%l2 1246 ld [%g4+3*8+0],%l4 1247 ld [%g4+3*8+4],%l3 1248 sllx %l3,32,%l3 1249 or %l4,%l3,%l3 1250 ld [%g4+4*8+0],%l5 1251 ld [%g4+4*8+4],%l4 1252 sllx %l4,32,%l4 1253 or %l5,%l4,%l4 1254 ld [%g4+5*8+0],%l6 1255 ld [%g4+5*8+4],%l5 1256 sllx %l5,32,%l5 1257 or %l6,%l5,%l5 1258 ld [%g4+6*8+0],%l7 1259 ld [%g4+6*8+4],%l6 1260 sllx %l6,32,%l6 1261 or %l7,%l6,%l6 1262 ld [%g4+7*8+0],%o0 1263 ld [%g4+7*8+4],%l7 1264 sllx %l7,32,%l7 1265 or %o0,%l7,%l7 1266 ld [%g4+8*8+0],%o1 1267 ld [%g4+8*8+4],%o0 1268 sllx %o0,32,%o0 1269 or %o1,%o0,%o0 1270 ld [%g4+9*8+0],%o2 1271 ld [%g4+9*8+4],%o1 1272 sllx %o1,32,%o1 1273 or %o2,%o1,%o1 1274 ld [%g4+10*8+0],%o3 1275 ld [%g4+10*8+4],%o2 1276 sllx %o2,32,%o2 1277 or %o3,%o2,%o2 1278 ld [%g4+11*8+0],%o4 1279 ld [%g4+11*8+4],%o3 1280 sllx %o3,32,%o3 1281 or %o4,%o3,%o3 1282 ld [%g4+12*8+0],%o5 1283 ld [%g4+12*8+4],%o4 1284 sllx %o4,32,%o4 1285 or %o5,%o4,%o4 1286 ld [%g4+13*8+0],%o7 1287 ld [%g4+13*8+4],%o5 1288 sllx %o5,32,%o5 1289 or %o7,%o5,%o5 1290 save %sp,-128,%sp; or %g5,%fp,%fp 1291 ld [%g4+14*8+0],%l1 1292 ld [%g4+14*8+4],%l0 1293 sllx %l0,32,%l0 1294 or %l1,%l0,%l0 1295 ld [%g4+15*8+0],%l2 1296 ld [%g4+15*8+4],%l1 1297 sllx %l1,32,%l1 1298 or %l2,%l1,%l1 1299 ld [%g4+16*8+0],%l3 1300 ld [%g4+16*8+4],%l2 1301 sllx %l2,32,%l2 1302 or %l3,%l2,%l2 1303 ld [%g4+17*8+0],%l4 1304 ld [%g4+17*8+4],%l3 1305 sllx %l3,32,%l3 1306 or %l4,%l3,%l3 1307 ld [%g4+18*8+0],%l5 1308 ld [%g4+18*8+4],%l4 1309 sllx %l4,32,%l4 1310 or %l5,%l4,%l4 1311 ld [%g4+19*8+0],%l6 1312 ld [%g4+19*8+4],%l5 1313 sllx %l5,32,%l5 1314 or %l6,%l5,%l5 1315 ld [%g4+20*8+0],%l7 1316 ld [%g4+20*8+4],%l6 1317 sllx %l6,32,%l6 1318 or %l7,%l6,%l6 1319 ld [%g4+21*8+0],%o0 1320 ld [%g4+21*8+4],%l7 1321 sllx %l7,32,%l7 1322 or %o0,%l7,%l7 1323 ld [%g4+22*8+0],%o1 1324 ld [%g4+22*8+4],%o0 1325 sllx %o0,32,%o0 1326 or %o1,%o0,%o0 1327 ld [%g4+23*8+0],%o2 1328 ld [%g4+23*8+4],%o1 1329 sllx %o1,32,%o1 1330 or %o2,%o1,%o1 1331 ld [%g4+24*8+0],%o3 1332 ld [%g4+24*8+4],%o2 1333 sllx %o2,32,%o2 1334 or %o3,%o2,%o2 1335 ld [%g4+25*8+0],%o4 1336 ld [%g4+25*8+4],%o3 1337 sllx %o3,32,%o3 1338 or %o4,%o3,%o3 1339 ld [%g4+26*8+0],%o5 1340 ld [%g4+26*8+4],%o4 1341 sllx %o4,32,%o4 1342 or %o5,%o4,%o4 1343 ld [%g4+27*8+0],%o7 1344 ld [%g4+27*8+4],%o5 1345 sllx %o5,32,%o5 1346 or %o7,%o5,%o5 1347 save %sp,-128,%sp; or %g5,%fp,%fp 1348 ld [%g4+28*8+0],%l1 1349 ld [%g4+28*8+4],%l0 1350 sllx %l0,32,%l0 1351 or %l1,%l0,%l0 1352 ld [%g4+29*8+0],%l2 1353 ld [%g4+29*8+4],%l1 1354 sllx %l1,32,%l1 1355 or %l2,%l1,%l1 1356 ld [%g4+30*8+0],%l3 1357 ld [%g4+30*8+4],%l2 1358 sllx %l2,32,%l2 1359 or %l3,%l2,%l2 1360 ld [%g4+31*8+0],%o7 1361 ld [%g4+31*8+4],%l3 1362 sllx %l3,32,%l3 1363 or %o7,%l3,%l3 1364 cmp %g2,%g3 1365 be SIZE_T_CC,.Lmsquare_32 1366 nop 1367 save %sp,-128,%sp; or %g5,%fp,%fp 1368 ld [%g3+0*8+0],%i1 1369 ld [%g3+0*8+4],%i0 1370 sllx %i0,32,%i0 1371 or %i1,%i0,%i0 1372 ld [%g3+1*8+0],%i2 1373 ld [%g3+1*8+4],%i1 1374 sllx %i1,32,%i1 1375 or %i2,%i1,%i1 1376 ld [%g3+2*8+0],%i3 1377 ld [%g3+2*8+4],%i2 1378 sllx %i2,32,%i2 1379 or %i3,%i2,%i2 1380 ld [%g3+3*8+0],%i4 1381 ld [%g3+3*8+4],%i3 1382 sllx %i3,32,%i3 1383 or %i4,%i3,%i3 1384 ld [%g3+4*8+0],%i5 1385 ld [%g3+4*8+4],%i4 1386 sllx %i4,32,%i4 1387 or %i5,%i4,%i4 1388 ld [%g3+5*8+0],%l0 1389 ld [%g3+5*8+4],%i5 1390 sllx %i5,32,%i5 1391 or %l0,%i5,%i5 1392 ld [%g3+6*8+0],%l1 1393 ld [%g3+6*8+4],%l0 1394 sllx %l0,32,%l0 1395 or %l1,%l0,%l0 1396 ld [%g3+7*8+0],%l2 1397 ld [%g3+7*8+4],%l1 1398 sllx %l1,32,%l1 1399 or %l2,%l1,%l1 1400 ld [%g3+8*8+0],%l3 1401 ld [%g3+8*8+4],%l2 1402 sllx %l2,32,%l2 1403 or %l3,%l2,%l2 1404 ld [%g3+9*8+0],%l4 1405 ld [%g3+9*8+4],%l3 1406 sllx %l3,32,%l3 1407 or %l4,%l3,%l3 1408 ld [%g3+10*8+0],%l5 1409 ld [%g3+10*8+4],%l4 1410 sllx %l4,32,%l4 1411 or %l5,%l4,%l4 1412 ld [%g3+11*8+0],%l6 1413 ld [%g3+11*8+4],%l5 1414 sllx %l5,32,%l5 1415 or %l6,%l5,%l5 1416 ld [%g3+12*8+0],%l7 1417 ld [%g3+12*8+4],%l6 1418 sllx %l6,32,%l6 1419 or %l7,%l6,%l6 1420 ld [%g3+13*8+0],%o7 1421 ld [%g3+13*8+4],%l7 1422 sllx %l7,32,%l7 1423 or %o7,%l7,%l7 1424 save %sp,-128,%sp; or %g5,%fp,%fp 1425 ld [%g3+14*8+0],%i1 1426 ld [%g3+14*8+4],%i0 1427 sllx %i0,32,%i0 1428 or %i1,%i0,%i0 1429 ld [%g3+15*8+0],%i2 1430 ld [%g3+15*8+4],%i1 1431 sllx %i1,32,%i1 1432 or %i2,%i1,%i1 1433 ld [%g3+16*8+0],%i3 1434 ld [%g3+16*8+4],%i2 1435 sllx %i2,32,%i2 1436 or %i3,%i2,%i2 1437 ld [%g3+17*8+0],%i4 1438 ld [%g3+17*8+4],%i3 1439 sllx %i3,32,%i3 1440 or %i4,%i3,%i3 1441 ld [%g3+18*8+0],%i5 1442 ld [%g3+18*8+4],%i4 1443 sllx %i4,32,%i4 1444 or %i5,%i4,%i4 1445 ld [%g3+19*8+0],%l0 1446 ld [%g3+19*8+4],%i5 1447 sllx %i5,32,%i5 1448 or %l0,%i5,%i5 1449 ld [%g3+20*8+0],%l1 1450 ld [%g3+20*8+4],%l0 1451 sllx %l0,32,%l0 1452 or %l1,%l0,%l0 1453 ld [%g3+21*8+0],%l2 1454 ld [%g3+21*8+4],%l1 1455 sllx %l1,32,%l1 1456 or %l2,%l1,%l1 1457 ld [%g3+22*8+0],%l3 1458 ld [%g3+22*8+4],%l2 1459 sllx %l2,32,%l2 1460 or %l3,%l2,%l2 1461 ld [%g3+23*8+0],%l4 1462 ld [%g3+23*8+4],%l3 1463 sllx %l3,32,%l3 1464 or %l4,%l3,%l3 1465 ld [%g3+24*8+0],%l5 1466 ld [%g3+24*8+4],%l4 1467 sllx %l4,32,%l4 1468 or %l5,%l4,%l4 1469 ld [%g3+25*8+0],%l6 1470 ld [%g3+25*8+4],%l5 1471 sllx %l5,32,%l5 1472 or %l6,%l5,%l5 1473 ld [%g3+26*8+0],%l7 1474 ld [%g3+26*8+4],%l6 1475 sllx %l6,32,%l6 1476 or %l7,%l6,%l6 1477 ld [%g3+27*8+0],%o0 1478 ld [%g3+27*8+4],%l7 1479 sllx %l7,32,%l7 1480 or %o0,%l7,%l7 1481 ld [%g3+28*8+0],%o1 1482 ld [%g3+28*8+4],%o0 1483 sllx %o0,32,%o0 1484 or %o1,%o0,%o0 1485 ld [%g3+29*8+0],%o2 1486 ld [%g3+29*8+4],%o1 1487 sllx %o1,32,%o1 1488 or %o2,%o1,%o1 1489 ld [%g3+30*8+0],%o3 1490 ld [%g3+30*8+4],%o2 1491 sllx %o2,32,%o2 1492 or %o3,%o2,%o2 1493 ld [%g3+31*8+0],%o7 1494 ld [%g3+31*8+4],%o3 1495 sllx %o3,32,%o3 1496 or %o7,%o3,%o3 1497 .word 0x81b02920+32-1 ! montmul 32-1 1498.Lmresume_32: 1499 fbu,pn %fcc3,.Lmabort_32 1500#ifndef __arch64__ 1501 and %fp,%g5,%g5 1502 brz,pn %g5,.Lmabort_32 1503#endif 1504 nop 1505#ifdef __arch64__ 1506 restore 1507 restore 1508 restore 1509 restore 1510 restore 1511#else 1512 restore; and %fp,%g5,%g5 1513 restore; and %fp,%g5,%g5 1514 restore; and %fp,%g5,%g5 1515 restore; and %fp,%g5,%g5 1516 brz,pn %g5,.Lmabort1_32 1517 restore 1518#endif 1519 .word 0x81b02310 !movxtod %l0,%f0 1520 .word 0x85b02311 !movxtod %l1,%f2 1521 .word 0x89b02312 !movxtod %l2,%f4 1522 .word 0x8db02313 !movxtod %l3,%f6 1523 .word 0x91b02314 !movxtod %l4,%f8 1524 .word 0x95b02315 !movxtod %l5,%f10 1525 .word 0x99b02316 !movxtod %l6,%f12 1526 .word 0x9db02317 !movxtod %l7,%f14 1527 .word 0xa1b02308 !movxtod %o0,%f16 1528 .word 0xa5b02309 !movxtod %o1,%f18 1529 .word 0xa9b0230a !movxtod %o2,%f20 1530 .word 0xadb0230b !movxtod %o3,%f22 1531 .word 0xbbb0230c !movxtod %o4,%f60 1532 .word 0xbfb0230d !movxtod %o5,%f62 1533#ifdef __arch64__ 1534 restore 1535#else 1536 and %fp,%g5,%g5 1537 restore 1538 and %g5,1,%o7 1539 and %fp,%g5,%g5 1540 srl %fp,0,%fp ! just in case? 1541 or %o7,%g5,%g5 1542 brz,a,pn %g5,.Lmdone_32 1543 mov 0,%i0 ! return failure 1544#endif 1545 st %f1,[%g1+0*8+0] 1546 st %f0,[%g1+0*8+4] 1547 st %f3,[%g1+1*8+0] 1548 st %f2,[%g1+1*8+4] 1549 st %f5,[%g1+2*8+0] 1550 st %f4,[%g1+2*8+4] 1551 st %f7,[%g1+3*8+0] 1552 st %f6,[%g1+3*8+4] 1553 st %f9,[%g1+4*8+0] 1554 st %f8,[%g1+4*8+4] 1555 st %f11,[%g1+5*8+0] 1556 st %f10,[%g1+5*8+4] 1557 st %f13,[%g1+6*8+0] 1558 st %f12,[%g1+6*8+4] 1559 st %f15,[%g1+7*8+0] 1560 st %f14,[%g1+7*8+4] 1561 st %f17,[%g1+8*8+0] 1562 st %f16,[%g1+8*8+4] 1563 st %f19,[%g1+9*8+0] 1564 st %f18,[%g1+9*8+4] 1565 st %f21,[%g1+10*8+0] 1566 st %f20,[%g1+10*8+4] 1567 st %f23,[%g1+11*8+0] 1568 st %f22,[%g1+11*8+4] 1569 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1570 st %f1,[%g1+12*8+0] 1571 st %f0,[%g1+12*8+4] 1572 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1573 st %f3,[%g1+13*8+0] 1574 st %f2,[%g1+13*8+4] 1575 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1576 st %f5,[%g1+14*8+0] 1577 st %f4,[%g1+14*8+4] 1578 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1579 st %f7,[%g1+15*8+0] 1580 st %f6,[%g1+15*8+4] 1581 .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1582 st %f1,[%g1+16*8+0] 1583 st %f0,[%g1+16*8+4] 1584 .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1585 st %f3,[%g1+17*8+0] 1586 st %f2,[%g1+17*8+4] 1587 .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1588 st %f5,[%g1+18*8+0] 1589 st %f4,[%g1+18*8+4] 1590 .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1591 st %f7,[%g1+19*8+0] 1592 st %f6,[%g1+19*8+4] 1593 .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1594 st %f1,[%g1+20*8+0] 1595 st %f0,[%g1+20*8+4] 1596 .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1597 st %f3,[%g1+21*8+0] 1598 st %f2,[%g1+21*8+4] 1599 .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1600 st %f5,[%g1+22*8+0] 1601 st %f4,[%g1+22*8+4] 1602 .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1603 st %f7,[%g1+23*8+0] 1604 st %f6,[%g1+23*8+4] 1605 .word 0x81b00f0d !fsrc2 %f0,%f44,%f0 1606 st %f1,[%g1+24*8+0] 1607 st %f0,[%g1+24*8+4] 1608 .word 0x85b00f0f !fsrc2 %f0,%f46,%f2 1609 st %f3,[%g1+25*8+0] 1610 st %f2,[%g1+25*8+4] 1611 .word 0x89b00f11 !fsrc2 %f0,%f48,%f4 1612 st %f5,[%g1+26*8+0] 1613 st %f4,[%g1+26*8+4] 1614 .word 0x8db00f13 !fsrc2 %f0,%f50,%f6 1615 st %f7,[%g1+27*8+0] 1616 st %f6,[%g1+27*8+4] 1617 .word 0x81b00f15 !fsrc2 %f0,%f52,%f0 1618 st %f1,[%g1+28*8+0] 1619 st %f0,[%g1+28*8+4] 1620 .word 0x85b00f17 !fsrc2 %f0,%f54,%f2 1621 st %f3,[%g1+29*8+0] 1622 st %f2,[%g1+29*8+4] 1623 .word 0x89b00f19 !fsrc2 %f0,%f56,%f4 1624 st %f5,[%g1+30*8+0] 1625 st %f4,[%g1+30*8+4] 1626 .word 0x8db00f1b !fsrc2 %f0,%f58,%f6 1627 st %f7,[%g1+31*8+0] 1628 st %f6,[%g1+31*8+4] 1629 mov 1,%i0 ! return success 1630.Lmdone_32: 1631 ret 1632 restore 1633 1634.Lmabort_32: 1635 restore 1636 restore 1637 restore 1638 restore 1639 restore 1640.Lmabort1_32: 1641 restore 1642 1643 mov 0,%i0 ! return failure 1644 ret 1645 restore 1646 1647.align 32 1648.Lmsquare_32: 1649 save %sp,-128,%sp; or %g5,%fp,%fp 1650 save %sp,-128,%sp; or %g5,%fp,%fp 1651 .word 0x81b02940+32-1 ! montsqr 32-1 1652 ba .Lmresume_32 1653 nop 1654.type bn_mul_mont_t4_32, #function 1655.size bn_mul_mont_t4_32, .-bn_mul_mont_t4_32 1656.globl bn_pwr5_mont_t4_8 1657.align 32 1658bn_pwr5_mont_t4_8: 1659#ifdef __arch64__ 1660 mov 0,%g5 1661 mov -128,%g4 1662#elif defined(SPARCV9_64BIT_STACK) 1663 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1664 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1665 mov -2047,%g4 1666 and %g1,SPARCV9_64BIT_STACK,%g1 1667 movrz %g1,0,%g4 1668 mov -1,%g5 1669 add %g4,-128,%g4 1670#else 1671 mov -1,%g5 1672 mov -128,%g4 1673#endif 1674 sllx %g5,32,%g5 1675 save %sp,%g4,%sp 1676#ifndef __arch64__ 1677 save %sp,-128,%sp ! warm it up 1678 save %sp,-128,%sp 1679 save %sp,-128,%sp 1680 save %sp,-128,%sp 1681 save %sp,-128,%sp 1682 save %sp,-128,%sp 1683 restore 1684 restore 1685 restore 1686 restore 1687 restore 1688 restore 1689#endif 1690 and %sp,1,%g4 1691 or %g5,%fp,%fp 1692 or %g4,%g5,%g5 1693 1694 ! copy arguments to global registers 1695 mov %i0,%g1 1696 mov %i1,%g2 1697 ld [%i2+0],%f1 ! load *n0 1698 ld [%i2+4],%f0 1699 mov %i3,%g3 1700 srl %i4,%g0,%i4 ! pack last arguments 1701 sllx %i5,32,%g4 1702 or %i4,%g4,%g4 1703 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1704 save %sp,-128,%sp; or %g5,%fp,%fp 1705 ldx [%g1+0*8],%l0 1706 ldx [%g1+1*8],%l1 1707 ldx [%g1+2*8],%l2 1708 ldx [%g1+3*8],%l3 1709 ldx [%g1+4*8],%l4 1710 ldx [%g1+5*8],%l5 1711 ldx [%g1+6*8],%l6 1712 ldx [%g1+7*8],%l7 1713 save %sp,-128,%sp; or %g5,%fp,%fp 1714 ldx [%g2+0*8],%l0 1715 ldx [%g2+1*8],%l1 1716 ldx [%g2+2*8],%l2 1717 ldx [%g2+3*8],%l3 1718 ldx [%g2+4*8],%l4 1719 ldx [%g2+5*8],%l5 1720 ldx [%g2+6*8],%l6 1721 ldx [%g2+7*8],%l7 1722 save %sp,-128,%sp; or %g5,%fp,%fp 1723 save %sp,-128,%sp; or %g5,%fp,%fp 1724 save %sp,-128,%sp; or %g5,%fp,%fp 1725 1726 srlx %g4, 32, %o4 ! unpack %g4 1727 srl %g4, %g0, %o5 1728 sub %o4, 5, %o4 1729 mov %g3, %o7 1730 sllx %o4, 32, %g4 ! re-pack %g4 1731 or %o5, %g4, %g4 1732 srl %o5, %o4, %o5 1733 srl %o5, 2, %o4 1734 and %o5, 3, %o5 1735 and %o4, 7, %o4 1736 sll %o5, 3, %o5 ! offset within first cache line 1737 add %o5, %o7, %o7 ! of the pwrtbl 1738 or %g0, 1, %o5 1739 sll %o5, %o4, %o4 1740 wr %o4, %g0, %ccr 1741 b .Lstride_8 1742 nop 1743.align 16 1744.Lstride_8: 1745 ldx [%o7+0*32], %i0 1746 ldx [%o7+8*32], %i1 1747 ldx [%o7+1*32], %o4 1748 ldx [%o7+9*32], %o5 1749 movvs %icc, %o4, %i0 1750 ldx [%o7+2*32], %o4 1751 movvs %icc, %o5, %i1 1752 ldx [%o7+10*32],%o5 1753 move %icc, %o4, %i0 1754 ldx [%o7+3*32], %o4 1755 move %icc, %o5, %i1 1756 ldx [%o7+11*32],%o5 1757 movneg %icc, %o4, %i0 1758 ldx [%o7+4*32], %o4 1759 movneg %icc, %o5, %i1 1760 ldx [%o7+12*32],%o5 1761 movcs %xcc, %o4, %i0 1762 ldx [%o7+5*32],%o4 1763 movcs %xcc, %o5, %i1 1764 ldx [%o7+13*32],%o5 1765 movvs %xcc, %o4, %i0 1766 ldx [%o7+6*32], %o4 1767 movvs %xcc, %o5, %i1 1768 ldx [%o7+14*32],%o5 1769 move %xcc, %o4, %i0 1770 ldx [%o7+7*32], %o4 1771 move %xcc, %o5, %i1 1772 ldx [%o7+15*32],%o5 1773 movneg %xcc, %o4, %i0 1774 add %o7,16*32, %o7 1775 movneg %xcc, %o5, %i1 1776 ldx [%o7+0*32], %i2 1777 ldx [%o7+8*32], %i3 1778 ldx [%o7+1*32], %o4 1779 ldx [%o7+9*32], %o5 1780 movvs %icc, %o4, %i2 1781 ldx [%o7+2*32], %o4 1782 movvs %icc, %o5, %i3 1783 ldx [%o7+10*32],%o5 1784 move %icc, %o4, %i2 1785 ldx [%o7+3*32], %o4 1786 move %icc, %o5, %i3 1787 ldx [%o7+11*32],%o5 1788 movneg %icc, %o4, %i2 1789 ldx [%o7+4*32], %o4 1790 movneg %icc, %o5, %i3 1791 ldx [%o7+12*32],%o5 1792 movcs %xcc, %o4, %i2 1793 ldx [%o7+5*32],%o4 1794 movcs %xcc, %o5, %i3 1795 ldx [%o7+13*32],%o5 1796 movvs %xcc, %o4, %i2 1797 ldx [%o7+6*32], %o4 1798 movvs %xcc, %o5, %i3 1799 ldx [%o7+14*32],%o5 1800 move %xcc, %o4, %i2 1801 ldx [%o7+7*32], %o4 1802 move %xcc, %o5, %i3 1803 ldx [%o7+15*32],%o5 1804 movneg %xcc, %o4, %i2 1805 add %o7,16*32, %o7 1806 movneg %xcc, %o5, %i3 1807 ldx [%o7+0*32], %i4 1808 ldx [%o7+8*32], %i5 1809 ldx [%o7+1*32], %o4 1810 ldx [%o7+9*32], %o5 1811 movvs %icc, %o4, %i4 1812 ldx [%o7+2*32], %o4 1813 movvs %icc, %o5, %i5 1814 ldx [%o7+10*32],%o5 1815 move %icc, %o4, %i4 1816 ldx [%o7+3*32], %o4 1817 move %icc, %o5, %i5 1818 ldx [%o7+11*32],%o5 1819 movneg %icc, %o4, %i4 1820 ldx [%o7+4*32], %o4 1821 movneg %icc, %o5, %i5 1822 ldx [%o7+12*32],%o5 1823 movcs %xcc, %o4, %i4 1824 ldx [%o7+5*32],%o4 1825 movcs %xcc, %o5, %i5 1826 ldx [%o7+13*32],%o5 1827 movvs %xcc, %o4, %i4 1828 ldx [%o7+6*32], %o4 1829 movvs %xcc, %o5, %i5 1830 ldx [%o7+14*32],%o5 1831 move %xcc, %o4, %i4 1832 ldx [%o7+7*32], %o4 1833 move %xcc, %o5, %i5 1834 ldx [%o7+15*32],%o5 1835 movneg %xcc, %o4, %i4 1836 add %o7,16*32, %o7 1837 movneg %xcc, %o5, %i5 1838 ldx [%o7+0*32], %l0 1839 ldx [%o7+8*32], %l1 1840 ldx [%o7+1*32], %o4 1841 ldx [%o7+9*32], %o5 1842 movvs %icc, %o4, %l0 1843 ldx [%o7+2*32], %o4 1844 movvs %icc, %o5, %l1 1845 ldx [%o7+10*32],%o5 1846 move %icc, %o4, %l0 1847 ldx [%o7+3*32], %o4 1848 move %icc, %o5, %l1 1849 ldx [%o7+11*32],%o5 1850 movneg %icc, %o4, %l0 1851 ldx [%o7+4*32], %o4 1852 movneg %icc, %o5, %l1 1853 ldx [%o7+12*32],%o5 1854 movcs %xcc, %o4, %l0 1855 ldx [%o7+5*32],%o4 1856 movcs %xcc, %o5, %l1 1857 ldx [%o7+13*32],%o5 1858 movvs %xcc, %o4, %l0 1859 ldx [%o7+6*32], %o4 1860 movvs %xcc, %o5, %l1 1861 ldx [%o7+14*32],%o5 1862 move %xcc, %o4, %l0 1863 ldx [%o7+7*32], %o4 1864 move %xcc, %o5, %l1 1865 ldx [%o7+15*32],%o5 1866 movneg %xcc, %o4, %l0 1867 add %o7,16*32, %o7 1868 movneg %xcc, %o5, %l1 1869 save %sp,-128,%sp; or %g5,%fp,%fp 1870 srax %g4, 32, %o4 ! unpack %g4 1871 srl %g4, %g0, %o5 1872 sub %o4, 5, %o4 1873 mov %g3, %i7 1874 sllx %o4, 32, %g4 ! re-pack %g4 1875 or %o5, %g4, %g4 1876 srl %o5, %o4, %o5 1877 srl %o5, 2, %o4 1878 and %o5, 3, %o5 1879 and %o4, 7, %o4 1880 sll %o5, 3, %o5 ! offset within first cache line 1881 add %o5, %i7, %i7 ! of the pwrtbl 1882 or %g0, 1, %o5 1883 sll %o5, %o4, %o4 1884 .word 0x81b02940+8-1 ! montsqr 8-1 1885 fbu,pn %fcc3,.Labort_8 1886#ifndef __arch64__ 1887 and %fp,%g5,%g5 1888 brz,pn %g5,.Labort_8 1889#endif 1890 nop 1891 .word 0x81b02940+8-1 ! montsqr 8-1 1892 fbu,pn %fcc3,.Labort_8 1893#ifndef __arch64__ 1894 and %fp,%g5,%g5 1895 brz,pn %g5,.Labort_8 1896#endif 1897 nop 1898 .word 0x81b02940+8-1 ! montsqr 8-1 1899 fbu,pn %fcc3,.Labort_8 1900#ifndef __arch64__ 1901 and %fp,%g5,%g5 1902 brz,pn %g5,.Labort_8 1903#endif 1904 nop 1905 .word 0x81b02940+8-1 ! montsqr 8-1 1906 fbu,pn %fcc3,.Labort_8 1907#ifndef __arch64__ 1908 and %fp,%g5,%g5 1909 brz,pn %g5,.Labort_8 1910#endif 1911 nop 1912 .word 0x81b02940+8-1 ! montsqr 8-1 1913 fbu,pn %fcc3,.Labort_8 1914#ifndef __arch64__ 1915 and %fp,%g5,%g5 1916 brz,pn %g5,.Labort_8 1917#endif 1918 nop 1919 wr %o4, %g0, %ccr 1920 .word 0x81b02920+8-1 ! montmul 8-1 1921 fbu,pn %fcc3,.Labort_8 1922#ifndef __arch64__ 1923 and %fp,%g5,%g5 1924 brz,pn %g5,.Labort_8 1925#endif 1926 1927 srax %g4, 32, %o4 1928#ifdef __arch64__ 1929 brgez %o4,.Lstride_8 1930 restore 1931 restore 1932 restore 1933 restore 1934 restore 1935#else 1936 brgez %o4,.Lstride_8 1937 restore; and %fp,%g5,%g5 1938 restore; and %fp,%g5,%g5 1939 restore; and %fp,%g5,%g5 1940 restore; and %fp,%g5,%g5 1941 brz,pn %g5,.Labort1_8 1942 restore 1943#endif 1944 .word 0x81b02310 !movxtod %l0,%f0 1945 .word 0x85b02311 !movxtod %l1,%f2 1946 .word 0x89b02312 !movxtod %l2,%f4 1947 .word 0x8db02313 !movxtod %l3,%f6 1948 .word 0x91b02314 !movxtod %l4,%f8 1949 .word 0x95b02315 !movxtod %l5,%f10 1950 .word 0x99b02316 !movxtod %l6,%f12 1951 .word 0x9db02317 !movxtod %l7,%f14 1952#ifdef __arch64__ 1953 restore 1954#else 1955 and %fp,%g5,%g5 1956 restore 1957 and %g5,1,%o7 1958 and %fp,%g5,%g5 1959 srl %fp,0,%fp ! just in case? 1960 or %o7,%g5,%g5 1961 brz,a,pn %g5,.Ldone_8 1962 mov 0,%i0 ! return failure 1963#endif 1964 std %f0,[%g1+0*8] 1965 std %f2,[%g1+1*8] 1966 std %f4,[%g1+2*8] 1967 std %f6,[%g1+3*8] 1968 std %f8,[%g1+4*8] 1969 std %f10,[%g1+5*8] 1970 std %f12,[%g1+6*8] 1971 std %f14,[%g1+7*8] 1972 mov 1,%i0 ! return success 1973.Ldone_8: 1974 ret 1975 restore 1976 1977.Labort_8: 1978 restore 1979 restore 1980 restore 1981 restore 1982 restore 1983.Labort1_8: 1984 restore 1985 1986 mov 0,%i0 ! return failure 1987 ret 1988 restore 1989.type bn_pwr5_mont_t4_8, #function 1990.size bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8 1991.globl bn_pwr5_mont_t4_16 1992.align 32 1993bn_pwr5_mont_t4_16: 1994#ifdef __arch64__ 1995 mov 0,%g5 1996 mov -128,%g4 1997#elif defined(SPARCV9_64BIT_STACK) 1998 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1999 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 2000 mov -2047,%g4 2001 and %g1,SPARCV9_64BIT_STACK,%g1 2002 movrz %g1,0,%g4 2003 mov -1,%g5 2004 add %g4,-128,%g4 2005#else 2006 mov -1,%g5 2007 mov -128,%g4 2008#endif 2009 sllx %g5,32,%g5 2010 save %sp,%g4,%sp 2011#ifndef __arch64__ 2012 save %sp,-128,%sp ! warm it up 2013 save %sp,-128,%sp 2014 save %sp,-128,%sp 2015 save %sp,-128,%sp 2016 save %sp,-128,%sp 2017 save %sp,-128,%sp 2018 restore 2019 restore 2020 restore 2021 restore 2022 restore 2023 restore 2024#endif 2025 and %sp,1,%g4 2026 or %g5,%fp,%fp 2027 or %g4,%g5,%g5 2028 2029 ! copy arguments to global registers 2030 mov %i0,%g1 2031 mov %i1,%g2 2032 ld [%i2+0],%f1 ! load *n0 2033 ld [%i2+4],%f0 2034 mov %i3,%g3 2035 srl %i4,%g0,%i4 ! pack last arguments 2036 sllx %i5,32,%g4 2037 or %i4,%g4,%g4 2038 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2039 save %sp,-128,%sp; or %g5,%fp,%fp 2040 ldx [%g1+0*8],%l0 2041 ldx [%g1+1*8],%l1 2042 ldx [%g1+2*8],%l2 2043 ldx [%g1+3*8],%l3 2044 ldx [%g1+4*8],%l4 2045 ldx [%g1+5*8],%l5 2046 ldx [%g1+6*8],%l6 2047 ldx [%g1+7*8],%l7 2048 ldx [%g1+8*8],%o0 2049 ldx [%g1+9*8],%o1 2050 ldx [%g1+10*8],%o2 2051 ldx [%g1+11*8],%o3 2052 ldx [%g1+12*8],%o4 2053 ldx [%g1+13*8],%o5 2054 ldd [%g1+14*8],%f24 2055 ldd [%g1+15*8],%f26 2056 save %sp,-128,%sp; or %g5,%fp,%fp 2057 ldx [%g2+0*8],%l0 2058 ldx [%g2+1*8],%l1 2059 ldx [%g2+2*8],%l2 2060 ldx [%g2+3*8],%l3 2061 ldx [%g2+4*8],%l4 2062 ldx [%g2+5*8],%l5 2063 ldx [%g2+6*8],%l6 2064 ldx [%g2+7*8],%l7 2065 ldx [%g2+8*8],%o0 2066 ldx [%g2+9*8],%o1 2067 ldx [%g2+10*8],%o2 2068 ldx [%g2+11*8],%o3 2069 ldx [%g2+12*8],%o4 2070 ldx [%g2+13*8],%o5 2071 save %sp,-128,%sp; or %g5,%fp,%fp 2072 ldx [%g2+14*8],%l0 2073 ldx [%g2+15*8],%l1 2074 save %sp,-128,%sp; or %g5,%fp,%fp 2075 save %sp,-128,%sp; or %g5,%fp,%fp 2076 2077 srlx %g4, 32, %o4 ! unpack %g4 2078 srl %g4, %g0, %o5 2079 sub %o4, 5, %o4 2080 mov %g3, %o7 2081 sllx %o4, 32, %g4 ! re-pack %g4 2082 or %o5, %g4, %g4 2083 srl %o5, %o4, %o5 2084 srl %o5, 2, %o4 2085 and %o5, 3, %o5 2086 and %o4, 7, %o4 2087 sll %o5, 3, %o5 ! offset within first cache line 2088 add %o5, %o7, %o7 ! of the pwrtbl 2089 or %g0, 1, %o5 2090 sll %o5, %o4, %o4 2091 wr %o4, %g0, %ccr 2092 b .Lstride_16 2093 nop 2094.align 16 2095.Lstride_16: 2096 ldx [%o7+0*32], %i0 2097 ldx [%o7+8*32], %i1 2098 ldx [%o7+1*32], %o4 2099 ldx [%o7+9*32], %o5 2100 movvs %icc, %o4, %i0 2101 ldx [%o7+2*32], %o4 2102 movvs %icc, %o5, %i1 2103 ldx [%o7+10*32],%o5 2104 move %icc, %o4, %i0 2105 ldx [%o7+3*32], %o4 2106 move %icc, %o5, %i1 2107 ldx [%o7+11*32],%o5 2108 movneg %icc, %o4, %i0 2109 ldx [%o7+4*32], %o4 2110 movneg %icc, %o5, %i1 2111 ldx [%o7+12*32],%o5 2112 movcs %xcc, %o4, %i0 2113 ldx [%o7+5*32],%o4 2114 movcs %xcc, %o5, %i1 2115 ldx [%o7+13*32],%o5 2116 movvs %xcc, %o4, %i0 2117 ldx [%o7+6*32], %o4 2118 movvs %xcc, %o5, %i1 2119 ldx [%o7+14*32],%o5 2120 move %xcc, %o4, %i0 2121 ldx [%o7+7*32], %o4 2122 move %xcc, %o5, %i1 2123 ldx [%o7+15*32],%o5 2124 movneg %xcc, %o4, %i0 2125 add %o7,16*32, %o7 2126 movneg %xcc, %o5, %i1 2127 ldx [%o7+0*32], %i2 2128 ldx [%o7+8*32], %i3 2129 ldx [%o7+1*32], %o4 2130 ldx [%o7+9*32], %o5 2131 movvs %icc, %o4, %i2 2132 ldx [%o7+2*32], %o4 2133 movvs %icc, %o5, %i3 2134 ldx [%o7+10*32],%o5 2135 move %icc, %o4, %i2 2136 ldx [%o7+3*32], %o4 2137 move %icc, %o5, %i3 2138 ldx [%o7+11*32],%o5 2139 movneg %icc, %o4, %i2 2140 ldx [%o7+4*32], %o4 2141 movneg %icc, %o5, %i3 2142 ldx [%o7+12*32],%o5 2143 movcs %xcc, %o4, %i2 2144 ldx [%o7+5*32],%o4 2145 movcs %xcc, %o5, %i3 2146 ldx [%o7+13*32],%o5 2147 movvs %xcc, %o4, %i2 2148 ldx [%o7+6*32], %o4 2149 movvs %xcc, %o5, %i3 2150 ldx [%o7+14*32],%o5 2151 move %xcc, %o4, %i2 2152 ldx [%o7+7*32], %o4 2153 move %xcc, %o5, %i3 2154 ldx [%o7+15*32],%o5 2155 movneg %xcc, %o4, %i2 2156 add %o7,16*32, %o7 2157 movneg %xcc, %o5, %i3 2158 ldx [%o7+0*32], %i4 2159 ldx [%o7+8*32], %i5 2160 ldx [%o7+1*32], %o4 2161 ldx [%o7+9*32], %o5 2162 movvs %icc, %o4, %i4 2163 ldx [%o7+2*32], %o4 2164 movvs %icc, %o5, %i5 2165 ldx [%o7+10*32],%o5 2166 move %icc, %o4, %i4 2167 ldx [%o7+3*32], %o4 2168 move %icc, %o5, %i5 2169 ldx [%o7+11*32],%o5 2170 movneg %icc, %o4, %i4 2171 ldx [%o7+4*32], %o4 2172 movneg %icc, %o5, %i5 2173 ldx [%o7+12*32],%o5 2174 movcs %xcc, %o4, %i4 2175 ldx [%o7+5*32],%o4 2176 movcs %xcc, %o5, %i5 2177 ldx [%o7+13*32],%o5 2178 movvs %xcc, %o4, %i4 2179 ldx [%o7+6*32], %o4 2180 movvs %xcc, %o5, %i5 2181 ldx [%o7+14*32],%o5 2182 move %xcc, %o4, %i4 2183 ldx [%o7+7*32], %o4 2184 move %xcc, %o5, %i5 2185 ldx [%o7+15*32],%o5 2186 movneg %xcc, %o4, %i4 2187 add %o7,16*32, %o7 2188 movneg %xcc, %o5, %i5 2189 ldx [%o7+0*32], %l0 2190 ldx [%o7+8*32], %l1 2191 ldx [%o7+1*32], %o4 2192 ldx [%o7+9*32], %o5 2193 movvs %icc, %o4, %l0 2194 ldx [%o7+2*32], %o4 2195 movvs %icc, %o5, %l1 2196 ldx [%o7+10*32],%o5 2197 move %icc, %o4, %l0 2198 ldx [%o7+3*32], %o4 2199 move %icc, %o5, %l1 2200 ldx [%o7+11*32],%o5 2201 movneg %icc, %o4, %l0 2202 ldx [%o7+4*32], %o4 2203 movneg %icc, %o5, %l1 2204 ldx [%o7+12*32],%o5 2205 movcs %xcc, %o4, %l0 2206 ldx [%o7+5*32],%o4 2207 movcs %xcc, %o5, %l1 2208 ldx [%o7+13*32],%o5 2209 movvs %xcc, %o4, %l0 2210 ldx [%o7+6*32], %o4 2211 movvs %xcc, %o5, %l1 2212 ldx [%o7+14*32],%o5 2213 move %xcc, %o4, %l0 2214 ldx [%o7+7*32], %o4 2215 move %xcc, %o5, %l1 2216 ldx [%o7+15*32],%o5 2217 movneg %xcc, %o4, %l0 2218 add %o7,16*32, %o7 2219 movneg %xcc, %o5, %l1 2220 ldx [%o7+0*32], %l2 2221 ldx [%o7+8*32], %l3 2222 ldx [%o7+1*32], %o4 2223 ldx [%o7+9*32], %o5 2224 movvs %icc, %o4, %l2 2225 ldx [%o7+2*32], %o4 2226 movvs %icc, %o5, %l3 2227 ldx [%o7+10*32],%o5 2228 move %icc, %o4, %l2 2229 ldx [%o7+3*32], %o4 2230 move %icc, %o5, %l3 2231 ldx [%o7+11*32],%o5 2232 movneg %icc, %o4, %l2 2233 ldx [%o7+4*32], %o4 2234 movneg %icc, %o5, %l3 2235 ldx [%o7+12*32],%o5 2236 movcs %xcc, %o4, %l2 2237 ldx [%o7+5*32],%o4 2238 movcs %xcc, %o5, %l3 2239 ldx [%o7+13*32],%o5 2240 movvs %xcc, %o4, %l2 2241 ldx [%o7+6*32], %o4 2242 movvs %xcc, %o5, %l3 2243 ldx [%o7+14*32],%o5 2244 move %xcc, %o4, %l2 2245 ldx [%o7+7*32], %o4 2246 move %xcc, %o5, %l3 2247 ldx [%o7+15*32],%o5 2248 movneg %xcc, %o4, %l2 2249 add %o7,16*32, %o7 2250 movneg %xcc, %o5, %l3 2251 ldx [%o7+0*32], %l4 2252 ldx [%o7+8*32], %l5 2253 ldx [%o7+1*32], %o4 2254 ldx [%o7+9*32], %o5 2255 movvs %icc, %o4, %l4 2256 ldx [%o7+2*32], %o4 2257 movvs %icc, %o5, %l5 2258 ldx [%o7+10*32],%o5 2259 move %icc, %o4, %l4 2260 ldx [%o7+3*32], %o4 2261 move %icc, %o5, %l5 2262 ldx [%o7+11*32],%o5 2263 movneg %icc, %o4, %l4 2264 ldx [%o7+4*32], %o4 2265 movneg %icc, %o5, %l5 2266 ldx [%o7+12*32],%o5 2267 movcs %xcc, %o4, %l4 2268 ldx [%o7+5*32],%o4 2269 movcs %xcc, %o5, %l5 2270 ldx [%o7+13*32],%o5 2271 movvs %xcc, %o4, %l4 2272 ldx [%o7+6*32], %o4 2273 movvs %xcc, %o5, %l5 2274 ldx [%o7+14*32],%o5 2275 move %xcc, %o4, %l4 2276 ldx [%o7+7*32], %o4 2277 move %xcc, %o5, %l5 2278 ldx [%o7+15*32],%o5 2279 movneg %xcc, %o4, %l4 2280 add %o7,16*32, %o7 2281 movneg %xcc, %o5, %l5 2282 ldx [%o7+0*32], %l6 2283 ldx [%o7+8*32], %l7 2284 ldx [%o7+1*32], %o4 2285 ldx [%o7+9*32], %o5 2286 movvs %icc, %o4, %l6 2287 ldx [%o7+2*32], %o4 2288 movvs %icc, %o5, %l7 2289 ldx [%o7+10*32],%o5 2290 move %icc, %o4, %l6 2291 ldx [%o7+3*32], %o4 2292 move %icc, %o5, %l7 2293 ldx [%o7+11*32],%o5 2294 movneg %icc, %o4, %l6 2295 ldx [%o7+4*32], %o4 2296 movneg %icc, %o5, %l7 2297 ldx [%o7+12*32],%o5 2298 movcs %xcc, %o4, %l6 2299 ldx [%o7+5*32],%o4 2300 movcs %xcc, %o5, %l7 2301 ldx [%o7+13*32],%o5 2302 movvs %xcc, %o4, %l6 2303 ldx [%o7+6*32], %o4 2304 movvs %xcc, %o5, %l7 2305 ldx [%o7+14*32],%o5 2306 move %xcc, %o4, %l6 2307 ldx [%o7+7*32], %o4 2308 move %xcc, %o5, %l7 2309 ldx [%o7+15*32],%o5 2310 movneg %xcc, %o4, %l6 2311 add %o7,16*32, %o7 2312 movneg %xcc, %o5, %l7 2313 save %sp,-128,%sp; or %g5,%fp,%fp 2314 ldx [%i7+0*32], %i0 2315 ldx [%i7+8*32], %i1 2316 ldx [%i7+1*32], %o4 2317 ldx [%i7+9*32], %o5 2318 movvs %icc, %o4, %i0 2319 ldx [%i7+2*32], %o4 2320 movvs %icc, %o5, %i1 2321 ldx [%i7+10*32],%o5 2322 move %icc, %o4, %i0 2323 ldx [%i7+3*32], %o4 2324 move %icc, %o5, %i1 2325 ldx [%i7+11*32],%o5 2326 movneg %icc, %o4, %i0 2327 ldx [%i7+4*32], %o4 2328 movneg %icc, %o5, %i1 2329 ldx [%i7+12*32],%o5 2330 movcs %xcc, %o4, %i0 2331 ldx [%i7+5*32],%o4 2332 movcs %xcc, %o5, %i1 2333 ldx [%i7+13*32],%o5 2334 movvs %xcc, %o4, %i0 2335 ldx [%i7+6*32], %o4 2336 movvs %xcc, %o5, %i1 2337 ldx [%i7+14*32],%o5 2338 move %xcc, %o4, %i0 2339 ldx [%i7+7*32], %o4 2340 move %xcc, %o5, %i1 2341 ldx [%i7+15*32],%o5 2342 movneg %xcc, %o4, %i0 2343 add %i7,16*32, %i7 2344 movneg %xcc, %o5, %i1 2345 srax %g4, 32, %o4 ! unpack %g4 2346 srl %g4, %g0, %o5 2347 sub %o4, 5, %o4 2348 mov %g3, %i7 2349 sllx %o4, 32, %g4 ! re-pack %g4 2350 or %o5, %g4, %g4 2351 srl %o5, %o4, %o5 2352 srl %o5, 2, %o4 2353 and %o5, 3, %o5 2354 and %o4, 7, %o4 2355 sll %o5, 3, %o5 ! offset within first cache line 2356 add %o5, %i7, %i7 ! of the pwrtbl 2357 or %g0, 1, %o5 2358 sll %o5, %o4, %o4 2359 .word 0x81b02940+16-1 ! montsqr 16-1 2360 fbu,pn %fcc3,.Labort_16 2361#ifndef __arch64__ 2362 and %fp,%g5,%g5 2363 brz,pn %g5,.Labort_16 2364#endif 2365 nop 2366 .word 0x81b02940+16-1 ! montsqr 16-1 2367 fbu,pn %fcc3,.Labort_16 2368#ifndef __arch64__ 2369 and %fp,%g5,%g5 2370 brz,pn %g5,.Labort_16 2371#endif 2372 nop 2373 .word 0x81b02940+16-1 ! montsqr 16-1 2374 fbu,pn %fcc3,.Labort_16 2375#ifndef __arch64__ 2376 and %fp,%g5,%g5 2377 brz,pn %g5,.Labort_16 2378#endif 2379 nop 2380 .word 0x81b02940+16-1 ! montsqr 16-1 2381 fbu,pn %fcc3,.Labort_16 2382#ifndef __arch64__ 2383 and %fp,%g5,%g5 2384 brz,pn %g5,.Labort_16 2385#endif 2386 nop 2387 .word 0x81b02940+16-1 ! montsqr 16-1 2388 fbu,pn %fcc3,.Labort_16 2389#ifndef __arch64__ 2390 and %fp,%g5,%g5 2391 brz,pn %g5,.Labort_16 2392#endif 2393 nop 2394 wr %o4, %g0, %ccr 2395 .word 0x81b02920+16-1 ! montmul 16-1 2396 fbu,pn %fcc3,.Labort_16 2397#ifndef __arch64__ 2398 and %fp,%g5,%g5 2399 brz,pn %g5,.Labort_16 2400#endif 2401 2402 srax %g4, 32, %o4 2403#ifdef __arch64__ 2404 brgez %o4,.Lstride_16 2405 restore 2406 restore 2407 restore 2408 restore 2409 restore 2410#else 2411 brgez %o4,.Lstride_16 2412 restore; and %fp,%g5,%g5 2413 restore; and %fp,%g5,%g5 2414 restore; and %fp,%g5,%g5 2415 restore; and %fp,%g5,%g5 2416 brz,pn %g5,.Labort1_16 2417 restore 2418#endif 2419 .word 0x81b02310 !movxtod %l0,%f0 2420 .word 0x85b02311 !movxtod %l1,%f2 2421 .word 0x89b02312 !movxtod %l2,%f4 2422 .word 0x8db02313 !movxtod %l3,%f6 2423 .word 0x91b02314 !movxtod %l4,%f8 2424 .word 0x95b02315 !movxtod %l5,%f10 2425 .word 0x99b02316 !movxtod %l6,%f12 2426 .word 0x9db02317 !movxtod %l7,%f14 2427 .word 0xa1b02308 !movxtod %o0,%f16 2428 .word 0xa5b02309 !movxtod %o1,%f18 2429 .word 0xa9b0230a !movxtod %o2,%f20 2430 .word 0xadb0230b !movxtod %o3,%f22 2431 .word 0xbbb0230c !movxtod %o4,%f60 2432 .word 0xbfb0230d !movxtod %o5,%f62 2433#ifdef __arch64__ 2434 restore 2435#else 2436 and %fp,%g5,%g5 2437 restore 2438 and %g5,1,%o7 2439 and %fp,%g5,%g5 2440 srl %fp,0,%fp ! just in case? 2441 or %o7,%g5,%g5 2442 brz,a,pn %g5,.Ldone_16 2443 mov 0,%i0 ! return failure 2444#endif 2445 std %f0,[%g1+0*8] 2446 std %f2,[%g1+1*8] 2447 std %f4,[%g1+2*8] 2448 std %f6,[%g1+3*8] 2449 std %f8,[%g1+4*8] 2450 std %f10,[%g1+5*8] 2451 std %f12,[%g1+6*8] 2452 std %f14,[%g1+7*8] 2453 std %f16,[%g1+8*8] 2454 std %f18,[%g1+9*8] 2455 std %f20,[%g1+10*8] 2456 std %f22,[%g1+11*8] 2457 std %f60,[%g1+12*8] 2458 std %f62,[%g1+13*8] 2459 std %f24,[%g1+14*8] 2460 std %f26,[%g1+15*8] 2461 mov 1,%i0 ! return success 2462.Ldone_16: 2463 ret 2464 restore 2465 2466.Labort_16: 2467 restore 2468 restore 2469 restore 2470 restore 2471 restore 2472.Labort1_16: 2473 restore 2474 2475 mov 0,%i0 ! return failure 2476 ret 2477 restore 2478.type bn_pwr5_mont_t4_16, #function 2479.size bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16 2480.globl bn_pwr5_mont_t4_24 2481.align 32 2482bn_pwr5_mont_t4_24: 2483#ifdef __arch64__ 2484 mov 0,%g5 2485 mov -128,%g4 2486#elif defined(SPARCV9_64BIT_STACK) 2487 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 2488 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 2489 mov -2047,%g4 2490 and %g1,SPARCV9_64BIT_STACK,%g1 2491 movrz %g1,0,%g4 2492 mov -1,%g5 2493 add %g4,-128,%g4 2494#else 2495 mov -1,%g5 2496 mov -128,%g4 2497#endif 2498 sllx %g5,32,%g5 2499 save %sp,%g4,%sp 2500#ifndef __arch64__ 2501 save %sp,-128,%sp ! warm it up 2502 save %sp,-128,%sp 2503 save %sp,-128,%sp 2504 save %sp,-128,%sp 2505 save %sp,-128,%sp 2506 save %sp,-128,%sp 2507 restore 2508 restore 2509 restore 2510 restore 2511 restore 2512 restore 2513#endif 2514 and %sp,1,%g4 2515 or %g5,%fp,%fp 2516 or %g4,%g5,%g5 2517 2518 ! copy arguments to global registers 2519 mov %i0,%g1 2520 mov %i1,%g2 2521 ld [%i2+0],%f1 ! load *n0 2522 ld [%i2+4],%f0 2523 mov %i3,%g3 2524 srl %i4,%g0,%i4 ! pack last arguments 2525 sllx %i5,32,%g4 2526 or %i4,%g4,%g4 2527 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2528 save %sp,-128,%sp; or %g5,%fp,%fp 2529 ldx [%g1+0*8],%l0 2530 ldx [%g1+1*8],%l1 2531 ldx [%g1+2*8],%l2 2532 ldx [%g1+3*8],%l3 2533 ldx [%g1+4*8],%l4 2534 ldx [%g1+5*8],%l5 2535 ldx [%g1+6*8],%l6 2536 ldx [%g1+7*8],%l7 2537 ldx [%g1+8*8],%o0 2538 ldx [%g1+9*8],%o1 2539 ldx [%g1+10*8],%o2 2540 ldx [%g1+11*8],%o3 2541 ldx [%g1+12*8],%o4 2542 ldx [%g1+13*8],%o5 2543 ldd [%g1+14*8],%f24 2544 ldd [%g1+15*8],%f26 2545 ldd [%g1+16*8],%f28 2546 ldd [%g1+17*8],%f30 2547 ldd [%g1+18*8],%f32 2548 ldd [%g1+19*8],%f34 2549 ldd [%g1+20*8],%f36 2550 ldd [%g1+21*8],%f38 2551 ldd [%g1+22*8],%f40 2552 ldd [%g1+23*8],%f42 2553 save %sp,-128,%sp; or %g5,%fp,%fp 2554 ldx [%g2+0*8],%l0 2555 ldx [%g2+1*8],%l1 2556 ldx [%g2+2*8],%l2 2557 ldx [%g2+3*8],%l3 2558 ldx [%g2+4*8],%l4 2559 ldx [%g2+5*8],%l5 2560 ldx [%g2+6*8],%l6 2561 ldx [%g2+7*8],%l7 2562 ldx [%g2+8*8],%o0 2563 ldx [%g2+9*8],%o1 2564 ldx [%g2+10*8],%o2 2565 ldx [%g2+11*8],%o3 2566 ldx [%g2+12*8],%o4 2567 ldx [%g2+13*8],%o5 2568 save %sp,-128,%sp; or %g5,%fp,%fp 2569 ldx [%g2+14*8],%l0 2570 ldx [%g2+15*8],%l1 2571 ldx [%g2+16*8],%l2 2572 ldx [%g2+17*8],%l3 2573 ldx [%g2+18*8],%l4 2574 ldx [%g2+19*8],%l5 2575 ldx [%g2+20*8],%l6 2576 ldx [%g2+21*8],%l7 2577 ldx [%g2+22*8],%o0 2578 ldx [%g2+23*8],%o1 2579 save %sp,-128,%sp; or %g5,%fp,%fp 2580 save %sp,-128,%sp; or %g5,%fp,%fp 2581 2582 srlx %g4, 32, %o4 ! unpack %g4 2583 srl %g4, %g0, %o5 2584 sub %o4, 5, %o4 2585 mov %g3, %o7 2586 sllx %o4, 32, %g4 ! re-pack %g4 2587 or %o5, %g4, %g4 2588 srl %o5, %o4, %o5 2589 srl %o5, 2, %o4 2590 and %o5, 3, %o5 2591 and %o4, 7, %o4 2592 sll %o5, 3, %o5 ! offset within first cache line 2593 add %o5, %o7, %o7 ! of the pwrtbl 2594 or %g0, 1, %o5 2595 sll %o5, %o4, %o4 2596 wr %o4, %g0, %ccr 2597 b .Lstride_24 2598 nop 2599.align 16 2600.Lstride_24: 2601 ldx [%o7+0*32], %i0 2602 ldx [%o7+8*32], %i1 2603 ldx [%o7+1*32], %o4 2604 ldx [%o7+9*32], %o5 2605 movvs %icc, %o4, %i0 2606 ldx [%o7+2*32], %o4 2607 movvs %icc, %o5, %i1 2608 ldx [%o7+10*32],%o5 2609 move %icc, %o4, %i0 2610 ldx [%o7+3*32], %o4 2611 move %icc, %o5, %i1 2612 ldx [%o7+11*32],%o5 2613 movneg %icc, %o4, %i0 2614 ldx [%o7+4*32], %o4 2615 movneg %icc, %o5, %i1 2616 ldx [%o7+12*32],%o5 2617 movcs %xcc, %o4, %i0 2618 ldx [%o7+5*32],%o4 2619 movcs %xcc, %o5, %i1 2620 ldx [%o7+13*32],%o5 2621 movvs %xcc, %o4, %i0 2622 ldx [%o7+6*32], %o4 2623 movvs %xcc, %o5, %i1 2624 ldx [%o7+14*32],%o5 2625 move %xcc, %o4, %i0 2626 ldx [%o7+7*32], %o4 2627 move %xcc, %o5, %i1 2628 ldx [%o7+15*32],%o5 2629 movneg %xcc, %o4, %i0 2630 add %o7,16*32, %o7 2631 movneg %xcc, %o5, %i1 2632 ldx [%o7+0*32], %i2 2633 ldx [%o7+8*32], %i3 2634 ldx [%o7+1*32], %o4 2635 ldx [%o7+9*32], %o5 2636 movvs %icc, %o4, %i2 2637 ldx [%o7+2*32], %o4 2638 movvs %icc, %o5, %i3 2639 ldx [%o7+10*32],%o5 2640 move %icc, %o4, %i2 2641 ldx [%o7+3*32], %o4 2642 move %icc, %o5, %i3 2643 ldx [%o7+11*32],%o5 2644 movneg %icc, %o4, %i2 2645 ldx [%o7+4*32], %o4 2646 movneg %icc, %o5, %i3 2647 ldx [%o7+12*32],%o5 2648 movcs %xcc, %o4, %i2 2649 ldx [%o7+5*32],%o4 2650 movcs %xcc, %o5, %i3 2651 ldx [%o7+13*32],%o5 2652 movvs %xcc, %o4, %i2 2653 ldx [%o7+6*32], %o4 2654 movvs %xcc, %o5, %i3 2655 ldx [%o7+14*32],%o5 2656 move %xcc, %o4, %i2 2657 ldx [%o7+7*32], %o4 2658 move %xcc, %o5, %i3 2659 ldx [%o7+15*32],%o5 2660 movneg %xcc, %o4, %i2 2661 add %o7,16*32, %o7 2662 movneg %xcc, %o5, %i3 2663 ldx [%o7+0*32], %i4 2664 ldx [%o7+8*32], %i5 2665 ldx [%o7+1*32], %o4 2666 ldx [%o7+9*32], %o5 2667 movvs %icc, %o4, %i4 2668 ldx [%o7+2*32], %o4 2669 movvs %icc, %o5, %i5 2670 ldx [%o7+10*32],%o5 2671 move %icc, %o4, %i4 2672 ldx [%o7+3*32], %o4 2673 move %icc, %o5, %i5 2674 ldx [%o7+11*32],%o5 2675 movneg %icc, %o4, %i4 2676 ldx [%o7+4*32], %o4 2677 movneg %icc, %o5, %i5 2678 ldx [%o7+12*32],%o5 2679 movcs %xcc, %o4, %i4 2680 ldx [%o7+5*32],%o4 2681 movcs %xcc, %o5, %i5 2682 ldx [%o7+13*32],%o5 2683 movvs %xcc, %o4, %i4 2684 ldx [%o7+6*32], %o4 2685 movvs %xcc, %o5, %i5 2686 ldx [%o7+14*32],%o5 2687 move %xcc, %o4, %i4 2688 ldx [%o7+7*32], %o4 2689 move %xcc, %o5, %i5 2690 ldx [%o7+15*32],%o5 2691 movneg %xcc, %o4, %i4 2692 add %o7,16*32, %o7 2693 movneg %xcc, %o5, %i5 2694 ldx [%o7+0*32], %l0 2695 ldx [%o7+8*32], %l1 2696 ldx [%o7+1*32], %o4 2697 ldx [%o7+9*32], %o5 2698 movvs %icc, %o4, %l0 2699 ldx [%o7+2*32], %o4 2700 movvs %icc, %o5, %l1 2701 ldx [%o7+10*32],%o5 2702 move %icc, %o4, %l0 2703 ldx [%o7+3*32], %o4 2704 move %icc, %o5, %l1 2705 ldx [%o7+11*32],%o5 2706 movneg %icc, %o4, %l0 2707 ldx [%o7+4*32], %o4 2708 movneg %icc, %o5, %l1 2709 ldx [%o7+12*32],%o5 2710 movcs %xcc, %o4, %l0 2711 ldx [%o7+5*32],%o4 2712 movcs %xcc, %o5, %l1 2713 ldx [%o7+13*32],%o5 2714 movvs %xcc, %o4, %l0 2715 ldx [%o7+6*32], %o4 2716 movvs %xcc, %o5, %l1 2717 ldx [%o7+14*32],%o5 2718 move %xcc, %o4, %l0 2719 ldx [%o7+7*32], %o4 2720 move %xcc, %o5, %l1 2721 ldx [%o7+15*32],%o5 2722 movneg %xcc, %o4, %l0 2723 add %o7,16*32, %o7 2724 movneg %xcc, %o5, %l1 2725 ldx [%o7+0*32], %l2 2726 ldx [%o7+8*32], %l3 2727 ldx [%o7+1*32], %o4 2728 ldx [%o7+9*32], %o5 2729 movvs %icc, %o4, %l2 2730 ldx [%o7+2*32], %o4 2731 movvs %icc, %o5, %l3 2732 ldx [%o7+10*32],%o5 2733 move %icc, %o4, %l2 2734 ldx [%o7+3*32], %o4 2735 move %icc, %o5, %l3 2736 ldx [%o7+11*32],%o5 2737 movneg %icc, %o4, %l2 2738 ldx [%o7+4*32], %o4 2739 movneg %icc, %o5, %l3 2740 ldx [%o7+12*32],%o5 2741 movcs %xcc, %o4, %l2 2742 ldx [%o7+5*32],%o4 2743 movcs %xcc, %o5, %l3 2744 ldx [%o7+13*32],%o5 2745 movvs %xcc, %o4, %l2 2746 ldx [%o7+6*32], %o4 2747 movvs %xcc, %o5, %l3 2748 ldx [%o7+14*32],%o5 2749 move %xcc, %o4, %l2 2750 ldx [%o7+7*32], %o4 2751 move %xcc, %o5, %l3 2752 ldx [%o7+15*32],%o5 2753 movneg %xcc, %o4, %l2 2754 add %o7,16*32, %o7 2755 movneg %xcc, %o5, %l3 2756 ldx [%o7+0*32], %l4 2757 ldx [%o7+8*32], %l5 2758 ldx [%o7+1*32], %o4 2759 ldx [%o7+9*32], %o5 2760 movvs %icc, %o4, %l4 2761 ldx [%o7+2*32], %o4 2762 movvs %icc, %o5, %l5 2763 ldx [%o7+10*32],%o5 2764 move %icc, %o4, %l4 2765 ldx [%o7+3*32], %o4 2766 move %icc, %o5, %l5 2767 ldx [%o7+11*32],%o5 2768 movneg %icc, %o4, %l4 2769 ldx [%o7+4*32], %o4 2770 movneg %icc, %o5, %l5 2771 ldx [%o7+12*32],%o5 2772 movcs %xcc, %o4, %l4 2773 ldx [%o7+5*32],%o4 2774 movcs %xcc, %o5, %l5 2775 ldx [%o7+13*32],%o5 2776 movvs %xcc, %o4, %l4 2777 ldx [%o7+6*32], %o4 2778 movvs %xcc, %o5, %l5 2779 ldx [%o7+14*32],%o5 2780 move %xcc, %o4, %l4 2781 ldx [%o7+7*32], %o4 2782 move %xcc, %o5, %l5 2783 ldx [%o7+15*32],%o5 2784 movneg %xcc, %o4, %l4 2785 add %o7,16*32, %o7 2786 movneg %xcc, %o5, %l5 2787 ldx [%o7+0*32], %l6 2788 ldx [%o7+8*32], %l7 2789 ldx [%o7+1*32], %o4 2790 ldx [%o7+9*32], %o5 2791 movvs %icc, %o4, %l6 2792 ldx [%o7+2*32], %o4 2793 movvs %icc, %o5, %l7 2794 ldx [%o7+10*32],%o5 2795 move %icc, %o4, %l6 2796 ldx [%o7+3*32], %o4 2797 move %icc, %o5, %l7 2798 ldx [%o7+11*32],%o5 2799 movneg %icc, %o4, %l6 2800 ldx [%o7+4*32], %o4 2801 movneg %icc, %o5, %l7 2802 ldx [%o7+12*32],%o5 2803 movcs %xcc, %o4, %l6 2804 ldx [%o7+5*32],%o4 2805 movcs %xcc, %o5, %l7 2806 ldx [%o7+13*32],%o5 2807 movvs %xcc, %o4, %l6 2808 ldx [%o7+6*32], %o4 2809 movvs %xcc, %o5, %l7 2810 ldx [%o7+14*32],%o5 2811 move %xcc, %o4, %l6 2812 ldx [%o7+7*32], %o4 2813 move %xcc, %o5, %l7 2814 ldx [%o7+15*32],%o5 2815 movneg %xcc, %o4, %l6 2816 add %o7,16*32, %o7 2817 movneg %xcc, %o5, %l7 2818 save %sp,-128,%sp; or %g5,%fp,%fp 2819 ldx [%i7+0*32], %i0 2820 ldx [%i7+8*32], %i1 2821 ldx [%i7+1*32], %o4 2822 ldx [%i7+9*32], %o5 2823 movvs %icc, %o4, %i0 2824 ldx [%i7+2*32], %o4 2825 movvs %icc, %o5, %i1 2826 ldx [%i7+10*32],%o5 2827 move %icc, %o4, %i0 2828 ldx [%i7+3*32], %o4 2829 move %icc, %o5, %i1 2830 ldx [%i7+11*32],%o5 2831 movneg %icc, %o4, %i0 2832 ldx [%i7+4*32], %o4 2833 movneg %icc, %o5, %i1 2834 ldx [%i7+12*32],%o5 2835 movcs %xcc, %o4, %i0 2836 ldx [%i7+5*32],%o4 2837 movcs %xcc, %o5, %i1 2838 ldx [%i7+13*32],%o5 2839 movvs %xcc, %o4, %i0 2840 ldx [%i7+6*32], %o4 2841 movvs %xcc, %o5, %i1 2842 ldx [%i7+14*32],%o5 2843 move %xcc, %o4, %i0 2844 ldx [%i7+7*32], %o4 2845 move %xcc, %o5, %i1 2846 ldx [%i7+15*32],%o5 2847 movneg %xcc, %o4, %i0 2848 add %i7,16*32, %i7 2849 movneg %xcc, %o5, %i1 2850 ldx [%i7+0*32], %i2 2851 ldx [%i7+8*32], %i3 2852 ldx [%i7+1*32], %o4 2853 ldx [%i7+9*32], %o5 2854 movvs %icc, %o4, %i2 2855 ldx [%i7+2*32], %o4 2856 movvs %icc, %o5, %i3 2857 ldx [%i7+10*32],%o5 2858 move %icc, %o4, %i2 2859 ldx [%i7+3*32], %o4 2860 move %icc, %o5, %i3 2861 ldx [%i7+11*32],%o5 2862 movneg %icc, %o4, %i2 2863 ldx [%i7+4*32], %o4 2864 movneg %icc, %o5, %i3 2865 ldx [%i7+12*32],%o5 2866 movcs %xcc, %o4, %i2 2867 ldx [%i7+5*32],%o4 2868 movcs %xcc, %o5, %i3 2869 ldx [%i7+13*32],%o5 2870 movvs %xcc, %o4, %i2 2871 ldx [%i7+6*32], %o4 2872 movvs %xcc, %o5, %i3 2873 ldx [%i7+14*32],%o5 2874 move %xcc, %o4, %i2 2875 ldx [%i7+7*32], %o4 2876 move %xcc, %o5, %i3 2877 ldx [%i7+15*32],%o5 2878 movneg %xcc, %o4, %i2 2879 add %i7,16*32, %i7 2880 movneg %xcc, %o5, %i3 2881 ldx [%i7+0*32], %i4 2882 ldx [%i7+8*32], %i5 2883 ldx [%i7+1*32], %o4 2884 ldx [%i7+9*32], %o5 2885 movvs %icc, %o4, %i4 2886 ldx [%i7+2*32], %o4 2887 movvs %icc, %o5, %i5 2888 ldx [%i7+10*32],%o5 2889 move %icc, %o4, %i4 2890 ldx [%i7+3*32], %o4 2891 move %icc, %o5, %i5 2892 ldx [%i7+11*32],%o5 2893 movneg %icc, %o4, %i4 2894 ldx [%i7+4*32], %o4 2895 movneg %icc, %o5, %i5 2896 ldx [%i7+12*32],%o5 2897 movcs %xcc, %o4, %i4 2898 ldx [%i7+5*32],%o4 2899 movcs %xcc, %o5, %i5 2900 ldx [%i7+13*32],%o5 2901 movvs %xcc, %o4, %i4 2902 ldx [%i7+6*32], %o4 2903 movvs %xcc, %o5, %i5 2904 ldx [%i7+14*32],%o5 2905 move %xcc, %o4, %i4 2906 ldx [%i7+7*32], %o4 2907 move %xcc, %o5, %i5 2908 ldx [%i7+15*32],%o5 2909 movneg %xcc, %o4, %i4 2910 add %i7,16*32, %i7 2911 movneg %xcc, %o5, %i5 2912 ldx [%i7+0*32], %l0 2913 ldx [%i7+8*32], %l1 2914 ldx [%i7+1*32], %o4 2915 ldx [%i7+9*32], %o5 2916 movvs %icc, %o4, %l0 2917 ldx [%i7+2*32], %o4 2918 movvs %icc, %o5, %l1 2919 ldx [%i7+10*32],%o5 2920 move %icc, %o4, %l0 2921 ldx [%i7+3*32], %o4 2922 move %icc, %o5, %l1 2923 ldx [%i7+11*32],%o5 2924 movneg %icc, %o4, %l0 2925 ldx [%i7+4*32], %o4 2926 movneg %icc, %o5, %l1 2927 ldx [%i7+12*32],%o5 2928 movcs %xcc, %o4, %l0 2929 ldx [%i7+5*32],%o4 2930 movcs %xcc, %o5, %l1 2931 ldx [%i7+13*32],%o5 2932 movvs %xcc, %o4, %l0 2933 ldx [%i7+6*32], %o4 2934 movvs %xcc, %o5, %l1 2935 ldx [%i7+14*32],%o5 2936 move %xcc, %o4, %l0 2937 ldx [%i7+7*32], %o4 2938 move %xcc, %o5, %l1 2939 ldx [%i7+15*32],%o5 2940 movneg %xcc, %o4, %l0 2941 add %i7,16*32, %i7 2942 movneg %xcc, %o5, %l1 2943 ldx [%i7+0*32], %l2 2944 ldx [%i7+8*32], %l3 2945 ldx [%i7+1*32], %o4 2946 ldx [%i7+9*32], %o5 2947 movvs %icc, %o4, %l2 2948 ldx [%i7+2*32], %o4 2949 movvs %icc, %o5, %l3 2950 ldx [%i7+10*32],%o5 2951 move %icc, %o4, %l2 2952 ldx [%i7+3*32], %o4 2953 move %icc, %o5, %l3 2954 ldx [%i7+11*32],%o5 2955 movneg %icc, %o4, %l2 2956 ldx [%i7+4*32], %o4 2957 movneg %icc, %o5, %l3 2958 ldx [%i7+12*32],%o5 2959 movcs %xcc, %o4, %l2 2960 ldx [%i7+5*32],%o4 2961 movcs %xcc, %o5, %l3 2962 ldx [%i7+13*32],%o5 2963 movvs %xcc, %o4, %l2 2964 ldx [%i7+6*32], %o4 2965 movvs %xcc, %o5, %l3 2966 ldx [%i7+14*32],%o5 2967 move %xcc, %o4, %l2 2968 ldx [%i7+7*32], %o4 2969 move %xcc, %o5, %l3 2970 ldx [%i7+15*32],%o5 2971 movneg %xcc, %o4, %l2 2972 add %i7,16*32, %i7 2973 movneg %xcc, %o5, %l3 2974 srax %g4, 32, %o4 ! unpack %g4 2975 srl %g4, %g0, %o5 2976 sub %o4, 5, %o4 2977 mov %g3, %i7 2978 sllx %o4, 32, %g4 ! re-pack %g4 2979 or %o5, %g4, %g4 2980 srl %o5, %o4, %o5 2981 srl %o5, 2, %o4 2982 and %o5, 3, %o5 2983 and %o4, 7, %o4 2984 sll %o5, 3, %o5 ! offset within first cache line 2985 add %o5, %i7, %i7 ! of the pwrtbl 2986 or %g0, 1, %o5 2987 sll %o5, %o4, %o4 2988 .word 0x81b02940+24-1 ! montsqr 24-1 2989 fbu,pn %fcc3,.Labort_24 2990#ifndef __arch64__ 2991 and %fp,%g5,%g5 2992 brz,pn %g5,.Labort_24 2993#endif 2994 nop 2995 .word 0x81b02940+24-1 ! montsqr 24-1 2996 fbu,pn %fcc3,.Labort_24 2997#ifndef __arch64__ 2998 and %fp,%g5,%g5 2999 brz,pn %g5,.Labort_24 3000#endif 3001 nop 3002 .word 0x81b02940+24-1 ! montsqr 24-1 3003 fbu,pn %fcc3,.Labort_24 3004#ifndef __arch64__ 3005 and %fp,%g5,%g5 3006 brz,pn %g5,.Labort_24 3007#endif 3008 nop 3009 .word 0x81b02940+24-1 ! montsqr 24-1 3010 fbu,pn %fcc3,.Labort_24 3011#ifndef __arch64__ 3012 and %fp,%g5,%g5 3013 brz,pn %g5,.Labort_24 3014#endif 3015 nop 3016 .word 0x81b02940+24-1 ! montsqr 24-1 3017 fbu,pn %fcc3,.Labort_24 3018#ifndef __arch64__ 3019 and %fp,%g5,%g5 3020 brz,pn %g5,.Labort_24 3021#endif 3022 nop 3023 wr %o4, %g0, %ccr 3024 .word 0x81b02920+24-1 ! montmul 24-1 3025 fbu,pn %fcc3,.Labort_24 3026#ifndef __arch64__ 3027 and %fp,%g5,%g5 3028 brz,pn %g5,.Labort_24 3029#endif 3030 3031 srax %g4, 32, %o4 3032#ifdef __arch64__ 3033 brgez %o4,.Lstride_24 3034 restore 3035 restore 3036 restore 3037 restore 3038 restore 3039#else 3040 brgez %o4,.Lstride_24 3041 restore; and %fp,%g5,%g5 3042 restore; and %fp,%g5,%g5 3043 restore; and %fp,%g5,%g5 3044 restore; and %fp,%g5,%g5 3045 brz,pn %g5,.Labort1_24 3046 restore 3047#endif 3048 .word 0x81b02310 !movxtod %l0,%f0 3049 .word 0x85b02311 !movxtod %l1,%f2 3050 .word 0x89b02312 !movxtod %l2,%f4 3051 .word 0x8db02313 !movxtod %l3,%f6 3052 .word 0x91b02314 !movxtod %l4,%f8 3053 .word 0x95b02315 !movxtod %l5,%f10 3054 .word 0x99b02316 !movxtod %l6,%f12 3055 .word 0x9db02317 !movxtod %l7,%f14 3056 .word 0xa1b02308 !movxtod %o0,%f16 3057 .word 0xa5b02309 !movxtod %o1,%f18 3058 .word 0xa9b0230a !movxtod %o2,%f20 3059 .word 0xadb0230b !movxtod %o3,%f22 3060 .word 0xbbb0230c !movxtod %o4,%f60 3061 .word 0xbfb0230d !movxtod %o5,%f62 3062#ifdef __arch64__ 3063 restore 3064#else 3065 and %fp,%g5,%g5 3066 restore 3067 and %g5,1,%o7 3068 and %fp,%g5,%g5 3069 srl %fp,0,%fp ! just in case? 3070 or %o7,%g5,%g5 3071 brz,a,pn %g5,.Ldone_24 3072 mov 0,%i0 ! return failure 3073#endif 3074 std %f0,[%g1+0*8] 3075 std %f2,[%g1+1*8] 3076 std %f4,[%g1+2*8] 3077 std %f6,[%g1+3*8] 3078 std %f8,[%g1+4*8] 3079 std %f10,[%g1+5*8] 3080 std %f12,[%g1+6*8] 3081 std %f14,[%g1+7*8] 3082 std %f16,[%g1+8*8] 3083 std %f18,[%g1+9*8] 3084 std %f20,[%g1+10*8] 3085 std %f22,[%g1+11*8] 3086 std %f60,[%g1+12*8] 3087 std %f62,[%g1+13*8] 3088 std %f24,[%g1+14*8] 3089 std %f26,[%g1+15*8] 3090 std %f28,[%g1+16*8] 3091 std %f30,[%g1+17*8] 3092 std %f32,[%g1+18*8] 3093 std %f34,[%g1+19*8] 3094 std %f36,[%g1+20*8] 3095 std %f38,[%g1+21*8] 3096 std %f40,[%g1+22*8] 3097 std %f42,[%g1+23*8] 3098 mov 1,%i0 ! return success 3099.Ldone_24: 3100 ret 3101 restore 3102 3103.Labort_24: 3104 restore 3105 restore 3106 restore 3107 restore 3108 restore 3109.Labort1_24: 3110 restore 3111 3112 mov 0,%i0 ! return failure 3113 ret 3114 restore 3115.type bn_pwr5_mont_t4_24, #function 3116.size bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24 3117.globl bn_pwr5_mont_t4_32 3118.align 32 3119bn_pwr5_mont_t4_32: 3120#ifdef __arch64__ 3121 mov 0,%g5 3122 mov -128,%g4 3123#elif defined(SPARCV9_64BIT_STACK) 3124 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 3125 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 3126 mov -2047,%g4 3127 and %g1,SPARCV9_64BIT_STACK,%g1 3128 movrz %g1,0,%g4 3129 mov -1,%g5 3130 add %g4,-128,%g4 3131#else 3132 mov -1,%g5 3133 mov -128,%g4 3134#endif 3135 sllx %g5,32,%g5 3136 save %sp,%g4,%sp 3137#ifndef __arch64__ 3138 save %sp,-128,%sp ! warm it up 3139 save %sp,-128,%sp 3140 save %sp,-128,%sp 3141 save %sp,-128,%sp 3142 save %sp,-128,%sp 3143 save %sp,-128,%sp 3144 restore 3145 restore 3146 restore 3147 restore 3148 restore 3149 restore 3150#endif 3151 and %sp,1,%g4 3152 or %g5,%fp,%fp 3153 or %g4,%g5,%g5 3154 3155 ! copy arguments to global registers 3156 mov %i0,%g1 3157 mov %i1,%g2 3158 ld [%i2+0],%f1 ! load *n0 3159 ld [%i2+4],%f0 3160 mov %i3,%g3 3161 srl %i4,%g0,%i4 ! pack last arguments 3162 sllx %i5,32,%g4 3163 or %i4,%g4,%g4 3164 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 3165 save %sp,-128,%sp; or %g5,%fp,%fp 3166 ldx [%g1+0*8],%l0 3167 ldx [%g1+1*8],%l1 3168 ldx [%g1+2*8],%l2 3169 ldx [%g1+3*8],%l3 3170 ldx [%g1+4*8],%l4 3171 ldx [%g1+5*8],%l5 3172 ldx [%g1+6*8],%l6 3173 ldx [%g1+7*8],%l7 3174 ldx [%g1+8*8],%o0 3175 ldx [%g1+9*8],%o1 3176 ldx [%g1+10*8],%o2 3177 ldx [%g1+11*8],%o3 3178 ldx [%g1+12*8],%o4 3179 ldx [%g1+13*8],%o5 3180 ldd [%g1+14*8],%f24 3181 ldd [%g1+15*8],%f26 3182 ldd [%g1+16*8],%f28 3183 ldd [%g1+17*8],%f30 3184 ldd [%g1+18*8],%f32 3185 ldd [%g1+19*8],%f34 3186 ldd [%g1+20*8],%f36 3187 ldd [%g1+21*8],%f38 3188 ldd [%g1+22*8],%f40 3189 ldd [%g1+23*8],%f42 3190 ldd [%g1+24*8],%f44 3191 ldd [%g1+25*8],%f46 3192 ldd [%g1+26*8],%f48 3193 ldd [%g1+27*8],%f50 3194 ldd [%g1+28*8],%f52 3195 ldd [%g1+29*8],%f54 3196 ldd [%g1+30*8],%f56 3197 ldd [%g1+31*8],%f58 3198 save %sp,-128,%sp; or %g5,%fp,%fp 3199 ldx [%g2+0*8],%l0 3200 ldx [%g2+1*8],%l1 3201 ldx [%g2+2*8],%l2 3202 ldx [%g2+3*8],%l3 3203 ldx [%g2+4*8],%l4 3204 ldx [%g2+5*8],%l5 3205 ldx [%g2+6*8],%l6 3206 ldx [%g2+7*8],%l7 3207 ldx [%g2+8*8],%o0 3208 ldx [%g2+9*8],%o1 3209 ldx [%g2+10*8],%o2 3210 ldx [%g2+11*8],%o3 3211 ldx [%g2+12*8],%o4 3212 ldx [%g2+13*8],%o5 3213 save %sp,-128,%sp; or %g5,%fp,%fp 3214 ldx [%g2+14*8],%l0 3215 ldx [%g2+15*8],%l1 3216 ldx [%g2+16*8],%l2 3217 ldx [%g2+17*8],%l3 3218 ldx [%g2+18*8],%l4 3219 ldx [%g2+19*8],%l5 3220 ldx [%g2+20*8],%l6 3221 ldx [%g2+21*8],%l7 3222 ldx [%g2+22*8],%o0 3223 ldx [%g2+23*8],%o1 3224 ldx [%g2+24*8],%o2 3225 ldx [%g2+25*8],%o3 3226 ldx [%g2+26*8],%o4 3227 ldx [%g2+27*8],%o5 3228 save %sp,-128,%sp; or %g5,%fp,%fp 3229 ldx [%g2+28*8],%l0 3230 ldx [%g2+29*8],%l1 3231 ldx [%g2+30*8],%l2 3232 ldx [%g2+31*8],%l3 3233 save %sp,-128,%sp; or %g5,%fp,%fp 3234 3235 srlx %g4, 32, %o4 ! unpack %g4 3236 srl %g4, %g0, %o5 3237 sub %o4, 5, %o4 3238 mov %g3, %o7 3239 sllx %o4, 32, %g4 ! re-pack %g4 3240 or %o5, %g4, %g4 3241 srl %o5, %o4, %o5 3242 srl %o5, 2, %o4 3243 and %o5, 3, %o5 3244 and %o4, 7, %o4 3245 sll %o5, 3, %o5 ! offset within first cache line 3246 add %o5, %o7, %o7 ! of the pwrtbl 3247 or %g0, 1, %o5 3248 sll %o5, %o4, %o4 3249 wr %o4, %g0, %ccr 3250 b .Lstride_32 3251 nop 3252.align 16 3253.Lstride_32: 3254 ldx [%o7+0*32], %i0 3255 ldx [%o7+8*32], %i1 3256 ldx [%o7+1*32], %o4 3257 ldx [%o7+9*32], %o5 3258 movvs %icc, %o4, %i0 3259 ldx [%o7+2*32], %o4 3260 movvs %icc, %o5, %i1 3261 ldx [%o7+10*32],%o5 3262 move %icc, %o4, %i0 3263 ldx [%o7+3*32], %o4 3264 move %icc, %o5, %i1 3265 ldx [%o7+11*32],%o5 3266 movneg %icc, %o4, %i0 3267 ldx [%o7+4*32], %o4 3268 movneg %icc, %o5, %i1 3269 ldx [%o7+12*32],%o5 3270 movcs %xcc, %o4, %i0 3271 ldx [%o7+5*32],%o4 3272 movcs %xcc, %o5, %i1 3273 ldx [%o7+13*32],%o5 3274 movvs %xcc, %o4, %i0 3275 ldx [%o7+6*32], %o4 3276 movvs %xcc, %o5, %i1 3277 ldx [%o7+14*32],%o5 3278 move %xcc, %o4, %i0 3279 ldx [%o7+7*32], %o4 3280 move %xcc, %o5, %i1 3281 ldx [%o7+15*32],%o5 3282 movneg %xcc, %o4, %i0 3283 add %o7,16*32, %o7 3284 movneg %xcc, %o5, %i1 3285 ldx [%o7+0*32], %i2 3286 ldx [%o7+8*32], %i3 3287 ldx [%o7+1*32], %o4 3288 ldx [%o7+9*32], %o5 3289 movvs %icc, %o4, %i2 3290 ldx [%o7+2*32], %o4 3291 movvs %icc, %o5, %i3 3292 ldx [%o7+10*32],%o5 3293 move %icc, %o4, %i2 3294 ldx [%o7+3*32], %o4 3295 move %icc, %o5, %i3 3296 ldx [%o7+11*32],%o5 3297 movneg %icc, %o4, %i2 3298 ldx [%o7+4*32], %o4 3299 movneg %icc, %o5, %i3 3300 ldx [%o7+12*32],%o5 3301 movcs %xcc, %o4, %i2 3302 ldx [%o7+5*32],%o4 3303 movcs %xcc, %o5, %i3 3304 ldx [%o7+13*32],%o5 3305 movvs %xcc, %o4, %i2 3306 ldx [%o7+6*32], %o4 3307 movvs %xcc, %o5, %i3 3308 ldx [%o7+14*32],%o5 3309 move %xcc, %o4, %i2 3310 ldx [%o7+7*32], %o4 3311 move %xcc, %o5, %i3 3312 ldx [%o7+15*32],%o5 3313 movneg %xcc, %o4, %i2 3314 add %o7,16*32, %o7 3315 movneg %xcc, %o5, %i3 3316 ldx [%o7+0*32], %i4 3317 ldx [%o7+8*32], %i5 3318 ldx [%o7+1*32], %o4 3319 ldx [%o7+9*32], %o5 3320 movvs %icc, %o4, %i4 3321 ldx [%o7+2*32], %o4 3322 movvs %icc, %o5, %i5 3323 ldx [%o7+10*32],%o5 3324 move %icc, %o4, %i4 3325 ldx [%o7+3*32], %o4 3326 move %icc, %o5, %i5 3327 ldx [%o7+11*32],%o5 3328 movneg %icc, %o4, %i4 3329 ldx [%o7+4*32], %o4 3330 movneg %icc, %o5, %i5 3331 ldx [%o7+12*32],%o5 3332 movcs %xcc, %o4, %i4 3333 ldx [%o7+5*32],%o4 3334 movcs %xcc, %o5, %i5 3335 ldx [%o7+13*32],%o5 3336 movvs %xcc, %o4, %i4 3337 ldx [%o7+6*32], %o4 3338 movvs %xcc, %o5, %i5 3339 ldx [%o7+14*32],%o5 3340 move %xcc, %o4, %i4 3341 ldx [%o7+7*32], %o4 3342 move %xcc, %o5, %i5 3343 ldx [%o7+15*32],%o5 3344 movneg %xcc, %o4, %i4 3345 add %o7,16*32, %o7 3346 movneg %xcc, %o5, %i5 3347 ldx [%o7+0*32], %l0 3348 ldx [%o7+8*32], %l1 3349 ldx [%o7+1*32], %o4 3350 ldx [%o7+9*32], %o5 3351 movvs %icc, %o4, %l0 3352 ldx [%o7+2*32], %o4 3353 movvs %icc, %o5, %l1 3354 ldx [%o7+10*32],%o5 3355 move %icc, %o4, %l0 3356 ldx [%o7+3*32], %o4 3357 move %icc, %o5, %l1 3358 ldx [%o7+11*32],%o5 3359 movneg %icc, %o4, %l0 3360 ldx [%o7+4*32], %o4 3361 movneg %icc, %o5, %l1 3362 ldx [%o7+12*32],%o5 3363 movcs %xcc, %o4, %l0 3364 ldx [%o7+5*32],%o4 3365 movcs %xcc, %o5, %l1 3366 ldx [%o7+13*32],%o5 3367 movvs %xcc, %o4, %l0 3368 ldx [%o7+6*32], %o4 3369 movvs %xcc, %o5, %l1 3370 ldx [%o7+14*32],%o5 3371 move %xcc, %o4, %l0 3372 ldx [%o7+7*32], %o4 3373 move %xcc, %o5, %l1 3374 ldx [%o7+15*32],%o5 3375 movneg %xcc, %o4, %l0 3376 add %o7,16*32, %o7 3377 movneg %xcc, %o5, %l1 3378 ldx [%o7+0*32], %l2 3379 ldx [%o7+8*32], %l3 3380 ldx [%o7+1*32], %o4 3381 ldx [%o7+9*32], %o5 3382 movvs %icc, %o4, %l2 3383 ldx [%o7+2*32], %o4 3384 movvs %icc, %o5, %l3 3385 ldx [%o7+10*32],%o5 3386 move %icc, %o4, %l2 3387 ldx [%o7+3*32], %o4 3388 move %icc, %o5, %l3 3389 ldx [%o7+11*32],%o5 3390 movneg %icc, %o4, %l2 3391 ldx [%o7+4*32], %o4 3392 movneg %icc, %o5, %l3 3393 ldx [%o7+12*32],%o5 3394 movcs %xcc, %o4, %l2 3395 ldx [%o7+5*32],%o4 3396 movcs %xcc, %o5, %l3 3397 ldx [%o7+13*32],%o5 3398 movvs %xcc, %o4, %l2 3399 ldx [%o7+6*32], %o4 3400 movvs %xcc, %o5, %l3 3401 ldx [%o7+14*32],%o5 3402 move %xcc, %o4, %l2 3403 ldx [%o7+7*32], %o4 3404 move %xcc, %o5, %l3 3405 ldx [%o7+15*32],%o5 3406 movneg %xcc, %o4, %l2 3407 add %o7,16*32, %o7 3408 movneg %xcc, %o5, %l3 3409 ldx [%o7+0*32], %l4 3410 ldx [%o7+8*32], %l5 3411 ldx [%o7+1*32], %o4 3412 ldx [%o7+9*32], %o5 3413 movvs %icc, %o4, %l4 3414 ldx [%o7+2*32], %o4 3415 movvs %icc, %o5, %l5 3416 ldx [%o7+10*32],%o5 3417 move %icc, %o4, %l4 3418 ldx [%o7+3*32], %o4 3419 move %icc, %o5, %l5 3420 ldx [%o7+11*32],%o5 3421 movneg %icc, %o4, %l4 3422 ldx [%o7+4*32], %o4 3423 movneg %icc, %o5, %l5 3424 ldx [%o7+12*32],%o5 3425 movcs %xcc, %o4, %l4 3426 ldx [%o7+5*32],%o4 3427 movcs %xcc, %o5, %l5 3428 ldx [%o7+13*32],%o5 3429 movvs %xcc, %o4, %l4 3430 ldx [%o7+6*32], %o4 3431 movvs %xcc, %o5, %l5 3432 ldx [%o7+14*32],%o5 3433 move %xcc, %o4, %l4 3434 ldx [%o7+7*32], %o4 3435 move %xcc, %o5, %l5 3436 ldx [%o7+15*32],%o5 3437 movneg %xcc, %o4, %l4 3438 add %o7,16*32, %o7 3439 movneg %xcc, %o5, %l5 3440 ldx [%o7+0*32], %l6 3441 ldx [%o7+8*32], %l7 3442 ldx [%o7+1*32], %o4 3443 ldx [%o7+9*32], %o5 3444 movvs %icc, %o4, %l6 3445 ldx [%o7+2*32], %o4 3446 movvs %icc, %o5, %l7 3447 ldx [%o7+10*32],%o5 3448 move %icc, %o4, %l6 3449 ldx [%o7+3*32], %o4 3450 move %icc, %o5, %l7 3451 ldx [%o7+11*32],%o5 3452 movneg %icc, %o4, %l6 3453 ldx [%o7+4*32], %o4 3454 movneg %icc, %o5, %l7 3455 ldx [%o7+12*32],%o5 3456 movcs %xcc, %o4, %l6 3457 ldx [%o7+5*32],%o4 3458 movcs %xcc, %o5, %l7 3459 ldx [%o7+13*32],%o5 3460 movvs %xcc, %o4, %l6 3461 ldx [%o7+6*32], %o4 3462 movvs %xcc, %o5, %l7 3463 ldx [%o7+14*32],%o5 3464 move %xcc, %o4, %l6 3465 ldx [%o7+7*32], %o4 3466 move %xcc, %o5, %l7 3467 ldx [%o7+15*32],%o5 3468 movneg %xcc, %o4, %l6 3469 add %o7,16*32, %o7 3470 movneg %xcc, %o5, %l7 3471 save %sp,-128,%sp; or %g5,%fp,%fp 3472 ldx [%i7+0*32], %i0 3473 ldx [%i7+8*32], %i1 3474 ldx [%i7+1*32], %o4 3475 ldx [%i7+9*32], %o5 3476 movvs %icc, %o4, %i0 3477 ldx [%i7+2*32], %o4 3478 movvs %icc, %o5, %i1 3479 ldx [%i7+10*32],%o5 3480 move %icc, %o4, %i0 3481 ldx [%i7+3*32], %o4 3482 move %icc, %o5, %i1 3483 ldx [%i7+11*32],%o5 3484 movneg %icc, %o4, %i0 3485 ldx [%i7+4*32], %o4 3486 movneg %icc, %o5, %i1 3487 ldx [%i7+12*32],%o5 3488 movcs %xcc, %o4, %i0 3489 ldx [%i7+5*32],%o4 3490 movcs %xcc, %o5, %i1 3491 ldx [%i7+13*32],%o5 3492 movvs %xcc, %o4, %i0 3493 ldx [%i7+6*32], %o4 3494 movvs %xcc, %o5, %i1 3495 ldx [%i7+14*32],%o5 3496 move %xcc, %o4, %i0 3497 ldx [%i7+7*32], %o4 3498 move %xcc, %o5, %i1 3499 ldx [%i7+15*32],%o5 3500 movneg %xcc, %o4, %i0 3501 add %i7,16*32, %i7 3502 movneg %xcc, %o5, %i1 3503 ldx [%i7+0*32], %i2 3504 ldx [%i7+8*32], %i3 3505 ldx [%i7+1*32], %o4 3506 ldx [%i7+9*32], %o5 3507 movvs %icc, %o4, %i2 3508 ldx [%i7+2*32], %o4 3509 movvs %icc, %o5, %i3 3510 ldx [%i7+10*32],%o5 3511 move %icc, %o4, %i2 3512 ldx [%i7+3*32], %o4 3513 move %icc, %o5, %i3 3514 ldx [%i7+11*32],%o5 3515 movneg %icc, %o4, %i2 3516 ldx [%i7+4*32], %o4 3517 movneg %icc, %o5, %i3 3518 ldx [%i7+12*32],%o5 3519 movcs %xcc, %o4, %i2 3520 ldx [%i7+5*32],%o4 3521 movcs %xcc, %o5, %i3 3522 ldx [%i7+13*32],%o5 3523 movvs %xcc, %o4, %i2 3524 ldx [%i7+6*32], %o4 3525 movvs %xcc, %o5, %i3 3526 ldx [%i7+14*32],%o5 3527 move %xcc, %o4, %i2 3528 ldx [%i7+7*32], %o4 3529 move %xcc, %o5, %i3 3530 ldx [%i7+15*32],%o5 3531 movneg %xcc, %o4, %i2 3532 add %i7,16*32, %i7 3533 movneg %xcc, %o5, %i3 3534 ldx [%i7+0*32], %i4 3535 ldx [%i7+8*32], %i5 3536 ldx [%i7+1*32], %o4 3537 ldx [%i7+9*32], %o5 3538 movvs %icc, %o4, %i4 3539 ldx [%i7+2*32], %o4 3540 movvs %icc, %o5, %i5 3541 ldx [%i7+10*32],%o5 3542 move %icc, %o4, %i4 3543 ldx [%i7+3*32], %o4 3544 move %icc, %o5, %i5 3545 ldx [%i7+11*32],%o5 3546 movneg %icc, %o4, %i4 3547 ldx [%i7+4*32], %o4 3548 movneg %icc, %o5, %i5 3549 ldx [%i7+12*32],%o5 3550 movcs %xcc, %o4, %i4 3551 ldx [%i7+5*32],%o4 3552 movcs %xcc, %o5, %i5 3553 ldx [%i7+13*32],%o5 3554 movvs %xcc, %o4, %i4 3555 ldx [%i7+6*32], %o4 3556 movvs %xcc, %o5, %i5 3557 ldx [%i7+14*32],%o5 3558 move %xcc, %o4, %i4 3559 ldx [%i7+7*32], %o4 3560 move %xcc, %o5, %i5 3561 ldx [%i7+15*32],%o5 3562 movneg %xcc, %o4, %i4 3563 add %i7,16*32, %i7 3564 movneg %xcc, %o5, %i5 3565 ldx [%i7+0*32], %l0 3566 ldx [%i7+8*32], %l1 3567 ldx [%i7+1*32], %o4 3568 ldx [%i7+9*32], %o5 3569 movvs %icc, %o4, %l0 3570 ldx [%i7+2*32], %o4 3571 movvs %icc, %o5, %l1 3572 ldx [%i7+10*32],%o5 3573 move %icc, %o4, %l0 3574 ldx [%i7+3*32], %o4 3575 move %icc, %o5, %l1 3576 ldx [%i7+11*32],%o5 3577 movneg %icc, %o4, %l0 3578 ldx [%i7+4*32], %o4 3579 movneg %icc, %o5, %l1 3580 ldx [%i7+12*32],%o5 3581 movcs %xcc, %o4, %l0 3582 ldx [%i7+5*32],%o4 3583 movcs %xcc, %o5, %l1 3584 ldx [%i7+13*32],%o5 3585 movvs %xcc, %o4, %l0 3586 ldx [%i7+6*32], %o4 3587 movvs %xcc, %o5, %l1 3588 ldx [%i7+14*32],%o5 3589 move %xcc, %o4, %l0 3590 ldx [%i7+7*32], %o4 3591 move %xcc, %o5, %l1 3592 ldx [%i7+15*32],%o5 3593 movneg %xcc, %o4, %l0 3594 add %i7,16*32, %i7 3595 movneg %xcc, %o5, %l1 3596 ldx [%i7+0*32], %l2 3597 ldx [%i7+8*32], %l3 3598 ldx [%i7+1*32], %o4 3599 ldx [%i7+9*32], %o5 3600 movvs %icc, %o4, %l2 3601 ldx [%i7+2*32], %o4 3602 movvs %icc, %o5, %l3 3603 ldx [%i7+10*32],%o5 3604 move %icc, %o4, %l2 3605 ldx [%i7+3*32], %o4 3606 move %icc, %o5, %l3 3607 ldx [%i7+11*32],%o5 3608 movneg %icc, %o4, %l2 3609 ldx [%i7+4*32], %o4 3610 movneg %icc, %o5, %l3 3611 ldx [%i7+12*32],%o5 3612 movcs %xcc, %o4, %l2 3613 ldx [%i7+5*32],%o4 3614 movcs %xcc, %o5, %l3 3615 ldx [%i7+13*32],%o5 3616 movvs %xcc, %o4, %l2 3617 ldx [%i7+6*32], %o4 3618 movvs %xcc, %o5, %l3 3619 ldx [%i7+14*32],%o5 3620 move %xcc, %o4, %l2 3621 ldx [%i7+7*32], %o4 3622 move %xcc, %o5, %l3 3623 ldx [%i7+15*32],%o5 3624 movneg %xcc, %o4, %l2 3625 add %i7,16*32, %i7 3626 movneg %xcc, %o5, %l3 3627 ldx [%i7+0*32], %l4 3628 ldx [%i7+8*32], %l5 3629 ldx [%i7+1*32], %o4 3630 ldx [%i7+9*32], %o5 3631 movvs %icc, %o4, %l4 3632 ldx [%i7+2*32], %o4 3633 movvs %icc, %o5, %l5 3634 ldx [%i7+10*32],%o5 3635 move %icc, %o4, %l4 3636 ldx [%i7+3*32], %o4 3637 move %icc, %o5, %l5 3638 ldx [%i7+11*32],%o5 3639 movneg %icc, %o4, %l4 3640 ldx [%i7+4*32], %o4 3641 movneg %icc, %o5, %l5 3642 ldx [%i7+12*32],%o5 3643 movcs %xcc, %o4, %l4 3644 ldx [%i7+5*32],%o4 3645 movcs %xcc, %o5, %l5 3646 ldx [%i7+13*32],%o5 3647 movvs %xcc, %o4, %l4 3648 ldx [%i7+6*32], %o4 3649 movvs %xcc, %o5, %l5 3650 ldx [%i7+14*32],%o5 3651 move %xcc, %o4, %l4 3652 ldx [%i7+7*32], %o4 3653 move %xcc, %o5, %l5 3654 ldx [%i7+15*32],%o5 3655 movneg %xcc, %o4, %l4 3656 add %i7,16*32, %i7 3657 movneg %xcc, %o5, %l5 3658 ldx [%i7+0*32], %l6 3659 ldx [%i7+8*32], %l7 3660 ldx [%i7+1*32], %o4 3661 ldx [%i7+9*32], %o5 3662 movvs %icc, %o4, %l6 3663 ldx [%i7+2*32], %o4 3664 movvs %icc, %o5, %l7 3665 ldx [%i7+10*32],%o5 3666 move %icc, %o4, %l6 3667 ldx [%i7+3*32], %o4 3668 move %icc, %o5, %l7 3669 ldx [%i7+11*32],%o5 3670 movneg %icc, %o4, %l6 3671 ldx [%i7+4*32], %o4 3672 movneg %icc, %o5, %l7 3673 ldx [%i7+12*32],%o5 3674 movcs %xcc, %o4, %l6 3675 ldx [%i7+5*32],%o4 3676 movcs %xcc, %o5, %l7 3677 ldx [%i7+13*32],%o5 3678 movvs %xcc, %o4, %l6 3679 ldx [%i7+6*32], %o4 3680 movvs %xcc, %o5, %l7 3681 ldx [%i7+14*32],%o5 3682 move %xcc, %o4, %l6 3683 ldx [%i7+7*32], %o4 3684 move %xcc, %o5, %l7 3685 ldx [%i7+15*32],%o5 3686 movneg %xcc, %o4, %l6 3687 add %i7,16*32, %i7 3688 movneg %xcc, %o5, %l7 3689 ldx [%i7+0*32], %o0 3690 ldx [%i7+8*32], %o1 3691 ldx [%i7+1*32], %o4 3692 ldx [%i7+9*32], %o5 3693 movvs %icc, %o4, %o0 3694 ldx [%i7+2*32], %o4 3695 movvs %icc, %o5, %o1 3696 ldx [%i7+10*32],%o5 3697 move %icc, %o4, %o0 3698 ldx [%i7+3*32], %o4 3699 move %icc, %o5, %o1 3700 ldx [%i7+11*32],%o5 3701 movneg %icc, %o4, %o0 3702 ldx [%i7+4*32], %o4 3703 movneg %icc, %o5, %o1 3704 ldx [%i7+12*32],%o5 3705 movcs %xcc, %o4, %o0 3706 ldx [%i7+5*32],%o4 3707 movcs %xcc, %o5, %o1 3708 ldx [%i7+13*32],%o5 3709 movvs %xcc, %o4, %o0 3710 ldx [%i7+6*32], %o4 3711 movvs %xcc, %o5, %o1 3712 ldx [%i7+14*32],%o5 3713 move %xcc, %o4, %o0 3714 ldx [%i7+7*32], %o4 3715 move %xcc, %o5, %o1 3716 ldx [%i7+15*32],%o5 3717 movneg %xcc, %o4, %o0 3718 add %i7,16*32, %i7 3719 movneg %xcc, %o5, %o1 3720 ldx [%i7+0*32], %o2 3721 ldx [%i7+8*32], %o3 3722 ldx [%i7+1*32], %o4 3723 ldx [%i7+9*32], %o5 3724 movvs %icc, %o4, %o2 3725 ldx [%i7+2*32], %o4 3726 movvs %icc, %o5, %o3 3727 ldx [%i7+10*32],%o5 3728 move %icc, %o4, %o2 3729 ldx [%i7+3*32], %o4 3730 move %icc, %o5, %o3 3731 ldx [%i7+11*32],%o5 3732 movneg %icc, %o4, %o2 3733 ldx [%i7+4*32], %o4 3734 movneg %icc, %o5, %o3 3735 ldx [%i7+12*32],%o5 3736 movcs %xcc, %o4, %o2 3737 ldx [%i7+5*32],%o4 3738 movcs %xcc, %o5, %o3 3739 ldx [%i7+13*32],%o5 3740 movvs %xcc, %o4, %o2 3741 ldx [%i7+6*32], %o4 3742 movvs %xcc, %o5, %o3 3743 ldx [%i7+14*32],%o5 3744 move %xcc, %o4, %o2 3745 ldx [%i7+7*32], %o4 3746 move %xcc, %o5, %o3 3747 ldx [%i7+15*32],%o5 3748 movneg %xcc, %o4, %o2 3749 add %i7,16*32, %i7 3750 movneg %xcc, %o5, %o3 3751 srax %g4, 32, %o4 ! unpack %g4 3752 srl %g4, %g0, %o5 3753 sub %o4, 5, %o4 3754 mov %g3, %i7 3755 sllx %o4, 32, %g4 ! re-pack %g4 3756 or %o5, %g4, %g4 3757 srl %o5, %o4, %o5 3758 srl %o5, 2, %o4 3759 and %o5, 3, %o5 3760 and %o4, 7, %o4 3761 sll %o5, 3, %o5 ! offset within first cache line 3762 add %o5, %i7, %i7 ! of the pwrtbl 3763 or %g0, 1, %o5 3764 sll %o5, %o4, %o4 3765 .word 0x81b02940+32-1 ! montsqr 32-1 3766 fbu,pn %fcc3,.Labort_32 3767#ifndef __arch64__ 3768 and %fp,%g5,%g5 3769 brz,pn %g5,.Labort_32 3770#endif 3771 nop 3772 .word 0x81b02940+32-1 ! montsqr 32-1 3773 fbu,pn %fcc3,.Labort_32 3774#ifndef __arch64__ 3775 and %fp,%g5,%g5 3776 brz,pn %g5,.Labort_32 3777#endif 3778 nop 3779 .word 0x81b02940+32-1 ! montsqr 32-1 3780 fbu,pn %fcc3,.Labort_32 3781#ifndef __arch64__ 3782 and %fp,%g5,%g5 3783 brz,pn %g5,.Labort_32 3784#endif 3785 nop 3786 .word 0x81b02940+32-1 ! montsqr 32-1 3787 fbu,pn %fcc3,.Labort_32 3788#ifndef __arch64__ 3789 and %fp,%g5,%g5 3790 brz,pn %g5,.Labort_32 3791#endif 3792 nop 3793 .word 0x81b02940+32-1 ! montsqr 32-1 3794 fbu,pn %fcc3,.Labort_32 3795#ifndef __arch64__ 3796 and %fp,%g5,%g5 3797 brz,pn %g5,.Labort_32 3798#endif 3799 nop 3800 wr %o4, %g0, %ccr 3801 .word 0x81b02920+32-1 ! montmul 32-1 3802 fbu,pn %fcc3,.Labort_32 3803#ifndef __arch64__ 3804 and %fp,%g5,%g5 3805 brz,pn %g5,.Labort_32 3806#endif 3807 3808 srax %g4, 32, %o4 3809#ifdef __arch64__ 3810 brgez %o4,.Lstride_32 3811 restore 3812 restore 3813 restore 3814 restore 3815 restore 3816#else 3817 brgez %o4,.Lstride_32 3818 restore; and %fp,%g5,%g5 3819 restore; and %fp,%g5,%g5 3820 restore; and %fp,%g5,%g5 3821 restore; and %fp,%g5,%g5 3822 brz,pn %g5,.Labort1_32 3823 restore 3824#endif 3825 .word 0x81b02310 !movxtod %l0,%f0 3826 .word 0x85b02311 !movxtod %l1,%f2 3827 .word 0x89b02312 !movxtod %l2,%f4 3828 .word 0x8db02313 !movxtod %l3,%f6 3829 .word 0x91b02314 !movxtod %l4,%f8 3830 .word 0x95b02315 !movxtod %l5,%f10 3831 .word 0x99b02316 !movxtod %l6,%f12 3832 .word 0x9db02317 !movxtod %l7,%f14 3833 .word 0xa1b02308 !movxtod %o0,%f16 3834 .word 0xa5b02309 !movxtod %o1,%f18 3835 .word 0xa9b0230a !movxtod %o2,%f20 3836 .word 0xadb0230b !movxtod %o3,%f22 3837 .word 0xbbb0230c !movxtod %o4,%f60 3838 .word 0xbfb0230d !movxtod %o5,%f62 3839#ifdef __arch64__ 3840 restore 3841#else 3842 and %fp,%g5,%g5 3843 restore 3844 and %g5,1,%o7 3845 and %fp,%g5,%g5 3846 srl %fp,0,%fp ! just in case? 3847 or %o7,%g5,%g5 3848 brz,a,pn %g5,.Ldone_32 3849 mov 0,%i0 ! return failure 3850#endif 3851 std %f0,[%g1+0*8] 3852 std %f2,[%g1+1*8] 3853 std %f4,[%g1+2*8] 3854 std %f6,[%g1+3*8] 3855 std %f8,[%g1+4*8] 3856 std %f10,[%g1+5*8] 3857 std %f12,[%g1+6*8] 3858 std %f14,[%g1+7*8] 3859 std %f16,[%g1+8*8] 3860 std %f18,[%g1+9*8] 3861 std %f20,[%g1+10*8] 3862 std %f22,[%g1+11*8] 3863 std %f60,[%g1+12*8] 3864 std %f62,[%g1+13*8] 3865 std %f24,[%g1+14*8] 3866 std %f26,[%g1+15*8] 3867 std %f28,[%g1+16*8] 3868 std %f30,[%g1+17*8] 3869 std %f32,[%g1+18*8] 3870 std %f34,[%g1+19*8] 3871 std %f36,[%g1+20*8] 3872 std %f38,[%g1+21*8] 3873 std %f40,[%g1+22*8] 3874 std %f42,[%g1+23*8] 3875 std %f44,[%g1+24*8] 3876 std %f46,[%g1+25*8] 3877 std %f48,[%g1+26*8] 3878 std %f50,[%g1+27*8] 3879 std %f52,[%g1+28*8] 3880 std %f54,[%g1+29*8] 3881 std %f56,[%g1+30*8] 3882 std %f58,[%g1+31*8] 3883 mov 1,%i0 ! return success 3884.Ldone_32: 3885 ret 3886 restore 3887 3888.Labort_32: 3889 restore 3890 restore 3891 restore 3892 restore 3893 restore 3894.Labort1_32: 3895 restore 3896 3897 mov 0,%i0 ! return failure 3898 ret 3899 restore 3900.type bn_pwr5_mont_t4_32, #function 3901.size bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32 3902.globl bn_mul_mont_t4 3903.align 32 3904bn_mul_mont_t4: 3905 add %sp, STACK_BIAS, %g4 ! real top of stack 3906 sll %o5, 3, %o5 ! size in bytes 3907 add %o5, 63, %g1 3908 andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 3909 sub %g4, %g1, %g1 3910 andn %g1, 63, %g1 ! align at 64 byte 3911 sub %g1, STACK_FRAME, %g1 ! new top of stack 3912 sub %g1, %g4, %g1 3913 3914 save %sp, %g1, %sp 3915 ld [%i4+0], %l0 ! pull n0[0..1] value 3916 ld [%i4+4], %l1 3917 add %sp, STACK_BIAS+STACK_FRAME, %l5 3918 ldx [%i2+0], %g2 ! m0=bp[0] 3919 sllx %l1, 32, %g1 3920 add %i2, 8, %i2 3921 or %l0, %g1, %g1 3922 3923 ldx [%i1+0], %o2 ! ap[0] 3924 3925 mulx %o2, %g2, %g4 ! ap[0]*bp[0] 3926 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 3927 3928 ldx [%i1+8], %o2 ! ap[1] 3929 add %i1, 16, %i1 3930 ldx [%i3+0], %o4 ! np[0] 3931 3932 mulx %g4, %g1, %g3 ! "tp[0]"*n0 3933 3934 mulx %o2, %g2, %o3 ! ap[1]*bp[0] 3935 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3936 3937 mulx %o4, %g3, %o0 ! np[0]*m1 3938 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 3939 3940 ldx [%i3+8], %o4 ! np[1] 3941 3942 addcc %g4, %o0, %o0 3943 add %i3, 16, %i3 3944 .word 0x93b00229 !addxc %g0,%o1,%o1 3945 3946 mulx %o4, %g3, %o5 ! np[1]*m1 3947 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3948 3949 ba .L1st 3950 sub %i5, 24, %l4 ! cnt=num-3 3951 3952.align 16 3953.L1st: 3954 addcc %o3, %g5, %g4 3955 .word 0x8bb28220 !addxc %o2,%g0,%g5 3956 3957 ldx [%i1+0], %o2 ! ap[j] 3958 addcc %o5, %o1, %o0 3959 add %i1, 8, %i1 3960 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 3961 3962 ldx [%i3+0], %o4 ! np[j] 3963 mulx %o2, %g2, %o3 ! ap[j]*bp[0] 3964 add %i3, 8, %i3 3965 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3966 3967 mulx %o4, %g3, %o5 ! np[j]*m1 3968 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3969 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3970 .word 0x93b00229 !addxc %g0,%o1,%o1 3971 stxa %o0, [%l5]0xe2 ! tp[j-1] 3972 add %l5, 8, %l5 ! tp++ 3973 3974 brnz,pt %l4, .L1st 3975 sub %l4, 8, %l4 ! j-- 3976!.L1st 3977 addcc %o3, %g5, %g4 3978 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 3979 3980 addcc %o5, %o1, %o0 3981 .word 0x93b30220 !addxc %o4,%g0,%o1 3982 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3983 .word 0x93b00229 !addxc %g0,%o1,%o1 3984 stxa %o0, [%l5]0xe2 ! tp[j-1] 3985 add %l5, 8, %l5 3986 3987 addcc %g5, %o1, %o1 3988 .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 3989 stxa %o1, [%l5]0xe2 3990 add %l5, 8, %l5 3991 3992 ba .Louter 3993 sub %i5, 16, %l1 ! i=num-2 3994 3995.align 16 3996.Louter: 3997 ldx [%i2+0], %g2 ! m0=bp[i] 3998 add %i2, 8, %i2 3999 4000 sub %i1, %i5, %i1 ! rewind 4001 sub %i3, %i5, %i3 4002 sub %l5, %i5, %l5 4003 4004 ldx [%i1+0], %o2 ! ap[0] 4005 ldx [%i3+0], %o4 ! np[0] 4006 4007 mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4008 ldx [%l5], %o7 ! tp[0] 4009 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4010 ldx [%i1+8], %o2 ! ap[1] 4011 addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4012 mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4013 .word 0x8bb00225 !addxc %g0,%g5,%g5 4014 mulx %g4, %g1, %g3 ! tp[0]*n0 4015 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4016 mulx %o4, %g3, %o0 ! np[0]*m1 4017 add %i1, 16, %i1 4018 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4019 ldx [%i3+8], %o4 ! np[1] 4020 add %i3, 16, %i3 4021 addcc %o0, %g4, %o0 4022 mulx %o4, %g3, %o5 ! np[1]*m1 4023 .word 0x93b00229 !addxc %g0,%o1,%o1 4024 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4025 4026 ba .Linner 4027 sub %i5, 24, %l4 ! cnt=num-3 4028.align 16 4029.Linner: 4030 addcc %o3, %g5, %g4 4031 ldx [%l5+8], %o7 ! tp[j] 4032 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4033 ldx [%i1+0], %o2 ! ap[j] 4034 add %i1, 8, %i1 4035 addcc %o5, %o1, %o0 4036 mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4037 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4038 ldx [%i3+0], %o4 ! np[j] 4039 add %i3, 8, %i3 4040 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4041 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4042 mulx %o4, %g3, %o5 ! np[j]*m1 4043 .word 0x8bb00225 !addxc %g0,%g5,%g5 4044 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4045 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4046 .word 0x93b00229 !addxc %g0,%o1,%o1 4047 stx %o0, [%l5] ! tp[j-1] 4048 add %l5, 8, %l5 4049 brnz,pt %l4, .Linner 4050 sub %l4, 8, %l4 4051!.Linner 4052 ldx [%l5+8], %o7 ! tp[j] 4053 addcc %o3, %g5, %g4 4054 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4055 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4056 .word 0x8bb00225 !addxc %g0,%g5,%g5 4057 4058 addcc %o5, %o1, %o0 4059 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4060 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4061 .word 0x93b00229 !addxc %g0,%o1,%o1 4062 stx %o0, [%l5] ! tp[j-1] 4063 4064 subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4065 .word 0x93b24265 !addxccc %o1,%g5,%o1 4066 .word 0xa1b00220 !addxc %g0,%g0,%l0 4067 stx %o1, [%l5+8] 4068 add %l5, 16, %l5 4069 4070 brnz,pt %l1, .Louter 4071 sub %l1, 8, %l1 4072 4073 sub %i1, %i5, %i1 ! rewind 4074 sub %i3, %i5, %i3 4075 sub %l5, %i5, %l5 4076 ba .Lsub 4077 subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4078 4079.align 16 4080.Lsub: 4081 ldx [%l5], %o7 4082 add %l5, 8, %l5 4083 ldx [%i3+0], %o4 4084 add %i3, 8, %i3 4085 subccc %o7, %o4, %l2 ! tp[j]-np[j] 4086 srlx %o7, 32, %o7 4087 srlx %o4, 32, %o4 4088 subccc %o7, %o4, %l3 4089 add %i0, 8, %i0 4090 st %l2, [%i0-4] ! reverse order 4091 st %l3, [%i0-8] 4092 brnz,pt %l4, .Lsub 4093 sub %l4, 8, %l4 4094 4095 sub %i3, %i5, %i3 ! rewind 4096 sub %l5, %i5, %l5 4097 sub %i0, %i5, %i0 4098 4099 subccc %l0, %g0, %l0 ! handle upmost overflow bit 4100 ba .Lcopy 4101 sub %i5, 8, %l4 4102 4103.align 16 4104.Lcopy: ! conditional copy 4105 ldx [%l5], %o7 4106 ldx [%i0+0], %l2 4107 stx %g0, [%l5] ! zap 4108 add %l5, 8, %l5 4109 movcs %icc, %o7, %l2 4110 stx %l2, [%i0+0] 4111 add %i0, 8, %i0 4112 brnz %l4, .Lcopy 4113 sub %l4, 8, %l4 4114 4115 mov 1, %o0 4116 ret 4117 restore 4118.type bn_mul_mont_t4, #function 4119.size bn_mul_mont_t4, .-bn_mul_mont_t4 4120.globl bn_mul_mont_gather5_t4 4121.align 32 4122bn_mul_mont_gather5_t4: 4123 add %sp, STACK_BIAS, %g4 ! real top of stack 4124 sll %o5, 3, %o5 ! size in bytes 4125 add %o5, 63, %g1 4126 andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 4127 sub %g4, %g1, %g1 4128 andn %g1, 63, %g1 ! align at 64 byte 4129 sub %g1, STACK_FRAME, %g1 ! new top of stack 4130 sub %g1, %g4, %g1 4131 LDPTR [%sp+STACK_7thARG], %g4 ! load power, 7th argument 4132 4133 save %sp, %g1, %sp 4134 srl %g4, 2, %o4 4135 and %g4, 3, %o5 4136 and %o4, 7, %o4 4137 sll %o5, 3, %o5 ! offset within first cache line 4138 add %o5, %i2, %i2 ! of the pwrtbl 4139 or %g0, 1, %o5 4140 sll %o5, %o4, %l7 4141 wr %l7, %g0, %ccr 4142 ldx [%i2+0*32], %g2 4143 ldx [%i2+1*32], %o4 4144 ldx [%i2+2*32], %o5 4145 movvs %icc, %o4, %g2 4146 ldx [%i2+3*32], %o4 4147 move %icc, %o5, %g2 4148 ldx [%i2+4*32], %o5 4149 movneg %icc, %o4, %g2 4150 ldx [%i2+5*32], %o4 4151 movcs %xcc, %o5, %g2 4152 ldx [%i2+6*32], %o5 4153 movvs %xcc, %o4, %g2 4154 ldx [%i2+7*32], %o4 4155 move %xcc, %o5, %g2 4156 add %i2,8*32, %i2 4157 movneg %xcc, %o4, %g2 4158 ld [%i4+0], %l0 ! pull n0[0..1] value 4159 ld [%i4+4], %l1 4160 add %sp, STACK_BIAS+STACK_FRAME, %l5 4161 sllx %l1, 32, %g1 4162 or %l0, %g1, %g1 4163 4164 ldx [%i1+0], %o2 ! ap[0] 4165 4166 mulx %o2, %g2, %g4 ! ap[0]*bp[0] 4167 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4168 4169 ldx [%i1+8], %o2 ! ap[1] 4170 add %i1, 16, %i1 4171 ldx [%i3+0], %o4 ! np[0] 4172 4173 mulx %g4, %g1, %g3 ! "tp[0]"*n0 4174 4175 mulx %o2, %g2, %o3 ! ap[1]*bp[0] 4176 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4177 4178 mulx %o4, %g3, %o0 ! np[0]*m1 4179 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4180 4181 ldx [%i3+8], %o4 ! np[1] 4182 4183 addcc %g4, %o0, %o0 4184 add %i3, 16, %i3 4185 .word 0x93b00229 !addxc %g0,%o1,%o1 4186 4187 mulx %o4, %g3, %o5 ! np[1]*m1 4188 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4189 4190 ba .L1st_g5 4191 sub %i5, 24, %l4 ! cnt=num-3 4192 4193.align 16 4194.L1st_g5: 4195 addcc %o3, %g5, %g4 4196 .word 0x8bb28220 !addxc %o2,%g0,%g5 4197 4198 ldx [%i1+0], %o2 ! ap[j] 4199 addcc %o5, %o1, %o0 4200 add %i1, 8, %i1 4201 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4202 4203 ldx [%i3+0], %o4 ! np[j] 4204 mulx %o2, %g2, %o3 ! ap[j]*bp[0] 4205 add %i3, 8, %i3 4206 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4207 4208 mulx %o4, %g3, %o5 ! np[j]*m1 4209 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4210 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4211 .word 0x93b00229 !addxc %g0,%o1,%o1 4212 stxa %o0, [%l5]0xe2 ! tp[j-1] 4213 add %l5, 8, %l5 ! tp++ 4214 4215 brnz,pt %l4, .L1st_g5 4216 sub %l4, 8, %l4 ! j-- 4217!.L1st_g5 4218 addcc %o3, %g5, %g4 4219 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4220 4221 addcc %o5, %o1, %o0 4222 .word 0x93b30220 !addxc %o4,%g0,%o1 4223 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4224 .word 0x93b00229 !addxc %g0,%o1,%o1 4225 stxa %o0, [%l5]0xe2 ! tp[j-1] 4226 add %l5, 8, %l5 4227 4228 addcc %g5, %o1, %o1 4229 .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 4230 stxa %o1, [%l5]0xe2 4231 add %l5, 8, %l5 4232 4233 ba .Louter_g5 4234 sub %i5, 16, %l1 ! i=num-2 4235 4236.align 16 4237.Louter_g5: 4238 wr %l7, %g0, %ccr 4239 ldx [%i2+0*32], %g2 4240 ldx [%i2+1*32], %o4 4241 ldx [%i2+2*32], %o5 4242 movvs %icc, %o4, %g2 4243 ldx [%i2+3*32], %o4 4244 move %icc, %o5, %g2 4245 ldx [%i2+4*32], %o5 4246 movneg %icc, %o4, %g2 4247 ldx [%i2+5*32], %o4 4248 movcs %xcc, %o5, %g2 4249 ldx [%i2+6*32], %o5 4250 movvs %xcc, %o4, %g2 4251 ldx [%i2+7*32], %o4 4252 move %xcc, %o5, %g2 4253 add %i2,8*32, %i2 4254 movneg %xcc, %o4, %g2 4255 sub %i1, %i5, %i1 ! rewind 4256 sub %i3, %i5, %i3 4257 sub %l5, %i5, %l5 4258 4259 ldx [%i1+0], %o2 ! ap[0] 4260 ldx [%i3+0], %o4 ! np[0] 4261 4262 mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4263 ldx [%l5], %o7 ! tp[0] 4264 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4265 ldx [%i1+8], %o2 ! ap[1] 4266 addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4267 mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4268 .word 0x8bb00225 !addxc %g0,%g5,%g5 4269 mulx %g4, %g1, %g3 ! tp[0]*n0 4270 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4271 mulx %o4, %g3, %o0 ! np[0]*m1 4272 add %i1, 16, %i1 4273 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4274 ldx [%i3+8], %o4 ! np[1] 4275 add %i3, 16, %i3 4276 addcc %o0, %g4, %o0 4277 mulx %o4, %g3, %o5 ! np[1]*m1 4278 .word 0x93b00229 !addxc %g0,%o1,%o1 4279 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4280 4281 ba .Linner_g5 4282 sub %i5, 24, %l4 ! cnt=num-3 4283.align 16 4284.Linner_g5: 4285 addcc %o3, %g5, %g4 4286 ldx [%l5+8], %o7 ! tp[j] 4287 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4288 ldx [%i1+0], %o2 ! ap[j] 4289 add %i1, 8, %i1 4290 addcc %o5, %o1, %o0 4291 mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4292 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4293 ldx [%i3+0], %o4 ! np[j] 4294 add %i3, 8, %i3 4295 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4296 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4297 mulx %o4, %g3, %o5 ! np[j]*m1 4298 .word 0x8bb00225 !addxc %g0,%g5,%g5 4299 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4300 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4301 .word 0x93b00229 !addxc %g0,%o1,%o1 4302 stx %o0, [%l5] ! tp[j-1] 4303 add %l5, 8, %l5 4304 brnz,pt %l4, .Linner_g5 4305 sub %l4, 8, %l4 4306!.Linner_g5 4307 ldx [%l5+8], %o7 ! tp[j] 4308 addcc %o3, %g5, %g4 4309 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4310 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4311 .word 0x8bb00225 !addxc %g0,%g5,%g5 4312 4313 addcc %o5, %o1, %o0 4314 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4315 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4316 .word 0x93b00229 !addxc %g0,%o1,%o1 4317 stx %o0, [%l5] ! tp[j-1] 4318 4319 subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4320 .word 0x93b24265 !addxccc %o1,%g5,%o1 4321 .word 0xa1b00220 !addxc %g0,%g0,%l0 4322 stx %o1, [%l5+8] 4323 add %l5, 16, %l5 4324 4325 brnz,pt %l1, .Louter_g5 4326 sub %l1, 8, %l1 4327 4328 sub %i1, %i5, %i1 ! rewind 4329 sub %i3, %i5, %i3 4330 sub %l5, %i5, %l5 4331 ba .Lsub_g5 4332 subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4333 4334.align 16 4335.Lsub_g5: 4336 ldx [%l5], %o7 4337 add %l5, 8, %l5 4338 ldx [%i3+0], %o4 4339 add %i3, 8, %i3 4340 subccc %o7, %o4, %l2 ! tp[j]-np[j] 4341 srlx %o7, 32, %o7 4342 srlx %o4, 32, %o4 4343 subccc %o7, %o4, %l3 4344 add %i0, 8, %i0 4345 st %l2, [%i0-4] ! reverse order 4346 st %l3, [%i0-8] 4347 brnz,pt %l4, .Lsub_g5 4348 sub %l4, 8, %l4 4349 4350 sub %i3, %i5, %i3 ! rewind 4351 sub %l5, %i5, %l5 4352 sub %i0, %i5, %i0 4353 4354 subccc %l0, %g0, %l0 ! handle upmost overflow bit 4355 ba .Lcopy_g5 4356 sub %i5, 8, %l4 4357 4358.align 16 4359.Lcopy_g5: ! conditional copy 4360 ldx [%l5], %o7 4361 ldx [%i0+0], %l2 4362 stx %g0, [%l5] ! zap 4363 add %l5, 8, %l5 4364 movcs %icc, %o7, %l2 4365 stx %l2, [%i0+0] 4366 add %i0, 8, %i0 4367 brnz %l4, .Lcopy_g5 4368 sub %l4, 8, %l4 4369 4370 mov 1, %o0 4371 ret 4372 restore 4373.type bn_mul_mont_gather5_t4, #function 4374.size bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4 4375.globl bn_flip_t4 4376.align 32 4377bn_flip_t4: 4378.Loop_flip: 4379 ld [%o1+0], %o4 4380 sub %o2, 1, %o2 4381 ld [%o1+4], %o5 4382 add %o1, 8, %o1 4383 st %o5, [%o0+0] 4384 st %o4, [%o0+4] 4385 brnz %o2, .Loop_flip 4386 add %o0, 8, %o0 4387 retl 4388 nop 4389.type bn_flip_t4, #function 4390.size bn_flip_t4, .-bn_flip_t4 4391 4392.globl bn_flip_n_scatter5_t4 4393.align 32 4394bn_flip_n_scatter5_t4: 4395 sll %o3, 3, %o3 4396 srl %o1, 1, %o1 4397 add %o3, %o2, %o2 ! &pwrtbl[pwr] 4398 sub %o1, 1, %o1 4399.Loop_flip_n_scatter5: 4400 ld [%o0+0], %o4 ! inp[i] 4401 ld [%o0+4], %o5 4402 add %o0, 8, %o0 4403 sllx %o5, 32, %o5 4404 or %o4, %o5, %o5 4405 stx %o5, [%o2] 4406 add %o2, 32*8, %o2 4407 brnz %o1, .Loop_flip_n_scatter5 4408 sub %o1, 1, %o1 4409 retl 4410 nop 4411.type bn_flip_n_scatter5_t4, #function 4412.size bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4 4413 4414.globl bn_gather5_t4 4415.align 32 4416bn_gather5_t4: 4417 srl %o3, 2, %o4 4418 and %o3, 3, %o5 4419 and %o4, 7, %o4 4420 sll %o5, 3, %o5 ! offset within first cache line 4421 add %o5, %o2, %o2 ! of the pwrtbl 4422 or %g0, 1, %o5 4423 sll %o5, %o4, %g1 4424 wr %g1, %g0, %ccr 4425 sub %o1, 1, %o1 4426.Loop_gather5: 4427 ldx [%o2+0*32], %g1 4428 ldx [%o2+1*32], %o4 4429 ldx [%o2+2*32], %o5 4430 movvs %icc, %o4, %g1 4431 ldx [%o2+3*32], %o4 4432 move %icc, %o5, %g1 4433 ldx [%o2+4*32], %o5 4434 movneg %icc, %o4, %g1 4435 ldx [%o2+5*32], %o4 4436 movcs %xcc, %o5, %g1 4437 ldx [%o2+6*32], %o5 4438 movvs %xcc, %o4, %g1 4439 ldx [%o2+7*32], %o4 4440 move %xcc, %o5, %g1 4441 add %o2,8*32, %o2 4442 movneg %xcc, %o4, %g1 4443 stx %g1, [%o0] 4444 add %o0, 8, %o0 4445 brnz %o1, .Loop_gather5 4446 sub %o1, 1, %o1 4447 4448 retl 4449 nop 4450.type bn_gather5_t4, #function 4451.size bn_gather5_t4, .-bn_gather5_t4 4452 4453.asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov" 4454.align 4 4455