1#include "sparc_arch.h" 2 3#ifdef __arch64__ 4.register %g2,#scratch 5.register %g3,#scratch 6#endif 7 8.section ".text",#alloc,#execinstr 9 10#ifdef __PIC__ 11SPARC_PIC_THUNK(%g1) 12#endif 13.globl bn_mul_mont_t4_8 14.align 32 15bn_mul_mont_t4_8: 16#ifdef __arch64__ 17 mov 0,%g5 18 mov -128,%g4 19#elif defined(SPARCV9_64BIT_STACK) 20 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 21 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 22 mov -2047,%g4 23 and %g1,SPARCV9_64BIT_STACK,%g1 24 movrz %g1,0,%g4 25 mov -1,%g5 26 add %g4,-128,%g4 27#else 28 mov -1,%g5 29 mov -128,%g4 30#endif 31 sllx %g5,32,%g5 32 save %sp,%g4,%sp 33#ifndef __arch64__ 34 save %sp,-128,%sp ! warm it up 35 save %sp,-128,%sp 36 save %sp,-128,%sp 37 save %sp,-128,%sp 38 save %sp,-128,%sp 39 save %sp,-128,%sp 40 restore 41 restore 42 restore 43 restore 44 restore 45 restore 46#endif 47 and %sp,1,%g4 48 or %g5,%fp,%fp 49 or %g4,%g5,%g5 50 51 ! copy arguments to global registers 52 mov %i0,%g1 53 mov %i1,%g2 54 mov %i2,%g3 55 mov %i3,%g4 56 ld [%i4+0],%f1 ! load *n0 57 ld [%i4+4],%f0 58 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 59 save %sp,-128,%sp; or %g5,%fp,%fp 60 ld [%g2+0*8+0],%l1 61 ld [%g2+0*8+4],%l0 62 sllx %l0,32,%l0 63 or %l1,%l0,%l0 64 ld [%g2+1*8+0],%l2 65 ld [%g2+1*8+4],%l1 66 sllx %l1,32,%l1 67 or %l2,%l1,%l1 68 ld [%g2+2*8+0],%l3 69 ld [%g2+2*8+4],%l2 70 sllx %l2,32,%l2 71 or %l3,%l2,%l2 72 ld [%g2+3*8+0],%l4 73 ld [%g2+3*8+4],%l3 74 sllx %l3,32,%l3 75 or %l4,%l3,%l3 76 ld [%g2+4*8+0],%l5 77 ld [%g2+4*8+4],%l4 78 sllx %l4,32,%l4 79 or %l5,%l4,%l4 80 ld [%g2+5*8+0],%l6 81 ld [%g2+5*8+4],%l5 82 sllx %l5,32,%l5 83 or %l6,%l5,%l5 84 ld [%g2+6*8+0],%l7 85 ld [%g2+6*8+4],%l6 86 sllx %l6,32,%l6 87 or %l7,%l6,%l6 88 ld [%g2+7*8+0],%o0 89 ld [%g2+7*8+4],%l7 90 sllx %l7,32,%l7 91 or %o0,%l7,%l7 92 save %sp,-128,%sp; or %g5,%fp,%fp 93 ld [%g4+0*8+0],%l1 94 ld [%g4+0*8+4],%l0 95 sllx %l0,32,%l0 96 or %l1,%l0,%l0 97 ld [%g4+1*8+0],%l2 98 ld [%g4+1*8+4],%l1 99 sllx %l1,32,%l1 100 or %l2,%l1,%l1 101 ld [%g4+2*8+0],%l3 102 ld [%g4+2*8+4],%l2 103 sllx %l2,32,%l2 104 or %l3,%l2,%l2 105 ld [%g4+3*8+0],%l4 106 ld [%g4+3*8+4],%l3 107 sllx %l3,32,%l3 108 or %l4,%l3,%l3 109 ld [%g4+4*8+0],%l5 110 ld [%g4+4*8+4],%l4 111 sllx %l4,32,%l4 112 or %l5,%l4,%l4 113 ld [%g4+5*8+0],%l6 114 ld [%g4+5*8+4],%l5 115 sllx %l5,32,%l5 116 or %l6,%l5,%l5 117 ld [%g4+6*8+0],%l7 118 ld [%g4+6*8+4],%l6 119 sllx %l6,32,%l6 120 or %l7,%l6,%l6 121 ld [%g4+7*8+0],%o0 122 ld [%g4+7*8+4],%l7 123 sllx %l7,32,%l7 124 or %o0,%l7,%l7 125 save %sp,-128,%sp; or %g5,%fp,%fp 126 save %sp,-128,%sp; or %g5,%fp,%fp 127 cmp %g2,%g3 128 be SIZE_T_CC,.Lmsquare_8 129 nop 130 save %sp,-128,%sp; or %g5,%fp,%fp 131 ld [%g3+0*8+0],%i1 132 ld [%g3+0*8+4],%i0 133 sllx %i0,32,%i0 134 or %i1,%i0,%i0 135 ld [%g3+1*8+0],%i2 136 ld [%g3+1*8+4],%i1 137 sllx %i1,32,%i1 138 or %i2,%i1,%i1 139 ld [%g3+2*8+0],%i3 140 ld [%g3+2*8+4],%i2 141 sllx %i2,32,%i2 142 or %i3,%i2,%i2 143 ld [%g3+3*8+0],%i4 144 ld [%g3+3*8+4],%i3 145 sllx %i3,32,%i3 146 or %i4,%i3,%i3 147 ld [%g3+4*8+0],%i5 148 ld [%g3+4*8+4],%i4 149 sllx %i4,32,%i4 150 or %i5,%i4,%i4 151 ld [%g3+5*8+0],%l0 152 ld [%g3+5*8+4],%i5 153 sllx %i5,32,%i5 154 or %l0,%i5,%i5 155 ld [%g3+6*8+0],%l1 156 ld [%g3+6*8+4],%l0 157 sllx %l0,32,%l0 158 or %l1,%l0,%l0 159 ld [%g3+7*8+0],%l2 160 ld [%g3+7*8+4],%l1 161 sllx %l1,32,%l1 162 or %l2,%l1,%l1 163 save %sp,-128,%sp; or %g5,%fp,%fp 164 .word 0x81b02920+8-1 ! montmul 8-1 165.Lmresume_8: 166 fbu,pn %fcc3,.Lmabort_8 167#ifndef __arch64__ 168 and %fp,%g5,%g5 169 brz,pn %g5,.Lmabort_8 170#endif 171 nop 172#ifdef __arch64__ 173 restore 174 restore 175 restore 176 restore 177 restore 178#else 179 restore; and %fp,%g5,%g5 180 restore; and %fp,%g5,%g5 181 restore; and %fp,%g5,%g5 182 restore; and %fp,%g5,%g5 183 brz,pn %g5,.Lmabort1_8 184 restore 185#endif 186 .word 0x81b02310 !movxtod %l0,%f0 187 .word 0x85b02311 !movxtod %l1,%f2 188 .word 0x89b02312 !movxtod %l2,%f4 189 .word 0x8db02313 !movxtod %l3,%f6 190 .word 0x91b02314 !movxtod %l4,%f8 191 .word 0x95b02315 !movxtod %l5,%f10 192 .word 0x99b02316 !movxtod %l6,%f12 193 .word 0x9db02317 !movxtod %l7,%f14 194#ifdef __arch64__ 195 restore 196#else 197 and %fp,%g5,%g5 198 restore 199 and %g5,1,%o7 200 and %fp,%g5,%g5 201 srl %fp,0,%fp ! just in case? 202 or %o7,%g5,%g5 203 brz,a,pn %g5,.Lmdone_8 204 mov 0,%i0 ! return failure 205#endif 206 st %f1,[%g1+0*8+0] 207 st %f0,[%g1+0*8+4] 208 st %f3,[%g1+1*8+0] 209 st %f2,[%g1+1*8+4] 210 st %f5,[%g1+2*8+0] 211 st %f4,[%g1+2*8+4] 212 st %f7,[%g1+3*8+0] 213 st %f6,[%g1+3*8+4] 214 st %f9,[%g1+4*8+0] 215 st %f8,[%g1+4*8+4] 216 st %f11,[%g1+5*8+0] 217 st %f10,[%g1+5*8+4] 218 st %f13,[%g1+6*8+0] 219 st %f12,[%g1+6*8+4] 220 st %f15,[%g1+7*8+0] 221 st %f14,[%g1+7*8+4] 222 mov 1,%i0 ! return success 223.Lmdone_8: 224 ret 225 restore 226 227.Lmabort_8: 228 restore 229 restore 230 restore 231 restore 232 restore 233.Lmabort1_8: 234 restore 235 236 mov 0,%i0 ! return failure 237 ret 238 restore 239 240.align 32 241.Lmsquare_8: 242 save %sp,-128,%sp; or %g5,%fp,%fp 243 save %sp,-128,%sp; or %g5,%fp,%fp 244 .word 0x81b02940+8-1 ! montsqr 8-1 245 ba .Lmresume_8 246 nop 247.type bn_mul_mont_t4_8, #function 248.size bn_mul_mont_t4_8, .-bn_mul_mont_t4_8 249.globl bn_mul_mont_t4_16 250.align 32 251bn_mul_mont_t4_16: 252#ifdef __arch64__ 253 mov 0,%g5 254 mov -128,%g4 255#elif defined(SPARCV9_64BIT_STACK) 256 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 257 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 258 mov -2047,%g4 259 and %g1,SPARCV9_64BIT_STACK,%g1 260 movrz %g1,0,%g4 261 mov -1,%g5 262 add %g4,-128,%g4 263#else 264 mov -1,%g5 265 mov -128,%g4 266#endif 267 sllx %g5,32,%g5 268 save %sp,%g4,%sp 269#ifndef __arch64__ 270 save %sp,-128,%sp ! warm it up 271 save %sp,-128,%sp 272 save %sp,-128,%sp 273 save %sp,-128,%sp 274 save %sp,-128,%sp 275 save %sp,-128,%sp 276 restore 277 restore 278 restore 279 restore 280 restore 281 restore 282#endif 283 and %sp,1,%g4 284 or %g5,%fp,%fp 285 or %g4,%g5,%g5 286 287 ! copy arguments to global registers 288 mov %i0,%g1 289 mov %i1,%g2 290 mov %i2,%g3 291 mov %i3,%g4 292 ld [%i4+0],%f1 ! load *n0 293 ld [%i4+4],%f0 294 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 295 save %sp,-128,%sp; or %g5,%fp,%fp 296 ld [%g2+0*8+0],%l1 297 ld [%g2+0*8+4],%l0 298 sllx %l0,32,%l0 299 or %l1,%l0,%l0 300 ld [%g2+1*8+0],%l2 301 ld [%g2+1*8+4],%l1 302 sllx %l1,32,%l1 303 or %l2,%l1,%l1 304 ld [%g2+2*8+0],%l3 305 ld [%g2+2*8+4],%l2 306 sllx %l2,32,%l2 307 or %l3,%l2,%l2 308 ld [%g2+3*8+0],%l4 309 ld [%g2+3*8+4],%l3 310 sllx %l3,32,%l3 311 or %l4,%l3,%l3 312 ld [%g2+4*8+0],%l5 313 ld [%g2+4*8+4],%l4 314 sllx %l4,32,%l4 315 or %l5,%l4,%l4 316 ld [%g2+5*8+0],%l6 317 ld [%g2+5*8+4],%l5 318 sllx %l5,32,%l5 319 or %l6,%l5,%l5 320 ld [%g2+6*8+0],%l7 321 ld [%g2+6*8+4],%l6 322 sllx %l6,32,%l6 323 or %l7,%l6,%l6 324 ld [%g2+7*8+0],%o0 325 ld [%g2+7*8+4],%l7 326 sllx %l7,32,%l7 327 or %o0,%l7,%l7 328 ld [%g2+8*8+0],%o1 329 ld [%g2+8*8+4],%o0 330 sllx %o0,32,%o0 331 or %o1,%o0,%o0 332 ld [%g2+9*8+0],%o2 333 ld [%g2+9*8+4],%o1 334 sllx %o1,32,%o1 335 or %o2,%o1,%o1 336 ld [%g2+10*8+0],%o3 337 ld [%g2+10*8+4],%o2 338 sllx %o2,32,%o2 339 or %o3,%o2,%o2 340 ld [%g2+11*8+0],%o4 341 ld [%g2+11*8+4],%o3 342 sllx %o3,32,%o3 343 or %o4,%o3,%o3 344 ld [%g2+12*8+0],%o5 345 ld [%g2+12*8+4],%o4 346 sllx %o4,32,%o4 347 or %o5,%o4,%o4 348 ld [%g2+13*8+0],%o7 349 ld [%g2+13*8+4],%o5 350 sllx %o5,32,%o5 351 or %o7,%o5,%o5 352 ld [%g2+14*8+0],%f5 353 ld [%g2+14*8+4],%f4 354 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 355 ld [%g2+15*8+0],%f7 356 ld [%g2+15*8+4],%f6 357 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 358 save %sp,-128,%sp; or %g5,%fp,%fp 359 ld [%g4+0*8+0],%l1 360 ld [%g4+0*8+4],%l0 361 sllx %l0,32,%l0 362 or %l1,%l0,%l0 363 ld [%g4+1*8+0],%l2 364 ld [%g4+1*8+4],%l1 365 sllx %l1,32,%l1 366 or %l2,%l1,%l1 367 ld [%g4+2*8+0],%l3 368 ld [%g4+2*8+4],%l2 369 sllx %l2,32,%l2 370 or %l3,%l2,%l2 371 ld [%g4+3*8+0],%l4 372 ld [%g4+3*8+4],%l3 373 sllx %l3,32,%l3 374 or %l4,%l3,%l3 375 ld [%g4+4*8+0],%l5 376 ld [%g4+4*8+4],%l4 377 sllx %l4,32,%l4 378 or %l5,%l4,%l4 379 ld [%g4+5*8+0],%l6 380 ld [%g4+5*8+4],%l5 381 sllx %l5,32,%l5 382 or %l6,%l5,%l5 383 ld [%g4+6*8+0],%l7 384 ld [%g4+6*8+4],%l6 385 sllx %l6,32,%l6 386 or %l7,%l6,%l6 387 ld [%g4+7*8+0],%o0 388 ld [%g4+7*8+4],%l7 389 sllx %l7,32,%l7 390 or %o0,%l7,%l7 391 ld [%g4+8*8+0],%o1 392 ld [%g4+8*8+4],%o0 393 sllx %o0,32,%o0 394 or %o1,%o0,%o0 395 ld [%g4+9*8+0],%o2 396 ld [%g4+9*8+4],%o1 397 sllx %o1,32,%o1 398 or %o2,%o1,%o1 399 ld [%g4+10*8+0],%o3 400 ld [%g4+10*8+4],%o2 401 sllx %o2,32,%o2 402 or %o3,%o2,%o2 403 ld [%g4+11*8+0],%o4 404 ld [%g4+11*8+4],%o3 405 sllx %o3,32,%o3 406 or %o4,%o3,%o3 407 ld [%g4+12*8+0],%o5 408 ld [%g4+12*8+4],%o4 409 sllx %o4,32,%o4 410 or %o5,%o4,%o4 411 ld [%g4+13*8+0],%o7 412 ld [%g4+13*8+4],%o5 413 sllx %o5,32,%o5 414 or %o7,%o5,%o5 415 save %sp,-128,%sp; or %g5,%fp,%fp 416 ld [%g4+14*8+0],%l1 417 ld [%g4+14*8+4],%l0 418 sllx %l0,32,%l0 419 or %l1,%l0,%l0 420 ld [%g4+15*8+0],%l2 421 ld [%g4+15*8+4],%l1 422 sllx %l1,32,%l1 423 or %l2,%l1,%l1 424 save %sp,-128,%sp; or %g5,%fp,%fp 425 cmp %g2,%g3 426 be SIZE_T_CC,.Lmsquare_16 427 nop 428 save %sp,-128,%sp; or %g5,%fp,%fp 429 ld [%g3+0*8+0],%i1 430 ld [%g3+0*8+4],%i0 431 sllx %i0,32,%i0 432 or %i1,%i0,%i0 433 ld [%g3+1*8+0],%i2 434 ld [%g3+1*8+4],%i1 435 sllx %i1,32,%i1 436 or %i2,%i1,%i1 437 ld [%g3+2*8+0],%i3 438 ld [%g3+2*8+4],%i2 439 sllx %i2,32,%i2 440 or %i3,%i2,%i2 441 ld [%g3+3*8+0],%i4 442 ld [%g3+3*8+4],%i3 443 sllx %i3,32,%i3 444 or %i4,%i3,%i3 445 ld [%g3+4*8+0],%i5 446 ld [%g3+4*8+4],%i4 447 sllx %i4,32,%i4 448 or %i5,%i4,%i4 449 ld [%g3+5*8+0],%l0 450 ld [%g3+5*8+4],%i5 451 sllx %i5,32,%i5 452 or %l0,%i5,%i5 453 ld [%g3+6*8+0],%l1 454 ld [%g3+6*8+4],%l0 455 sllx %l0,32,%l0 456 or %l1,%l0,%l0 457 ld [%g3+7*8+0],%l2 458 ld [%g3+7*8+4],%l1 459 sllx %l1,32,%l1 460 or %l2,%l1,%l1 461 ld [%g3+8*8+0],%l3 462 ld [%g3+8*8+4],%l2 463 sllx %l2,32,%l2 464 or %l3,%l2,%l2 465 ld [%g3+9*8+0],%l4 466 ld [%g3+9*8+4],%l3 467 sllx %l3,32,%l3 468 or %l4,%l3,%l3 469 ld [%g3+10*8+0],%l5 470 ld [%g3+10*8+4],%l4 471 sllx %l4,32,%l4 472 or %l5,%l4,%l4 473 ld [%g3+11*8+0],%l6 474 ld [%g3+11*8+4],%l5 475 sllx %l5,32,%l5 476 or %l6,%l5,%l5 477 ld [%g3+12*8+0],%l7 478 ld [%g3+12*8+4],%l6 479 sllx %l6,32,%l6 480 or %l7,%l6,%l6 481 ld [%g3+13*8+0],%o7 482 ld [%g3+13*8+4],%l7 483 sllx %l7,32,%l7 484 or %o7,%l7,%l7 485 save %sp,-128,%sp; or %g5,%fp,%fp 486 ld [%g3+14*8+0],%i1 487 ld [%g3+14*8+4],%i0 488 sllx %i0,32,%i0 489 or %i1,%i0,%i0 490 ld [%g3+15*8+0],%o7 491 ld [%g3+15*8+4],%i1 492 sllx %i1,32,%i1 493 or %o7,%i1,%i1 494 .word 0x81b02920+16-1 ! montmul 16-1 495.Lmresume_16: 496 fbu,pn %fcc3,.Lmabort_16 497#ifndef __arch64__ 498 and %fp,%g5,%g5 499 brz,pn %g5,.Lmabort_16 500#endif 501 nop 502#ifdef __arch64__ 503 restore 504 restore 505 restore 506 restore 507 restore 508#else 509 restore; and %fp,%g5,%g5 510 restore; and %fp,%g5,%g5 511 restore; and %fp,%g5,%g5 512 restore; and %fp,%g5,%g5 513 brz,pn %g5,.Lmabort1_16 514 restore 515#endif 516 .word 0x81b02310 !movxtod %l0,%f0 517 .word 0x85b02311 !movxtod %l1,%f2 518 .word 0x89b02312 !movxtod %l2,%f4 519 .word 0x8db02313 !movxtod %l3,%f6 520 .word 0x91b02314 !movxtod %l4,%f8 521 .word 0x95b02315 !movxtod %l5,%f10 522 .word 0x99b02316 !movxtod %l6,%f12 523 .word 0x9db02317 !movxtod %l7,%f14 524 .word 0xa1b02308 !movxtod %o0,%f16 525 .word 0xa5b02309 !movxtod %o1,%f18 526 .word 0xa9b0230a !movxtod %o2,%f20 527 .word 0xadb0230b !movxtod %o3,%f22 528 .word 0xbbb0230c !movxtod %o4,%f60 529 .word 0xbfb0230d !movxtod %o5,%f62 530#ifdef __arch64__ 531 restore 532#else 533 and %fp,%g5,%g5 534 restore 535 and %g5,1,%o7 536 and %fp,%g5,%g5 537 srl %fp,0,%fp ! just in case? 538 or %o7,%g5,%g5 539 brz,a,pn %g5,.Lmdone_16 540 mov 0,%i0 ! return failure 541#endif 542 st %f1,[%g1+0*8+0] 543 st %f0,[%g1+0*8+4] 544 st %f3,[%g1+1*8+0] 545 st %f2,[%g1+1*8+4] 546 st %f5,[%g1+2*8+0] 547 st %f4,[%g1+2*8+4] 548 st %f7,[%g1+3*8+0] 549 st %f6,[%g1+3*8+4] 550 st %f9,[%g1+4*8+0] 551 st %f8,[%g1+4*8+4] 552 st %f11,[%g1+5*8+0] 553 st %f10,[%g1+5*8+4] 554 st %f13,[%g1+6*8+0] 555 st %f12,[%g1+6*8+4] 556 st %f15,[%g1+7*8+0] 557 st %f14,[%g1+7*8+4] 558 st %f17,[%g1+8*8+0] 559 st %f16,[%g1+8*8+4] 560 st %f19,[%g1+9*8+0] 561 st %f18,[%g1+9*8+4] 562 st %f21,[%g1+10*8+0] 563 st %f20,[%g1+10*8+4] 564 st %f23,[%g1+11*8+0] 565 st %f22,[%g1+11*8+4] 566 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 567 st %f1,[%g1+12*8+0] 568 st %f0,[%g1+12*8+4] 569 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 570 st %f3,[%g1+13*8+0] 571 st %f2,[%g1+13*8+4] 572 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 573 st %f5,[%g1+14*8+0] 574 st %f4,[%g1+14*8+4] 575 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 576 st %f7,[%g1+15*8+0] 577 st %f6,[%g1+15*8+4] 578 mov 1,%i0 ! return success 579.Lmdone_16: 580 ret 581 restore 582 583.Lmabort_16: 584 restore 585 restore 586 restore 587 restore 588 restore 589.Lmabort1_16: 590 restore 591 592 mov 0,%i0 ! return failure 593 ret 594 restore 595 596.align 32 597.Lmsquare_16: 598 save %sp,-128,%sp; or %g5,%fp,%fp 599 save %sp,-128,%sp; or %g5,%fp,%fp 600 .word 0x81b02940+16-1 ! montsqr 16-1 601 ba .Lmresume_16 602 nop 603.type bn_mul_mont_t4_16, #function 604.size bn_mul_mont_t4_16, .-bn_mul_mont_t4_16 605.globl bn_mul_mont_t4_24 606.align 32 607bn_mul_mont_t4_24: 608#ifdef __arch64__ 609 mov 0,%g5 610 mov -128,%g4 611#elif defined(SPARCV9_64BIT_STACK) 612 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 613 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 614 mov -2047,%g4 615 and %g1,SPARCV9_64BIT_STACK,%g1 616 movrz %g1,0,%g4 617 mov -1,%g5 618 add %g4,-128,%g4 619#else 620 mov -1,%g5 621 mov -128,%g4 622#endif 623 sllx %g5,32,%g5 624 save %sp,%g4,%sp 625#ifndef __arch64__ 626 save %sp,-128,%sp ! warm it up 627 save %sp,-128,%sp 628 save %sp,-128,%sp 629 save %sp,-128,%sp 630 save %sp,-128,%sp 631 save %sp,-128,%sp 632 restore 633 restore 634 restore 635 restore 636 restore 637 restore 638#endif 639 and %sp,1,%g4 640 or %g5,%fp,%fp 641 or %g4,%g5,%g5 642 643 ! copy arguments to global registers 644 mov %i0,%g1 645 mov %i1,%g2 646 mov %i2,%g3 647 mov %i3,%g4 648 ld [%i4+0],%f1 ! load *n0 649 ld [%i4+4],%f0 650 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 651 save %sp,-128,%sp; or %g5,%fp,%fp 652 ld [%g2+0*8+0],%l1 653 ld [%g2+0*8+4],%l0 654 sllx %l0,32,%l0 655 or %l1,%l0,%l0 656 ld [%g2+1*8+0],%l2 657 ld [%g2+1*8+4],%l1 658 sllx %l1,32,%l1 659 or %l2,%l1,%l1 660 ld [%g2+2*8+0],%l3 661 ld [%g2+2*8+4],%l2 662 sllx %l2,32,%l2 663 or %l3,%l2,%l2 664 ld [%g2+3*8+0],%l4 665 ld [%g2+3*8+4],%l3 666 sllx %l3,32,%l3 667 or %l4,%l3,%l3 668 ld [%g2+4*8+0],%l5 669 ld [%g2+4*8+4],%l4 670 sllx %l4,32,%l4 671 or %l5,%l4,%l4 672 ld [%g2+5*8+0],%l6 673 ld [%g2+5*8+4],%l5 674 sllx %l5,32,%l5 675 or %l6,%l5,%l5 676 ld [%g2+6*8+0],%l7 677 ld [%g2+6*8+4],%l6 678 sllx %l6,32,%l6 679 or %l7,%l6,%l6 680 ld [%g2+7*8+0],%o0 681 ld [%g2+7*8+4],%l7 682 sllx %l7,32,%l7 683 or %o0,%l7,%l7 684 ld [%g2+8*8+0],%o1 685 ld [%g2+8*8+4],%o0 686 sllx %o0,32,%o0 687 or %o1,%o0,%o0 688 ld [%g2+9*8+0],%o2 689 ld [%g2+9*8+4],%o1 690 sllx %o1,32,%o1 691 or %o2,%o1,%o1 692 ld [%g2+10*8+0],%o3 693 ld [%g2+10*8+4],%o2 694 sllx %o2,32,%o2 695 or %o3,%o2,%o2 696 ld [%g2+11*8+0],%o4 697 ld [%g2+11*8+4],%o3 698 sllx %o3,32,%o3 699 or %o4,%o3,%o3 700 ld [%g2+12*8+0],%o5 701 ld [%g2+12*8+4],%o4 702 sllx %o4,32,%o4 703 or %o5,%o4,%o4 704 ld [%g2+13*8+0],%o7 705 ld [%g2+13*8+4],%o5 706 sllx %o5,32,%o5 707 or %o7,%o5,%o5 708 ld [%g2+14*8+0],%f5 709 ld [%g2+14*8+4],%f4 710 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 711 ld [%g2+15*8+0],%f7 712 ld [%g2+15*8+4],%f6 713 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 714 ld [%g2+16*8+0],%f1 715 ld [%g2+16*8+4],%f0 716 .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 717 ld [%g2+17*8+0],%f3 718 ld [%g2+17*8+4],%f2 719 .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 720 ld [%g2+18*8+0],%f5 721 ld [%g2+18*8+4],%f4 722 .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 723 ld [%g2+19*8+0],%f7 724 ld [%g2+19*8+4],%f6 725 .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 726 ld [%g2+20*8+0],%f1 727 ld [%g2+20*8+4],%f0 728 .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 729 ld [%g2+21*8+0],%f3 730 ld [%g2+21*8+4],%f2 731 .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 732 ld [%g2+22*8+0],%f5 733 ld [%g2+22*8+4],%f4 734 .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 735 ld [%g2+23*8+0],%f7 736 ld [%g2+23*8+4],%f6 737 .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 738 save %sp,-128,%sp; or %g5,%fp,%fp 739 ld [%g4+0*8+0],%l1 740 ld [%g4+0*8+4],%l0 741 sllx %l0,32,%l0 742 or %l1,%l0,%l0 743 ld [%g4+1*8+0],%l2 744 ld [%g4+1*8+4],%l1 745 sllx %l1,32,%l1 746 or %l2,%l1,%l1 747 ld [%g4+2*8+0],%l3 748 ld [%g4+2*8+4],%l2 749 sllx %l2,32,%l2 750 or %l3,%l2,%l2 751 ld [%g4+3*8+0],%l4 752 ld [%g4+3*8+4],%l3 753 sllx %l3,32,%l3 754 or %l4,%l3,%l3 755 ld [%g4+4*8+0],%l5 756 ld [%g4+4*8+4],%l4 757 sllx %l4,32,%l4 758 or %l5,%l4,%l4 759 ld [%g4+5*8+0],%l6 760 ld [%g4+5*8+4],%l5 761 sllx %l5,32,%l5 762 or %l6,%l5,%l5 763 ld [%g4+6*8+0],%l7 764 ld [%g4+6*8+4],%l6 765 sllx %l6,32,%l6 766 or %l7,%l6,%l6 767 ld [%g4+7*8+0],%o0 768 ld [%g4+7*8+4],%l7 769 sllx %l7,32,%l7 770 or %o0,%l7,%l7 771 ld [%g4+8*8+0],%o1 772 ld [%g4+8*8+4],%o0 773 sllx %o0,32,%o0 774 or %o1,%o0,%o0 775 ld [%g4+9*8+0],%o2 776 ld [%g4+9*8+4],%o1 777 sllx %o1,32,%o1 778 or %o2,%o1,%o1 779 ld [%g4+10*8+0],%o3 780 ld [%g4+10*8+4],%o2 781 sllx %o2,32,%o2 782 or %o3,%o2,%o2 783 ld [%g4+11*8+0],%o4 784 ld [%g4+11*8+4],%o3 785 sllx %o3,32,%o3 786 or %o4,%o3,%o3 787 ld [%g4+12*8+0],%o5 788 ld [%g4+12*8+4],%o4 789 sllx %o4,32,%o4 790 or %o5,%o4,%o4 791 ld [%g4+13*8+0],%o7 792 ld [%g4+13*8+4],%o5 793 sllx %o5,32,%o5 794 or %o7,%o5,%o5 795 save %sp,-128,%sp; or %g5,%fp,%fp 796 ld [%g4+14*8+0],%l1 797 ld [%g4+14*8+4],%l0 798 sllx %l0,32,%l0 799 or %l1,%l0,%l0 800 ld [%g4+15*8+0],%l2 801 ld [%g4+15*8+4],%l1 802 sllx %l1,32,%l1 803 or %l2,%l1,%l1 804 ld [%g4+16*8+0],%l3 805 ld [%g4+16*8+4],%l2 806 sllx %l2,32,%l2 807 or %l3,%l2,%l2 808 ld [%g4+17*8+0],%l4 809 ld [%g4+17*8+4],%l3 810 sllx %l3,32,%l3 811 or %l4,%l3,%l3 812 ld [%g4+18*8+0],%l5 813 ld [%g4+18*8+4],%l4 814 sllx %l4,32,%l4 815 or %l5,%l4,%l4 816 ld [%g4+19*8+0],%l6 817 ld [%g4+19*8+4],%l5 818 sllx %l5,32,%l5 819 or %l6,%l5,%l5 820 ld [%g4+20*8+0],%l7 821 ld [%g4+20*8+4],%l6 822 sllx %l6,32,%l6 823 or %l7,%l6,%l6 824 ld [%g4+21*8+0],%o0 825 ld [%g4+21*8+4],%l7 826 sllx %l7,32,%l7 827 or %o0,%l7,%l7 828 ld [%g4+22*8+0],%o1 829 ld [%g4+22*8+4],%o0 830 sllx %o0,32,%o0 831 or %o1,%o0,%o0 832 ld [%g4+23*8+0],%o2 833 ld [%g4+23*8+4],%o1 834 sllx %o1,32,%o1 835 or %o2,%o1,%o1 836 save %sp,-128,%sp; or %g5,%fp,%fp 837 cmp %g2,%g3 838 be SIZE_T_CC,.Lmsquare_24 839 nop 840 save %sp,-128,%sp; or %g5,%fp,%fp 841 ld [%g3+0*8+0],%i1 842 ld [%g3+0*8+4],%i0 843 sllx %i0,32,%i0 844 or %i1,%i0,%i0 845 ld [%g3+1*8+0],%i2 846 ld [%g3+1*8+4],%i1 847 sllx %i1,32,%i1 848 or %i2,%i1,%i1 849 ld [%g3+2*8+0],%i3 850 ld [%g3+2*8+4],%i2 851 sllx %i2,32,%i2 852 or %i3,%i2,%i2 853 ld [%g3+3*8+0],%i4 854 ld [%g3+3*8+4],%i3 855 sllx %i3,32,%i3 856 or %i4,%i3,%i3 857 ld [%g3+4*8+0],%i5 858 ld [%g3+4*8+4],%i4 859 sllx %i4,32,%i4 860 or %i5,%i4,%i4 861 ld [%g3+5*8+0],%l0 862 ld [%g3+5*8+4],%i5 863 sllx %i5,32,%i5 864 or %l0,%i5,%i5 865 ld [%g3+6*8+0],%l1 866 ld [%g3+6*8+4],%l0 867 sllx %l0,32,%l0 868 or %l1,%l0,%l0 869 ld [%g3+7*8+0],%l2 870 ld [%g3+7*8+4],%l1 871 sllx %l1,32,%l1 872 or %l2,%l1,%l1 873 ld [%g3+8*8+0],%l3 874 ld [%g3+8*8+4],%l2 875 sllx %l2,32,%l2 876 or %l3,%l2,%l2 877 ld [%g3+9*8+0],%l4 878 ld [%g3+9*8+4],%l3 879 sllx %l3,32,%l3 880 or %l4,%l3,%l3 881 ld [%g3+10*8+0],%l5 882 ld [%g3+10*8+4],%l4 883 sllx %l4,32,%l4 884 or %l5,%l4,%l4 885 ld [%g3+11*8+0],%l6 886 ld [%g3+11*8+4],%l5 887 sllx %l5,32,%l5 888 or %l6,%l5,%l5 889 ld [%g3+12*8+0],%l7 890 ld [%g3+12*8+4],%l6 891 sllx %l6,32,%l6 892 or %l7,%l6,%l6 893 ld [%g3+13*8+0],%o7 894 ld [%g3+13*8+4],%l7 895 sllx %l7,32,%l7 896 or %o7,%l7,%l7 897 save %sp,-128,%sp; or %g5,%fp,%fp 898 ld [%g3+14*8+0],%i1 899 ld [%g3+14*8+4],%i0 900 sllx %i0,32,%i0 901 or %i1,%i0,%i0 902 ld [%g3+15*8+0],%i2 903 ld [%g3+15*8+4],%i1 904 sllx %i1,32,%i1 905 or %i2,%i1,%i1 906 ld [%g3+16*8+0],%i3 907 ld [%g3+16*8+4],%i2 908 sllx %i2,32,%i2 909 or %i3,%i2,%i2 910 ld [%g3+17*8+0],%i4 911 ld [%g3+17*8+4],%i3 912 sllx %i3,32,%i3 913 or %i4,%i3,%i3 914 ld [%g3+18*8+0],%i5 915 ld [%g3+18*8+4],%i4 916 sllx %i4,32,%i4 917 or %i5,%i4,%i4 918 ld [%g3+19*8+0],%l0 919 ld [%g3+19*8+4],%i5 920 sllx %i5,32,%i5 921 or %l0,%i5,%i5 922 ld [%g3+20*8+0],%l1 923 ld [%g3+20*8+4],%l0 924 sllx %l0,32,%l0 925 or %l1,%l0,%l0 926 ld [%g3+21*8+0],%l2 927 ld [%g3+21*8+4],%l1 928 sllx %l1,32,%l1 929 or %l2,%l1,%l1 930 ld [%g3+22*8+0],%l3 931 ld [%g3+22*8+4],%l2 932 sllx %l2,32,%l2 933 or %l3,%l2,%l2 934 ld [%g3+23*8+0],%o7 935 ld [%g3+23*8+4],%l3 936 sllx %l3,32,%l3 937 or %o7,%l3,%l3 938 .word 0x81b02920+24-1 ! montmul 24-1 939.Lmresume_24: 940 fbu,pn %fcc3,.Lmabort_24 941#ifndef __arch64__ 942 and %fp,%g5,%g5 943 brz,pn %g5,.Lmabort_24 944#endif 945 nop 946#ifdef __arch64__ 947 restore 948 restore 949 restore 950 restore 951 restore 952#else 953 restore; and %fp,%g5,%g5 954 restore; and %fp,%g5,%g5 955 restore; and %fp,%g5,%g5 956 restore; and %fp,%g5,%g5 957 brz,pn %g5,.Lmabort1_24 958 restore 959#endif 960 .word 0x81b02310 !movxtod %l0,%f0 961 .word 0x85b02311 !movxtod %l1,%f2 962 .word 0x89b02312 !movxtod %l2,%f4 963 .word 0x8db02313 !movxtod %l3,%f6 964 .word 0x91b02314 !movxtod %l4,%f8 965 .word 0x95b02315 !movxtod %l5,%f10 966 .word 0x99b02316 !movxtod %l6,%f12 967 .word 0x9db02317 !movxtod %l7,%f14 968 .word 0xa1b02308 !movxtod %o0,%f16 969 .word 0xa5b02309 !movxtod %o1,%f18 970 .word 0xa9b0230a !movxtod %o2,%f20 971 .word 0xadb0230b !movxtod %o3,%f22 972 .word 0xbbb0230c !movxtod %o4,%f60 973 .word 0xbfb0230d !movxtod %o5,%f62 974#ifdef __arch64__ 975 restore 976#else 977 and %fp,%g5,%g5 978 restore 979 and %g5,1,%o7 980 and %fp,%g5,%g5 981 srl %fp,0,%fp ! just in case? 982 or %o7,%g5,%g5 983 brz,a,pn %g5,.Lmdone_24 984 mov 0,%i0 ! return failure 985#endif 986 st %f1,[%g1+0*8+0] 987 st %f0,[%g1+0*8+4] 988 st %f3,[%g1+1*8+0] 989 st %f2,[%g1+1*8+4] 990 st %f5,[%g1+2*8+0] 991 st %f4,[%g1+2*8+4] 992 st %f7,[%g1+3*8+0] 993 st %f6,[%g1+3*8+4] 994 st %f9,[%g1+4*8+0] 995 st %f8,[%g1+4*8+4] 996 st %f11,[%g1+5*8+0] 997 st %f10,[%g1+5*8+4] 998 st %f13,[%g1+6*8+0] 999 st %f12,[%g1+6*8+4] 1000 st %f15,[%g1+7*8+0] 1001 st %f14,[%g1+7*8+4] 1002 st %f17,[%g1+8*8+0] 1003 st %f16,[%g1+8*8+4] 1004 st %f19,[%g1+9*8+0] 1005 st %f18,[%g1+9*8+4] 1006 st %f21,[%g1+10*8+0] 1007 st %f20,[%g1+10*8+4] 1008 st %f23,[%g1+11*8+0] 1009 st %f22,[%g1+11*8+4] 1010 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1011 st %f1,[%g1+12*8+0] 1012 st %f0,[%g1+12*8+4] 1013 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1014 st %f3,[%g1+13*8+0] 1015 st %f2,[%g1+13*8+4] 1016 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1017 st %f5,[%g1+14*8+0] 1018 st %f4,[%g1+14*8+4] 1019 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1020 st %f7,[%g1+15*8+0] 1021 st %f6,[%g1+15*8+4] 1022 .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1023 st %f1,[%g1+16*8+0] 1024 st %f0,[%g1+16*8+4] 1025 .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1026 st %f3,[%g1+17*8+0] 1027 st %f2,[%g1+17*8+4] 1028 .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1029 st %f5,[%g1+18*8+0] 1030 st %f4,[%g1+18*8+4] 1031 .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1032 st %f7,[%g1+19*8+0] 1033 st %f6,[%g1+19*8+4] 1034 .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1035 st %f1,[%g1+20*8+0] 1036 st %f0,[%g1+20*8+4] 1037 .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1038 st %f3,[%g1+21*8+0] 1039 st %f2,[%g1+21*8+4] 1040 .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1041 st %f5,[%g1+22*8+0] 1042 st %f4,[%g1+22*8+4] 1043 .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1044 st %f7,[%g1+23*8+0] 1045 st %f6,[%g1+23*8+4] 1046 mov 1,%i0 ! return success 1047.Lmdone_24: 1048 ret 1049 restore 1050 1051.Lmabort_24: 1052 restore 1053 restore 1054 restore 1055 restore 1056 restore 1057.Lmabort1_24: 1058 restore 1059 1060 mov 0,%i0 ! return failure 1061 ret 1062 restore 1063 1064.align 32 1065.Lmsquare_24: 1066 save %sp,-128,%sp; or %g5,%fp,%fp 1067 save %sp,-128,%sp; or %g5,%fp,%fp 1068 .word 0x81b02940+24-1 ! montsqr 24-1 1069 ba .Lmresume_24 1070 nop 1071.type bn_mul_mont_t4_24, #function 1072.size bn_mul_mont_t4_24, .-bn_mul_mont_t4_24 1073.globl bn_mul_mont_t4_32 1074.align 32 1075bn_mul_mont_t4_32: 1076#ifdef __arch64__ 1077 mov 0,%g5 1078 mov -128,%g4 1079#elif defined(SPARCV9_64BIT_STACK) 1080 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1081 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1082 mov -2047,%g4 1083 and %g1,SPARCV9_64BIT_STACK,%g1 1084 movrz %g1,0,%g4 1085 mov -1,%g5 1086 add %g4,-128,%g4 1087#else 1088 mov -1,%g5 1089 mov -128,%g4 1090#endif 1091 sllx %g5,32,%g5 1092 save %sp,%g4,%sp 1093#ifndef __arch64__ 1094 save %sp,-128,%sp ! warm it up 1095 save %sp,-128,%sp 1096 save %sp,-128,%sp 1097 save %sp,-128,%sp 1098 save %sp,-128,%sp 1099 save %sp,-128,%sp 1100 restore 1101 restore 1102 restore 1103 restore 1104 restore 1105 restore 1106#endif 1107 and %sp,1,%g4 1108 or %g5,%fp,%fp 1109 or %g4,%g5,%g5 1110 1111 ! copy arguments to global registers 1112 mov %i0,%g1 1113 mov %i1,%g2 1114 mov %i2,%g3 1115 mov %i3,%g4 1116 ld [%i4+0],%f1 ! load *n0 1117 ld [%i4+4],%f0 1118 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1119 save %sp,-128,%sp; or %g5,%fp,%fp 1120 ld [%g2+0*8+0],%l1 1121 ld [%g2+0*8+4],%l0 1122 sllx %l0,32,%l0 1123 or %l1,%l0,%l0 1124 ld [%g2+1*8+0],%l2 1125 ld [%g2+1*8+4],%l1 1126 sllx %l1,32,%l1 1127 or %l2,%l1,%l1 1128 ld [%g2+2*8+0],%l3 1129 ld [%g2+2*8+4],%l2 1130 sllx %l2,32,%l2 1131 or %l3,%l2,%l2 1132 ld [%g2+3*8+0],%l4 1133 ld [%g2+3*8+4],%l3 1134 sllx %l3,32,%l3 1135 or %l4,%l3,%l3 1136 ld [%g2+4*8+0],%l5 1137 ld [%g2+4*8+4],%l4 1138 sllx %l4,32,%l4 1139 or %l5,%l4,%l4 1140 ld [%g2+5*8+0],%l6 1141 ld [%g2+5*8+4],%l5 1142 sllx %l5,32,%l5 1143 or %l6,%l5,%l5 1144 ld [%g2+6*8+0],%l7 1145 ld [%g2+6*8+4],%l6 1146 sllx %l6,32,%l6 1147 or %l7,%l6,%l6 1148 ld [%g2+7*8+0],%o0 1149 ld [%g2+7*8+4],%l7 1150 sllx %l7,32,%l7 1151 or %o0,%l7,%l7 1152 ld [%g2+8*8+0],%o1 1153 ld [%g2+8*8+4],%o0 1154 sllx %o0,32,%o0 1155 or %o1,%o0,%o0 1156 ld [%g2+9*8+0],%o2 1157 ld [%g2+9*8+4],%o1 1158 sllx %o1,32,%o1 1159 or %o2,%o1,%o1 1160 ld [%g2+10*8+0],%o3 1161 ld [%g2+10*8+4],%o2 1162 sllx %o2,32,%o2 1163 or %o3,%o2,%o2 1164 ld [%g2+11*8+0],%o4 1165 ld [%g2+11*8+4],%o3 1166 sllx %o3,32,%o3 1167 or %o4,%o3,%o3 1168 ld [%g2+12*8+0],%o5 1169 ld [%g2+12*8+4],%o4 1170 sllx %o4,32,%o4 1171 or %o5,%o4,%o4 1172 ld [%g2+13*8+0],%o7 1173 ld [%g2+13*8+4],%o5 1174 sllx %o5,32,%o5 1175 or %o7,%o5,%o5 1176 ld [%g2+14*8+0],%f5 1177 ld [%g2+14*8+4],%f4 1178 .word 0xb1b00f04 !fsrc2 %f0,%f4,%f24 1179 ld [%g2+15*8+0],%f7 1180 ld [%g2+15*8+4],%f6 1181 .word 0xb5b00f06 !fsrc2 %f0,%f6,%f26 1182 ld [%g2+16*8+0],%f1 1183 ld [%g2+16*8+4],%f0 1184 .word 0xb9b00f00 !fsrc2 %f0,%f0,%f28 1185 ld [%g2+17*8+0],%f3 1186 ld [%g2+17*8+4],%f2 1187 .word 0xbdb00f02 !fsrc2 %f0,%f2,%f30 1188 ld [%g2+18*8+0],%f5 1189 ld [%g2+18*8+4],%f4 1190 .word 0x83b00f04 !fsrc2 %f0,%f4,%f32 1191 ld [%g2+19*8+0],%f7 1192 ld [%g2+19*8+4],%f6 1193 .word 0x87b00f06 !fsrc2 %f0,%f6,%f34 1194 ld [%g2+20*8+0],%f1 1195 ld [%g2+20*8+4],%f0 1196 .word 0x8bb00f00 !fsrc2 %f0,%f0,%f36 1197 ld [%g2+21*8+0],%f3 1198 ld [%g2+21*8+4],%f2 1199 .word 0x8fb00f02 !fsrc2 %f0,%f2,%f38 1200 ld [%g2+22*8+0],%f5 1201 ld [%g2+22*8+4],%f4 1202 .word 0x93b00f04 !fsrc2 %f0,%f4,%f40 1203 ld [%g2+23*8+0],%f7 1204 ld [%g2+23*8+4],%f6 1205 .word 0x97b00f06 !fsrc2 %f0,%f6,%f42 1206 ld [%g2+24*8+0],%f1 1207 ld [%g2+24*8+4],%f0 1208 .word 0x9bb00f00 !fsrc2 %f0,%f0,%f44 1209 ld [%g2+25*8+0],%f3 1210 ld [%g2+25*8+4],%f2 1211 .word 0x9fb00f02 !fsrc2 %f0,%f2,%f46 1212 ld [%g2+26*8+0],%f5 1213 ld [%g2+26*8+4],%f4 1214 .word 0xa3b00f04 !fsrc2 %f0,%f4,%f48 1215 ld [%g2+27*8+0],%f7 1216 ld [%g2+27*8+4],%f6 1217 .word 0xa7b00f06 !fsrc2 %f0,%f6,%f50 1218 ld [%g2+28*8+0],%f1 1219 ld [%g2+28*8+4],%f0 1220 .word 0xabb00f00 !fsrc2 %f0,%f0,%f52 1221 ld [%g2+29*8+0],%f3 1222 ld [%g2+29*8+4],%f2 1223 .word 0xafb00f02 !fsrc2 %f0,%f2,%f54 1224 ld [%g2+30*8+0],%f5 1225 ld [%g2+30*8+4],%f4 1226 .word 0xb3b00f04 !fsrc2 %f0,%f4,%f56 1227 ld [%g2+31*8+0],%f7 1228 ld [%g2+31*8+4],%f6 1229 .word 0xb7b00f06 !fsrc2 %f0,%f6,%f58 1230 save %sp,-128,%sp; or %g5,%fp,%fp 1231 ld [%g4+0*8+0],%l1 1232 ld [%g4+0*8+4],%l0 1233 sllx %l0,32,%l0 1234 or %l1,%l0,%l0 1235 ld [%g4+1*8+0],%l2 1236 ld [%g4+1*8+4],%l1 1237 sllx %l1,32,%l1 1238 or %l2,%l1,%l1 1239 ld [%g4+2*8+0],%l3 1240 ld [%g4+2*8+4],%l2 1241 sllx %l2,32,%l2 1242 or %l3,%l2,%l2 1243 ld [%g4+3*8+0],%l4 1244 ld [%g4+3*8+4],%l3 1245 sllx %l3,32,%l3 1246 or %l4,%l3,%l3 1247 ld [%g4+4*8+0],%l5 1248 ld [%g4+4*8+4],%l4 1249 sllx %l4,32,%l4 1250 or %l5,%l4,%l4 1251 ld [%g4+5*8+0],%l6 1252 ld [%g4+5*8+4],%l5 1253 sllx %l5,32,%l5 1254 or %l6,%l5,%l5 1255 ld [%g4+6*8+0],%l7 1256 ld [%g4+6*8+4],%l6 1257 sllx %l6,32,%l6 1258 or %l7,%l6,%l6 1259 ld [%g4+7*8+0],%o0 1260 ld [%g4+7*8+4],%l7 1261 sllx %l7,32,%l7 1262 or %o0,%l7,%l7 1263 ld [%g4+8*8+0],%o1 1264 ld [%g4+8*8+4],%o0 1265 sllx %o0,32,%o0 1266 or %o1,%o0,%o0 1267 ld [%g4+9*8+0],%o2 1268 ld [%g4+9*8+4],%o1 1269 sllx %o1,32,%o1 1270 or %o2,%o1,%o1 1271 ld [%g4+10*8+0],%o3 1272 ld [%g4+10*8+4],%o2 1273 sllx %o2,32,%o2 1274 or %o3,%o2,%o2 1275 ld [%g4+11*8+0],%o4 1276 ld [%g4+11*8+4],%o3 1277 sllx %o3,32,%o3 1278 or %o4,%o3,%o3 1279 ld [%g4+12*8+0],%o5 1280 ld [%g4+12*8+4],%o4 1281 sllx %o4,32,%o4 1282 or %o5,%o4,%o4 1283 ld [%g4+13*8+0],%o7 1284 ld [%g4+13*8+4],%o5 1285 sllx %o5,32,%o5 1286 or %o7,%o5,%o5 1287 save %sp,-128,%sp; or %g5,%fp,%fp 1288 ld [%g4+14*8+0],%l1 1289 ld [%g4+14*8+4],%l0 1290 sllx %l0,32,%l0 1291 or %l1,%l0,%l0 1292 ld [%g4+15*8+0],%l2 1293 ld [%g4+15*8+4],%l1 1294 sllx %l1,32,%l1 1295 or %l2,%l1,%l1 1296 ld [%g4+16*8+0],%l3 1297 ld [%g4+16*8+4],%l2 1298 sllx %l2,32,%l2 1299 or %l3,%l2,%l2 1300 ld [%g4+17*8+0],%l4 1301 ld [%g4+17*8+4],%l3 1302 sllx %l3,32,%l3 1303 or %l4,%l3,%l3 1304 ld [%g4+18*8+0],%l5 1305 ld [%g4+18*8+4],%l4 1306 sllx %l4,32,%l4 1307 or %l5,%l4,%l4 1308 ld [%g4+19*8+0],%l6 1309 ld [%g4+19*8+4],%l5 1310 sllx %l5,32,%l5 1311 or %l6,%l5,%l5 1312 ld [%g4+20*8+0],%l7 1313 ld [%g4+20*8+4],%l6 1314 sllx %l6,32,%l6 1315 or %l7,%l6,%l6 1316 ld [%g4+21*8+0],%o0 1317 ld [%g4+21*8+4],%l7 1318 sllx %l7,32,%l7 1319 or %o0,%l7,%l7 1320 ld [%g4+22*8+0],%o1 1321 ld [%g4+22*8+4],%o0 1322 sllx %o0,32,%o0 1323 or %o1,%o0,%o0 1324 ld [%g4+23*8+0],%o2 1325 ld [%g4+23*8+4],%o1 1326 sllx %o1,32,%o1 1327 or %o2,%o1,%o1 1328 ld [%g4+24*8+0],%o3 1329 ld [%g4+24*8+4],%o2 1330 sllx %o2,32,%o2 1331 or %o3,%o2,%o2 1332 ld [%g4+25*8+0],%o4 1333 ld [%g4+25*8+4],%o3 1334 sllx %o3,32,%o3 1335 or %o4,%o3,%o3 1336 ld [%g4+26*8+0],%o5 1337 ld [%g4+26*8+4],%o4 1338 sllx %o4,32,%o4 1339 or %o5,%o4,%o4 1340 ld [%g4+27*8+0],%o7 1341 ld [%g4+27*8+4],%o5 1342 sllx %o5,32,%o5 1343 or %o7,%o5,%o5 1344 save %sp,-128,%sp; or %g5,%fp,%fp 1345 ld [%g4+28*8+0],%l1 1346 ld [%g4+28*8+4],%l0 1347 sllx %l0,32,%l0 1348 or %l1,%l0,%l0 1349 ld [%g4+29*8+0],%l2 1350 ld [%g4+29*8+4],%l1 1351 sllx %l1,32,%l1 1352 or %l2,%l1,%l1 1353 ld [%g4+30*8+0],%l3 1354 ld [%g4+30*8+4],%l2 1355 sllx %l2,32,%l2 1356 or %l3,%l2,%l2 1357 ld [%g4+31*8+0],%o7 1358 ld [%g4+31*8+4],%l3 1359 sllx %l3,32,%l3 1360 or %o7,%l3,%l3 1361 cmp %g2,%g3 1362 be SIZE_T_CC,.Lmsquare_32 1363 nop 1364 save %sp,-128,%sp; or %g5,%fp,%fp 1365 ld [%g3+0*8+0],%i1 1366 ld [%g3+0*8+4],%i0 1367 sllx %i0,32,%i0 1368 or %i1,%i0,%i0 1369 ld [%g3+1*8+0],%i2 1370 ld [%g3+1*8+4],%i1 1371 sllx %i1,32,%i1 1372 or %i2,%i1,%i1 1373 ld [%g3+2*8+0],%i3 1374 ld [%g3+2*8+4],%i2 1375 sllx %i2,32,%i2 1376 or %i3,%i2,%i2 1377 ld [%g3+3*8+0],%i4 1378 ld [%g3+3*8+4],%i3 1379 sllx %i3,32,%i3 1380 or %i4,%i3,%i3 1381 ld [%g3+4*8+0],%i5 1382 ld [%g3+4*8+4],%i4 1383 sllx %i4,32,%i4 1384 or %i5,%i4,%i4 1385 ld [%g3+5*8+0],%l0 1386 ld [%g3+5*8+4],%i5 1387 sllx %i5,32,%i5 1388 or %l0,%i5,%i5 1389 ld [%g3+6*8+0],%l1 1390 ld [%g3+6*8+4],%l0 1391 sllx %l0,32,%l0 1392 or %l1,%l0,%l0 1393 ld [%g3+7*8+0],%l2 1394 ld [%g3+7*8+4],%l1 1395 sllx %l1,32,%l1 1396 or %l2,%l1,%l1 1397 ld [%g3+8*8+0],%l3 1398 ld [%g3+8*8+4],%l2 1399 sllx %l2,32,%l2 1400 or %l3,%l2,%l2 1401 ld [%g3+9*8+0],%l4 1402 ld [%g3+9*8+4],%l3 1403 sllx %l3,32,%l3 1404 or %l4,%l3,%l3 1405 ld [%g3+10*8+0],%l5 1406 ld [%g3+10*8+4],%l4 1407 sllx %l4,32,%l4 1408 or %l5,%l4,%l4 1409 ld [%g3+11*8+0],%l6 1410 ld [%g3+11*8+4],%l5 1411 sllx %l5,32,%l5 1412 or %l6,%l5,%l5 1413 ld [%g3+12*8+0],%l7 1414 ld [%g3+12*8+4],%l6 1415 sllx %l6,32,%l6 1416 or %l7,%l6,%l6 1417 ld [%g3+13*8+0],%o7 1418 ld [%g3+13*8+4],%l7 1419 sllx %l7,32,%l7 1420 or %o7,%l7,%l7 1421 save %sp,-128,%sp; or %g5,%fp,%fp 1422 ld [%g3+14*8+0],%i1 1423 ld [%g3+14*8+4],%i0 1424 sllx %i0,32,%i0 1425 or %i1,%i0,%i0 1426 ld [%g3+15*8+0],%i2 1427 ld [%g3+15*8+4],%i1 1428 sllx %i1,32,%i1 1429 or %i2,%i1,%i1 1430 ld [%g3+16*8+0],%i3 1431 ld [%g3+16*8+4],%i2 1432 sllx %i2,32,%i2 1433 or %i3,%i2,%i2 1434 ld [%g3+17*8+0],%i4 1435 ld [%g3+17*8+4],%i3 1436 sllx %i3,32,%i3 1437 or %i4,%i3,%i3 1438 ld [%g3+18*8+0],%i5 1439 ld [%g3+18*8+4],%i4 1440 sllx %i4,32,%i4 1441 or %i5,%i4,%i4 1442 ld [%g3+19*8+0],%l0 1443 ld [%g3+19*8+4],%i5 1444 sllx %i5,32,%i5 1445 or %l0,%i5,%i5 1446 ld [%g3+20*8+0],%l1 1447 ld [%g3+20*8+4],%l0 1448 sllx %l0,32,%l0 1449 or %l1,%l0,%l0 1450 ld [%g3+21*8+0],%l2 1451 ld [%g3+21*8+4],%l1 1452 sllx %l1,32,%l1 1453 or %l2,%l1,%l1 1454 ld [%g3+22*8+0],%l3 1455 ld [%g3+22*8+4],%l2 1456 sllx %l2,32,%l2 1457 or %l3,%l2,%l2 1458 ld [%g3+23*8+0],%l4 1459 ld [%g3+23*8+4],%l3 1460 sllx %l3,32,%l3 1461 or %l4,%l3,%l3 1462 ld [%g3+24*8+0],%l5 1463 ld [%g3+24*8+4],%l4 1464 sllx %l4,32,%l4 1465 or %l5,%l4,%l4 1466 ld [%g3+25*8+0],%l6 1467 ld [%g3+25*8+4],%l5 1468 sllx %l5,32,%l5 1469 or %l6,%l5,%l5 1470 ld [%g3+26*8+0],%l7 1471 ld [%g3+26*8+4],%l6 1472 sllx %l6,32,%l6 1473 or %l7,%l6,%l6 1474 ld [%g3+27*8+0],%o0 1475 ld [%g3+27*8+4],%l7 1476 sllx %l7,32,%l7 1477 or %o0,%l7,%l7 1478 ld [%g3+28*8+0],%o1 1479 ld [%g3+28*8+4],%o0 1480 sllx %o0,32,%o0 1481 or %o1,%o0,%o0 1482 ld [%g3+29*8+0],%o2 1483 ld [%g3+29*8+4],%o1 1484 sllx %o1,32,%o1 1485 or %o2,%o1,%o1 1486 ld [%g3+30*8+0],%o3 1487 ld [%g3+30*8+4],%o2 1488 sllx %o2,32,%o2 1489 or %o3,%o2,%o2 1490 ld [%g3+31*8+0],%o7 1491 ld [%g3+31*8+4],%o3 1492 sllx %o3,32,%o3 1493 or %o7,%o3,%o3 1494 .word 0x81b02920+32-1 ! montmul 32-1 1495.Lmresume_32: 1496 fbu,pn %fcc3,.Lmabort_32 1497#ifndef __arch64__ 1498 and %fp,%g5,%g5 1499 brz,pn %g5,.Lmabort_32 1500#endif 1501 nop 1502#ifdef __arch64__ 1503 restore 1504 restore 1505 restore 1506 restore 1507 restore 1508#else 1509 restore; and %fp,%g5,%g5 1510 restore; and %fp,%g5,%g5 1511 restore; and %fp,%g5,%g5 1512 restore; and %fp,%g5,%g5 1513 brz,pn %g5,.Lmabort1_32 1514 restore 1515#endif 1516 .word 0x81b02310 !movxtod %l0,%f0 1517 .word 0x85b02311 !movxtod %l1,%f2 1518 .word 0x89b02312 !movxtod %l2,%f4 1519 .word 0x8db02313 !movxtod %l3,%f6 1520 .word 0x91b02314 !movxtod %l4,%f8 1521 .word 0x95b02315 !movxtod %l5,%f10 1522 .word 0x99b02316 !movxtod %l6,%f12 1523 .word 0x9db02317 !movxtod %l7,%f14 1524 .word 0xa1b02308 !movxtod %o0,%f16 1525 .word 0xa5b02309 !movxtod %o1,%f18 1526 .word 0xa9b0230a !movxtod %o2,%f20 1527 .word 0xadb0230b !movxtod %o3,%f22 1528 .word 0xbbb0230c !movxtod %o4,%f60 1529 .word 0xbfb0230d !movxtod %o5,%f62 1530#ifdef __arch64__ 1531 restore 1532#else 1533 and %fp,%g5,%g5 1534 restore 1535 and %g5,1,%o7 1536 and %fp,%g5,%g5 1537 srl %fp,0,%fp ! just in case? 1538 or %o7,%g5,%g5 1539 brz,a,pn %g5,.Lmdone_32 1540 mov 0,%i0 ! return failure 1541#endif 1542 st %f1,[%g1+0*8+0] 1543 st %f0,[%g1+0*8+4] 1544 st %f3,[%g1+1*8+0] 1545 st %f2,[%g1+1*8+4] 1546 st %f5,[%g1+2*8+0] 1547 st %f4,[%g1+2*8+4] 1548 st %f7,[%g1+3*8+0] 1549 st %f6,[%g1+3*8+4] 1550 st %f9,[%g1+4*8+0] 1551 st %f8,[%g1+4*8+4] 1552 st %f11,[%g1+5*8+0] 1553 st %f10,[%g1+5*8+4] 1554 st %f13,[%g1+6*8+0] 1555 st %f12,[%g1+6*8+4] 1556 st %f15,[%g1+7*8+0] 1557 st %f14,[%g1+7*8+4] 1558 st %f17,[%g1+8*8+0] 1559 st %f16,[%g1+8*8+4] 1560 st %f19,[%g1+9*8+0] 1561 st %f18,[%g1+9*8+4] 1562 st %f21,[%g1+10*8+0] 1563 st %f20,[%g1+10*8+4] 1564 st %f23,[%g1+11*8+0] 1565 st %f22,[%g1+11*8+4] 1566 .word 0x81b00f1d !fsrc2 %f0,%f60,%f0 1567 st %f1,[%g1+12*8+0] 1568 st %f0,[%g1+12*8+4] 1569 .word 0x85b00f1f !fsrc2 %f0,%f62,%f2 1570 st %f3,[%g1+13*8+0] 1571 st %f2,[%g1+13*8+4] 1572 .word 0x89b00f18 !fsrc2 %f0,%f24,%f4 1573 st %f5,[%g1+14*8+0] 1574 st %f4,[%g1+14*8+4] 1575 .word 0x8db00f1a !fsrc2 %f0,%f26,%f6 1576 st %f7,[%g1+15*8+0] 1577 st %f6,[%g1+15*8+4] 1578 .word 0x81b00f1c !fsrc2 %f0,%f28,%f0 1579 st %f1,[%g1+16*8+0] 1580 st %f0,[%g1+16*8+4] 1581 .word 0x85b00f1e !fsrc2 %f0,%f30,%f2 1582 st %f3,[%g1+17*8+0] 1583 st %f2,[%g1+17*8+4] 1584 .word 0x89b00f01 !fsrc2 %f0,%f32,%f4 1585 st %f5,[%g1+18*8+0] 1586 st %f4,[%g1+18*8+4] 1587 .word 0x8db00f03 !fsrc2 %f0,%f34,%f6 1588 st %f7,[%g1+19*8+0] 1589 st %f6,[%g1+19*8+4] 1590 .word 0x81b00f05 !fsrc2 %f0,%f36,%f0 1591 st %f1,[%g1+20*8+0] 1592 st %f0,[%g1+20*8+4] 1593 .word 0x85b00f07 !fsrc2 %f0,%f38,%f2 1594 st %f3,[%g1+21*8+0] 1595 st %f2,[%g1+21*8+4] 1596 .word 0x89b00f09 !fsrc2 %f0,%f40,%f4 1597 st %f5,[%g1+22*8+0] 1598 st %f4,[%g1+22*8+4] 1599 .word 0x8db00f0b !fsrc2 %f0,%f42,%f6 1600 st %f7,[%g1+23*8+0] 1601 st %f6,[%g1+23*8+4] 1602 .word 0x81b00f0d !fsrc2 %f0,%f44,%f0 1603 st %f1,[%g1+24*8+0] 1604 st %f0,[%g1+24*8+4] 1605 .word 0x85b00f0f !fsrc2 %f0,%f46,%f2 1606 st %f3,[%g1+25*8+0] 1607 st %f2,[%g1+25*8+4] 1608 .word 0x89b00f11 !fsrc2 %f0,%f48,%f4 1609 st %f5,[%g1+26*8+0] 1610 st %f4,[%g1+26*8+4] 1611 .word 0x8db00f13 !fsrc2 %f0,%f50,%f6 1612 st %f7,[%g1+27*8+0] 1613 st %f6,[%g1+27*8+4] 1614 .word 0x81b00f15 !fsrc2 %f0,%f52,%f0 1615 st %f1,[%g1+28*8+0] 1616 st %f0,[%g1+28*8+4] 1617 .word 0x85b00f17 !fsrc2 %f0,%f54,%f2 1618 st %f3,[%g1+29*8+0] 1619 st %f2,[%g1+29*8+4] 1620 .word 0x89b00f19 !fsrc2 %f0,%f56,%f4 1621 st %f5,[%g1+30*8+0] 1622 st %f4,[%g1+30*8+4] 1623 .word 0x8db00f1b !fsrc2 %f0,%f58,%f6 1624 st %f7,[%g1+31*8+0] 1625 st %f6,[%g1+31*8+4] 1626 mov 1,%i0 ! return success 1627.Lmdone_32: 1628 ret 1629 restore 1630 1631.Lmabort_32: 1632 restore 1633 restore 1634 restore 1635 restore 1636 restore 1637.Lmabort1_32: 1638 restore 1639 1640 mov 0,%i0 ! return failure 1641 ret 1642 restore 1643 1644.align 32 1645.Lmsquare_32: 1646 save %sp,-128,%sp; or %g5,%fp,%fp 1647 save %sp,-128,%sp; or %g5,%fp,%fp 1648 .word 0x81b02940+32-1 ! montsqr 32-1 1649 ba .Lmresume_32 1650 nop 1651.type bn_mul_mont_t4_32, #function 1652.size bn_mul_mont_t4_32, .-bn_mul_mont_t4_32 1653.globl bn_pwr5_mont_t4_8 1654.align 32 1655bn_pwr5_mont_t4_8: 1656#ifdef __arch64__ 1657 mov 0,%g5 1658 mov -128,%g4 1659#elif defined(SPARCV9_64BIT_STACK) 1660 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1661 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1662 mov -2047,%g4 1663 and %g1,SPARCV9_64BIT_STACK,%g1 1664 movrz %g1,0,%g4 1665 mov -1,%g5 1666 add %g4,-128,%g4 1667#else 1668 mov -1,%g5 1669 mov -128,%g4 1670#endif 1671 sllx %g5,32,%g5 1672 save %sp,%g4,%sp 1673#ifndef __arch64__ 1674 save %sp,-128,%sp ! warm it up 1675 save %sp,-128,%sp 1676 save %sp,-128,%sp 1677 save %sp,-128,%sp 1678 save %sp,-128,%sp 1679 save %sp,-128,%sp 1680 restore 1681 restore 1682 restore 1683 restore 1684 restore 1685 restore 1686#endif 1687 and %sp,1,%g4 1688 or %g5,%fp,%fp 1689 or %g4,%g5,%g5 1690 1691 ! copy arguments to global registers 1692 mov %i0,%g1 1693 mov %i1,%g2 1694 ld [%i2+0],%f1 ! load *n0 1695 ld [%i2+4],%f0 1696 mov %i3,%g3 1697 srl %i4,%g0,%i4 ! pack last arguments 1698 sllx %i5,32,%g4 1699 or %i4,%g4,%g4 1700 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 1701 save %sp,-128,%sp; or %g5,%fp,%fp 1702 ldx [%g1+0*8],%l0 1703 ldx [%g1+1*8],%l1 1704 ldx [%g1+2*8],%l2 1705 ldx [%g1+3*8],%l3 1706 ldx [%g1+4*8],%l4 1707 ldx [%g1+5*8],%l5 1708 ldx [%g1+6*8],%l6 1709 ldx [%g1+7*8],%l7 1710 save %sp,-128,%sp; or %g5,%fp,%fp 1711 ldx [%g2+0*8],%l0 1712 ldx [%g2+1*8],%l1 1713 ldx [%g2+2*8],%l2 1714 ldx [%g2+3*8],%l3 1715 ldx [%g2+4*8],%l4 1716 ldx [%g2+5*8],%l5 1717 ldx [%g2+6*8],%l6 1718 ldx [%g2+7*8],%l7 1719 save %sp,-128,%sp; or %g5,%fp,%fp 1720 save %sp,-128,%sp; or %g5,%fp,%fp 1721 save %sp,-128,%sp; or %g5,%fp,%fp 1722 1723 srlx %g4, 32, %o4 ! unpack %g4 1724 srl %g4, %g0, %o5 1725 sub %o4, 5, %o4 1726 mov %g3, %o7 1727 sllx %o4, 32, %g4 ! re-pack %g4 1728 or %o5, %g4, %g4 1729 srl %o5, %o4, %o5 1730 srl %o5, 2, %o4 1731 and %o5, 3, %o5 1732 and %o4, 7, %o4 1733 sll %o5, 3, %o5 ! offset within first cache line 1734 add %o5, %o7, %o7 ! of the pwrtbl 1735 or %g0, 1, %o5 1736 sll %o5, %o4, %o4 1737 wr %o4, %g0, %ccr 1738 b .Lstride_8 1739 nop 1740.align 16 1741.Lstride_8: 1742 ldx [%o7+0*32], %i0 1743 ldx [%o7+8*32], %i1 1744 ldx [%o7+1*32], %o4 1745 ldx [%o7+9*32], %o5 1746 movvs %icc, %o4, %i0 1747 ldx [%o7+2*32], %o4 1748 movvs %icc, %o5, %i1 1749 ldx [%o7+10*32],%o5 1750 move %icc, %o4, %i0 1751 ldx [%o7+3*32], %o4 1752 move %icc, %o5, %i1 1753 ldx [%o7+11*32],%o5 1754 movneg %icc, %o4, %i0 1755 ldx [%o7+4*32], %o4 1756 movneg %icc, %o5, %i1 1757 ldx [%o7+12*32],%o5 1758 movcs %xcc, %o4, %i0 1759 ldx [%o7+5*32],%o4 1760 movcs %xcc, %o5, %i1 1761 ldx [%o7+13*32],%o5 1762 movvs %xcc, %o4, %i0 1763 ldx [%o7+6*32], %o4 1764 movvs %xcc, %o5, %i1 1765 ldx [%o7+14*32],%o5 1766 move %xcc, %o4, %i0 1767 ldx [%o7+7*32], %o4 1768 move %xcc, %o5, %i1 1769 ldx [%o7+15*32],%o5 1770 movneg %xcc, %o4, %i0 1771 add %o7,16*32, %o7 1772 movneg %xcc, %o5, %i1 1773 ldx [%o7+0*32], %i2 1774 ldx [%o7+8*32], %i3 1775 ldx [%o7+1*32], %o4 1776 ldx [%o7+9*32], %o5 1777 movvs %icc, %o4, %i2 1778 ldx [%o7+2*32], %o4 1779 movvs %icc, %o5, %i3 1780 ldx [%o7+10*32],%o5 1781 move %icc, %o4, %i2 1782 ldx [%o7+3*32], %o4 1783 move %icc, %o5, %i3 1784 ldx [%o7+11*32],%o5 1785 movneg %icc, %o4, %i2 1786 ldx [%o7+4*32], %o4 1787 movneg %icc, %o5, %i3 1788 ldx [%o7+12*32],%o5 1789 movcs %xcc, %o4, %i2 1790 ldx [%o7+5*32],%o4 1791 movcs %xcc, %o5, %i3 1792 ldx [%o7+13*32],%o5 1793 movvs %xcc, %o4, %i2 1794 ldx [%o7+6*32], %o4 1795 movvs %xcc, %o5, %i3 1796 ldx [%o7+14*32],%o5 1797 move %xcc, %o4, %i2 1798 ldx [%o7+7*32], %o4 1799 move %xcc, %o5, %i3 1800 ldx [%o7+15*32],%o5 1801 movneg %xcc, %o4, %i2 1802 add %o7,16*32, %o7 1803 movneg %xcc, %o5, %i3 1804 ldx [%o7+0*32], %i4 1805 ldx [%o7+8*32], %i5 1806 ldx [%o7+1*32], %o4 1807 ldx [%o7+9*32], %o5 1808 movvs %icc, %o4, %i4 1809 ldx [%o7+2*32], %o4 1810 movvs %icc, %o5, %i5 1811 ldx [%o7+10*32],%o5 1812 move %icc, %o4, %i4 1813 ldx [%o7+3*32], %o4 1814 move %icc, %o5, %i5 1815 ldx [%o7+11*32],%o5 1816 movneg %icc, %o4, %i4 1817 ldx [%o7+4*32], %o4 1818 movneg %icc, %o5, %i5 1819 ldx [%o7+12*32],%o5 1820 movcs %xcc, %o4, %i4 1821 ldx [%o7+5*32],%o4 1822 movcs %xcc, %o5, %i5 1823 ldx [%o7+13*32],%o5 1824 movvs %xcc, %o4, %i4 1825 ldx [%o7+6*32], %o4 1826 movvs %xcc, %o5, %i5 1827 ldx [%o7+14*32],%o5 1828 move %xcc, %o4, %i4 1829 ldx [%o7+7*32], %o4 1830 move %xcc, %o5, %i5 1831 ldx [%o7+15*32],%o5 1832 movneg %xcc, %o4, %i4 1833 add %o7,16*32, %o7 1834 movneg %xcc, %o5, %i5 1835 ldx [%o7+0*32], %l0 1836 ldx [%o7+8*32], %l1 1837 ldx [%o7+1*32], %o4 1838 ldx [%o7+9*32], %o5 1839 movvs %icc, %o4, %l0 1840 ldx [%o7+2*32], %o4 1841 movvs %icc, %o5, %l1 1842 ldx [%o7+10*32],%o5 1843 move %icc, %o4, %l0 1844 ldx [%o7+3*32], %o4 1845 move %icc, %o5, %l1 1846 ldx [%o7+11*32],%o5 1847 movneg %icc, %o4, %l0 1848 ldx [%o7+4*32], %o4 1849 movneg %icc, %o5, %l1 1850 ldx [%o7+12*32],%o5 1851 movcs %xcc, %o4, %l0 1852 ldx [%o7+5*32],%o4 1853 movcs %xcc, %o5, %l1 1854 ldx [%o7+13*32],%o5 1855 movvs %xcc, %o4, %l0 1856 ldx [%o7+6*32], %o4 1857 movvs %xcc, %o5, %l1 1858 ldx [%o7+14*32],%o5 1859 move %xcc, %o4, %l0 1860 ldx [%o7+7*32], %o4 1861 move %xcc, %o5, %l1 1862 ldx [%o7+15*32],%o5 1863 movneg %xcc, %o4, %l0 1864 add %o7,16*32, %o7 1865 movneg %xcc, %o5, %l1 1866 save %sp,-128,%sp; or %g5,%fp,%fp 1867 srax %g4, 32, %o4 ! unpack %g4 1868 srl %g4, %g0, %o5 1869 sub %o4, 5, %o4 1870 mov %g3, %i7 1871 sllx %o4, 32, %g4 ! re-pack %g4 1872 or %o5, %g4, %g4 1873 srl %o5, %o4, %o5 1874 srl %o5, 2, %o4 1875 and %o5, 3, %o5 1876 and %o4, 7, %o4 1877 sll %o5, 3, %o5 ! offset within first cache line 1878 add %o5, %i7, %i7 ! of the pwrtbl 1879 or %g0, 1, %o5 1880 sll %o5, %o4, %o4 1881 .word 0x81b02940+8-1 ! montsqr 8-1 1882 fbu,pn %fcc3,.Labort_8 1883#ifndef __arch64__ 1884 and %fp,%g5,%g5 1885 brz,pn %g5,.Labort_8 1886#endif 1887 nop 1888 .word 0x81b02940+8-1 ! montsqr 8-1 1889 fbu,pn %fcc3,.Labort_8 1890#ifndef __arch64__ 1891 and %fp,%g5,%g5 1892 brz,pn %g5,.Labort_8 1893#endif 1894 nop 1895 .word 0x81b02940+8-1 ! montsqr 8-1 1896 fbu,pn %fcc3,.Labort_8 1897#ifndef __arch64__ 1898 and %fp,%g5,%g5 1899 brz,pn %g5,.Labort_8 1900#endif 1901 nop 1902 .word 0x81b02940+8-1 ! montsqr 8-1 1903 fbu,pn %fcc3,.Labort_8 1904#ifndef __arch64__ 1905 and %fp,%g5,%g5 1906 brz,pn %g5,.Labort_8 1907#endif 1908 nop 1909 .word 0x81b02940+8-1 ! montsqr 8-1 1910 fbu,pn %fcc3,.Labort_8 1911#ifndef __arch64__ 1912 and %fp,%g5,%g5 1913 brz,pn %g5,.Labort_8 1914#endif 1915 nop 1916 wr %o4, %g0, %ccr 1917 .word 0x81b02920+8-1 ! montmul 8-1 1918 fbu,pn %fcc3,.Labort_8 1919#ifndef __arch64__ 1920 and %fp,%g5,%g5 1921 brz,pn %g5,.Labort_8 1922#endif 1923 1924 srax %g4, 32, %o4 1925#ifdef __arch64__ 1926 brgez %o4,.Lstride_8 1927 restore 1928 restore 1929 restore 1930 restore 1931 restore 1932#else 1933 brgez %o4,.Lstride_8 1934 restore; and %fp,%g5,%g5 1935 restore; and %fp,%g5,%g5 1936 restore; and %fp,%g5,%g5 1937 restore; and %fp,%g5,%g5 1938 brz,pn %g5,.Labort1_8 1939 restore 1940#endif 1941 .word 0x81b02310 !movxtod %l0,%f0 1942 .word 0x85b02311 !movxtod %l1,%f2 1943 .word 0x89b02312 !movxtod %l2,%f4 1944 .word 0x8db02313 !movxtod %l3,%f6 1945 .word 0x91b02314 !movxtod %l4,%f8 1946 .word 0x95b02315 !movxtod %l5,%f10 1947 .word 0x99b02316 !movxtod %l6,%f12 1948 .word 0x9db02317 !movxtod %l7,%f14 1949#ifdef __arch64__ 1950 restore 1951#else 1952 and %fp,%g5,%g5 1953 restore 1954 and %g5,1,%o7 1955 and %fp,%g5,%g5 1956 srl %fp,0,%fp ! just in case? 1957 or %o7,%g5,%g5 1958 brz,a,pn %g5,.Ldone_8 1959 mov 0,%i0 ! return failure 1960#endif 1961 std %f0,[%g1+0*8] 1962 std %f2,[%g1+1*8] 1963 std %f4,[%g1+2*8] 1964 std %f6,[%g1+3*8] 1965 std %f8,[%g1+4*8] 1966 std %f10,[%g1+5*8] 1967 std %f12,[%g1+6*8] 1968 std %f14,[%g1+7*8] 1969 mov 1,%i0 ! return success 1970.Ldone_8: 1971 ret 1972 restore 1973 1974.Labort_8: 1975 restore 1976 restore 1977 restore 1978 restore 1979 restore 1980.Labort1_8: 1981 restore 1982 1983 mov 0,%i0 ! return failure 1984 ret 1985 restore 1986.type bn_pwr5_mont_t4_8, #function 1987.size bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8 1988.globl bn_pwr5_mont_t4_16 1989.align 32 1990bn_pwr5_mont_t4_16: 1991#ifdef __arch64__ 1992 mov 0,%g5 1993 mov -128,%g4 1994#elif defined(SPARCV9_64BIT_STACK) 1995 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 1996 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 1997 mov -2047,%g4 1998 and %g1,SPARCV9_64BIT_STACK,%g1 1999 movrz %g1,0,%g4 2000 mov -1,%g5 2001 add %g4,-128,%g4 2002#else 2003 mov -1,%g5 2004 mov -128,%g4 2005#endif 2006 sllx %g5,32,%g5 2007 save %sp,%g4,%sp 2008#ifndef __arch64__ 2009 save %sp,-128,%sp ! warm it up 2010 save %sp,-128,%sp 2011 save %sp,-128,%sp 2012 save %sp,-128,%sp 2013 save %sp,-128,%sp 2014 save %sp,-128,%sp 2015 restore 2016 restore 2017 restore 2018 restore 2019 restore 2020 restore 2021#endif 2022 and %sp,1,%g4 2023 or %g5,%fp,%fp 2024 or %g4,%g5,%g5 2025 2026 ! copy arguments to global registers 2027 mov %i0,%g1 2028 mov %i1,%g2 2029 ld [%i2+0],%f1 ! load *n0 2030 ld [%i2+4],%f0 2031 mov %i3,%g3 2032 srl %i4,%g0,%i4 ! pack last arguments 2033 sllx %i5,32,%g4 2034 or %i4,%g4,%g4 2035 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2036 save %sp,-128,%sp; or %g5,%fp,%fp 2037 ldx [%g1+0*8],%l0 2038 ldx [%g1+1*8],%l1 2039 ldx [%g1+2*8],%l2 2040 ldx [%g1+3*8],%l3 2041 ldx [%g1+4*8],%l4 2042 ldx [%g1+5*8],%l5 2043 ldx [%g1+6*8],%l6 2044 ldx [%g1+7*8],%l7 2045 ldx [%g1+8*8],%o0 2046 ldx [%g1+9*8],%o1 2047 ldx [%g1+10*8],%o2 2048 ldx [%g1+11*8],%o3 2049 ldx [%g1+12*8],%o4 2050 ldx [%g1+13*8],%o5 2051 ldd [%g1+14*8],%f24 2052 ldd [%g1+15*8],%f26 2053 save %sp,-128,%sp; or %g5,%fp,%fp 2054 ldx [%g2+0*8],%l0 2055 ldx [%g2+1*8],%l1 2056 ldx [%g2+2*8],%l2 2057 ldx [%g2+3*8],%l3 2058 ldx [%g2+4*8],%l4 2059 ldx [%g2+5*8],%l5 2060 ldx [%g2+6*8],%l6 2061 ldx [%g2+7*8],%l7 2062 ldx [%g2+8*8],%o0 2063 ldx [%g2+9*8],%o1 2064 ldx [%g2+10*8],%o2 2065 ldx [%g2+11*8],%o3 2066 ldx [%g2+12*8],%o4 2067 ldx [%g2+13*8],%o5 2068 save %sp,-128,%sp; or %g5,%fp,%fp 2069 ldx [%g2+14*8],%l0 2070 ldx [%g2+15*8],%l1 2071 save %sp,-128,%sp; or %g5,%fp,%fp 2072 save %sp,-128,%sp; or %g5,%fp,%fp 2073 2074 srlx %g4, 32, %o4 ! unpack %g4 2075 srl %g4, %g0, %o5 2076 sub %o4, 5, %o4 2077 mov %g3, %o7 2078 sllx %o4, 32, %g4 ! re-pack %g4 2079 or %o5, %g4, %g4 2080 srl %o5, %o4, %o5 2081 srl %o5, 2, %o4 2082 and %o5, 3, %o5 2083 and %o4, 7, %o4 2084 sll %o5, 3, %o5 ! offset within first cache line 2085 add %o5, %o7, %o7 ! of the pwrtbl 2086 or %g0, 1, %o5 2087 sll %o5, %o4, %o4 2088 wr %o4, %g0, %ccr 2089 b .Lstride_16 2090 nop 2091.align 16 2092.Lstride_16: 2093 ldx [%o7+0*32], %i0 2094 ldx [%o7+8*32], %i1 2095 ldx [%o7+1*32], %o4 2096 ldx [%o7+9*32], %o5 2097 movvs %icc, %o4, %i0 2098 ldx [%o7+2*32], %o4 2099 movvs %icc, %o5, %i1 2100 ldx [%o7+10*32],%o5 2101 move %icc, %o4, %i0 2102 ldx [%o7+3*32], %o4 2103 move %icc, %o5, %i1 2104 ldx [%o7+11*32],%o5 2105 movneg %icc, %o4, %i0 2106 ldx [%o7+4*32], %o4 2107 movneg %icc, %o5, %i1 2108 ldx [%o7+12*32],%o5 2109 movcs %xcc, %o4, %i0 2110 ldx [%o7+5*32],%o4 2111 movcs %xcc, %o5, %i1 2112 ldx [%o7+13*32],%o5 2113 movvs %xcc, %o4, %i0 2114 ldx [%o7+6*32], %o4 2115 movvs %xcc, %o5, %i1 2116 ldx [%o7+14*32],%o5 2117 move %xcc, %o4, %i0 2118 ldx [%o7+7*32], %o4 2119 move %xcc, %o5, %i1 2120 ldx [%o7+15*32],%o5 2121 movneg %xcc, %o4, %i0 2122 add %o7,16*32, %o7 2123 movneg %xcc, %o5, %i1 2124 ldx [%o7+0*32], %i2 2125 ldx [%o7+8*32], %i3 2126 ldx [%o7+1*32], %o4 2127 ldx [%o7+9*32], %o5 2128 movvs %icc, %o4, %i2 2129 ldx [%o7+2*32], %o4 2130 movvs %icc, %o5, %i3 2131 ldx [%o7+10*32],%o5 2132 move %icc, %o4, %i2 2133 ldx [%o7+3*32], %o4 2134 move %icc, %o5, %i3 2135 ldx [%o7+11*32],%o5 2136 movneg %icc, %o4, %i2 2137 ldx [%o7+4*32], %o4 2138 movneg %icc, %o5, %i3 2139 ldx [%o7+12*32],%o5 2140 movcs %xcc, %o4, %i2 2141 ldx [%o7+5*32],%o4 2142 movcs %xcc, %o5, %i3 2143 ldx [%o7+13*32],%o5 2144 movvs %xcc, %o4, %i2 2145 ldx [%o7+6*32], %o4 2146 movvs %xcc, %o5, %i3 2147 ldx [%o7+14*32],%o5 2148 move %xcc, %o4, %i2 2149 ldx [%o7+7*32], %o4 2150 move %xcc, %o5, %i3 2151 ldx [%o7+15*32],%o5 2152 movneg %xcc, %o4, %i2 2153 add %o7,16*32, %o7 2154 movneg %xcc, %o5, %i3 2155 ldx [%o7+0*32], %i4 2156 ldx [%o7+8*32], %i5 2157 ldx [%o7+1*32], %o4 2158 ldx [%o7+9*32], %o5 2159 movvs %icc, %o4, %i4 2160 ldx [%o7+2*32], %o4 2161 movvs %icc, %o5, %i5 2162 ldx [%o7+10*32],%o5 2163 move %icc, %o4, %i4 2164 ldx [%o7+3*32], %o4 2165 move %icc, %o5, %i5 2166 ldx [%o7+11*32],%o5 2167 movneg %icc, %o4, %i4 2168 ldx [%o7+4*32], %o4 2169 movneg %icc, %o5, %i5 2170 ldx [%o7+12*32],%o5 2171 movcs %xcc, %o4, %i4 2172 ldx [%o7+5*32],%o4 2173 movcs %xcc, %o5, %i5 2174 ldx [%o7+13*32],%o5 2175 movvs %xcc, %o4, %i4 2176 ldx [%o7+6*32], %o4 2177 movvs %xcc, %o5, %i5 2178 ldx [%o7+14*32],%o5 2179 move %xcc, %o4, %i4 2180 ldx [%o7+7*32], %o4 2181 move %xcc, %o5, %i5 2182 ldx [%o7+15*32],%o5 2183 movneg %xcc, %o4, %i4 2184 add %o7,16*32, %o7 2185 movneg %xcc, %o5, %i5 2186 ldx [%o7+0*32], %l0 2187 ldx [%o7+8*32], %l1 2188 ldx [%o7+1*32], %o4 2189 ldx [%o7+9*32], %o5 2190 movvs %icc, %o4, %l0 2191 ldx [%o7+2*32], %o4 2192 movvs %icc, %o5, %l1 2193 ldx [%o7+10*32],%o5 2194 move %icc, %o4, %l0 2195 ldx [%o7+3*32], %o4 2196 move %icc, %o5, %l1 2197 ldx [%o7+11*32],%o5 2198 movneg %icc, %o4, %l0 2199 ldx [%o7+4*32], %o4 2200 movneg %icc, %o5, %l1 2201 ldx [%o7+12*32],%o5 2202 movcs %xcc, %o4, %l0 2203 ldx [%o7+5*32],%o4 2204 movcs %xcc, %o5, %l1 2205 ldx [%o7+13*32],%o5 2206 movvs %xcc, %o4, %l0 2207 ldx [%o7+6*32], %o4 2208 movvs %xcc, %o5, %l1 2209 ldx [%o7+14*32],%o5 2210 move %xcc, %o4, %l0 2211 ldx [%o7+7*32], %o4 2212 move %xcc, %o5, %l1 2213 ldx [%o7+15*32],%o5 2214 movneg %xcc, %o4, %l0 2215 add %o7,16*32, %o7 2216 movneg %xcc, %o5, %l1 2217 ldx [%o7+0*32], %l2 2218 ldx [%o7+8*32], %l3 2219 ldx [%o7+1*32], %o4 2220 ldx [%o7+9*32], %o5 2221 movvs %icc, %o4, %l2 2222 ldx [%o7+2*32], %o4 2223 movvs %icc, %o5, %l3 2224 ldx [%o7+10*32],%o5 2225 move %icc, %o4, %l2 2226 ldx [%o7+3*32], %o4 2227 move %icc, %o5, %l3 2228 ldx [%o7+11*32],%o5 2229 movneg %icc, %o4, %l2 2230 ldx [%o7+4*32], %o4 2231 movneg %icc, %o5, %l3 2232 ldx [%o7+12*32],%o5 2233 movcs %xcc, %o4, %l2 2234 ldx [%o7+5*32],%o4 2235 movcs %xcc, %o5, %l3 2236 ldx [%o7+13*32],%o5 2237 movvs %xcc, %o4, %l2 2238 ldx [%o7+6*32], %o4 2239 movvs %xcc, %o5, %l3 2240 ldx [%o7+14*32],%o5 2241 move %xcc, %o4, %l2 2242 ldx [%o7+7*32], %o4 2243 move %xcc, %o5, %l3 2244 ldx [%o7+15*32],%o5 2245 movneg %xcc, %o4, %l2 2246 add %o7,16*32, %o7 2247 movneg %xcc, %o5, %l3 2248 ldx [%o7+0*32], %l4 2249 ldx [%o7+8*32], %l5 2250 ldx [%o7+1*32], %o4 2251 ldx [%o7+9*32], %o5 2252 movvs %icc, %o4, %l4 2253 ldx [%o7+2*32], %o4 2254 movvs %icc, %o5, %l5 2255 ldx [%o7+10*32],%o5 2256 move %icc, %o4, %l4 2257 ldx [%o7+3*32], %o4 2258 move %icc, %o5, %l5 2259 ldx [%o7+11*32],%o5 2260 movneg %icc, %o4, %l4 2261 ldx [%o7+4*32], %o4 2262 movneg %icc, %o5, %l5 2263 ldx [%o7+12*32],%o5 2264 movcs %xcc, %o4, %l4 2265 ldx [%o7+5*32],%o4 2266 movcs %xcc, %o5, %l5 2267 ldx [%o7+13*32],%o5 2268 movvs %xcc, %o4, %l4 2269 ldx [%o7+6*32], %o4 2270 movvs %xcc, %o5, %l5 2271 ldx [%o7+14*32],%o5 2272 move %xcc, %o4, %l4 2273 ldx [%o7+7*32], %o4 2274 move %xcc, %o5, %l5 2275 ldx [%o7+15*32],%o5 2276 movneg %xcc, %o4, %l4 2277 add %o7,16*32, %o7 2278 movneg %xcc, %o5, %l5 2279 ldx [%o7+0*32], %l6 2280 ldx [%o7+8*32], %l7 2281 ldx [%o7+1*32], %o4 2282 ldx [%o7+9*32], %o5 2283 movvs %icc, %o4, %l6 2284 ldx [%o7+2*32], %o4 2285 movvs %icc, %o5, %l7 2286 ldx [%o7+10*32],%o5 2287 move %icc, %o4, %l6 2288 ldx [%o7+3*32], %o4 2289 move %icc, %o5, %l7 2290 ldx [%o7+11*32],%o5 2291 movneg %icc, %o4, %l6 2292 ldx [%o7+4*32], %o4 2293 movneg %icc, %o5, %l7 2294 ldx [%o7+12*32],%o5 2295 movcs %xcc, %o4, %l6 2296 ldx [%o7+5*32],%o4 2297 movcs %xcc, %o5, %l7 2298 ldx [%o7+13*32],%o5 2299 movvs %xcc, %o4, %l6 2300 ldx [%o7+6*32], %o4 2301 movvs %xcc, %o5, %l7 2302 ldx [%o7+14*32],%o5 2303 move %xcc, %o4, %l6 2304 ldx [%o7+7*32], %o4 2305 move %xcc, %o5, %l7 2306 ldx [%o7+15*32],%o5 2307 movneg %xcc, %o4, %l6 2308 add %o7,16*32, %o7 2309 movneg %xcc, %o5, %l7 2310 save %sp,-128,%sp; or %g5,%fp,%fp 2311 ldx [%i7+0*32], %i0 2312 ldx [%i7+8*32], %i1 2313 ldx [%i7+1*32], %o4 2314 ldx [%i7+9*32], %o5 2315 movvs %icc, %o4, %i0 2316 ldx [%i7+2*32], %o4 2317 movvs %icc, %o5, %i1 2318 ldx [%i7+10*32],%o5 2319 move %icc, %o4, %i0 2320 ldx [%i7+3*32], %o4 2321 move %icc, %o5, %i1 2322 ldx [%i7+11*32],%o5 2323 movneg %icc, %o4, %i0 2324 ldx [%i7+4*32], %o4 2325 movneg %icc, %o5, %i1 2326 ldx [%i7+12*32],%o5 2327 movcs %xcc, %o4, %i0 2328 ldx [%i7+5*32],%o4 2329 movcs %xcc, %o5, %i1 2330 ldx [%i7+13*32],%o5 2331 movvs %xcc, %o4, %i0 2332 ldx [%i7+6*32], %o4 2333 movvs %xcc, %o5, %i1 2334 ldx [%i7+14*32],%o5 2335 move %xcc, %o4, %i0 2336 ldx [%i7+7*32], %o4 2337 move %xcc, %o5, %i1 2338 ldx [%i7+15*32],%o5 2339 movneg %xcc, %o4, %i0 2340 add %i7,16*32, %i7 2341 movneg %xcc, %o5, %i1 2342 srax %g4, 32, %o4 ! unpack %g4 2343 srl %g4, %g0, %o5 2344 sub %o4, 5, %o4 2345 mov %g3, %i7 2346 sllx %o4, 32, %g4 ! re-pack %g4 2347 or %o5, %g4, %g4 2348 srl %o5, %o4, %o5 2349 srl %o5, 2, %o4 2350 and %o5, 3, %o5 2351 and %o4, 7, %o4 2352 sll %o5, 3, %o5 ! offset within first cache line 2353 add %o5, %i7, %i7 ! of the pwrtbl 2354 or %g0, 1, %o5 2355 sll %o5, %o4, %o4 2356 .word 0x81b02940+16-1 ! montsqr 16-1 2357 fbu,pn %fcc3,.Labort_16 2358#ifndef __arch64__ 2359 and %fp,%g5,%g5 2360 brz,pn %g5,.Labort_16 2361#endif 2362 nop 2363 .word 0x81b02940+16-1 ! montsqr 16-1 2364 fbu,pn %fcc3,.Labort_16 2365#ifndef __arch64__ 2366 and %fp,%g5,%g5 2367 brz,pn %g5,.Labort_16 2368#endif 2369 nop 2370 .word 0x81b02940+16-1 ! montsqr 16-1 2371 fbu,pn %fcc3,.Labort_16 2372#ifndef __arch64__ 2373 and %fp,%g5,%g5 2374 brz,pn %g5,.Labort_16 2375#endif 2376 nop 2377 .word 0x81b02940+16-1 ! montsqr 16-1 2378 fbu,pn %fcc3,.Labort_16 2379#ifndef __arch64__ 2380 and %fp,%g5,%g5 2381 brz,pn %g5,.Labort_16 2382#endif 2383 nop 2384 .word 0x81b02940+16-1 ! montsqr 16-1 2385 fbu,pn %fcc3,.Labort_16 2386#ifndef __arch64__ 2387 and %fp,%g5,%g5 2388 brz,pn %g5,.Labort_16 2389#endif 2390 nop 2391 wr %o4, %g0, %ccr 2392 .word 0x81b02920+16-1 ! montmul 16-1 2393 fbu,pn %fcc3,.Labort_16 2394#ifndef __arch64__ 2395 and %fp,%g5,%g5 2396 brz,pn %g5,.Labort_16 2397#endif 2398 2399 srax %g4, 32, %o4 2400#ifdef __arch64__ 2401 brgez %o4,.Lstride_16 2402 restore 2403 restore 2404 restore 2405 restore 2406 restore 2407#else 2408 brgez %o4,.Lstride_16 2409 restore; and %fp,%g5,%g5 2410 restore; and %fp,%g5,%g5 2411 restore; and %fp,%g5,%g5 2412 restore; and %fp,%g5,%g5 2413 brz,pn %g5,.Labort1_16 2414 restore 2415#endif 2416 .word 0x81b02310 !movxtod %l0,%f0 2417 .word 0x85b02311 !movxtod %l1,%f2 2418 .word 0x89b02312 !movxtod %l2,%f4 2419 .word 0x8db02313 !movxtod %l3,%f6 2420 .word 0x91b02314 !movxtod %l4,%f8 2421 .word 0x95b02315 !movxtod %l5,%f10 2422 .word 0x99b02316 !movxtod %l6,%f12 2423 .word 0x9db02317 !movxtod %l7,%f14 2424 .word 0xa1b02308 !movxtod %o0,%f16 2425 .word 0xa5b02309 !movxtod %o1,%f18 2426 .word 0xa9b0230a !movxtod %o2,%f20 2427 .word 0xadb0230b !movxtod %o3,%f22 2428 .word 0xbbb0230c !movxtod %o4,%f60 2429 .word 0xbfb0230d !movxtod %o5,%f62 2430#ifdef __arch64__ 2431 restore 2432#else 2433 and %fp,%g5,%g5 2434 restore 2435 and %g5,1,%o7 2436 and %fp,%g5,%g5 2437 srl %fp,0,%fp ! just in case? 2438 or %o7,%g5,%g5 2439 brz,a,pn %g5,.Ldone_16 2440 mov 0,%i0 ! return failure 2441#endif 2442 std %f0,[%g1+0*8] 2443 std %f2,[%g1+1*8] 2444 std %f4,[%g1+2*8] 2445 std %f6,[%g1+3*8] 2446 std %f8,[%g1+4*8] 2447 std %f10,[%g1+5*8] 2448 std %f12,[%g1+6*8] 2449 std %f14,[%g1+7*8] 2450 std %f16,[%g1+8*8] 2451 std %f18,[%g1+9*8] 2452 std %f20,[%g1+10*8] 2453 std %f22,[%g1+11*8] 2454 std %f60,[%g1+12*8] 2455 std %f62,[%g1+13*8] 2456 std %f24,[%g1+14*8] 2457 std %f26,[%g1+15*8] 2458 mov 1,%i0 ! return success 2459.Ldone_16: 2460 ret 2461 restore 2462 2463.Labort_16: 2464 restore 2465 restore 2466 restore 2467 restore 2468 restore 2469.Labort1_16: 2470 restore 2471 2472 mov 0,%i0 ! return failure 2473 ret 2474 restore 2475.type bn_pwr5_mont_t4_16, #function 2476.size bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16 2477.globl bn_pwr5_mont_t4_24 2478.align 32 2479bn_pwr5_mont_t4_24: 2480#ifdef __arch64__ 2481 mov 0,%g5 2482 mov -128,%g4 2483#elif defined(SPARCV9_64BIT_STACK) 2484 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 2485 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 2486 mov -2047,%g4 2487 and %g1,SPARCV9_64BIT_STACK,%g1 2488 movrz %g1,0,%g4 2489 mov -1,%g5 2490 add %g4,-128,%g4 2491#else 2492 mov -1,%g5 2493 mov -128,%g4 2494#endif 2495 sllx %g5,32,%g5 2496 save %sp,%g4,%sp 2497#ifndef __arch64__ 2498 save %sp,-128,%sp ! warm it up 2499 save %sp,-128,%sp 2500 save %sp,-128,%sp 2501 save %sp,-128,%sp 2502 save %sp,-128,%sp 2503 save %sp,-128,%sp 2504 restore 2505 restore 2506 restore 2507 restore 2508 restore 2509 restore 2510#endif 2511 and %sp,1,%g4 2512 or %g5,%fp,%fp 2513 or %g4,%g5,%g5 2514 2515 ! copy arguments to global registers 2516 mov %i0,%g1 2517 mov %i1,%g2 2518 ld [%i2+0],%f1 ! load *n0 2519 ld [%i2+4],%f0 2520 mov %i3,%g3 2521 srl %i4,%g0,%i4 ! pack last arguments 2522 sllx %i5,32,%g4 2523 or %i4,%g4,%g4 2524 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 2525 save %sp,-128,%sp; or %g5,%fp,%fp 2526 ldx [%g1+0*8],%l0 2527 ldx [%g1+1*8],%l1 2528 ldx [%g1+2*8],%l2 2529 ldx [%g1+3*8],%l3 2530 ldx [%g1+4*8],%l4 2531 ldx [%g1+5*8],%l5 2532 ldx [%g1+6*8],%l6 2533 ldx [%g1+7*8],%l7 2534 ldx [%g1+8*8],%o0 2535 ldx [%g1+9*8],%o1 2536 ldx [%g1+10*8],%o2 2537 ldx [%g1+11*8],%o3 2538 ldx [%g1+12*8],%o4 2539 ldx [%g1+13*8],%o5 2540 ldd [%g1+14*8],%f24 2541 ldd [%g1+15*8],%f26 2542 ldd [%g1+16*8],%f28 2543 ldd [%g1+17*8],%f30 2544 ldd [%g1+18*8],%f32 2545 ldd [%g1+19*8],%f34 2546 ldd [%g1+20*8],%f36 2547 ldd [%g1+21*8],%f38 2548 ldd [%g1+22*8],%f40 2549 ldd [%g1+23*8],%f42 2550 save %sp,-128,%sp; or %g5,%fp,%fp 2551 ldx [%g2+0*8],%l0 2552 ldx [%g2+1*8],%l1 2553 ldx [%g2+2*8],%l2 2554 ldx [%g2+3*8],%l3 2555 ldx [%g2+4*8],%l4 2556 ldx [%g2+5*8],%l5 2557 ldx [%g2+6*8],%l6 2558 ldx [%g2+7*8],%l7 2559 ldx [%g2+8*8],%o0 2560 ldx [%g2+9*8],%o1 2561 ldx [%g2+10*8],%o2 2562 ldx [%g2+11*8],%o3 2563 ldx [%g2+12*8],%o4 2564 ldx [%g2+13*8],%o5 2565 save %sp,-128,%sp; or %g5,%fp,%fp 2566 ldx [%g2+14*8],%l0 2567 ldx [%g2+15*8],%l1 2568 ldx [%g2+16*8],%l2 2569 ldx [%g2+17*8],%l3 2570 ldx [%g2+18*8],%l4 2571 ldx [%g2+19*8],%l5 2572 ldx [%g2+20*8],%l6 2573 ldx [%g2+21*8],%l7 2574 ldx [%g2+22*8],%o0 2575 ldx [%g2+23*8],%o1 2576 save %sp,-128,%sp; or %g5,%fp,%fp 2577 save %sp,-128,%sp; or %g5,%fp,%fp 2578 2579 srlx %g4, 32, %o4 ! unpack %g4 2580 srl %g4, %g0, %o5 2581 sub %o4, 5, %o4 2582 mov %g3, %o7 2583 sllx %o4, 32, %g4 ! re-pack %g4 2584 or %o5, %g4, %g4 2585 srl %o5, %o4, %o5 2586 srl %o5, 2, %o4 2587 and %o5, 3, %o5 2588 and %o4, 7, %o4 2589 sll %o5, 3, %o5 ! offset within first cache line 2590 add %o5, %o7, %o7 ! of the pwrtbl 2591 or %g0, 1, %o5 2592 sll %o5, %o4, %o4 2593 wr %o4, %g0, %ccr 2594 b .Lstride_24 2595 nop 2596.align 16 2597.Lstride_24: 2598 ldx [%o7+0*32], %i0 2599 ldx [%o7+8*32], %i1 2600 ldx [%o7+1*32], %o4 2601 ldx [%o7+9*32], %o5 2602 movvs %icc, %o4, %i0 2603 ldx [%o7+2*32], %o4 2604 movvs %icc, %o5, %i1 2605 ldx [%o7+10*32],%o5 2606 move %icc, %o4, %i0 2607 ldx [%o7+3*32], %o4 2608 move %icc, %o5, %i1 2609 ldx [%o7+11*32],%o5 2610 movneg %icc, %o4, %i0 2611 ldx [%o7+4*32], %o4 2612 movneg %icc, %o5, %i1 2613 ldx [%o7+12*32],%o5 2614 movcs %xcc, %o4, %i0 2615 ldx [%o7+5*32],%o4 2616 movcs %xcc, %o5, %i1 2617 ldx [%o7+13*32],%o5 2618 movvs %xcc, %o4, %i0 2619 ldx [%o7+6*32], %o4 2620 movvs %xcc, %o5, %i1 2621 ldx [%o7+14*32],%o5 2622 move %xcc, %o4, %i0 2623 ldx [%o7+7*32], %o4 2624 move %xcc, %o5, %i1 2625 ldx [%o7+15*32],%o5 2626 movneg %xcc, %o4, %i0 2627 add %o7,16*32, %o7 2628 movneg %xcc, %o5, %i1 2629 ldx [%o7+0*32], %i2 2630 ldx [%o7+8*32], %i3 2631 ldx [%o7+1*32], %o4 2632 ldx [%o7+9*32], %o5 2633 movvs %icc, %o4, %i2 2634 ldx [%o7+2*32], %o4 2635 movvs %icc, %o5, %i3 2636 ldx [%o7+10*32],%o5 2637 move %icc, %o4, %i2 2638 ldx [%o7+3*32], %o4 2639 move %icc, %o5, %i3 2640 ldx [%o7+11*32],%o5 2641 movneg %icc, %o4, %i2 2642 ldx [%o7+4*32], %o4 2643 movneg %icc, %o5, %i3 2644 ldx [%o7+12*32],%o5 2645 movcs %xcc, %o4, %i2 2646 ldx [%o7+5*32],%o4 2647 movcs %xcc, %o5, %i3 2648 ldx [%o7+13*32],%o5 2649 movvs %xcc, %o4, %i2 2650 ldx [%o7+6*32], %o4 2651 movvs %xcc, %o5, %i3 2652 ldx [%o7+14*32],%o5 2653 move %xcc, %o4, %i2 2654 ldx [%o7+7*32], %o4 2655 move %xcc, %o5, %i3 2656 ldx [%o7+15*32],%o5 2657 movneg %xcc, %o4, %i2 2658 add %o7,16*32, %o7 2659 movneg %xcc, %o5, %i3 2660 ldx [%o7+0*32], %i4 2661 ldx [%o7+8*32], %i5 2662 ldx [%o7+1*32], %o4 2663 ldx [%o7+9*32], %o5 2664 movvs %icc, %o4, %i4 2665 ldx [%o7+2*32], %o4 2666 movvs %icc, %o5, %i5 2667 ldx [%o7+10*32],%o5 2668 move %icc, %o4, %i4 2669 ldx [%o7+3*32], %o4 2670 move %icc, %o5, %i5 2671 ldx [%o7+11*32],%o5 2672 movneg %icc, %o4, %i4 2673 ldx [%o7+4*32], %o4 2674 movneg %icc, %o5, %i5 2675 ldx [%o7+12*32],%o5 2676 movcs %xcc, %o4, %i4 2677 ldx [%o7+5*32],%o4 2678 movcs %xcc, %o5, %i5 2679 ldx [%o7+13*32],%o5 2680 movvs %xcc, %o4, %i4 2681 ldx [%o7+6*32], %o4 2682 movvs %xcc, %o5, %i5 2683 ldx [%o7+14*32],%o5 2684 move %xcc, %o4, %i4 2685 ldx [%o7+7*32], %o4 2686 move %xcc, %o5, %i5 2687 ldx [%o7+15*32],%o5 2688 movneg %xcc, %o4, %i4 2689 add %o7,16*32, %o7 2690 movneg %xcc, %o5, %i5 2691 ldx [%o7+0*32], %l0 2692 ldx [%o7+8*32], %l1 2693 ldx [%o7+1*32], %o4 2694 ldx [%o7+9*32], %o5 2695 movvs %icc, %o4, %l0 2696 ldx [%o7+2*32], %o4 2697 movvs %icc, %o5, %l1 2698 ldx [%o7+10*32],%o5 2699 move %icc, %o4, %l0 2700 ldx [%o7+3*32], %o4 2701 move %icc, %o5, %l1 2702 ldx [%o7+11*32],%o5 2703 movneg %icc, %o4, %l0 2704 ldx [%o7+4*32], %o4 2705 movneg %icc, %o5, %l1 2706 ldx [%o7+12*32],%o5 2707 movcs %xcc, %o4, %l0 2708 ldx [%o7+5*32],%o4 2709 movcs %xcc, %o5, %l1 2710 ldx [%o7+13*32],%o5 2711 movvs %xcc, %o4, %l0 2712 ldx [%o7+6*32], %o4 2713 movvs %xcc, %o5, %l1 2714 ldx [%o7+14*32],%o5 2715 move %xcc, %o4, %l0 2716 ldx [%o7+7*32], %o4 2717 move %xcc, %o5, %l1 2718 ldx [%o7+15*32],%o5 2719 movneg %xcc, %o4, %l0 2720 add %o7,16*32, %o7 2721 movneg %xcc, %o5, %l1 2722 ldx [%o7+0*32], %l2 2723 ldx [%o7+8*32], %l3 2724 ldx [%o7+1*32], %o4 2725 ldx [%o7+9*32], %o5 2726 movvs %icc, %o4, %l2 2727 ldx [%o7+2*32], %o4 2728 movvs %icc, %o5, %l3 2729 ldx [%o7+10*32],%o5 2730 move %icc, %o4, %l2 2731 ldx [%o7+3*32], %o4 2732 move %icc, %o5, %l3 2733 ldx [%o7+11*32],%o5 2734 movneg %icc, %o4, %l2 2735 ldx [%o7+4*32], %o4 2736 movneg %icc, %o5, %l3 2737 ldx [%o7+12*32],%o5 2738 movcs %xcc, %o4, %l2 2739 ldx [%o7+5*32],%o4 2740 movcs %xcc, %o5, %l3 2741 ldx [%o7+13*32],%o5 2742 movvs %xcc, %o4, %l2 2743 ldx [%o7+6*32], %o4 2744 movvs %xcc, %o5, %l3 2745 ldx [%o7+14*32],%o5 2746 move %xcc, %o4, %l2 2747 ldx [%o7+7*32], %o4 2748 move %xcc, %o5, %l3 2749 ldx [%o7+15*32],%o5 2750 movneg %xcc, %o4, %l2 2751 add %o7,16*32, %o7 2752 movneg %xcc, %o5, %l3 2753 ldx [%o7+0*32], %l4 2754 ldx [%o7+8*32], %l5 2755 ldx [%o7+1*32], %o4 2756 ldx [%o7+9*32], %o5 2757 movvs %icc, %o4, %l4 2758 ldx [%o7+2*32], %o4 2759 movvs %icc, %o5, %l5 2760 ldx [%o7+10*32],%o5 2761 move %icc, %o4, %l4 2762 ldx [%o7+3*32], %o4 2763 move %icc, %o5, %l5 2764 ldx [%o7+11*32],%o5 2765 movneg %icc, %o4, %l4 2766 ldx [%o7+4*32], %o4 2767 movneg %icc, %o5, %l5 2768 ldx [%o7+12*32],%o5 2769 movcs %xcc, %o4, %l4 2770 ldx [%o7+5*32],%o4 2771 movcs %xcc, %o5, %l5 2772 ldx [%o7+13*32],%o5 2773 movvs %xcc, %o4, %l4 2774 ldx [%o7+6*32], %o4 2775 movvs %xcc, %o5, %l5 2776 ldx [%o7+14*32],%o5 2777 move %xcc, %o4, %l4 2778 ldx [%o7+7*32], %o4 2779 move %xcc, %o5, %l5 2780 ldx [%o7+15*32],%o5 2781 movneg %xcc, %o4, %l4 2782 add %o7,16*32, %o7 2783 movneg %xcc, %o5, %l5 2784 ldx [%o7+0*32], %l6 2785 ldx [%o7+8*32], %l7 2786 ldx [%o7+1*32], %o4 2787 ldx [%o7+9*32], %o5 2788 movvs %icc, %o4, %l6 2789 ldx [%o7+2*32], %o4 2790 movvs %icc, %o5, %l7 2791 ldx [%o7+10*32],%o5 2792 move %icc, %o4, %l6 2793 ldx [%o7+3*32], %o4 2794 move %icc, %o5, %l7 2795 ldx [%o7+11*32],%o5 2796 movneg %icc, %o4, %l6 2797 ldx [%o7+4*32], %o4 2798 movneg %icc, %o5, %l7 2799 ldx [%o7+12*32],%o5 2800 movcs %xcc, %o4, %l6 2801 ldx [%o7+5*32],%o4 2802 movcs %xcc, %o5, %l7 2803 ldx [%o7+13*32],%o5 2804 movvs %xcc, %o4, %l6 2805 ldx [%o7+6*32], %o4 2806 movvs %xcc, %o5, %l7 2807 ldx [%o7+14*32],%o5 2808 move %xcc, %o4, %l6 2809 ldx [%o7+7*32], %o4 2810 move %xcc, %o5, %l7 2811 ldx [%o7+15*32],%o5 2812 movneg %xcc, %o4, %l6 2813 add %o7,16*32, %o7 2814 movneg %xcc, %o5, %l7 2815 save %sp,-128,%sp; or %g5,%fp,%fp 2816 ldx [%i7+0*32], %i0 2817 ldx [%i7+8*32], %i1 2818 ldx [%i7+1*32], %o4 2819 ldx [%i7+9*32], %o5 2820 movvs %icc, %o4, %i0 2821 ldx [%i7+2*32], %o4 2822 movvs %icc, %o5, %i1 2823 ldx [%i7+10*32],%o5 2824 move %icc, %o4, %i0 2825 ldx [%i7+3*32], %o4 2826 move %icc, %o5, %i1 2827 ldx [%i7+11*32],%o5 2828 movneg %icc, %o4, %i0 2829 ldx [%i7+4*32], %o4 2830 movneg %icc, %o5, %i1 2831 ldx [%i7+12*32],%o5 2832 movcs %xcc, %o4, %i0 2833 ldx [%i7+5*32],%o4 2834 movcs %xcc, %o5, %i1 2835 ldx [%i7+13*32],%o5 2836 movvs %xcc, %o4, %i0 2837 ldx [%i7+6*32], %o4 2838 movvs %xcc, %o5, %i1 2839 ldx [%i7+14*32],%o5 2840 move %xcc, %o4, %i0 2841 ldx [%i7+7*32], %o4 2842 move %xcc, %o5, %i1 2843 ldx [%i7+15*32],%o5 2844 movneg %xcc, %o4, %i0 2845 add %i7,16*32, %i7 2846 movneg %xcc, %o5, %i1 2847 ldx [%i7+0*32], %i2 2848 ldx [%i7+8*32], %i3 2849 ldx [%i7+1*32], %o4 2850 ldx [%i7+9*32], %o5 2851 movvs %icc, %o4, %i2 2852 ldx [%i7+2*32], %o4 2853 movvs %icc, %o5, %i3 2854 ldx [%i7+10*32],%o5 2855 move %icc, %o4, %i2 2856 ldx [%i7+3*32], %o4 2857 move %icc, %o5, %i3 2858 ldx [%i7+11*32],%o5 2859 movneg %icc, %o4, %i2 2860 ldx [%i7+4*32], %o4 2861 movneg %icc, %o5, %i3 2862 ldx [%i7+12*32],%o5 2863 movcs %xcc, %o4, %i2 2864 ldx [%i7+5*32],%o4 2865 movcs %xcc, %o5, %i3 2866 ldx [%i7+13*32],%o5 2867 movvs %xcc, %o4, %i2 2868 ldx [%i7+6*32], %o4 2869 movvs %xcc, %o5, %i3 2870 ldx [%i7+14*32],%o5 2871 move %xcc, %o4, %i2 2872 ldx [%i7+7*32], %o4 2873 move %xcc, %o5, %i3 2874 ldx [%i7+15*32],%o5 2875 movneg %xcc, %o4, %i2 2876 add %i7,16*32, %i7 2877 movneg %xcc, %o5, %i3 2878 ldx [%i7+0*32], %i4 2879 ldx [%i7+8*32], %i5 2880 ldx [%i7+1*32], %o4 2881 ldx [%i7+9*32], %o5 2882 movvs %icc, %o4, %i4 2883 ldx [%i7+2*32], %o4 2884 movvs %icc, %o5, %i5 2885 ldx [%i7+10*32],%o5 2886 move %icc, %o4, %i4 2887 ldx [%i7+3*32], %o4 2888 move %icc, %o5, %i5 2889 ldx [%i7+11*32],%o5 2890 movneg %icc, %o4, %i4 2891 ldx [%i7+4*32], %o4 2892 movneg %icc, %o5, %i5 2893 ldx [%i7+12*32],%o5 2894 movcs %xcc, %o4, %i4 2895 ldx [%i7+5*32],%o4 2896 movcs %xcc, %o5, %i5 2897 ldx [%i7+13*32],%o5 2898 movvs %xcc, %o4, %i4 2899 ldx [%i7+6*32], %o4 2900 movvs %xcc, %o5, %i5 2901 ldx [%i7+14*32],%o5 2902 move %xcc, %o4, %i4 2903 ldx [%i7+7*32], %o4 2904 move %xcc, %o5, %i5 2905 ldx [%i7+15*32],%o5 2906 movneg %xcc, %o4, %i4 2907 add %i7,16*32, %i7 2908 movneg %xcc, %o5, %i5 2909 ldx [%i7+0*32], %l0 2910 ldx [%i7+8*32], %l1 2911 ldx [%i7+1*32], %o4 2912 ldx [%i7+9*32], %o5 2913 movvs %icc, %o4, %l0 2914 ldx [%i7+2*32], %o4 2915 movvs %icc, %o5, %l1 2916 ldx [%i7+10*32],%o5 2917 move %icc, %o4, %l0 2918 ldx [%i7+3*32], %o4 2919 move %icc, %o5, %l1 2920 ldx [%i7+11*32],%o5 2921 movneg %icc, %o4, %l0 2922 ldx [%i7+4*32], %o4 2923 movneg %icc, %o5, %l1 2924 ldx [%i7+12*32],%o5 2925 movcs %xcc, %o4, %l0 2926 ldx [%i7+5*32],%o4 2927 movcs %xcc, %o5, %l1 2928 ldx [%i7+13*32],%o5 2929 movvs %xcc, %o4, %l0 2930 ldx [%i7+6*32], %o4 2931 movvs %xcc, %o5, %l1 2932 ldx [%i7+14*32],%o5 2933 move %xcc, %o4, %l0 2934 ldx [%i7+7*32], %o4 2935 move %xcc, %o5, %l1 2936 ldx [%i7+15*32],%o5 2937 movneg %xcc, %o4, %l0 2938 add %i7,16*32, %i7 2939 movneg %xcc, %o5, %l1 2940 ldx [%i7+0*32], %l2 2941 ldx [%i7+8*32], %l3 2942 ldx [%i7+1*32], %o4 2943 ldx [%i7+9*32], %o5 2944 movvs %icc, %o4, %l2 2945 ldx [%i7+2*32], %o4 2946 movvs %icc, %o5, %l3 2947 ldx [%i7+10*32],%o5 2948 move %icc, %o4, %l2 2949 ldx [%i7+3*32], %o4 2950 move %icc, %o5, %l3 2951 ldx [%i7+11*32],%o5 2952 movneg %icc, %o4, %l2 2953 ldx [%i7+4*32], %o4 2954 movneg %icc, %o5, %l3 2955 ldx [%i7+12*32],%o5 2956 movcs %xcc, %o4, %l2 2957 ldx [%i7+5*32],%o4 2958 movcs %xcc, %o5, %l3 2959 ldx [%i7+13*32],%o5 2960 movvs %xcc, %o4, %l2 2961 ldx [%i7+6*32], %o4 2962 movvs %xcc, %o5, %l3 2963 ldx [%i7+14*32],%o5 2964 move %xcc, %o4, %l2 2965 ldx [%i7+7*32], %o4 2966 move %xcc, %o5, %l3 2967 ldx [%i7+15*32],%o5 2968 movneg %xcc, %o4, %l2 2969 add %i7,16*32, %i7 2970 movneg %xcc, %o5, %l3 2971 srax %g4, 32, %o4 ! unpack %g4 2972 srl %g4, %g0, %o5 2973 sub %o4, 5, %o4 2974 mov %g3, %i7 2975 sllx %o4, 32, %g4 ! re-pack %g4 2976 or %o5, %g4, %g4 2977 srl %o5, %o4, %o5 2978 srl %o5, 2, %o4 2979 and %o5, 3, %o5 2980 and %o4, 7, %o4 2981 sll %o5, 3, %o5 ! offset within first cache line 2982 add %o5, %i7, %i7 ! of the pwrtbl 2983 or %g0, 1, %o5 2984 sll %o5, %o4, %o4 2985 .word 0x81b02940+24-1 ! montsqr 24-1 2986 fbu,pn %fcc3,.Labort_24 2987#ifndef __arch64__ 2988 and %fp,%g5,%g5 2989 brz,pn %g5,.Labort_24 2990#endif 2991 nop 2992 .word 0x81b02940+24-1 ! montsqr 24-1 2993 fbu,pn %fcc3,.Labort_24 2994#ifndef __arch64__ 2995 and %fp,%g5,%g5 2996 brz,pn %g5,.Labort_24 2997#endif 2998 nop 2999 .word 0x81b02940+24-1 ! montsqr 24-1 3000 fbu,pn %fcc3,.Labort_24 3001#ifndef __arch64__ 3002 and %fp,%g5,%g5 3003 brz,pn %g5,.Labort_24 3004#endif 3005 nop 3006 .word 0x81b02940+24-1 ! montsqr 24-1 3007 fbu,pn %fcc3,.Labort_24 3008#ifndef __arch64__ 3009 and %fp,%g5,%g5 3010 brz,pn %g5,.Labort_24 3011#endif 3012 nop 3013 .word 0x81b02940+24-1 ! montsqr 24-1 3014 fbu,pn %fcc3,.Labort_24 3015#ifndef __arch64__ 3016 and %fp,%g5,%g5 3017 brz,pn %g5,.Labort_24 3018#endif 3019 nop 3020 wr %o4, %g0, %ccr 3021 .word 0x81b02920+24-1 ! montmul 24-1 3022 fbu,pn %fcc3,.Labort_24 3023#ifndef __arch64__ 3024 and %fp,%g5,%g5 3025 brz,pn %g5,.Labort_24 3026#endif 3027 3028 srax %g4, 32, %o4 3029#ifdef __arch64__ 3030 brgez %o4,.Lstride_24 3031 restore 3032 restore 3033 restore 3034 restore 3035 restore 3036#else 3037 brgez %o4,.Lstride_24 3038 restore; and %fp,%g5,%g5 3039 restore; and %fp,%g5,%g5 3040 restore; and %fp,%g5,%g5 3041 restore; and %fp,%g5,%g5 3042 brz,pn %g5,.Labort1_24 3043 restore 3044#endif 3045 .word 0x81b02310 !movxtod %l0,%f0 3046 .word 0x85b02311 !movxtod %l1,%f2 3047 .word 0x89b02312 !movxtod %l2,%f4 3048 .word 0x8db02313 !movxtod %l3,%f6 3049 .word 0x91b02314 !movxtod %l4,%f8 3050 .word 0x95b02315 !movxtod %l5,%f10 3051 .word 0x99b02316 !movxtod %l6,%f12 3052 .word 0x9db02317 !movxtod %l7,%f14 3053 .word 0xa1b02308 !movxtod %o0,%f16 3054 .word 0xa5b02309 !movxtod %o1,%f18 3055 .word 0xa9b0230a !movxtod %o2,%f20 3056 .word 0xadb0230b !movxtod %o3,%f22 3057 .word 0xbbb0230c !movxtod %o4,%f60 3058 .word 0xbfb0230d !movxtod %o5,%f62 3059#ifdef __arch64__ 3060 restore 3061#else 3062 and %fp,%g5,%g5 3063 restore 3064 and %g5,1,%o7 3065 and %fp,%g5,%g5 3066 srl %fp,0,%fp ! just in case? 3067 or %o7,%g5,%g5 3068 brz,a,pn %g5,.Ldone_24 3069 mov 0,%i0 ! return failure 3070#endif 3071 std %f0,[%g1+0*8] 3072 std %f2,[%g1+1*8] 3073 std %f4,[%g1+2*8] 3074 std %f6,[%g1+3*8] 3075 std %f8,[%g1+4*8] 3076 std %f10,[%g1+5*8] 3077 std %f12,[%g1+6*8] 3078 std %f14,[%g1+7*8] 3079 std %f16,[%g1+8*8] 3080 std %f18,[%g1+9*8] 3081 std %f20,[%g1+10*8] 3082 std %f22,[%g1+11*8] 3083 std %f60,[%g1+12*8] 3084 std %f62,[%g1+13*8] 3085 std %f24,[%g1+14*8] 3086 std %f26,[%g1+15*8] 3087 std %f28,[%g1+16*8] 3088 std %f30,[%g1+17*8] 3089 std %f32,[%g1+18*8] 3090 std %f34,[%g1+19*8] 3091 std %f36,[%g1+20*8] 3092 std %f38,[%g1+21*8] 3093 std %f40,[%g1+22*8] 3094 std %f42,[%g1+23*8] 3095 mov 1,%i0 ! return success 3096.Ldone_24: 3097 ret 3098 restore 3099 3100.Labort_24: 3101 restore 3102 restore 3103 restore 3104 restore 3105 restore 3106.Labort1_24: 3107 restore 3108 3109 mov 0,%i0 ! return failure 3110 ret 3111 restore 3112.type bn_pwr5_mont_t4_24, #function 3113.size bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24 3114.globl bn_pwr5_mont_t4_32 3115.align 32 3116bn_pwr5_mont_t4_32: 3117#ifdef __arch64__ 3118 mov 0,%g5 3119 mov -128,%g4 3120#elif defined(SPARCV9_64BIT_STACK) 3121 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 3122 ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] 3123 mov -2047,%g4 3124 and %g1,SPARCV9_64BIT_STACK,%g1 3125 movrz %g1,0,%g4 3126 mov -1,%g5 3127 add %g4,-128,%g4 3128#else 3129 mov -1,%g5 3130 mov -128,%g4 3131#endif 3132 sllx %g5,32,%g5 3133 save %sp,%g4,%sp 3134#ifndef __arch64__ 3135 save %sp,-128,%sp ! warm it up 3136 save %sp,-128,%sp 3137 save %sp,-128,%sp 3138 save %sp,-128,%sp 3139 save %sp,-128,%sp 3140 save %sp,-128,%sp 3141 restore 3142 restore 3143 restore 3144 restore 3145 restore 3146 restore 3147#endif 3148 and %sp,1,%g4 3149 or %g5,%fp,%fp 3150 or %g4,%g5,%g5 3151 3152 ! copy arguments to global registers 3153 mov %i0,%g1 3154 mov %i1,%g2 3155 ld [%i2+0],%f1 ! load *n0 3156 ld [%i2+4],%f0 3157 mov %i3,%g3 3158 srl %i4,%g0,%i4 ! pack last arguments 3159 sllx %i5,32,%g4 3160 or %i4,%g4,%g4 3161 .word 0xbbb00f00 !fsrc2 %f0,%f0,%f60 3162 save %sp,-128,%sp; or %g5,%fp,%fp 3163 ldx [%g1+0*8],%l0 3164 ldx [%g1+1*8],%l1 3165 ldx [%g1+2*8],%l2 3166 ldx [%g1+3*8],%l3 3167 ldx [%g1+4*8],%l4 3168 ldx [%g1+5*8],%l5 3169 ldx [%g1+6*8],%l6 3170 ldx [%g1+7*8],%l7 3171 ldx [%g1+8*8],%o0 3172 ldx [%g1+9*8],%o1 3173 ldx [%g1+10*8],%o2 3174 ldx [%g1+11*8],%o3 3175 ldx [%g1+12*8],%o4 3176 ldx [%g1+13*8],%o5 3177 ldd [%g1+14*8],%f24 3178 ldd [%g1+15*8],%f26 3179 ldd [%g1+16*8],%f28 3180 ldd [%g1+17*8],%f30 3181 ldd [%g1+18*8],%f32 3182 ldd [%g1+19*8],%f34 3183 ldd [%g1+20*8],%f36 3184 ldd [%g1+21*8],%f38 3185 ldd [%g1+22*8],%f40 3186 ldd [%g1+23*8],%f42 3187 ldd [%g1+24*8],%f44 3188 ldd [%g1+25*8],%f46 3189 ldd [%g1+26*8],%f48 3190 ldd [%g1+27*8],%f50 3191 ldd [%g1+28*8],%f52 3192 ldd [%g1+29*8],%f54 3193 ldd [%g1+30*8],%f56 3194 ldd [%g1+31*8],%f58 3195 save %sp,-128,%sp; or %g5,%fp,%fp 3196 ldx [%g2+0*8],%l0 3197 ldx [%g2+1*8],%l1 3198 ldx [%g2+2*8],%l2 3199 ldx [%g2+3*8],%l3 3200 ldx [%g2+4*8],%l4 3201 ldx [%g2+5*8],%l5 3202 ldx [%g2+6*8],%l6 3203 ldx [%g2+7*8],%l7 3204 ldx [%g2+8*8],%o0 3205 ldx [%g2+9*8],%o1 3206 ldx [%g2+10*8],%o2 3207 ldx [%g2+11*8],%o3 3208 ldx [%g2+12*8],%o4 3209 ldx [%g2+13*8],%o5 3210 save %sp,-128,%sp; or %g5,%fp,%fp 3211 ldx [%g2+14*8],%l0 3212 ldx [%g2+15*8],%l1 3213 ldx [%g2+16*8],%l2 3214 ldx [%g2+17*8],%l3 3215 ldx [%g2+18*8],%l4 3216 ldx [%g2+19*8],%l5 3217 ldx [%g2+20*8],%l6 3218 ldx [%g2+21*8],%l7 3219 ldx [%g2+22*8],%o0 3220 ldx [%g2+23*8],%o1 3221 ldx [%g2+24*8],%o2 3222 ldx [%g2+25*8],%o3 3223 ldx [%g2+26*8],%o4 3224 ldx [%g2+27*8],%o5 3225 save %sp,-128,%sp; or %g5,%fp,%fp 3226 ldx [%g2+28*8],%l0 3227 ldx [%g2+29*8],%l1 3228 ldx [%g2+30*8],%l2 3229 ldx [%g2+31*8],%l3 3230 save %sp,-128,%sp; or %g5,%fp,%fp 3231 3232 srlx %g4, 32, %o4 ! unpack %g4 3233 srl %g4, %g0, %o5 3234 sub %o4, 5, %o4 3235 mov %g3, %o7 3236 sllx %o4, 32, %g4 ! re-pack %g4 3237 or %o5, %g4, %g4 3238 srl %o5, %o4, %o5 3239 srl %o5, 2, %o4 3240 and %o5, 3, %o5 3241 and %o4, 7, %o4 3242 sll %o5, 3, %o5 ! offset within first cache line 3243 add %o5, %o7, %o7 ! of the pwrtbl 3244 or %g0, 1, %o5 3245 sll %o5, %o4, %o4 3246 wr %o4, %g0, %ccr 3247 b .Lstride_32 3248 nop 3249.align 16 3250.Lstride_32: 3251 ldx [%o7+0*32], %i0 3252 ldx [%o7+8*32], %i1 3253 ldx [%o7+1*32], %o4 3254 ldx [%o7+9*32], %o5 3255 movvs %icc, %o4, %i0 3256 ldx [%o7+2*32], %o4 3257 movvs %icc, %o5, %i1 3258 ldx [%o7+10*32],%o5 3259 move %icc, %o4, %i0 3260 ldx [%o7+3*32], %o4 3261 move %icc, %o5, %i1 3262 ldx [%o7+11*32],%o5 3263 movneg %icc, %o4, %i0 3264 ldx [%o7+4*32], %o4 3265 movneg %icc, %o5, %i1 3266 ldx [%o7+12*32],%o5 3267 movcs %xcc, %o4, %i0 3268 ldx [%o7+5*32],%o4 3269 movcs %xcc, %o5, %i1 3270 ldx [%o7+13*32],%o5 3271 movvs %xcc, %o4, %i0 3272 ldx [%o7+6*32], %o4 3273 movvs %xcc, %o5, %i1 3274 ldx [%o7+14*32],%o5 3275 move %xcc, %o4, %i0 3276 ldx [%o7+7*32], %o4 3277 move %xcc, %o5, %i1 3278 ldx [%o7+15*32],%o5 3279 movneg %xcc, %o4, %i0 3280 add %o7,16*32, %o7 3281 movneg %xcc, %o5, %i1 3282 ldx [%o7+0*32], %i2 3283 ldx [%o7+8*32], %i3 3284 ldx [%o7+1*32], %o4 3285 ldx [%o7+9*32], %o5 3286 movvs %icc, %o4, %i2 3287 ldx [%o7+2*32], %o4 3288 movvs %icc, %o5, %i3 3289 ldx [%o7+10*32],%o5 3290 move %icc, %o4, %i2 3291 ldx [%o7+3*32], %o4 3292 move %icc, %o5, %i3 3293 ldx [%o7+11*32],%o5 3294 movneg %icc, %o4, %i2 3295 ldx [%o7+4*32], %o4 3296 movneg %icc, %o5, %i3 3297 ldx [%o7+12*32],%o5 3298 movcs %xcc, %o4, %i2 3299 ldx [%o7+5*32],%o4 3300 movcs %xcc, %o5, %i3 3301 ldx [%o7+13*32],%o5 3302 movvs %xcc, %o4, %i2 3303 ldx [%o7+6*32], %o4 3304 movvs %xcc, %o5, %i3 3305 ldx [%o7+14*32],%o5 3306 move %xcc, %o4, %i2 3307 ldx [%o7+7*32], %o4 3308 move %xcc, %o5, %i3 3309 ldx [%o7+15*32],%o5 3310 movneg %xcc, %o4, %i2 3311 add %o7,16*32, %o7 3312 movneg %xcc, %o5, %i3 3313 ldx [%o7+0*32], %i4 3314 ldx [%o7+8*32], %i5 3315 ldx [%o7+1*32], %o4 3316 ldx [%o7+9*32], %o5 3317 movvs %icc, %o4, %i4 3318 ldx [%o7+2*32], %o4 3319 movvs %icc, %o5, %i5 3320 ldx [%o7+10*32],%o5 3321 move %icc, %o4, %i4 3322 ldx [%o7+3*32], %o4 3323 move %icc, %o5, %i5 3324 ldx [%o7+11*32],%o5 3325 movneg %icc, %o4, %i4 3326 ldx [%o7+4*32], %o4 3327 movneg %icc, %o5, %i5 3328 ldx [%o7+12*32],%o5 3329 movcs %xcc, %o4, %i4 3330 ldx [%o7+5*32],%o4 3331 movcs %xcc, %o5, %i5 3332 ldx [%o7+13*32],%o5 3333 movvs %xcc, %o4, %i4 3334 ldx [%o7+6*32], %o4 3335 movvs %xcc, %o5, %i5 3336 ldx [%o7+14*32],%o5 3337 move %xcc, %o4, %i4 3338 ldx [%o7+7*32], %o4 3339 move %xcc, %o5, %i5 3340 ldx [%o7+15*32],%o5 3341 movneg %xcc, %o4, %i4 3342 add %o7,16*32, %o7 3343 movneg %xcc, %o5, %i5 3344 ldx [%o7+0*32], %l0 3345 ldx [%o7+8*32], %l1 3346 ldx [%o7+1*32], %o4 3347 ldx [%o7+9*32], %o5 3348 movvs %icc, %o4, %l0 3349 ldx [%o7+2*32], %o4 3350 movvs %icc, %o5, %l1 3351 ldx [%o7+10*32],%o5 3352 move %icc, %o4, %l0 3353 ldx [%o7+3*32], %o4 3354 move %icc, %o5, %l1 3355 ldx [%o7+11*32],%o5 3356 movneg %icc, %o4, %l0 3357 ldx [%o7+4*32], %o4 3358 movneg %icc, %o5, %l1 3359 ldx [%o7+12*32],%o5 3360 movcs %xcc, %o4, %l0 3361 ldx [%o7+5*32],%o4 3362 movcs %xcc, %o5, %l1 3363 ldx [%o7+13*32],%o5 3364 movvs %xcc, %o4, %l0 3365 ldx [%o7+6*32], %o4 3366 movvs %xcc, %o5, %l1 3367 ldx [%o7+14*32],%o5 3368 move %xcc, %o4, %l0 3369 ldx [%o7+7*32], %o4 3370 move %xcc, %o5, %l1 3371 ldx [%o7+15*32],%o5 3372 movneg %xcc, %o4, %l0 3373 add %o7,16*32, %o7 3374 movneg %xcc, %o5, %l1 3375 ldx [%o7+0*32], %l2 3376 ldx [%o7+8*32], %l3 3377 ldx [%o7+1*32], %o4 3378 ldx [%o7+9*32], %o5 3379 movvs %icc, %o4, %l2 3380 ldx [%o7+2*32], %o4 3381 movvs %icc, %o5, %l3 3382 ldx [%o7+10*32],%o5 3383 move %icc, %o4, %l2 3384 ldx [%o7+3*32], %o4 3385 move %icc, %o5, %l3 3386 ldx [%o7+11*32],%o5 3387 movneg %icc, %o4, %l2 3388 ldx [%o7+4*32], %o4 3389 movneg %icc, %o5, %l3 3390 ldx [%o7+12*32],%o5 3391 movcs %xcc, %o4, %l2 3392 ldx [%o7+5*32],%o4 3393 movcs %xcc, %o5, %l3 3394 ldx [%o7+13*32],%o5 3395 movvs %xcc, %o4, %l2 3396 ldx [%o7+6*32], %o4 3397 movvs %xcc, %o5, %l3 3398 ldx [%o7+14*32],%o5 3399 move %xcc, %o4, %l2 3400 ldx [%o7+7*32], %o4 3401 move %xcc, %o5, %l3 3402 ldx [%o7+15*32],%o5 3403 movneg %xcc, %o4, %l2 3404 add %o7,16*32, %o7 3405 movneg %xcc, %o5, %l3 3406 ldx [%o7+0*32], %l4 3407 ldx [%o7+8*32], %l5 3408 ldx [%o7+1*32], %o4 3409 ldx [%o7+9*32], %o5 3410 movvs %icc, %o4, %l4 3411 ldx [%o7+2*32], %o4 3412 movvs %icc, %o5, %l5 3413 ldx [%o7+10*32],%o5 3414 move %icc, %o4, %l4 3415 ldx [%o7+3*32], %o4 3416 move %icc, %o5, %l5 3417 ldx [%o7+11*32],%o5 3418 movneg %icc, %o4, %l4 3419 ldx [%o7+4*32], %o4 3420 movneg %icc, %o5, %l5 3421 ldx [%o7+12*32],%o5 3422 movcs %xcc, %o4, %l4 3423 ldx [%o7+5*32],%o4 3424 movcs %xcc, %o5, %l5 3425 ldx [%o7+13*32],%o5 3426 movvs %xcc, %o4, %l4 3427 ldx [%o7+6*32], %o4 3428 movvs %xcc, %o5, %l5 3429 ldx [%o7+14*32],%o5 3430 move %xcc, %o4, %l4 3431 ldx [%o7+7*32], %o4 3432 move %xcc, %o5, %l5 3433 ldx [%o7+15*32],%o5 3434 movneg %xcc, %o4, %l4 3435 add %o7,16*32, %o7 3436 movneg %xcc, %o5, %l5 3437 ldx [%o7+0*32], %l6 3438 ldx [%o7+8*32], %l7 3439 ldx [%o7+1*32], %o4 3440 ldx [%o7+9*32], %o5 3441 movvs %icc, %o4, %l6 3442 ldx [%o7+2*32], %o4 3443 movvs %icc, %o5, %l7 3444 ldx [%o7+10*32],%o5 3445 move %icc, %o4, %l6 3446 ldx [%o7+3*32], %o4 3447 move %icc, %o5, %l7 3448 ldx [%o7+11*32],%o5 3449 movneg %icc, %o4, %l6 3450 ldx [%o7+4*32], %o4 3451 movneg %icc, %o5, %l7 3452 ldx [%o7+12*32],%o5 3453 movcs %xcc, %o4, %l6 3454 ldx [%o7+5*32],%o4 3455 movcs %xcc, %o5, %l7 3456 ldx [%o7+13*32],%o5 3457 movvs %xcc, %o4, %l6 3458 ldx [%o7+6*32], %o4 3459 movvs %xcc, %o5, %l7 3460 ldx [%o7+14*32],%o5 3461 move %xcc, %o4, %l6 3462 ldx [%o7+7*32], %o4 3463 move %xcc, %o5, %l7 3464 ldx [%o7+15*32],%o5 3465 movneg %xcc, %o4, %l6 3466 add %o7,16*32, %o7 3467 movneg %xcc, %o5, %l7 3468 save %sp,-128,%sp; or %g5,%fp,%fp 3469 ldx [%i7+0*32], %i0 3470 ldx [%i7+8*32], %i1 3471 ldx [%i7+1*32], %o4 3472 ldx [%i7+9*32], %o5 3473 movvs %icc, %o4, %i0 3474 ldx [%i7+2*32], %o4 3475 movvs %icc, %o5, %i1 3476 ldx [%i7+10*32],%o5 3477 move %icc, %o4, %i0 3478 ldx [%i7+3*32], %o4 3479 move %icc, %o5, %i1 3480 ldx [%i7+11*32],%o5 3481 movneg %icc, %o4, %i0 3482 ldx [%i7+4*32], %o4 3483 movneg %icc, %o5, %i1 3484 ldx [%i7+12*32],%o5 3485 movcs %xcc, %o4, %i0 3486 ldx [%i7+5*32],%o4 3487 movcs %xcc, %o5, %i1 3488 ldx [%i7+13*32],%o5 3489 movvs %xcc, %o4, %i0 3490 ldx [%i7+6*32], %o4 3491 movvs %xcc, %o5, %i1 3492 ldx [%i7+14*32],%o5 3493 move %xcc, %o4, %i0 3494 ldx [%i7+7*32], %o4 3495 move %xcc, %o5, %i1 3496 ldx [%i7+15*32],%o5 3497 movneg %xcc, %o4, %i0 3498 add %i7,16*32, %i7 3499 movneg %xcc, %o5, %i1 3500 ldx [%i7+0*32], %i2 3501 ldx [%i7+8*32], %i3 3502 ldx [%i7+1*32], %o4 3503 ldx [%i7+9*32], %o5 3504 movvs %icc, %o4, %i2 3505 ldx [%i7+2*32], %o4 3506 movvs %icc, %o5, %i3 3507 ldx [%i7+10*32],%o5 3508 move %icc, %o4, %i2 3509 ldx [%i7+3*32], %o4 3510 move %icc, %o5, %i3 3511 ldx [%i7+11*32],%o5 3512 movneg %icc, %o4, %i2 3513 ldx [%i7+4*32], %o4 3514 movneg %icc, %o5, %i3 3515 ldx [%i7+12*32],%o5 3516 movcs %xcc, %o4, %i2 3517 ldx [%i7+5*32],%o4 3518 movcs %xcc, %o5, %i3 3519 ldx [%i7+13*32],%o5 3520 movvs %xcc, %o4, %i2 3521 ldx [%i7+6*32], %o4 3522 movvs %xcc, %o5, %i3 3523 ldx [%i7+14*32],%o5 3524 move %xcc, %o4, %i2 3525 ldx [%i7+7*32], %o4 3526 move %xcc, %o5, %i3 3527 ldx [%i7+15*32],%o5 3528 movneg %xcc, %o4, %i2 3529 add %i7,16*32, %i7 3530 movneg %xcc, %o5, %i3 3531 ldx [%i7+0*32], %i4 3532 ldx [%i7+8*32], %i5 3533 ldx [%i7+1*32], %o4 3534 ldx [%i7+9*32], %o5 3535 movvs %icc, %o4, %i4 3536 ldx [%i7+2*32], %o4 3537 movvs %icc, %o5, %i5 3538 ldx [%i7+10*32],%o5 3539 move %icc, %o4, %i4 3540 ldx [%i7+3*32], %o4 3541 move %icc, %o5, %i5 3542 ldx [%i7+11*32],%o5 3543 movneg %icc, %o4, %i4 3544 ldx [%i7+4*32], %o4 3545 movneg %icc, %o5, %i5 3546 ldx [%i7+12*32],%o5 3547 movcs %xcc, %o4, %i4 3548 ldx [%i7+5*32],%o4 3549 movcs %xcc, %o5, %i5 3550 ldx [%i7+13*32],%o5 3551 movvs %xcc, %o4, %i4 3552 ldx [%i7+6*32], %o4 3553 movvs %xcc, %o5, %i5 3554 ldx [%i7+14*32],%o5 3555 move %xcc, %o4, %i4 3556 ldx [%i7+7*32], %o4 3557 move %xcc, %o5, %i5 3558 ldx [%i7+15*32],%o5 3559 movneg %xcc, %o4, %i4 3560 add %i7,16*32, %i7 3561 movneg %xcc, %o5, %i5 3562 ldx [%i7+0*32], %l0 3563 ldx [%i7+8*32], %l1 3564 ldx [%i7+1*32], %o4 3565 ldx [%i7+9*32], %o5 3566 movvs %icc, %o4, %l0 3567 ldx [%i7+2*32], %o4 3568 movvs %icc, %o5, %l1 3569 ldx [%i7+10*32],%o5 3570 move %icc, %o4, %l0 3571 ldx [%i7+3*32], %o4 3572 move %icc, %o5, %l1 3573 ldx [%i7+11*32],%o5 3574 movneg %icc, %o4, %l0 3575 ldx [%i7+4*32], %o4 3576 movneg %icc, %o5, %l1 3577 ldx [%i7+12*32],%o5 3578 movcs %xcc, %o4, %l0 3579 ldx [%i7+5*32],%o4 3580 movcs %xcc, %o5, %l1 3581 ldx [%i7+13*32],%o5 3582 movvs %xcc, %o4, %l0 3583 ldx [%i7+6*32], %o4 3584 movvs %xcc, %o5, %l1 3585 ldx [%i7+14*32],%o5 3586 move %xcc, %o4, %l0 3587 ldx [%i7+7*32], %o4 3588 move %xcc, %o5, %l1 3589 ldx [%i7+15*32],%o5 3590 movneg %xcc, %o4, %l0 3591 add %i7,16*32, %i7 3592 movneg %xcc, %o5, %l1 3593 ldx [%i7+0*32], %l2 3594 ldx [%i7+8*32], %l3 3595 ldx [%i7+1*32], %o4 3596 ldx [%i7+9*32], %o5 3597 movvs %icc, %o4, %l2 3598 ldx [%i7+2*32], %o4 3599 movvs %icc, %o5, %l3 3600 ldx [%i7+10*32],%o5 3601 move %icc, %o4, %l2 3602 ldx [%i7+3*32], %o4 3603 move %icc, %o5, %l3 3604 ldx [%i7+11*32],%o5 3605 movneg %icc, %o4, %l2 3606 ldx [%i7+4*32], %o4 3607 movneg %icc, %o5, %l3 3608 ldx [%i7+12*32],%o5 3609 movcs %xcc, %o4, %l2 3610 ldx [%i7+5*32],%o4 3611 movcs %xcc, %o5, %l3 3612 ldx [%i7+13*32],%o5 3613 movvs %xcc, %o4, %l2 3614 ldx [%i7+6*32], %o4 3615 movvs %xcc, %o5, %l3 3616 ldx [%i7+14*32],%o5 3617 move %xcc, %o4, %l2 3618 ldx [%i7+7*32], %o4 3619 move %xcc, %o5, %l3 3620 ldx [%i7+15*32],%o5 3621 movneg %xcc, %o4, %l2 3622 add %i7,16*32, %i7 3623 movneg %xcc, %o5, %l3 3624 ldx [%i7+0*32], %l4 3625 ldx [%i7+8*32], %l5 3626 ldx [%i7+1*32], %o4 3627 ldx [%i7+9*32], %o5 3628 movvs %icc, %o4, %l4 3629 ldx [%i7+2*32], %o4 3630 movvs %icc, %o5, %l5 3631 ldx [%i7+10*32],%o5 3632 move %icc, %o4, %l4 3633 ldx [%i7+3*32], %o4 3634 move %icc, %o5, %l5 3635 ldx [%i7+11*32],%o5 3636 movneg %icc, %o4, %l4 3637 ldx [%i7+4*32], %o4 3638 movneg %icc, %o5, %l5 3639 ldx [%i7+12*32],%o5 3640 movcs %xcc, %o4, %l4 3641 ldx [%i7+5*32],%o4 3642 movcs %xcc, %o5, %l5 3643 ldx [%i7+13*32],%o5 3644 movvs %xcc, %o4, %l4 3645 ldx [%i7+6*32], %o4 3646 movvs %xcc, %o5, %l5 3647 ldx [%i7+14*32],%o5 3648 move %xcc, %o4, %l4 3649 ldx [%i7+7*32], %o4 3650 move %xcc, %o5, %l5 3651 ldx [%i7+15*32],%o5 3652 movneg %xcc, %o4, %l4 3653 add %i7,16*32, %i7 3654 movneg %xcc, %o5, %l5 3655 ldx [%i7+0*32], %l6 3656 ldx [%i7+8*32], %l7 3657 ldx [%i7+1*32], %o4 3658 ldx [%i7+9*32], %o5 3659 movvs %icc, %o4, %l6 3660 ldx [%i7+2*32], %o4 3661 movvs %icc, %o5, %l7 3662 ldx [%i7+10*32],%o5 3663 move %icc, %o4, %l6 3664 ldx [%i7+3*32], %o4 3665 move %icc, %o5, %l7 3666 ldx [%i7+11*32],%o5 3667 movneg %icc, %o4, %l6 3668 ldx [%i7+4*32], %o4 3669 movneg %icc, %o5, %l7 3670 ldx [%i7+12*32],%o5 3671 movcs %xcc, %o4, %l6 3672 ldx [%i7+5*32],%o4 3673 movcs %xcc, %o5, %l7 3674 ldx [%i7+13*32],%o5 3675 movvs %xcc, %o4, %l6 3676 ldx [%i7+6*32], %o4 3677 movvs %xcc, %o5, %l7 3678 ldx [%i7+14*32],%o5 3679 move %xcc, %o4, %l6 3680 ldx [%i7+7*32], %o4 3681 move %xcc, %o5, %l7 3682 ldx [%i7+15*32],%o5 3683 movneg %xcc, %o4, %l6 3684 add %i7,16*32, %i7 3685 movneg %xcc, %o5, %l7 3686 ldx [%i7+0*32], %o0 3687 ldx [%i7+8*32], %o1 3688 ldx [%i7+1*32], %o4 3689 ldx [%i7+9*32], %o5 3690 movvs %icc, %o4, %o0 3691 ldx [%i7+2*32], %o4 3692 movvs %icc, %o5, %o1 3693 ldx [%i7+10*32],%o5 3694 move %icc, %o4, %o0 3695 ldx [%i7+3*32], %o4 3696 move %icc, %o5, %o1 3697 ldx [%i7+11*32],%o5 3698 movneg %icc, %o4, %o0 3699 ldx [%i7+4*32], %o4 3700 movneg %icc, %o5, %o1 3701 ldx [%i7+12*32],%o5 3702 movcs %xcc, %o4, %o0 3703 ldx [%i7+5*32],%o4 3704 movcs %xcc, %o5, %o1 3705 ldx [%i7+13*32],%o5 3706 movvs %xcc, %o4, %o0 3707 ldx [%i7+6*32], %o4 3708 movvs %xcc, %o5, %o1 3709 ldx [%i7+14*32],%o5 3710 move %xcc, %o4, %o0 3711 ldx [%i7+7*32], %o4 3712 move %xcc, %o5, %o1 3713 ldx [%i7+15*32],%o5 3714 movneg %xcc, %o4, %o0 3715 add %i7,16*32, %i7 3716 movneg %xcc, %o5, %o1 3717 ldx [%i7+0*32], %o2 3718 ldx [%i7+8*32], %o3 3719 ldx [%i7+1*32], %o4 3720 ldx [%i7+9*32], %o5 3721 movvs %icc, %o4, %o2 3722 ldx [%i7+2*32], %o4 3723 movvs %icc, %o5, %o3 3724 ldx [%i7+10*32],%o5 3725 move %icc, %o4, %o2 3726 ldx [%i7+3*32], %o4 3727 move %icc, %o5, %o3 3728 ldx [%i7+11*32],%o5 3729 movneg %icc, %o4, %o2 3730 ldx [%i7+4*32], %o4 3731 movneg %icc, %o5, %o3 3732 ldx [%i7+12*32],%o5 3733 movcs %xcc, %o4, %o2 3734 ldx [%i7+5*32],%o4 3735 movcs %xcc, %o5, %o3 3736 ldx [%i7+13*32],%o5 3737 movvs %xcc, %o4, %o2 3738 ldx [%i7+6*32], %o4 3739 movvs %xcc, %o5, %o3 3740 ldx [%i7+14*32],%o5 3741 move %xcc, %o4, %o2 3742 ldx [%i7+7*32], %o4 3743 move %xcc, %o5, %o3 3744 ldx [%i7+15*32],%o5 3745 movneg %xcc, %o4, %o2 3746 add %i7,16*32, %i7 3747 movneg %xcc, %o5, %o3 3748 srax %g4, 32, %o4 ! unpack %g4 3749 srl %g4, %g0, %o5 3750 sub %o4, 5, %o4 3751 mov %g3, %i7 3752 sllx %o4, 32, %g4 ! re-pack %g4 3753 or %o5, %g4, %g4 3754 srl %o5, %o4, %o5 3755 srl %o5, 2, %o4 3756 and %o5, 3, %o5 3757 and %o4, 7, %o4 3758 sll %o5, 3, %o5 ! offset within first cache line 3759 add %o5, %i7, %i7 ! of the pwrtbl 3760 or %g0, 1, %o5 3761 sll %o5, %o4, %o4 3762 .word 0x81b02940+32-1 ! montsqr 32-1 3763 fbu,pn %fcc3,.Labort_32 3764#ifndef __arch64__ 3765 and %fp,%g5,%g5 3766 brz,pn %g5,.Labort_32 3767#endif 3768 nop 3769 .word 0x81b02940+32-1 ! montsqr 32-1 3770 fbu,pn %fcc3,.Labort_32 3771#ifndef __arch64__ 3772 and %fp,%g5,%g5 3773 brz,pn %g5,.Labort_32 3774#endif 3775 nop 3776 .word 0x81b02940+32-1 ! montsqr 32-1 3777 fbu,pn %fcc3,.Labort_32 3778#ifndef __arch64__ 3779 and %fp,%g5,%g5 3780 brz,pn %g5,.Labort_32 3781#endif 3782 nop 3783 .word 0x81b02940+32-1 ! montsqr 32-1 3784 fbu,pn %fcc3,.Labort_32 3785#ifndef __arch64__ 3786 and %fp,%g5,%g5 3787 brz,pn %g5,.Labort_32 3788#endif 3789 nop 3790 .word 0x81b02940+32-1 ! montsqr 32-1 3791 fbu,pn %fcc3,.Labort_32 3792#ifndef __arch64__ 3793 and %fp,%g5,%g5 3794 brz,pn %g5,.Labort_32 3795#endif 3796 nop 3797 wr %o4, %g0, %ccr 3798 .word 0x81b02920+32-1 ! montmul 32-1 3799 fbu,pn %fcc3,.Labort_32 3800#ifndef __arch64__ 3801 and %fp,%g5,%g5 3802 brz,pn %g5,.Labort_32 3803#endif 3804 3805 srax %g4, 32, %o4 3806#ifdef __arch64__ 3807 brgez %o4,.Lstride_32 3808 restore 3809 restore 3810 restore 3811 restore 3812 restore 3813#else 3814 brgez %o4,.Lstride_32 3815 restore; and %fp,%g5,%g5 3816 restore; and %fp,%g5,%g5 3817 restore; and %fp,%g5,%g5 3818 restore; and %fp,%g5,%g5 3819 brz,pn %g5,.Labort1_32 3820 restore 3821#endif 3822 .word 0x81b02310 !movxtod %l0,%f0 3823 .word 0x85b02311 !movxtod %l1,%f2 3824 .word 0x89b02312 !movxtod %l2,%f4 3825 .word 0x8db02313 !movxtod %l3,%f6 3826 .word 0x91b02314 !movxtod %l4,%f8 3827 .word 0x95b02315 !movxtod %l5,%f10 3828 .word 0x99b02316 !movxtod %l6,%f12 3829 .word 0x9db02317 !movxtod %l7,%f14 3830 .word 0xa1b02308 !movxtod %o0,%f16 3831 .word 0xa5b02309 !movxtod %o1,%f18 3832 .word 0xa9b0230a !movxtod %o2,%f20 3833 .word 0xadb0230b !movxtod %o3,%f22 3834 .word 0xbbb0230c !movxtod %o4,%f60 3835 .word 0xbfb0230d !movxtod %o5,%f62 3836#ifdef __arch64__ 3837 restore 3838#else 3839 and %fp,%g5,%g5 3840 restore 3841 and %g5,1,%o7 3842 and %fp,%g5,%g5 3843 srl %fp,0,%fp ! just in case? 3844 or %o7,%g5,%g5 3845 brz,a,pn %g5,.Ldone_32 3846 mov 0,%i0 ! return failure 3847#endif 3848 std %f0,[%g1+0*8] 3849 std %f2,[%g1+1*8] 3850 std %f4,[%g1+2*8] 3851 std %f6,[%g1+3*8] 3852 std %f8,[%g1+4*8] 3853 std %f10,[%g1+5*8] 3854 std %f12,[%g1+6*8] 3855 std %f14,[%g1+7*8] 3856 std %f16,[%g1+8*8] 3857 std %f18,[%g1+9*8] 3858 std %f20,[%g1+10*8] 3859 std %f22,[%g1+11*8] 3860 std %f60,[%g1+12*8] 3861 std %f62,[%g1+13*8] 3862 std %f24,[%g1+14*8] 3863 std %f26,[%g1+15*8] 3864 std %f28,[%g1+16*8] 3865 std %f30,[%g1+17*8] 3866 std %f32,[%g1+18*8] 3867 std %f34,[%g1+19*8] 3868 std %f36,[%g1+20*8] 3869 std %f38,[%g1+21*8] 3870 std %f40,[%g1+22*8] 3871 std %f42,[%g1+23*8] 3872 std %f44,[%g1+24*8] 3873 std %f46,[%g1+25*8] 3874 std %f48,[%g1+26*8] 3875 std %f50,[%g1+27*8] 3876 std %f52,[%g1+28*8] 3877 std %f54,[%g1+29*8] 3878 std %f56,[%g1+30*8] 3879 std %f58,[%g1+31*8] 3880 mov 1,%i0 ! return success 3881.Ldone_32: 3882 ret 3883 restore 3884 3885.Labort_32: 3886 restore 3887 restore 3888 restore 3889 restore 3890 restore 3891.Labort1_32: 3892 restore 3893 3894 mov 0,%i0 ! return failure 3895 ret 3896 restore 3897.type bn_pwr5_mont_t4_32, #function 3898.size bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32 3899.globl bn_mul_mont_t4 3900.align 32 3901bn_mul_mont_t4: 3902 add %sp, STACK_BIAS, %g4 ! real top of stack 3903 sll %o5, 3, %o5 ! size in bytes 3904 add %o5, 63, %g1 3905 andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 3906 sub %g4, %g1, %g1 3907 andn %g1, 63, %g1 ! align at 64 byte 3908 sub %g1, STACK_FRAME, %g1 ! new top of stack 3909 sub %g1, %g4, %g1 3910 3911 save %sp, %g1, %sp 3912 ld [%i4+0], %l0 ! pull n0[0..1] value 3913 ld [%i4+4], %l1 3914 add %sp, STACK_BIAS+STACK_FRAME, %l5 3915 ldx [%i2+0], %g2 ! m0=bp[0] 3916 sllx %l1, 32, %g1 3917 add %i2, 8, %i2 3918 or %l0, %g1, %g1 3919 3920 ldx [%i1+0], %o2 ! ap[0] 3921 3922 mulx %o2, %g2, %g4 ! ap[0]*bp[0] 3923 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 3924 3925 ldx [%i1+8], %o2 ! ap[1] 3926 add %i1, 16, %i1 3927 ldx [%i3+0], %o4 ! np[0] 3928 3929 mulx %g4, %g1, %g3 ! "tp[0]"*n0 3930 3931 mulx %o2, %g2, %o3 ! ap[1]*bp[0] 3932 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3933 3934 mulx %o4, %g3, %o0 ! np[0]*m1 3935 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 3936 3937 ldx [%i3+8], %o4 ! np[1] 3938 3939 addcc %g4, %o0, %o0 3940 add %i3, 16, %i3 3941 .word 0x93b00229 !addxc %g0,%o1,%o1 3942 3943 mulx %o4, %g3, %o5 ! np[1]*m1 3944 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3945 3946 ba .L1st 3947 sub %i5, 24, %l4 ! cnt=num-3 3948 3949.align 16 3950.L1st: 3951 addcc %o3, %g5, %g4 3952 .word 0x8bb28220 !addxc %o2,%g0,%g5 3953 3954 ldx [%i1+0], %o2 ! ap[j] 3955 addcc %o5, %o1, %o0 3956 add %i1, 8, %i1 3957 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 3958 3959 ldx [%i3+0], %o4 ! np[j] 3960 mulx %o2, %g2, %o3 ! ap[j]*bp[0] 3961 add %i3, 8, %i3 3962 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 3963 3964 mulx %o4, %g3, %o5 ! np[j]*m1 3965 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3966 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 3967 .word 0x93b00229 !addxc %g0,%o1,%o1 3968 stxa %o0, [%l5]0xe2 ! tp[j-1] 3969 add %l5, 8, %l5 ! tp++ 3970 3971 brnz,pt %l4, .L1st 3972 sub %l4, 8, %l4 ! j-- 3973!.L1st 3974 addcc %o3, %g5, %g4 3975 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 3976 3977 addcc %o5, %o1, %o0 3978 .word 0x93b30220 !addxc %o4,%g0,%o1 3979 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 3980 .word 0x93b00229 !addxc %g0,%o1,%o1 3981 stxa %o0, [%l5]0xe2 ! tp[j-1] 3982 add %l5, 8, %l5 3983 3984 addcc %g5, %o1, %o1 3985 .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 3986 stxa %o1, [%l5]0xe2 3987 add %l5, 8, %l5 3988 3989 ba .Louter 3990 sub %i5, 16, %l1 ! i=num-2 3991 3992.align 16 3993.Louter: 3994 ldx [%i2+0], %g2 ! m0=bp[i] 3995 add %i2, 8, %i2 3996 3997 sub %i1, %i5, %i1 ! rewind 3998 sub %i3, %i5, %i3 3999 sub %l5, %i5, %l5 4000 4001 ldx [%i1+0], %o2 ! ap[0] 4002 ldx [%i3+0], %o4 ! np[0] 4003 4004 mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4005 ldx [%l5], %o7 ! tp[0] 4006 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4007 ldx [%i1+8], %o2 ! ap[1] 4008 addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4009 mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4010 .word 0x8bb00225 !addxc %g0,%g5,%g5 4011 mulx %g4, %g1, %g3 ! tp[0]*n0 4012 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4013 mulx %o4, %g3, %o0 ! np[0]*m1 4014 add %i1, 16, %i1 4015 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4016 ldx [%i3+8], %o4 ! np[1] 4017 add %i3, 16, %i3 4018 addcc %o0, %g4, %o0 4019 mulx %o4, %g3, %o5 ! np[1]*m1 4020 .word 0x93b00229 !addxc %g0,%o1,%o1 4021 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4022 4023 ba .Linner 4024 sub %i5, 24, %l4 ! cnt=num-3 4025.align 16 4026.Linner: 4027 addcc %o3, %g5, %g4 4028 ldx [%l5+8], %o7 ! tp[j] 4029 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4030 ldx [%i1+0], %o2 ! ap[j] 4031 add %i1, 8, %i1 4032 addcc %o5, %o1, %o0 4033 mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4034 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4035 ldx [%i3+0], %o4 ! np[j] 4036 add %i3, 8, %i3 4037 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4038 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4039 mulx %o4, %g3, %o5 ! np[j]*m1 4040 .word 0x8bb00225 !addxc %g0,%g5,%g5 4041 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4042 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4043 .word 0x93b00229 !addxc %g0,%o1,%o1 4044 stx %o0, [%l5] ! tp[j-1] 4045 add %l5, 8, %l5 4046 brnz,pt %l4, .Linner 4047 sub %l4, 8, %l4 4048!.Linner 4049 ldx [%l5+8], %o7 ! tp[j] 4050 addcc %o3, %g5, %g4 4051 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4052 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4053 .word 0x8bb00225 !addxc %g0,%g5,%g5 4054 4055 addcc %o5, %o1, %o0 4056 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4057 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4058 .word 0x93b00229 !addxc %g0,%o1,%o1 4059 stx %o0, [%l5] ! tp[j-1] 4060 4061 subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4062 .word 0x93b24265 !addxccc %o1,%g5,%o1 4063 .word 0xa1b00220 !addxc %g0,%g0,%l0 4064 stx %o1, [%l5+8] 4065 add %l5, 16, %l5 4066 4067 brnz,pt %l1, .Louter 4068 sub %l1, 8, %l1 4069 4070 sub %i1, %i5, %i1 ! rewind 4071 sub %i3, %i5, %i3 4072 sub %l5, %i5, %l5 4073 ba .Lsub 4074 subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4075 4076.align 16 4077.Lsub: 4078 ldx [%l5], %o7 4079 add %l5, 8, %l5 4080 ldx [%i3+0], %o4 4081 add %i3, 8, %i3 4082 subccc %o7, %o4, %l2 ! tp[j]-np[j] 4083 srlx %o7, 32, %o7 4084 srlx %o4, 32, %o4 4085 subccc %o7, %o4, %l3 4086 add %i0, 8, %i0 4087 st %l2, [%i0-4] ! reverse order 4088 st %l3, [%i0-8] 4089 brnz,pt %l4, .Lsub 4090 sub %l4, 8, %l4 4091 4092 sub %i3, %i5, %i3 ! rewind 4093 sub %l5, %i5, %l5 4094 sub %i0, %i5, %i0 4095 4096 subccc %l0, %g0, %l0 ! handle upmost overflow bit 4097 ba .Lcopy 4098 sub %i5, 8, %l4 4099 4100.align 16 4101.Lcopy: ! conditional copy 4102 ldx [%l5], %o7 4103 ldx [%i0+0], %l2 4104 stx %g0, [%l5] ! zap 4105 add %l5, 8, %l5 4106 movcs %icc, %o7, %l2 4107 stx %l2, [%i0+0] 4108 add %i0, 8, %i0 4109 brnz %l4, .Lcopy 4110 sub %l4, 8, %l4 4111 4112 mov 1, %o0 4113 ret 4114 restore 4115.type bn_mul_mont_t4, #function 4116.size bn_mul_mont_t4, .-bn_mul_mont_t4 4117.globl bn_mul_mont_gather5_t4 4118.align 32 4119bn_mul_mont_gather5_t4: 4120 add %sp, STACK_BIAS, %g4 ! real top of stack 4121 sll %o5, 3, %o5 ! size in bytes 4122 add %o5, 63, %g1 4123 andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes 4124 sub %g4, %g1, %g1 4125 andn %g1, 63, %g1 ! align at 64 byte 4126 sub %g1, STACK_FRAME, %g1 ! new top of stack 4127 sub %g1, %g4, %g1 4128 LDPTR [%sp+STACK_7thARG], %g4 ! load power, 7th argument 4129 4130 save %sp, %g1, %sp 4131 srl %g4, 2, %o4 4132 and %g4, 3, %o5 4133 and %o4, 7, %o4 4134 sll %o5, 3, %o5 ! offset within first cache line 4135 add %o5, %i2, %i2 ! of the pwrtbl 4136 or %g0, 1, %o5 4137 sll %o5, %o4, %l7 4138 wr %l7, %g0, %ccr 4139 ldx [%i2+0*32], %g2 4140 ldx [%i2+1*32], %o4 4141 ldx [%i2+2*32], %o5 4142 movvs %icc, %o4, %g2 4143 ldx [%i2+3*32], %o4 4144 move %icc, %o5, %g2 4145 ldx [%i2+4*32], %o5 4146 movneg %icc, %o4, %g2 4147 ldx [%i2+5*32], %o4 4148 movcs %xcc, %o5, %g2 4149 ldx [%i2+6*32], %o5 4150 movvs %xcc, %o4, %g2 4151 ldx [%i2+7*32], %o4 4152 move %xcc, %o5, %g2 4153 add %i2,8*32, %i2 4154 movneg %xcc, %o4, %g2 4155 ld [%i4+0], %l0 ! pull n0[0..1] value 4156 ld [%i4+4], %l1 4157 add %sp, STACK_BIAS+STACK_FRAME, %l5 4158 sllx %l1, 32, %g1 4159 or %l0, %g1, %g1 4160 4161 ldx [%i1+0], %o2 ! ap[0] 4162 4163 mulx %o2, %g2, %g4 ! ap[0]*bp[0] 4164 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4165 4166 ldx [%i1+8], %o2 ! ap[1] 4167 add %i1, 16, %i1 4168 ldx [%i3+0], %o4 ! np[0] 4169 4170 mulx %g4, %g1, %g3 ! "tp[0]"*n0 4171 4172 mulx %o2, %g2, %o3 ! ap[1]*bp[0] 4173 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4174 4175 mulx %o4, %g3, %o0 ! np[0]*m1 4176 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4177 4178 ldx [%i3+8], %o4 ! np[1] 4179 4180 addcc %g4, %o0, %o0 4181 add %i3, 16, %i3 4182 .word 0x93b00229 !addxc %g0,%o1,%o1 4183 4184 mulx %o4, %g3, %o5 ! np[1]*m1 4185 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4186 4187 ba .L1st_g5 4188 sub %i5, 24, %l4 ! cnt=num-3 4189 4190.align 16 4191.L1st_g5: 4192 addcc %o3, %g5, %g4 4193 .word 0x8bb28220 !addxc %o2,%g0,%g5 4194 4195 ldx [%i1+0], %o2 ! ap[j] 4196 addcc %o5, %o1, %o0 4197 add %i1, 8, %i1 4198 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4199 4200 ldx [%i3+0], %o4 ! np[j] 4201 mulx %o2, %g2, %o3 ! ap[j]*bp[0] 4202 add %i3, 8, %i3 4203 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4204 4205 mulx %o4, %g3, %o5 ! np[j]*m1 4206 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4207 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4208 .word 0x93b00229 !addxc %g0,%o1,%o1 4209 stxa %o0, [%l5]0xe2 ! tp[j-1] 4210 add %l5, 8, %l5 ! tp++ 4211 4212 brnz,pt %l4, .L1st_g5 4213 sub %l4, 8, %l4 ! j-- 4214!.L1st_g5 4215 addcc %o3, %g5, %g4 4216 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4217 4218 addcc %o5, %o1, %o0 4219 .word 0x93b30220 !addxc %o4,%g0,%o1 4220 addcc %g4, %o0, %o0 ! np[j]*m1+ap[j]*bp[0] 4221 .word 0x93b00229 !addxc %g0,%o1,%o1 4222 stxa %o0, [%l5]0xe2 ! tp[j-1] 4223 add %l5, 8, %l5 4224 4225 addcc %g5, %o1, %o1 4226 .word 0xa1b00220 !addxc %g0,%g0,%l0 ! upmost overflow bit 4227 stxa %o1, [%l5]0xe2 4228 add %l5, 8, %l5 4229 4230 ba .Louter_g5 4231 sub %i5, 16, %l1 ! i=num-2 4232 4233.align 16 4234.Louter_g5: 4235 wr %l7, %g0, %ccr 4236 ldx [%i2+0*32], %g2 4237 ldx [%i2+1*32], %o4 4238 ldx [%i2+2*32], %o5 4239 movvs %icc, %o4, %g2 4240 ldx [%i2+3*32], %o4 4241 move %icc, %o5, %g2 4242 ldx [%i2+4*32], %o5 4243 movneg %icc, %o4, %g2 4244 ldx [%i2+5*32], %o4 4245 movcs %xcc, %o5, %g2 4246 ldx [%i2+6*32], %o5 4247 movvs %xcc, %o4, %g2 4248 ldx [%i2+7*32], %o4 4249 move %xcc, %o5, %g2 4250 add %i2,8*32, %i2 4251 movneg %xcc, %o4, %g2 4252 sub %i1, %i5, %i1 ! rewind 4253 sub %i3, %i5, %i3 4254 sub %l5, %i5, %l5 4255 4256 ldx [%i1+0], %o2 ! ap[0] 4257 ldx [%i3+0], %o4 ! np[0] 4258 4259 mulx %o2, %g2, %g4 ! ap[0]*bp[i] 4260 ldx [%l5], %o7 ! tp[0] 4261 .word 0x8bb282c2 !umulxhi %o2,%g2,%g5 4262 ldx [%i1+8], %o2 ! ap[1] 4263 addcc %g4, %o7, %g4 ! ap[0]*bp[i]+tp[0] 4264 mulx %o2, %g2, %o3 ! ap[1]*bp[i] 4265 .word 0x8bb00225 !addxc %g0,%g5,%g5 4266 mulx %g4, %g1, %g3 ! tp[0]*n0 4267 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4268 mulx %o4, %g3, %o0 ! np[0]*m1 4269 add %i1, 16, %i1 4270 .word 0x93b302c3 !umulxhi %o4,%g3,%o1 4271 ldx [%i3+8], %o4 ! np[1] 4272 add %i3, 16, %i3 4273 addcc %o0, %g4, %o0 4274 mulx %o4, %g3, %o5 ! np[1]*m1 4275 .word 0x93b00229 !addxc %g0,%o1,%o1 4276 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4277 4278 ba .Linner_g5 4279 sub %i5, 24, %l4 ! cnt=num-3 4280.align 16 4281.Linner_g5: 4282 addcc %o3, %g5, %g4 4283 ldx [%l5+8], %o7 ! tp[j] 4284 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4285 ldx [%i1+0], %o2 ! ap[j] 4286 add %i1, 8, %i1 4287 addcc %o5, %o1, %o0 4288 mulx %o2, %g2, %o3 ! ap[j]*bp[i] 4289 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4290 ldx [%i3+0], %o4 ! np[j] 4291 add %i3, 8, %i3 4292 .word 0x95b282c2 !umulxhi %o2,%g2,%o2 ! ahi=aj 4293 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4294 mulx %o4, %g3, %o5 ! np[j]*m1 4295 .word 0x8bb00225 !addxc %g0,%g5,%g5 4296 .word 0x99b302c3 !umulxhi %o4,%g3,%o4 ! nhi=nj 4297 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4298 .word 0x93b00229 !addxc %g0,%o1,%o1 4299 stx %o0, [%l5] ! tp[j-1] 4300 add %l5, 8, %l5 4301 brnz,pt %l4, .Linner_g5 4302 sub %l4, 8, %l4 4303!.Linner_g5 4304 ldx [%l5+8], %o7 ! tp[j] 4305 addcc %o3, %g5, %g4 4306 .word 0x8bb28220 !addxc %o2,%g0,%g5 ! ahi=aj 4307 addcc %g4, %o7, %g4 ! ap[j]*bp[i]+tp[j] 4308 .word 0x8bb00225 !addxc %g0,%g5,%g5 4309 4310 addcc %o5, %o1, %o0 4311 .word 0x93b30220 !addxc %o4,%g0,%o1 ! nhi=nj 4312 addcc %o0, %g4, %o0 ! np[j]*m1+ap[j]*bp[i]+tp[j] 4313 .word 0x93b00229 !addxc %g0,%o1,%o1 4314 stx %o0, [%l5] ! tp[j-1] 4315 4316 subcc %g0, %l0, %g0 ! move upmost overflow to CCR.xcc 4317 .word 0x93b24265 !addxccc %o1,%g5,%o1 4318 .word 0xa1b00220 !addxc %g0,%g0,%l0 4319 stx %o1, [%l5+8] 4320 add %l5, 16, %l5 4321 4322 brnz,pt %l1, .Louter_g5 4323 sub %l1, 8, %l1 4324 4325 sub %i1, %i5, %i1 ! rewind 4326 sub %i3, %i5, %i3 4327 sub %l5, %i5, %l5 4328 ba .Lsub_g5 4329 subcc %i5, 8, %l4 ! cnt=num-1 and clear CCR.xcc 4330 4331.align 16 4332.Lsub_g5: 4333 ldx [%l5], %o7 4334 add %l5, 8, %l5 4335 ldx [%i3+0], %o4 4336 add %i3, 8, %i3 4337 subccc %o7, %o4, %l2 ! tp[j]-np[j] 4338 srlx %o7, 32, %o7 4339 srlx %o4, 32, %o4 4340 subccc %o7, %o4, %l3 4341 add %i0, 8, %i0 4342 st %l2, [%i0-4] ! reverse order 4343 st %l3, [%i0-8] 4344 brnz,pt %l4, .Lsub_g5 4345 sub %l4, 8, %l4 4346 4347 sub %i3, %i5, %i3 ! rewind 4348 sub %l5, %i5, %l5 4349 sub %i0, %i5, %i0 4350 4351 subccc %l0, %g0, %l0 ! handle upmost overflow bit 4352 ba .Lcopy_g5 4353 sub %i5, 8, %l4 4354 4355.align 16 4356.Lcopy_g5: ! conditional copy 4357 ldx [%l5], %o7 4358 ldx [%i0+0], %l2 4359 stx %g0, [%l5] ! zap 4360 add %l5, 8, %l5 4361 movcs %icc, %o7, %l2 4362 stx %l2, [%i0+0] 4363 add %i0, 8, %i0 4364 brnz %l4, .Lcopy_g5 4365 sub %l4, 8, %l4 4366 4367 mov 1, %o0 4368 ret 4369 restore 4370.type bn_mul_mont_gather5_t4, #function 4371.size bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4 4372.globl bn_flip_t4 4373.align 32 4374bn_flip_t4: 4375.Loop_flip: 4376 ld [%o1+0], %o4 4377 sub %o2, 1, %o2 4378 ld [%o1+4], %o5 4379 add %o1, 8, %o1 4380 st %o5, [%o0+0] 4381 st %o4, [%o0+4] 4382 brnz %o2, .Loop_flip 4383 add %o0, 8, %o0 4384 retl 4385 nop 4386.type bn_flip_t4, #function 4387.size bn_flip_t4, .-bn_flip_t4 4388 4389.globl bn_flip_n_scatter5_t4 4390.align 32 4391bn_flip_n_scatter5_t4: 4392 sll %o3, 3, %o3 4393 srl %o1, 1, %o1 4394 add %o3, %o2, %o2 ! &pwrtbl[pwr] 4395 sub %o1, 1, %o1 4396.Loop_flip_n_scatter5: 4397 ld [%o0+0], %o4 ! inp[i] 4398 ld [%o0+4], %o5 4399 add %o0, 8, %o0 4400 sllx %o5, 32, %o5 4401 or %o4, %o5, %o5 4402 stx %o5, [%o2] 4403 add %o2, 32*8, %o2 4404 brnz %o1, .Loop_flip_n_scatter5 4405 sub %o1, 1, %o1 4406 retl 4407 nop 4408.type bn_flip_n_scatter5_t4, #function 4409.size bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4 4410 4411.globl bn_gather5_t4 4412.align 32 4413bn_gather5_t4: 4414 srl %o3, 2, %o4 4415 and %o3, 3, %o5 4416 and %o4, 7, %o4 4417 sll %o5, 3, %o5 ! offset within first cache line 4418 add %o5, %o2, %o2 ! of the pwrtbl 4419 or %g0, 1, %o5 4420 sll %o5, %o4, %g1 4421 wr %g1, %g0, %ccr 4422 sub %o1, 1, %o1 4423.Loop_gather5: 4424 ldx [%o2+0*32], %g1 4425 ldx [%o2+1*32], %o4 4426 ldx [%o2+2*32], %o5 4427 movvs %icc, %o4, %g1 4428 ldx [%o2+3*32], %o4 4429 move %icc, %o5, %g1 4430 ldx [%o2+4*32], %o5 4431 movneg %icc, %o4, %g1 4432 ldx [%o2+5*32], %o4 4433 movcs %xcc, %o5, %g1 4434 ldx [%o2+6*32], %o5 4435 movvs %xcc, %o4, %g1 4436 ldx [%o2+7*32], %o4 4437 move %xcc, %o5, %g1 4438 add %o2,8*32, %o2 4439 movneg %xcc, %o4, %g1 4440 stx %g1, [%o0] 4441 add %o0, 8, %o0 4442 brnz %o1, .Loop_gather5 4443 sub %o1, 1, %o1 4444 4445 retl 4446 nop 4447.type bn_gather5_t4, #function 4448.size bn_gather5_t4, .-bn_gather5_t4 4449 4450.asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov" 4451.align 4 4452