1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 .ident "@(#)__vsin.S 1.9 06/01/23 SMI" 27 28 .file "__vsin.S" 29 30#include "libm.h" 31 32 RO_DATA 33 .align 64 34constants: 35 .word 0x3ec718e3,0xa6972785 36 .word 0x3ef9fd39,0x94293940 37 .word 0xbf2a019f,0x75ee4be1 38 .word 0xbf56c16b,0xba552569 39 .word 0x3f811111,0x1108c703 40 .word 0x3fa55555,0x554f5b35 41 .word 0xbfc55555,0x555554d0 42 .word 0xbfdfffff,0xffffff85 43 .word 0x3ff00000,0x00000000 44 .word 0xbfc55555,0x5551fc28 45 .word 0x3f811107,0x62eacc9d 46 .word 0xbfdfffff,0xffff6328 47 .word 0x3fa55551,0x5f7acf0c 48 .word 0x3fe45f30,0x6dc9c883 49 .word 0x43380000,0x00000000 50 .word 0x3ff921fb,0x54400000 51 .word 0x3dd0b461,0x1a600000 52 .word 0x3ba3198a,0x2e000000 53 .word 0x397b839a,0x252049c1 54 .word 0x80000000,0x00004000 55 .word 0xffff8000,0x00000000 ! N.B.: low-order words used 56 .word 0x3fc90000,0x80000000 ! for sign bit hacking; see 57 .word 0x3fc40000,0x00000000 ! references to "thresh" below 58 59#define p4 0x0 60#define q4 0x08 61#define p3 0x10 62#define q3 0x18 63#define p2 0x20 64#define q2 0x28 65#define p1 0x30 66#define q1 0x38 67#define one 0x40 68#define pp1 0x48 69#define pp2 0x50 70#define qq1 0x58 71#define qq2 0x60 72#define invpio2 0x68 73#define round 0x70 74#define pio2_1 0x78 75#define pio2_2 0x80 76#define pio2_3 0x88 77#define pio2_3t 0x90 78#define f30val 0x98 79#define mask 0xa0 80#define thresh 0xa8 81 82! local storage indices 83 84#define xsave STACK_BIAS-0x8 85#define ysave STACK_BIAS-0x10 86#define nsave STACK_BIAS-0x14 87#define sxsave STACK_BIAS-0x18 88#define sysave STACK_BIAS-0x1c 89#define biguns STACK_BIAS-0x20 90#define n2 STACK_BIAS-0x24 91#define n1 STACK_BIAS-0x28 92#define n0 STACK_BIAS-0x2c 93#define x2_1 STACK_BIAS-0x40 94#define x1_1 STACK_BIAS-0x50 95#define x0_1 STACK_BIAS-0x60 96#define y2_0 STACK_BIAS-0x70 97#define y1_0 STACK_BIAS-0x80 98#define y0_0 STACK_BIAS-0x90 99! sizeof temp storage - must be a multiple of 16 for V9 100#define tmps 0x90 101 102!-------------------------------------------------------------- 103! Some defines to keep code more readable 104#define LIM_l6 %l6 105! in primary range, contains |x| upper limit when cos(x)=1. 106! in transferring to medium range, denotes what loop was active. 107!-------------------------------------------------------------- 108 109 ENTRY(__vsin) 110 save %sp,-SA(MINFRAME)-tmps,%sp 111 PIC_SETUP(g5) 112 PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) 113 PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) 114 PIC_SET(g5,constants,l5) 115 mov %l5,%g1 116 wr %g0,0x82,%asi ! set %asi for non-faulting loads 117 118! ========== primary range ========== 119 120! register use 121 122! i0 n 123! i1 x 124! i2 stridex 125! i3 y 126! i4 stridey 127! i5 0x80000000 128 129! l0 hx0 130! l1 hx1 131! l2 hx2 132! l3 __vlibm_TBL_sincos_hi 133! l4 __vlibm_TBL_sincos_lo 134! l5 0x3fc90000 135! l6 0x3e400000 136! l7 0x3fe921fb 137 138! the following are 64-bit registers in both V8+ and V9 139 140! g1 scratch 141! g5 142 143! o0 py0 144! o1 py1 145! o2 py2 146! o3 oy0 147! o4 oy1 148! o5 oy2 149! o7 scratch 150 151! f0 x0 152! f2 153! f4 154! f6 155! f8 scratch for table base 156! f9 signbit0 157! f10 x1 158! f12 159! f14 160! f16 161! f18 scratch for table base 162! f19 signbit1 163! f20 x2 164! f22 165! f24 166! f26 167! f28 scratch for table base 168! f29 signbit2 169! f30 0x80000000 170! f31 0x4000 171! f32 172! f34 173! f36 174! f38 175! f40 176! f42 177! f44 0xffff800000000000 178! f46 p1 179! f48 p2 180! f50 p3 181! f52 p4 182! f54 one 183! f56 pp1 184! f58 pp2 185! f60 qq1 186! f62 qq2 187 188#ifdef __sparcv9 189 stx %i1,[%fp+xsave] ! save arguments 190 stx %i3,[%fp+ysave] 191#else 192 st %i1,[%fp+xsave] ! save arguments 193 st %i3,[%fp+ysave] 194#endif 195 st %i0,[%fp+nsave] 196 st %i2,[%fp+sxsave] 197 st %i4,[%fp+sysave] 198 sethi %hi(0x80000000),%i5 ! load/set up constants 199 sethi %hi(0x3fc90000),%l5 200 sethi %hi(0x3e400000),LIM_l6 201 sethi %hi(0x3fe921fb),%l7 202 or %l7,%lo(0x3fe921fb),%l7 203 ldd [%g1+f30val],%f30 204 ldd [%g1+mask],%f44 205 ldd [%g1+p1],%f46 206 ldd [%g1+p2],%f48 207 ldd [%g1+p3],%f50 208 ldd [%g1+p4],%f52 209 ldd [%g1+one],%f54 210 ldd [%g1+pp1],%f56 211 ldd [%g1+pp2],%f58 212 ldd [%g1+qq1],%f60 213 ldd [%g1+qq2],%f62 214 sll %i2,3,%i2 ! scale strides 215 sll %i4,3,%i4 216 add %fp,x0_1,%o3 ! precondition loop 217 add %fp,x0_1,%o4 218 add %fp,x0_1,%o5 219 ld [%i1],%l0 ! hx = *x 220 ld [%i1],%f0 221 ld [%i1+4],%f1 222 andn %l0,%i5,%l0 ! hx &= ~0x80000000 223 add %i1,%i2,%i1 ! x += stridex 224 225 ba,pt %icc,.loop0 226! delay slot 227 nop 228 229 .align 32 230.loop0: 231 lda [%i1]%asi,%l1 ! preload next argument 232 sub %l0,LIM_l6,%g1 233 sub %l7,%l0,%o7 234 fands %f0,%f30,%f9 ! save signbit 235 236 lda [%i1]%asi,%f10 237 orcc %o7,%g1,%g0 238 mov %i3,%o0 ! py0 = y 239 bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb 240 241! delay slot 242 lda [%i1+4]%asi,%f11 243 addcc %i0,-1,%i0 244 add %i3,%i4,%i3 ! y += stridey 245 ble,pn %icc,.endloop1 246 247! delay slot 248 andn %l1,%i5,%l1 249 add %i1,%i2,%i1 ! x += stridex 250 fabsd %f0,%f0 251 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only 252 253.loop1: 254 lda [%i1]%asi,%l2 ! preload next argument 255 sub %l1,LIM_l6,%g1 256 sub %l7,%l1,%o7 257 fands %f10,%f30,%f19 ! save signbit 258 259 lda [%i1]%asi,%f20 260 orcc %o7,%g1,%g0 261 mov %i3,%o1 ! py1 = y 262 bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb 263 264! delay slot 265 lda [%i1+4]%asi,%f21 266 addcc %i0,-1,%i0 267 add %i3,%i4,%i3 ! y += stridey 268 ble,pn %icc,.endloop2 269 270! delay slot 271 andn %l2,%i5,%l2 272 add %i1,%i2,%i1 ! x += stridex 273 fabsd %f10,%f10 274 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only 275 276.loop2: 277 st %f6,[%o3] 278 sub %l2,LIM_l6,%g1 279 sub %l7,%l2,%o7 280 fands %f20,%f30,%f29 ! save signbit 281 282 st %f7,[%o3+4] 283 orcc %g1,%o7,%g0 284 mov %i3,%o2 ! py2 = y 285 bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb 286 287! delay slot 288 add %i3,%i4,%i3 ! y += stridey 289 cmp %l0,%l5 290 fabsd %f20,%f20 291 bl,pn %icc,.case4 292 293! delay slot 294 st %f16,[%o4] 295 cmp %l1,%l5 296 fpadd32s %f0,%f31,%f8 297 bl,pn %icc,.case2 298 299! delay slot 300 st %f17,[%o4+4] 301 cmp %l2,%l5 302 fpadd32s %f10,%f31,%f18 303 bl,pn %icc,.case1 304 305! delay slot 306 st %f26,[%o5] 307 mov %o0,%o3 308 sethi %hi(0x3fc3c000),%o7 309 fpadd32s %f20,%f31,%f28 310 311 st %f27,[%o5+4] 312 fand %f8,%f44,%f2 313 mov %o1,%o4 314 315 fand %f18,%f44,%f12 316 mov %o2,%o5 317 sub %l0,%o7,%l0 318 319 fand %f28,%f44,%f22 320 sub %l1,%o7,%l1 321 sub %l2,%o7,%l2 322 323 fsubd %f0,%f2,%f0 324 srl %l0,10,%l0 325 add %l3,8,%g1 326 327 fsubd %f10,%f12,%f10 328 srl %l1,10,%l1 329 330 fsubd %f20,%f22,%f20 331 srl %l2,10,%l2 332 333 fmuld %f0,%f0,%f2 334 andn %l0,0x1f,%l0 335 336 fmuld %f10,%f10,%f12 337 andn %l1,0x1f,%l1 338 339 fmuld %f20,%f20,%f22 340 andn %l2,0x1f,%l2 341 342 fmuld %f2,%f58,%f6 343 ldd [%l3+%l0],%f32 344 345 fmuld %f12,%f58,%f16 346 ldd [%l3+%l1],%f36 347 348 fmuld %f22,%f58,%f26 349 ldd [%l3+%l2],%f40 350 351 faddd %f6,%f56,%f6 352 fmuld %f2,%f62,%f4 353 ldd [%g1+%l0],%f34 354 355 faddd %f16,%f56,%f16 356 fmuld %f12,%f62,%f14 357 ldd [%g1+%l1],%f38 358 359 faddd %f26,%f56,%f26 360 fmuld %f22,%f62,%f24 361 ldd [%g1+%l2],%f42 362 363 fmuld %f2,%f6,%f6 364 faddd %f4,%f60,%f4 365 366 fmuld %f12,%f16,%f16 367 faddd %f14,%f60,%f14 368 369 fmuld %f22,%f26,%f26 370 faddd %f24,%f60,%f24 371 372 faddd %f6,%f54,%f6 373 fmuld %f2,%f4,%f4 374 375 faddd %f16,%f54,%f16 376 fmuld %f12,%f14,%f14 377 378 faddd %f26,%f54,%f26 379 fmuld %f22,%f24,%f24 380 381 fmuld %f0,%f6,%f6 382 ldd [%l4+%l0],%f2 383 384 fmuld %f10,%f16,%f16 385 ldd [%l4+%l1],%f12 386 387 fmuld %f20,%f26,%f26 388 ldd [%l4+%l2],%f22 389 390 fmuld %f4,%f32,%f4 391 lda [%i1]%asi,%l0 ! preload next argument 392 393 fmuld %f14,%f36,%f14 394 lda [%i1]%asi,%f0 395 396 fmuld %f24,%f40,%f24 397 lda [%i1+4]%asi,%f1 398 399 fmuld %f6,%f34,%f6 400 add %i1,%i2,%i1 ! x += stridex 401 402 fmuld %f16,%f38,%f16 403 404 fmuld %f26,%f42,%f26 405 406 faddd %f6,%f4,%f6 407 408 faddd %f16,%f14,%f16 409 410 faddd %f26,%f24,%f26 411 412 faddd %f6,%f2,%f6 413 414 faddd %f16,%f12,%f16 415 416 faddd %f26,%f22,%f26 417 418 faddd %f6,%f32,%f6 419 420 faddd %f16,%f36,%f16 421 422 faddd %f26,%f40,%f26 423 andn %l0,%i5,%l0 ! hx &= ~0x80000000 424 425 fors %f6,%f9,%f6 426 addcc %i0,-1,%i0 427 428 fors %f16,%f19,%f16 429 bg,pt %icc,.loop0 430 431! delay slot 432 fors %f26,%f29,%f26 433 434 ba,pt %icc,.endloop0 435! delay slot 436 nop 437 438 .align 32 439.case1: 440 st %f27,[%o5+4] 441 sethi %hi(0x3fc3c000),%o7 442 add %l3,8,%g1 443 fand %f8,%f44,%f2 444 445 sub %l0,%o7,%l0 446 sub %l1,%o7,%l1 447 fand %f18,%f44,%f12 448 fmuld %f20,%f20,%f22 449 450 fsubd %f0,%f2,%f0 451 srl %l0,10,%l0 452 mov %o0,%o3 453 454 fsubd %f10,%f12,%f10 455 srl %l1,10,%l1 456 mov %o1,%o4 457 458 fmuld %f22,%f52,%f24 459 mov %o2,%o5 460 461 fmuld %f0,%f0,%f2 462 andn %l0,0x1f,%l0 463 464 fmuld %f10,%f10,%f12 465 andn %l1,0x1f,%l1 466 467 faddd %f24,%f50,%f24 468 469 fmuld %f2,%f58,%f6 470 ldd [%l3+%l0],%f32 471 472 fmuld %f12,%f58,%f16 473 ldd [%l3+%l1],%f36 474 475 fmuld %f22,%f24,%f24 476 477 faddd %f6,%f56,%f6 478 fmuld %f2,%f62,%f4 479 ldd [%g1+%l0],%f34 480 481 faddd %f16,%f56,%f16 482 fmuld %f12,%f62,%f14 483 ldd [%g1+%l1],%f38 484 485 faddd %f24,%f48,%f24 486 487 fmuld %f2,%f6,%f6 488 faddd %f4,%f60,%f4 489 490 fmuld %f12,%f16,%f16 491 faddd %f14,%f60,%f14 492 493 fmuld %f22,%f24,%f24 494 495 faddd %f6,%f54,%f6 496 fmuld %f2,%f4,%f4 497 498 faddd %f16,%f54,%f16 499 fmuld %f12,%f14,%f14 500 501 faddd %f24,%f46,%f24 502 503 fmuld %f0,%f6,%f6 504 ldd [%l4+%l0],%f2 505 506 fmuld %f10,%f16,%f16 507 ldd [%l4+%l1],%f12 508 509 fmuld %f4,%f32,%f4 510 lda [%i1]%asi,%l0 ! preload next argument 511 512 fmuld %f14,%f36,%f14 513 lda [%i1]%asi,%f0 514 515 fmuld %f6,%f34,%f6 516 lda [%i1+4]%asi,%f1 517 518 fmuld %f16,%f38,%f16 519 add %i1,%i2,%i1 ! x += stridex 520 521 fmuld %f22,%f24,%f24 522 523 faddd %f6,%f4,%f6 524 525 faddd %f16,%f14,%f16 526 527 fmuld %f20,%f24,%f24 528 529 faddd %f6,%f2,%f6 530 531 faddd %f16,%f12,%f16 532 533 faddd %f20,%f24,%f26 534 535 faddd %f6,%f32,%f6 536 537 faddd %f16,%f36,%f16 538 andn %l0,%i5,%l0 ! hx &= ~0x80000000 539 540 fors %f26,%f29,%f26 541 addcc %i0,-1,%i0 542 543 fors %f6,%f9,%f6 544 bg,pt %icc,.loop0 545 546! delay slot 547 fors %f16,%f19,%f16 548 549 ba,pt %icc,.endloop0 550! delay slot 551 nop 552 553 .align 32 554.case2: 555 st %f26,[%o5] 556 cmp %l2,%l5 557 fpadd32s %f20,%f31,%f28 558 bl,pn %icc,.case3 559 560! delay slot 561 st %f27,[%o5+4] 562 sethi %hi(0x3fc3c000),%o7 563 add %l3,8,%g1 564 fand %f8,%f44,%f2 565 566 sub %l0,%o7,%l0 567 sub %l2,%o7,%l2 568 fand %f28,%f44,%f22 569 fmuld %f10,%f10,%f12 570 571 fsubd %f0,%f2,%f0 572 srl %l0,10,%l0 573 mov %o0,%o3 574 575 fsubd %f20,%f22,%f20 576 srl %l2,10,%l2 577 mov %o2,%o5 578 579 fmuld %f12,%f52,%f14 580 mov %o1,%o4 581 582 fmuld %f0,%f0,%f2 583 andn %l0,0x1f,%l0 584 585 fmuld %f20,%f20,%f22 586 andn %l2,0x1f,%l2 587 588 faddd %f14,%f50,%f14 589 590 fmuld %f2,%f58,%f6 591 ldd [%l3+%l0],%f32 592 593 fmuld %f22,%f58,%f26 594 ldd [%l3+%l2],%f40 595 596 fmuld %f12,%f14,%f14 597 598 faddd %f6,%f56,%f6 599 fmuld %f2,%f62,%f4 600 ldd [%g1+%l0],%f34 601 602 faddd %f26,%f56,%f26 603 fmuld %f22,%f62,%f24 604 ldd [%g1+%l2],%f42 605 606 faddd %f14,%f48,%f14 607 608 fmuld %f2,%f6,%f6 609 faddd %f4,%f60,%f4 610 611 fmuld %f22,%f26,%f26 612 faddd %f24,%f60,%f24 613 614 fmuld %f12,%f14,%f14 615 616 faddd %f6,%f54,%f6 617 fmuld %f2,%f4,%f4 618 619 faddd %f26,%f54,%f26 620 fmuld %f22,%f24,%f24 621 622 faddd %f14,%f46,%f14 623 624 fmuld %f0,%f6,%f6 625 ldd [%l4+%l0],%f2 626 627 fmuld %f20,%f26,%f26 628 ldd [%l4+%l2],%f22 629 630 fmuld %f4,%f32,%f4 631 lda [%i1]%asi,%l0 ! preload next argument 632 633 fmuld %f24,%f40,%f24 634 lda [%i1]%asi,%f0 635 636 fmuld %f6,%f34,%f6 637 lda [%i1+4]%asi,%f1 638 639 fmuld %f26,%f42,%f26 640 add %i1,%i2,%i1 ! x += stridex 641 642 fmuld %f12,%f14,%f14 643 644 faddd %f6,%f4,%f6 645 646 faddd %f26,%f24,%f26 647 648 fmuld %f10,%f14,%f14 649 650 faddd %f6,%f2,%f6 651 652 faddd %f26,%f22,%f26 653 654 faddd %f10,%f14,%f16 655 656 faddd %f6,%f32,%f6 657 658 faddd %f26,%f40,%f26 659 andn %l0,%i5,%l0 ! hx &= ~0x80000000 660 661 fors %f16,%f19,%f16 662 addcc %i0,-1,%i0 663 664 fors %f6,%f9,%f6 665 bg,pt %icc,.loop0 666 667! delay slot 668 fors %f26,%f29,%f26 669 670 ba,pt %icc,.endloop0 671! delay slot 672 nop 673 674 .align 32 675.case3: 676 sethi %hi(0x3fc3c000),%o7 677 add %l3,8,%g1 678 fand %f8,%f44,%f2 679 fmuld %f10,%f10,%f12 680 681 sub %l0,%o7,%l0 682 fmuld %f20,%f20,%f22 683 684 fsubd %f0,%f2,%f0 685 srl %l0,10,%l0 686 mov %o0,%o3 687 688 fmuld %f12,%f52,%f14 689 mov %o1,%o4 690 691 fmuld %f22,%f52,%f24 692 mov %o2,%o5 693 694 fmuld %f0,%f0,%f2 695 andn %l0,0x1f,%l0 696 697 faddd %f14,%f50,%f14 698 699 faddd %f24,%f50,%f24 700 701 fmuld %f2,%f58,%f6 702 ldd [%l3+%l0],%f32 703 704 fmuld %f12,%f14,%f14 705 706 fmuld %f22,%f24,%f24 707 708 faddd %f6,%f56,%f6 709 fmuld %f2,%f62,%f4 710 ldd [%g1+%l0],%f34 711 712 faddd %f14,%f48,%f14 713 714 faddd %f24,%f48,%f24 715 716 fmuld %f2,%f6,%f6 717 faddd %f4,%f60,%f4 718 719 fmuld %f12,%f14,%f14 720 721 fmuld %f22,%f24,%f24 722 723 faddd %f6,%f54,%f6 724 fmuld %f2,%f4,%f4 725 726 faddd %f14,%f46,%f14 727 728 faddd %f24,%f46,%f24 729 730 fmuld %f0,%f6,%f6 731 ldd [%l4+%l0],%f2 732 733 fmuld %f4,%f32,%f4 734 lda [%i1]%asi,%l0 ! preload next argument 735 736 fmuld %f12,%f14,%f14 737 lda [%i1]%asi,%f0 738 739 fmuld %f6,%f34,%f6 740 lda [%i1+4]%asi,%f1 741 742 fmuld %f22,%f24,%f24 743 add %i1,%i2,%i1 ! x += stridex 744 745 fmuld %f10,%f14,%f14 746 747 faddd %f6,%f4,%f6 748 749 fmuld %f20,%f24,%f24 750 751 faddd %f10,%f14,%f16 752 753 faddd %f6,%f2,%f6 754 755 faddd %f20,%f24,%f26 756 757 fors %f16,%f19,%f16 758 andn %l0,%i5,%l0 ! hx &= ~0x80000000 759 760 faddd %f6,%f32,%f6 761 addcc %i0,-1,%i0 762 763 fors %f26,%f29,%f26 764 bg,pt %icc,.loop0 765 766! delay slot 767 fors %f6,%f9,%f6 768 769 ba,pt %icc,.endloop0 770! delay slot 771 nop 772 773 .align 32 774.case4: 775 st %f17,[%o4+4] 776 cmp %l1,%l5 777 fpadd32s %f10,%f31,%f18 778 bl,pn %icc,.case6 779 780! delay slot 781 st %f26,[%o5] 782 cmp %l2,%l5 783 fpadd32s %f20,%f31,%f28 784 bl,pn %icc,.case5 785 786! delay slot 787 st %f27,[%o5+4] 788 sethi %hi(0x3fc3c000),%o7 789 add %l3,8,%g1 790 fand %f18,%f44,%f12 791 792 sub %l1,%o7,%l1 793 sub %l2,%o7,%l2 794 fand %f28,%f44,%f22 795 fmuld %f0,%f0,%f2 796 797 fsubd %f10,%f12,%f10 798 srl %l1,10,%l1 799 mov %o1,%o4 800 801 fsubd %f20,%f22,%f20 802 srl %l2,10,%l2 803 mov %o2,%o5 804 805 fmovd %f0,%f6 806 fmuld %f2,%f52,%f4 807 mov %o0,%o3 808 809 fmuld %f10,%f10,%f12 810 andn %l1,0x1f,%l1 811 812 fmuld %f20,%f20,%f22 813 andn %l2,0x1f,%l2 814 815 faddd %f4,%f50,%f4 816 817 fmuld %f12,%f58,%f16 818 ldd [%l3+%l1],%f36 819 820 fmuld %f22,%f58,%f26 821 ldd [%l3+%l2],%f40 822 823 fmuld %f2,%f4,%f4 824 825 faddd %f16,%f56,%f16 826 fmuld %f12,%f62,%f14 827 ldd [%g1+%l1],%f38 828 829 faddd %f26,%f56,%f26 830 fmuld %f22,%f62,%f24 831 ldd [%g1+%l2],%f42 832 833 faddd %f4,%f48,%f4 834 835 fmuld %f12,%f16,%f16 836 faddd %f14,%f60,%f14 837 838 fmuld %f22,%f26,%f26 839 faddd %f24,%f60,%f24 840 841 fmuld %f2,%f4,%f4 842 843 faddd %f16,%f54,%f16 844 fmuld %f12,%f14,%f14 845 846 faddd %f26,%f54,%f26 847 fmuld %f22,%f24,%f24 848 849 faddd %f4,%f46,%f4 850 851 fmuld %f10,%f16,%f16 852 ldd [%l4+%l1],%f12 853 854 fmuld %f20,%f26,%f26 855 ldd [%l4+%l2],%f22 856 857 fmuld %f14,%f36,%f14 858 lda [%i1]%asi,%l0 ! preload next argument 859 860 fmuld %f24,%f40,%f24 861 lda [%i1]%asi,%f0 862 863 fmuld %f16,%f38,%f16 864 lda [%i1+4]%asi,%f1 865 866 fmuld %f26,%f42,%f26 867 add %i1,%i2,%i1 ! x += stridex 868 869 fmuld %f2,%f4,%f4 870 871 faddd %f16,%f14,%f16 872 873 faddd %f26,%f24,%f26 874 875 fmuld %f6,%f4,%f4 876 877 faddd %f16,%f12,%f16 878 879 faddd %f26,%f22,%f26 880 881 faddd %f6,%f4,%f6 882 883 faddd %f16,%f36,%f16 884 885 faddd %f26,%f40,%f26 886 andn %l0,%i5,%l0 ! hx &= ~0x80000000 887 888 fors %f6,%f9,%f6 889 addcc %i0,-1,%i0 890 891 fors %f16,%f19,%f16 892 bg,pt %icc,.loop0 893 894! delay slot 895 fors %f26,%f29,%f26 896 897 ba,pt %icc,.endloop0 898! delay slot 899 nop 900 901 .align 32 902.case5: 903 sethi %hi(0x3fc3c000),%o7 904 add %l3,8,%g1 905 fand %f18,%f44,%f12 906 fmuld %f0,%f0,%f2 907 908 sub %l1,%o7,%l1 909 fmuld %f20,%f20,%f22 910 911 fsubd %f10,%f12,%f10 912 srl %l1,10,%l1 913 mov %o1,%o4 914 915 fmovd %f0,%f6 916 fmuld %f2,%f52,%f4 917 mov %o0,%o3 918 919 fmuld %f22,%f52,%f24 920 mov %o2,%o5 921 922 fmuld %f10,%f10,%f12 923 andn %l1,0x1f,%l1 924 925 faddd %f4,%f50,%f4 926 927 faddd %f24,%f50,%f24 928 929 fmuld %f12,%f58,%f16 930 ldd [%l3+%l1],%f36 931 932 fmuld %f2,%f4,%f4 933 934 fmuld %f22,%f24,%f24 935 936 faddd %f16,%f56,%f16 937 fmuld %f12,%f62,%f14 938 ldd [%g1+%l1],%f38 939 940 faddd %f4,%f48,%f4 941 942 faddd %f24,%f48,%f24 943 944 fmuld %f12,%f16,%f16 945 faddd %f14,%f60,%f14 946 947 fmuld %f2,%f4,%f4 948 949 fmuld %f22,%f24,%f24 950 951 faddd %f16,%f54,%f16 952 fmuld %f12,%f14,%f14 953 954 faddd %f4,%f46,%f4 955 956 faddd %f24,%f46,%f24 957 958 fmuld %f10,%f16,%f16 959 ldd [%l4+%l1],%f12 960 961 fmuld %f14,%f36,%f14 962 lda [%i1]%asi,%l0 ! preload next argument 963 964 fmuld %f2,%f4,%f4 965 lda [%i1]%asi,%f0 966 967 fmuld %f16,%f38,%f16 968 lda [%i1+4]%asi,%f1 969 970 fmuld %f22,%f24,%f24 971 add %i1,%i2,%i1 ! x += stridex 972 973 fmuld %f6,%f4,%f4 974 975 faddd %f16,%f14,%f16 976 977 fmuld %f20,%f24,%f24 978 979 faddd %f6,%f4,%f6 980 981 faddd %f16,%f12,%f16 982 983 faddd %f20,%f24,%f26 984 985 fors %f6,%f9,%f6 986 andn %l0,%i5,%l0 ! hx &= ~0x80000000 987 988 faddd %f16,%f36,%f16 989 addcc %i0,-1,%i0 990 991 fors %f26,%f29,%f26 992 bg,pt %icc,.loop0 993 994! delay slot 995 fors %f16,%f19,%f16 996 997 ba,pt %icc,.endloop0 998! delay slot 999 nop 1000 1001 .align 32 1002.case6: 1003 st %f27,[%o5+4] 1004 cmp %l2,%l5 1005 fpadd32s %f20,%f31,%f28 1006 bl,pn %icc,.case7 1007 1008! delay slot 1009 sethi %hi(0x3fc3c000),%o7 1010 add %l3,8,%g1 1011 fand %f28,%f44,%f22 1012 fmuld %f0,%f0,%f2 1013 1014 sub %l2,%o7,%l2 1015 fmuld %f10,%f10,%f12 1016 1017 fsubd %f20,%f22,%f20 1018 srl %l2,10,%l2 1019 mov %o2,%o5 1020 1021 fmovd %f0,%f6 1022 fmuld %f2,%f52,%f4 1023 mov %o0,%o3 1024 1025 fmuld %f12,%f52,%f14 1026 mov %o1,%o4 1027 1028 fmuld %f20,%f20,%f22 1029 andn %l2,0x1f,%l2 1030 1031 faddd %f4,%f50,%f4 1032 1033 faddd %f14,%f50,%f14 1034 1035 fmuld %f22,%f58,%f26 1036 ldd [%l3+%l2],%f40 1037 1038 fmuld %f2,%f4,%f4 1039 1040 fmuld %f12,%f14,%f14 1041 1042 faddd %f26,%f56,%f26 1043 fmuld %f22,%f62,%f24 1044 ldd [%g1+%l2],%f42 1045 1046 faddd %f4,%f48,%f4 1047 1048 faddd %f14,%f48,%f14 1049 1050 fmuld %f22,%f26,%f26 1051 faddd %f24,%f60,%f24 1052 1053 fmuld %f2,%f4,%f4 1054 1055 fmuld %f12,%f14,%f14 1056 1057 faddd %f26,%f54,%f26 1058 fmuld %f22,%f24,%f24 1059 1060 faddd %f4,%f46,%f4 1061 1062 faddd %f14,%f46,%f14 1063 1064 fmuld %f20,%f26,%f26 1065 ldd [%l4+%l2],%f22 1066 1067 fmuld %f24,%f40,%f24 1068 lda [%i1]%asi,%l0 ! preload next argument 1069 1070 fmuld %f2,%f4,%f4 1071 lda [%i1]%asi,%f0 1072 1073 fmuld %f26,%f42,%f26 1074 lda [%i1+4]%asi,%f1 1075 1076 fmuld %f12,%f14,%f14 1077 add %i1,%i2,%i1 ! x += stridex 1078 1079 fmuld %f6,%f4,%f4 1080 1081 faddd %f26,%f24,%f26 1082 1083 fmuld %f10,%f14,%f14 1084 1085 faddd %f6,%f4,%f6 1086 1087 faddd %f26,%f22,%f26 1088 1089 faddd %f10,%f14,%f16 1090 1091 fors %f6,%f9,%f6 1092 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1093 1094 faddd %f26,%f40,%f26 1095 addcc %i0,-1,%i0 1096 1097 fors %f16,%f19,%f16 1098 bg,pt %icc,.loop0 1099 1100! delay slot 1101 fors %f26,%f29,%f26 1102 1103 ba,pt %icc,.endloop0 1104! delay slot 1105 nop 1106 1107 .align 32 1108.case7: 1109 fmuld %f0,%f0,%f2 1110 fmovd %f0,%f6 1111 mov %o0,%o3 1112 1113 fmuld %f10,%f10,%f12 1114 mov %o1,%o4 1115 1116 fmuld %f20,%f20,%f22 1117 mov %o2,%o5 1118 1119 fmuld %f2,%f52,%f4 1120 lda [%i1]%asi,%l0 ! preload next argument 1121 1122 fmuld %f12,%f52,%f14 1123 lda [%i1]%asi,%f0 1124 1125 fmuld %f22,%f52,%f24 1126 lda [%i1+4]%asi,%f1 1127 1128 faddd %f4,%f50,%f4 1129 add %i1,%i2,%i1 ! x += stridex 1130 1131 faddd %f14,%f50,%f14 1132 1133 faddd %f24,%f50,%f24 1134 1135 fmuld %f2,%f4,%f4 1136 1137 fmuld %f12,%f14,%f14 1138 1139 fmuld %f22,%f24,%f24 1140 1141 faddd %f4,%f48,%f4 1142 1143 faddd %f14,%f48,%f14 1144 1145 faddd %f24,%f48,%f24 1146 1147 fmuld %f2,%f4,%f4 1148 1149 fmuld %f12,%f14,%f14 1150 1151 fmuld %f22,%f24,%f24 1152 1153 faddd %f4,%f46,%f4 1154 1155 faddd %f14,%f46,%f14 1156 1157 faddd %f24,%f46,%f24 1158 1159 fmuld %f2,%f4,%f4 1160 1161 fmuld %f12,%f14,%f14 1162 1163 fmuld %f22,%f24,%f24 1164 1165 fmuld %f6,%f4,%f4 1166 1167 fmuld %f10,%f14,%f14 1168 1169 fmuld %f20,%f24,%f24 1170 1171 faddd %f6,%f4,%f6 1172 1173 faddd %f10,%f14,%f16 1174 1175 faddd %f20,%f24,%f26 1176 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1177 1178 fors %f6,%f9,%f6 1179 addcc %i0,-1,%i0 1180 1181 fors %f16,%f19,%f16 1182 bg,pt %icc,.loop0 1183 1184! delay slot 1185 fors %f26,%f29,%f26 1186 1187 ba,pt %icc,.endloop0 1188! delay slot 1189 nop 1190 1191 1192 .align 32 1193.endloop2: 1194 cmp %l1,%l5 1195 bl,pn %icc,1f 1196! delay slot 1197 fabsd %f10,%f10 1198 sethi %hi(0x3fc3c000),%o7 1199 fpadd32s %f10,%f31,%f18 1200 add %l3,8,%g1 1201 fand %f18,%f44,%f12 1202 sub %l1,%o7,%l1 1203 fsubd %f10,%f12,%f10 1204 srl %l1,10,%l1 1205 fmuld %f10,%f10,%f12 1206 andn %l1,0x1f,%l1 1207 fmuld %f12,%f58,%f20 1208 ldd [%l3+%l1],%f36 1209 faddd %f20,%f56,%f20 1210 fmuld %f12,%f62,%f14 1211 ldd [%g1+%l1],%f38 1212 fmuld %f12,%f20,%f20 1213 faddd %f14,%f60,%f14 1214 faddd %f20,%f54,%f20 1215 fmuld %f12,%f14,%f14 1216 fmuld %f10,%f20,%f20 1217 ldd [%l4+%l1],%f12 1218 fmuld %f14,%f36,%f14 1219 fmuld %f20,%f38,%f20 1220 faddd %f20,%f14,%f20 1221 faddd %f20,%f12,%f20 1222 ba,pt %icc,2f 1223! delay slot 1224 faddd %f20,%f36,%f20 12251: 1226 fmuld %f10,%f10,%f12 1227 fmuld %f12,%f52,%f14 1228 faddd %f14,%f50,%f14 1229 fmuld %f12,%f14,%f14 1230 faddd %f14,%f48,%f14 1231 fmuld %f12,%f14,%f14 1232 faddd %f14,%f46,%f14 1233 fmuld %f12,%f14,%f14 1234 fmuld %f10,%f14,%f14 1235 faddd %f10,%f14,%f20 12362: 1237 fors %f20,%f19,%f20 1238 st %f20,[%o1] 1239 st %f21,[%o1+4] 1240 1241.endloop1: 1242 cmp %l0,%l5 1243 bl,pn %icc,1f 1244! delay slot 1245 fabsd %f0,%f0 1246 sethi %hi(0x3fc3c000),%o7 1247 fpadd32s %f0,%f31,%f8 1248 add %l3,8,%g1 1249 fand %f8,%f44,%f2 1250 sub %l0,%o7,%l0 1251 fsubd %f0,%f2,%f0 1252 srl %l0,10,%l0 1253 fmuld %f0,%f0,%f2 1254 andn %l0,0x1f,%l0 1255 fmuld %f2,%f58,%f20 1256 ldd [%l3+%l0],%f32 1257 faddd %f20,%f56,%f20 1258 fmuld %f2,%f62,%f4 1259 ldd [%g1+%l0],%f34 1260 fmuld %f2,%f20,%f20 1261 faddd %f4,%f60,%f4 1262 faddd %f20,%f54,%f20 1263 fmuld %f2,%f4,%f4 1264 fmuld %f0,%f20,%f20 1265 ldd [%l4+%l0],%f2 1266 fmuld %f4,%f32,%f4 1267 fmuld %f20,%f34,%f20 1268 faddd %f20,%f4,%f20 1269 faddd %f20,%f2,%f20 1270 ba,pt %icc,2f 1271! delay slot 1272 faddd %f20,%f32,%f20 12731: 1274 fmuld %f0,%f0,%f2 1275 fmuld %f2,%f52,%f4 1276 faddd %f4,%f50,%f4 1277 fmuld %f2,%f4,%f4 1278 faddd %f4,%f48,%f4 1279 fmuld %f2,%f4,%f4 1280 faddd %f4,%f46,%f4 1281 fmuld %f2,%f4,%f4 1282 fmuld %f0,%f4,%f4 1283 faddd %f0,%f4,%f20 12842: 1285 fors %f20,%f9,%f20 1286 st %f20,[%o0] 1287 st %f21,[%o0+4] 1288 1289.endloop0: 1290 st %f6,[%o3] 1291 st %f7,[%o3+4] 1292 st %f16,[%o4] 1293 st %f17,[%o4+4] 1294 st %f26,[%o5] 1295 st %f27,[%o5+4] 1296 1297! return. finished off with only primary range arguments. 1298 1299 ret 1300 restore 1301 1302 1303 .align 32 1304.range0: 1305 cmp %l0,LIM_l6 1306 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1307! delay slot, annulled if branch not taken 1308 mov 0x1,LIM_l6 ! set "processing loop0" 1309 st %f0,[%o0] ! *y = *x with inexact if x nonzero 1310 st %f1,[%o0+4] 1311 fdtoi %f0,%f2 1312 addcc %i0,-1,%i0 1313 ble,pn %icc,.endloop0 1314! delay slot, harmless if branch taken 1315 add %i3,%i4,%i3 ! y += stridey 1316 andn %l1,%i5,%l0 ! hx &= ~0x80000000 1317 fmovd %f10,%f0 1318 ba,pt %icc,.loop0 1319! delay slot 1320 add %i1,%i2,%i1 ! x += stridex 1321 1322 1323 .align 32 1324.range1: 1325 cmp %l1,LIM_l6 1326 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1327! delay slot, annulled if branch not taken 1328 mov 0x2,LIM_l6 ! set "processing loop1" 1329 st %f10,[%o1] ! *y = *x with inexact if x nonzero 1330 st %f11,[%o1+4] 1331 fdtoi %f10,%f12 1332 addcc %i0,-1,%i0 1333 ble,pn %icc,.endloop1 1334! delay slot, harmless if branch taken 1335 add %i3,%i4,%i3 ! y += stridey 1336 andn %l2,%i5,%l1 ! hx &= ~0x80000000 1337 fmovd %f20,%f10 1338 ba,pt %icc,.loop1 1339! delay slot 1340 add %i1,%i2,%i1 ! x += stridex 1341 1342 1343 .align 32 1344.range2: 1345 cmp %l2,LIM_l6 1346 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1347! delay slot, annulled if branch not taken 1348 mov 0x3,LIM_l6 ! set "processing loop2" 1349 st %f20,[%o2] ! *y = *x with inexact if x nonzero 1350 st %f21,[%o2+4] 1351 fdtoi %f20,%f22 13521: 1353 addcc %i0,-1,%i0 1354 ble,pn %icc,.endloop2 1355! delay slot 1356 nop 1357 ld [%i1],%l2 1358 ld [%i1],%f20 1359 ld [%i1+4],%f21 1360 andn %l2,%i5,%l2 ! hx &= ~0x80000000 1361 ba,pt %icc,.loop2 1362! delay slot 1363 add %i1,%i2,%i1 ! x += stridex 1364 1365 1366 .align 32 1367.MEDIUM: 1368 1369! ========== medium range ========== 1370 1371! register use 1372 1373! i0 n 1374! i1 x 1375! i2 stridex 1376! i3 y 1377! i4 stridey 1378! i5 0x80000000 1379 1380! l0 hx0 1381! l1 hx1 1382! l2 hx2 1383! l3 __vlibm_TBL_sincos_hi 1384! l4 __vlibm_TBL_sincos_lo 1385! l5 constants 1386! l6 in transition from pri-range and here, use for biguns 1387! l7 0x413921fb 1388 1389! the following are 64-bit registers in both V8+ and V9 1390 1391! g1 scratch 1392! g5 1393 1394! o0 py0 1395! o1 py1 1396! o2 py2 1397! o3 n0 1398! o4 n1 1399! o5 n2 1400! o7 scratch 1401 1402! f0 x0 1403! f2 n0,y0 1404! f4 1405! f6 1406! f8 scratch for table base 1407! f9 signbit0 1408! f10 x1 1409! f12 n1,y1 1410! f14 1411! f16 1412! f18 scratch for table base 1413! f19 signbit1 1414! f20 x2 1415! f22 n2,y2 1416! f24 1417! f26 1418! f28 scratch for table base 1419! f29 signbit2 1420! f30 0x80000000 1421! f31 0x4000 1422! f32 1423! f34 1424! f36 1425! f38 1426! f40 invpio2 1427! f42 round 1428! f44 0xffff800000000000 1429! f46 pio2_1 1430! f48 pio2_2 1431! f50 pio2_3 1432! f52 pio2_3t 1433! f54 one 1434! f56 pp1 1435! f58 pp2 1436! f60 qq1 1437! f62 qq2 1438 1439 PIC_SET(g5,constants,l5) 1440 1441 ! %o3,%o4,%o5 need to be stored 1442 st %f6,[%o3] 1443 sethi %hi(0x413921fb),%l7 1444 st %f7,[%o3+4] 1445 or %l7,%lo(0x413921fb),%l7 1446 st %f16,[%o4] 1447 st %f17,[%o4+4] 1448 st %f26,[%o5] 1449 st %f27,[%o5+4] 1450 ldd [%l5+invpio2],%f40 1451 ldd [%l5+round],%f42 1452 ldd [%l5+pio2_1],%f46 1453 ldd [%l5+pio2_2],%f48 1454 ldd [%l5+pio2_3],%f50 1455 ldd [%l5+pio2_3t],%f52 1456 std %f54,[%fp+x0_1+8] ! set up stack data 1457 std %f54,[%fp+x1_1+8] 1458 std %f54,[%fp+x2_1+8] 1459 stx %g0,[%fp+y0_0+8] 1460 stx %g0,[%fp+y1_0+8] 1461 stx %g0,[%fp+y2_0+8] 1462 1463! branched here in the middle of the array. Need to adjust 1464! for the members of the triple that were selected in the primary 1465! loop. 1466 1467! no adjustment since all three selected here 1468 subcc LIM_l6,0x1,%g0 ! continue in LOOP0? 1469 bz,a %icc,.LOOP0 1470 mov 0x0,LIM_l6 ! delay slot set biguns=0 1471 1472! ajust 1st triple since 2d and 3d done here 1473 subcc LIM_l6,0x2,%g0 ! continue in LOOP1? 1474 fors %f0,%f9,%f0 ! restore sign bit 1475 fmuld %f0,%f40,%f2 ! adj LOOP0 1476 bz,a %icc,.LOOP1 1477 mov 0x0,LIM_l6 ! delay slot set biguns=0 1478 1479! ajust 1st and 2d triple since 3d done here 1480 subcc LIM_l6,0x3,%g0 ! continue in LOOP2? 1481 !done fmuld %f0,%f40,%f2 ! adj LOOP0 1482 sub %i3,%i4,%i3 ! adjust to not double increment 1483 fors %f10,%f19,%f10 ! restore sign bit 1484 fmuld %f10,%f40,%f12 ! adj LOOP1 1485 faddd %f2,%f42,%f2 ! adj LOOP1 1486 bz,a %icc,.LOOP2 1487 mov 0x0,LIM_l6 ! delay slot set biguns=0 1488 1489 .align 32 1490.LOOP0: 1491 lda [%i1]%asi,%l1 ! preload next argument 1492 mov %i3,%o0 ! py0 = y 1493 lda [%i1]%asi,%f10 1494 cmp %l0,%l7 1495 add %i3,%i4,%i3 ! y += stridey 1496 bg,pn %icc,.BIG0 ! if hx > 0x413921fb 1497 1498! delay slot 1499 lda [%i1+4]%asi,%f11 1500 addcc %i0,-1,%i0 1501 add %i1,%i2,%i1 ! x += stridex 1502 ble,pn %icc,.ENDLOOP1 1503 1504! delay slot 1505 andn %l1,%i5,%l1 1506 nop 1507 fmuld %f0,%f40,%f2 1508 fabsd %f54,%f54 ! a nop for alignment only 1509 1510.LOOP1: 1511 lda [%i1]%asi,%l2 ! preload next argument 1512 mov %i3,%o1 ! py1 = y 1513 1514 lda [%i1]%asi,%f20 1515 cmp %l1,%l7 1516 add %i3,%i4,%i3 ! y += stridey 1517 bg,pn %icc,.BIG1 ! if hx > 0x413921fb 1518 1519! delay slot 1520 lda [%i1+4]%asi,%f21 1521 addcc %i0,-1,%i0 1522 add %i1,%i2,%i1 ! x += stridex 1523 ble,pn %icc,.ENDLOOP2 1524 1525! delay slot 1526 andn %l2,%i5,%l2 1527 nop 1528 fmuld %f10,%f40,%f12 1529 faddd %f2,%f42,%f2 1530 1531.LOOP2: 1532 st %f3,[%fp+n0] 1533 mov %i3,%o2 ! py2 = y 1534 1535 cmp %l2,%l7 1536 add %i3,%i4,%i3 ! y += stridey 1537 fmuld %f20,%f40,%f22 1538 bg,pn %icc,.BIG2 ! if hx > 0x413921fb 1539 1540! delay slot 1541 add %l5,thresh+4,%o7 1542 faddd %f12,%f42,%f12 1543 st %f13,[%fp+n1] 1544 1545! - 1546 1547 add %l5,thresh,%g1 1548 faddd %f22,%f42,%f22 1549 st %f23,[%fp+n2] 1550 1551 fsubd %f2,%f42,%f2 ! n 1552 1553 fsubd %f12,%f42,%f12 ! n 1554 1555 fsubd %f22,%f42,%f22 ! n 1556 1557 fmuld %f2,%f46,%f4 1558 1559 fmuld %f12,%f46,%f14 1560 1561 fmuld %f22,%f46,%f24 1562 1563 fsubd %f0,%f4,%f4 1564 fmuld %f2,%f48,%f6 1565 1566 fsubd %f10,%f14,%f14 1567 fmuld %f12,%f48,%f16 1568 1569 fsubd %f20,%f24,%f24 1570 fmuld %f22,%f48,%f26 1571 1572 fsubd %f4,%f6,%f0 1573 ld [%fp+n0],%o3 1574 1575 fsubd %f14,%f16,%f10 1576 ld [%fp+n1],%o4 1577 1578 fsubd %f24,%f26,%f20 1579 ld [%fp+n2],%o5 1580 1581 fsubd %f4,%f0,%f32 1582 and %o3,1,%o3 1583 1584 fsubd %f14,%f10,%f34 1585 and %o4,1,%o4 1586 1587 fsubd %f24,%f20,%f36 1588 and %o5,1,%o5 1589 1590 fsubd %f32,%f6,%f32 1591 fmuld %f2,%f50,%f8 1592 sll %o3,3,%o3 1593 1594 fsubd %f34,%f16,%f34 1595 fmuld %f12,%f50,%f18 1596 sll %o4,3,%o4 1597 1598 fsubd %f36,%f26,%f36 1599 fmuld %f22,%f50,%f28 1600 sll %o5,3,%o5 1601 1602 fsubd %f8,%f32,%f8 1603 ld [%g1+%o3],%f6 1604 1605 fsubd %f18,%f34,%f18 1606 ld [%g1+%o4],%f16 1607 1608 fsubd %f28,%f36,%f28 1609 ld [%g1+%o5],%f26 1610 1611 fsubd %f0,%f8,%f4 1612 1613 fsubd %f10,%f18,%f14 1614 1615 fsubd %f20,%f28,%f24 1616 1617 fsubd %f0,%f4,%f32 1618 1619 fsubd %f10,%f14,%f34 1620 1621 fsubd %f20,%f24,%f36 1622 1623 fsubd %f32,%f8,%f32 1624 fmuld %f2,%f52,%f2 1625 1626 fsubd %f34,%f18,%f34 1627 fmuld %f12,%f52,%f12 1628 1629 fsubd %f36,%f28,%f36 1630 fmuld %f22,%f52,%f22 1631 1632 fsubd %f2,%f32,%f2 1633 ld [%o7+%o3],%f8 1634 1635 fsubd %f12,%f34,%f12 1636 ld [%o7+%o4],%f18 1637 1638 fsubd %f22,%f36,%f22 1639 ld [%o7+%o5],%f28 1640 1641 fsubd %f4,%f2,%f0 ! x 1642 1643 fsubd %f14,%f12,%f10 ! x 1644 1645 fsubd %f24,%f22,%f20 ! x 1646 1647 fsubd %f4,%f0,%f4 1648 1649 fsubd %f14,%f10,%f14 1650 1651 fsubd %f24,%f20,%f24 1652 1653 fands %f0,%f30,%f9 ! save signbit 1654 1655 fands %f10,%f30,%f19 ! save signbit 1656 1657 fands %f20,%f30,%f29 ! save signbit 1658 1659 fabsd %f0,%f0 1660 std %f0,[%fp+x0_1] 1661 1662 fabsd %f10,%f10 1663 std %f10,[%fp+x1_1] 1664 1665 fabsd %f20,%f20 1666 std %f20,[%fp+x2_1] 1667 1668 fsubd %f4,%f2,%f2 ! y 1669 1670 fsubd %f14,%f12,%f12 ! y 1671 1672 fsubd %f24,%f22,%f22 ! y 1673 1674 fcmpgt32 %f6,%f0,%l0 1675 1676 fcmpgt32 %f16,%f10,%l1 1677 1678 fcmpgt32 %f26,%f20,%l2 1679 1680! -- 16 byte aligned 1681 fxors %f2,%f9,%f2 1682 1683 fxors %f12,%f19,%f12 1684 1685 fxors %f22,%f29,%f22 1686 1687 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 1688 andcc %l0,2,%g0 1689 bne,pn %icc,.CASE4 1690 1691! delay slot 1692 fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 1693 andcc %l1,2,%g0 1694 bne,pn %icc,.CASE2 1695 1696! delay slot 1697 fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 1698 andcc %l2,2,%g0 1699 bne,pn %icc,.CASE1 1700 1701! delay slot 1702 fpadd32s %f0,%f31,%f8 1703 sethi %hi(0x3fc3c000),%o7 1704 ld [%fp+x0_1],%l0 1705 1706 fpadd32s %f10,%f31,%f18 1707 add %l3,8,%g1 1708 ld [%fp+x1_1],%l1 1709 1710 fpadd32s %f20,%f31,%f28 1711 ld [%fp+x2_1],%l2 1712 1713 fand %f8,%f44,%f4 1714 sub %l0,%o7,%l0 1715 1716 fand %f18,%f44,%f14 1717 sub %l1,%o7,%l1 1718 1719 fand %f28,%f44,%f24 1720 sub %l2,%o7,%l2 1721 1722 fsubd %f0,%f4,%f0 1723 srl %l0,10,%l0 1724 1725 fsubd %f10,%f14,%f10 1726 srl %l1,10,%l1 1727 1728 fsubd %f20,%f24,%f20 1729 srl %l2,10,%l2 1730 1731 faddd %f0,%f2,%f0 1732 andn %l0,0x1f,%l0 1733 1734 faddd %f10,%f12,%f10 1735 andn %l1,0x1f,%l1 1736 1737 faddd %f20,%f22,%f20 1738 andn %l2,0x1f,%l2 1739 1740 fmuld %f0,%f0,%f2 1741 add %l0,%o3,%l0 1742 1743 fmuld %f10,%f10,%f12 1744 add %l1,%o4,%l1 1745 1746 fmuld %f20,%f20,%f22 1747 add %l2,%o5,%l2 1748 1749 fmuld %f2,%f58,%f6 1750 ldd [%l3+%l0],%f32 1751 1752 fmuld %f12,%f58,%f16 1753 ldd [%l3+%l1],%f34 1754 1755 fmuld %f22,%f58,%f26 1756 ldd [%l3+%l2],%f36 1757 1758 faddd %f6,%f56,%f6 1759 fmuld %f2,%f62,%f4 1760 1761 faddd %f16,%f56,%f16 1762 fmuld %f12,%f62,%f14 1763 1764 faddd %f26,%f56,%f26 1765 fmuld %f22,%f62,%f24 1766 1767 fmuld %f2,%f6,%f6 1768 faddd %f4,%f60,%f4 1769 1770 fmuld %f12,%f16,%f16 1771 faddd %f14,%f60,%f14 1772 1773 fmuld %f22,%f26,%f26 1774 faddd %f24,%f60,%f24 1775 1776 faddd %f6,%f54,%f6 1777 fmuld %f2,%f4,%f4 1778 1779 faddd %f16,%f54,%f16 1780 fmuld %f12,%f14,%f14 1781 1782 faddd %f26,%f54,%f26 1783 fmuld %f22,%f24,%f24 1784 1785 fmuld %f0,%f6,%f6 1786 ldd [%g1+%l0],%f2 1787 1788 fmuld %f10,%f16,%f16 1789 ldd [%g1+%l1],%f12 1790 1791 fmuld %f20,%f26,%f26 1792 ldd [%g1+%l2],%f22 1793 1794 fmuld %f4,%f32,%f4 1795 ldd [%l4+%l0],%f0 1796 1797 fmuld %f14,%f34,%f14 1798 ldd [%l4+%l1],%f10 1799 1800 fmuld %f24,%f36,%f24 1801 ldd [%l4+%l2],%f20 1802 1803 fmuld %f6,%f2,%f6 1804 1805 fmuld %f16,%f12,%f16 1806 1807 fmuld %f26,%f22,%f26 1808 1809 faddd %f6,%f4,%f6 1810 1811 faddd %f16,%f14,%f16 1812 1813 faddd %f26,%f24,%f26 1814 1815 faddd %f6,%f0,%f6 1816 1817 faddd %f16,%f10,%f16 1818 1819 faddd %f26,%f20,%f26 1820 1821 faddd %f6,%f32,%f6 1822 1823 faddd %f16,%f34,%f16 1824 1825 faddd %f26,%f36,%f26 1826 1827.FIXSIGN: 1828 ld [%fp+n0],%o3 1829 add %l5,thresh-4,%g1 1830 1831 ld [%fp+n1],%o4 1832 1833 ld [%fp+n2],%o5 1834 and %o3,2,%o3 1835 1836 sll %o3,2,%o3 1837 and %o4,2,%o4 1838 lda [%i1]%asi,%l0 ! preload next argument 1839 1840 sll %o4,2,%o4 1841 and %o5,2,%o5 1842 ld [%g1+%o3],%f8 1843 1844 sll %o5,2,%o5 1845 ld [%g1+%o4],%f18 1846 1847 ld [%g1+%o5],%f28 1848 fxors %f9,%f8,%f9 1849 1850 lda [%i1]%asi,%f0 1851 fxors %f29,%f28,%f29 1852 1853 lda [%i1+4]%asi,%f1 1854 fxors %f19,%f18,%f19 1855 1856 fors %f6,%f9,%f6 ! tack on sign 1857 add %i1,%i2,%i1 ! x += stridex 1858 st %f6,[%o0] 1859 1860 fors %f26,%f29,%f26 ! tack on sign 1861 st %f7,[%o0+4] 1862 1863 fors %f16,%f19,%f16 ! tack on sign 1864 st %f26,[%o2] 1865 1866 st %f27,[%o2+4] 1867 addcc %i0,-1,%i0 1868 1869 st %f16,[%o1] 1870 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1871 bg,pt %icc,.LOOP0 1872 1873! delay slot 1874 st %f17,[%o1+4] 1875 1876 ba,pt %icc,.ENDLOOP0 1877! delay slot 1878 nop 1879 1880 .align 32 1881.CASE1: 1882 fpadd32s %f10,%f31,%f18 1883 sethi %hi(0x3fc3c000),%o7 1884 ld [%fp+x0_1],%l0 1885 1886 fand %f8,%f44,%f4 1887 add %l3,8,%g1 1888 ld [%fp+x1_1],%l1 1889 1890 fand %f18,%f44,%f14 1891 sub %l0,%o7,%l0 1892 1893 fsubd %f0,%f4,%f0 1894 srl %l0,10,%l0 1895 sub %l1,%o7,%l1 1896 1897 fsubd %f10,%f14,%f10 1898 srl %l1,10,%l1 1899 1900 fmuld %f20,%f20,%f20 1901 ldd [%l5+%o5],%f36 1902 add %l5,%o5,%l2 1903 1904 faddd %f0,%f2,%f0 1905 andn %l0,0x1f,%l0 1906 1907 faddd %f10,%f12,%f10 1908 andn %l1,0x1f,%l1 1909 1910 fmuld %f20,%f36,%f24 1911 ldd [%l2+0x10],%f26 1912 add %fp,%o5,%o5 1913 1914 fmuld %f0,%f0,%f2 1915 add %l0,%o3,%l0 1916 1917 fmuld %f10,%f10,%f12 1918 add %l1,%o4,%l1 1919 1920 faddd %f24,%f26,%f24 1921 ldd [%l2+0x20],%f36 1922 1923 fmuld %f2,%f58,%f6 1924 ldd [%l3+%l0],%f32 1925 1926 fmuld %f12,%f58,%f16 1927 ldd [%l3+%l1],%f34 1928 1929 fmuld %f20,%f24,%f24 1930 ldd [%l2+0x30],%f26 1931 1932 faddd %f6,%f56,%f6 1933 fmuld %f2,%f62,%f4 1934 1935 faddd %f16,%f56,%f16 1936 fmuld %f12,%f62,%f14 1937 1938 faddd %f24,%f36,%f24 1939 ldd [%o5+x2_1],%f36 1940 1941 fmuld %f2,%f6,%f6 1942 faddd %f4,%f60,%f4 1943 1944 fmuld %f12,%f16,%f16 1945 faddd %f14,%f60,%f14 1946 1947 fmuld %f20,%f24,%f24 1948 1949 faddd %f6,%f54,%f6 1950 fmuld %f2,%f4,%f4 1951 ldd [%g1+%l0],%f2 1952 1953 faddd %f16,%f54,%f16 1954 fmuld %f12,%f14,%f14 1955 ldd [%g1+%l1],%f12 1956 1957 faddd %f24,%f26,%f24 1958 1959 fmuld %f0,%f6,%f6 1960 ldd [%l4+%l0],%f0 1961 1962 fmuld %f10,%f16,%f16 1963 ldd [%l4+%l1],%f10 1964 1965 fmuld %f4,%f32,%f4 1966 std %f22,[%fp+y2_0] 1967 1968 fmuld %f14,%f34,%f14 1969 1970 fmuld %f6,%f2,%f6 1971 1972 fmuld %f16,%f12,%f16 1973 1974 fmuld %f20,%f24,%f24 1975 1976 faddd %f6,%f4,%f6 1977 1978 faddd %f16,%f14,%f16 1979 1980 fmuld %f36,%f24,%f24 1981 ldd [%o5+y2_0],%f22 1982 1983 faddd %f6,%f0,%f6 1984 1985 faddd %f16,%f10,%f16 1986 1987 faddd %f24,%f22,%f24 1988 1989 faddd %f6,%f32,%f6 1990 1991 faddd %f16,%f34,%f16 1992 ba,pt %icc,.FIXSIGN 1993 1994! delay slot 1995 faddd %f36,%f24,%f26 1996 1997 .align 32 1998.CASE2: 1999 fpadd32s %f0,%f31,%f8 2000 ld [%fp+x0_1],%l0 2001 andcc %l2,2,%g0 2002 bne,pn %icc,.CASE3 2003 2004! delay slot 2005 sethi %hi(0x3fc3c000),%o7 2006 fpadd32s %f20,%f31,%f28 2007 ld [%fp+x2_1],%l2 2008 2009 fand %f8,%f44,%f4 2010 sub %l0,%o7,%l0 2011 add %l3,8,%g1 2012 2013 fand %f28,%f44,%f24 2014 sub %l2,%o7,%l2 2015 2016 fsubd %f0,%f4,%f0 2017 srl %l0,10,%l0 2018 2019 fsubd %f20,%f24,%f20 2020 srl %l2,10,%l2 2021 2022 fmuld %f10,%f10,%f10 2023 ldd [%l5+%o4],%f34 2024 add %l5,%o4,%l1 2025 2026 faddd %f0,%f2,%f0 2027 andn %l0,0x1f,%l0 2028 2029 faddd %f20,%f22,%f20 2030 andn %l2,0x1f,%l2 2031 2032 fmuld %f10,%f34,%f14 2033 ldd [%l1+0x10],%f16 2034 add %fp,%o4,%o4 2035 2036 fmuld %f0,%f0,%f2 2037 add %l0,%o3,%l0 2038 2039 fmuld %f20,%f20,%f22 2040 add %l2,%o5,%l2 2041 2042 faddd %f14,%f16,%f14 2043 ldd [%l1+0x20],%f34 2044 2045 fmuld %f2,%f58,%f6 2046 ldd [%l3+%l0],%f32 2047 2048 fmuld %f22,%f58,%f26 2049 ldd [%l3+%l2],%f36 2050 2051 fmuld %f10,%f14,%f14 2052 ldd [%l1+0x30],%f16 2053 2054 faddd %f6,%f56,%f6 2055 fmuld %f2,%f62,%f4 2056 2057 faddd %f26,%f56,%f26 2058 fmuld %f22,%f62,%f24 2059 2060 faddd %f14,%f34,%f14 2061 ldd [%o4+x1_1],%f34 2062 2063 fmuld %f2,%f6,%f6 2064 faddd %f4,%f60,%f4 2065 2066 fmuld %f22,%f26,%f26 2067 faddd %f24,%f60,%f24 2068 2069 fmuld %f10,%f14,%f14 2070 2071 faddd %f6,%f54,%f6 2072 fmuld %f2,%f4,%f4 2073 ldd [%g1+%l0],%f2 2074 2075 faddd %f26,%f54,%f26 2076 fmuld %f22,%f24,%f24 2077 ldd [%g1+%l2],%f22 2078 2079 faddd %f14,%f16,%f14 2080 2081 fmuld %f0,%f6,%f6 2082 ldd [%l4+%l0],%f0 2083 2084 fmuld %f20,%f26,%f26 2085 ldd [%l4+%l2],%f20 2086 2087 fmuld %f4,%f32,%f4 2088 std %f12,[%fp+y1_0] 2089 2090 fmuld %f24,%f36,%f24 2091 2092 fmuld %f6,%f2,%f6 2093 2094 fmuld %f26,%f22,%f26 2095 2096 fmuld %f10,%f14,%f14 2097 2098 faddd %f6,%f4,%f6 2099 2100 faddd %f26,%f24,%f26 2101 2102 fmuld %f34,%f14,%f14 2103 ldd [%o4+y1_0],%f12 2104 2105 faddd %f6,%f0,%f6 2106 2107 faddd %f26,%f20,%f26 2108 2109 faddd %f14,%f12,%f14 2110 2111 faddd %f6,%f32,%f6 2112 2113 faddd %f26,%f36,%f26 2114 ba,pt %icc,.FIXSIGN 2115 2116! delay slot 2117 faddd %f34,%f14,%f16 2118 2119 .align 32 2120.CASE3: 2121 fand %f8,%f44,%f4 2122 add %l3,8,%g1 2123 sub %l0,%o7,%l0 2124 2125 fmuld %f10,%f10,%f10 2126 ldd [%l5+%o4],%f34 2127 add %l5,%o4,%l1 2128 2129 fsubd %f0,%f4,%f0 2130 srl %l0,10,%l0 2131 2132 fmuld %f20,%f20,%f20 2133 ldd [%l5+%o5],%f36 2134 add %l5,%o5,%l2 2135 2136 fmuld %f10,%f34,%f14 2137 ldd [%l1+0x10],%f16 2138 add %fp,%o4,%o4 2139 2140 faddd %f0,%f2,%f0 2141 andn %l0,0x1f,%l0 2142 2143 fmuld %f20,%f36,%f24 2144 ldd [%l2+0x10],%f26 2145 add %fp,%o5,%o5 2146 2147 faddd %f14,%f16,%f14 2148 ldd [%l1+0x20],%f34 2149 2150 fmuld %f0,%f0,%f2 2151 add %l0,%o3,%l0 2152 2153 faddd %f24,%f26,%f24 2154 ldd [%l2+0x20],%f36 2155 2156 fmuld %f10,%f14,%f14 2157 ldd [%l1+0x30],%f16 2158 2159 fmuld %f2,%f58,%f6 2160 ldd [%l3+%l0],%f32 2161 2162 fmuld %f20,%f24,%f24 2163 ldd [%l2+0x30],%f26 2164 2165 faddd %f14,%f34,%f14 2166 ldd [%o4+x1_1],%f34 2167 2168 faddd %f6,%f56,%f6 2169 fmuld %f2,%f62,%f4 2170 2171 faddd %f24,%f36,%f24 2172 ldd [%o5+x2_1],%f36 2173 2174 fmuld %f10,%f14,%f14 2175 std %f12,[%fp+y1_0] 2176 2177 fmuld %f2,%f6,%f6 2178 faddd %f4,%f60,%f4 2179 2180 fmuld %f20,%f24,%f24 2181 std %f22,[%fp+y2_0] 2182 2183 faddd %f14,%f16,%f14 2184 2185 faddd %f6,%f54,%f6 2186 fmuld %f2,%f4,%f4 2187 ldd [%g1+%l0],%f2 2188 2189 faddd %f24,%f26,%f24 2190 2191 fmuld %f10,%f14,%f14 2192 2193 fmuld %f0,%f6,%f6 2194 ldd [%l4+%l0],%f0 2195 2196 fmuld %f4,%f32,%f4 2197 2198 fmuld %f20,%f24,%f24 2199 2200 fmuld %f6,%f2,%f6 2201 2202 fmuld %f34,%f14,%f14 2203 ldd [%o4+y1_0],%f12 2204 2205 fmuld %f36,%f24,%f24 2206 ldd [%o5+y2_0],%f22 2207 2208 faddd %f6,%f4,%f6 2209 2210 faddd %f14,%f12,%f14 2211 2212 faddd %f24,%f22,%f24 2213 2214 faddd %f6,%f0,%f6 2215 2216 faddd %f34,%f14,%f16 2217 2218 faddd %f36,%f24,%f26 2219 ba,pt %icc,.FIXSIGN 2220 2221! delay slot 2222 faddd %f6,%f32,%f6 2223 2224 .align 32 2225.CASE4: 2226 fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 2227 sethi %hi(0x3fc3c000),%o7 2228 andcc %l1,2,%g0 2229 bne,pn %icc,.CASE6 2230 2231! delay slot 2232 andcc %l2,2,%g0 2233 fpadd32s %f10,%f31,%f18 2234 ld [%fp+x1_1],%l1 2235 bne,pn %icc,.CASE5 2236 2237! delay slot 2238 add %l3,8,%g1 2239 ld [%fp+x2_1],%l2 2240 fpadd32s %f20,%f31,%f28 2241 2242 fand %f18,%f44,%f14 2243 sub %l1,%o7,%l1 2244 2245 fand %f28,%f44,%f24 2246 sub %l2,%o7,%l2 2247 2248 fsubd %f10,%f14,%f10 2249 srl %l1,10,%l1 2250 2251 fsubd %f20,%f24,%f20 2252 srl %l2,10,%l2 2253 2254 fmuld %f0,%f0,%f0 2255 ldd [%l5+%o3],%f32 2256 add %l5,%o3,%l0 2257 2258 faddd %f10,%f12,%f10 2259 andn %l1,0x1f,%l1 2260 2261 faddd %f20,%f22,%f20 2262 andn %l2,0x1f,%l2 2263 2264 fmuld %f0,%f32,%f4 2265 ldd [%l0+0x10],%f6 2266 add %fp,%o3,%o3 2267 2268 fmuld %f10,%f10,%f12 2269 add %l1,%o4,%l1 2270 2271 fmuld %f20,%f20,%f22 2272 add %l2,%o5,%l2 2273 2274 faddd %f4,%f6,%f4 2275 ldd [%l0+0x20],%f32 2276 2277 fmuld %f12,%f58,%f16 2278 ldd [%l3+%l1],%f34 2279 2280 fmuld %f22,%f58,%f26 2281 ldd [%l3+%l2],%f36 2282 2283 fmuld %f0,%f4,%f4 2284 ldd [%l0+0x30],%f6 2285 2286 faddd %f16,%f56,%f16 2287 fmuld %f12,%f62,%f14 2288 2289 faddd %f26,%f56,%f26 2290 fmuld %f22,%f62,%f24 2291 2292 faddd %f4,%f32,%f4 2293 ldd [%o3+x0_1],%f32 2294 2295 fmuld %f12,%f16,%f16 2296 faddd %f14,%f60,%f14 2297 2298 fmuld %f22,%f26,%f26 2299 faddd %f24,%f60,%f24 2300 2301 fmuld %f0,%f4,%f4 2302 2303 faddd %f16,%f54,%f16 2304 fmuld %f12,%f14,%f14 2305 ldd [%g1+%l1],%f12 2306 2307 faddd %f26,%f54,%f26 2308 fmuld %f22,%f24,%f24 2309 ldd [%g1+%l2],%f22 2310 2311 faddd %f4,%f6,%f4 2312 2313 fmuld %f10,%f16,%f16 2314 ldd [%l4+%l1],%f10 2315 2316 fmuld %f20,%f26,%f26 2317 ldd [%l4+%l2],%f20 2318 2319 fmuld %f14,%f34,%f14 2320 std %f2,[%fp+y0_0] 2321 2322 fmuld %f24,%f36,%f24 2323 2324 fmuld %f0,%f4,%f4 2325 2326 fmuld %f16,%f12,%f16 2327 2328 fmuld %f26,%f22,%f26 2329 2330 fmuld %f32,%f4,%f4 2331 ldd [%o3+y0_0],%f2 2332 2333 faddd %f16,%f14,%f16 2334 2335 faddd %f26,%f24,%f26 2336 2337 faddd %f4,%f2,%f4 2338 2339 faddd %f16,%f10,%f16 2340 2341 faddd %f26,%f20,%f26 2342 2343 faddd %f32,%f4,%f6 2344 2345 faddd %f16,%f34,%f16 2346 ba,pt %icc,.FIXSIGN 2347 2348! delay slot 2349 faddd %f26,%f36,%f26 2350 2351 .align 32 2352.CASE5: 2353 fand %f18,%f44,%f14 2354 sub %l1,%o7,%l1 2355 2356 fmuld %f0,%f0,%f0 2357 ldd [%l5+%o3],%f32 2358 add %l5,%o3,%l0 2359 2360 fsubd %f10,%f14,%f10 2361 srl %l1,10,%l1 2362 2363 fmuld %f20,%f20,%f20 2364 ldd [%l5+%o5],%f36 2365 add %l5,%o5,%l2 2366 2367 fmuld %f0,%f32,%f4 2368 ldd [%l0+0x10],%f6 2369 add %fp,%o3,%o3 2370 2371 faddd %f10,%f12,%f10 2372 andn %l1,0x1f,%l1 2373 2374 fmuld %f20,%f36,%f24 2375 ldd [%l2+0x10],%f26 2376 add %fp,%o5,%o5 2377 2378 faddd %f4,%f6,%f4 2379 ldd [%l0+0x20],%f32 2380 2381 fmuld %f10,%f10,%f12 2382 add %l1,%o4,%l1 2383 2384 faddd %f24,%f26,%f24 2385 ldd [%l2+0x20],%f36 2386 2387 fmuld %f0,%f4,%f4 2388 ldd [%l0+0x30],%f6 2389 2390 fmuld %f12,%f58,%f16 2391 ldd [%l3+%l1],%f34 2392 2393 fmuld %f20,%f24,%f24 2394 ldd [%l2+0x30],%f26 2395 2396 faddd %f4,%f32,%f4 2397 ldd [%o3+x0_1],%f32 2398 2399 faddd %f16,%f56,%f16 2400 fmuld %f12,%f62,%f14 2401 2402 faddd %f24,%f36,%f24 2403 ldd [%o5+x2_1],%f36 2404 2405 fmuld %f0,%f4,%f4 2406 std %f2,[%fp+y0_0] 2407 2408 fmuld %f12,%f16,%f16 2409 faddd %f14,%f60,%f14 2410 2411 fmuld %f20,%f24,%f24 2412 std %f22,[%fp+y2_0] 2413 2414 faddd %f4,%f6,%f4 2415 2416 faddd %f16,%f54,%f16 2417 fmuld %f12,%f14,%f14 2418 ldd [%g1+%l1],%f12 2419 2420 faddd %f24,%f26,%f24 2421 2422 fmuld %f0,%f4,%f4 2423 2424 fmuld %f10,%f16,%f16 2425 ldd [%l4+%l1],%f10 2426 2427 fmuld %f14,%f34,%f14 2428 2429 fmuld %f20,%f24,%f24 2430 2431 fmuld %f16,%f12,%f16 2432 2433 fmuld %f32,%f4,%f4 2434 ldd [%o3+y0_0],%f2 2435 2436 fmuld %f36,%f24,%f24 2437 ldd [%o5+y2_0],%f22 2438 2439 faddd %f16,%f14,%f16 2440 2441 faddd %f4,%f2,%f4 2442 2443 faddd %f24,%f22,%f24 2444 2445 faddd %f16,%f10,%f16 2446 2447 faddd %f32,%f4,%f6 2448 2449 faddd %f36,%f24,%f26 2450 ba,pt %icc,.FIXSIGN 2451 2452! delay slot 2453 faddd %f16,%f34,%f16 2454 2455 .align 32 2456.CASE6: 2457 ld [%fp+x2_1],%l2 2458 add %l3,8,%g1 2459 bne,pn %icc,.CASE7 2460! delay slot 2461 fpadd32s %f20,%f31,%f28 2462 2463 fand %f28,%f44,%f24 2464 ldd [%l5+%o3],%f32 2465 add %l5,%o3,%l0 2466 2467 fmuld %f0,%f0,%f0 2468 sub %l2,%o7,%l2 2469 2470 fsubd %f20,%f24,%f20 2471 srl %l2,10,%l2 2472 2473 fmuld %f10,%f10,%f10 2474 ldd [%l5+%o4],%f34 2475 add %l5,%o4,%l1 2476 2477 fmuld %f0,%f32,%f4 2478 ldd [%l0+0x10],%f6 2479 add %fp,%o3,%o3 2480 2481 faddd %f20,%f22,%f20 2482 andn %l2,0x1f,%l2 2483 2484 fmuld %f10,%f34,%f14 2485 ldd [%l1+0x10],%f16 2486 add %fp,%o4,%o4 2487 2488 faddd %f4,%f6,%f4 2489 ldd [%l0+0x20],%f32 2490 2491 fmuld %f20,%f20,%f22 2492 add %l2,%o5,%l2 2493 2494 faddd %f14,%f16,%f14 2495 ldd [%l1+0x20],%f34 2496 2497 fmuld %f0,%f4,%f4 2498 ldd [%l0+0x30],%f6 2499 2500 fmuld %f22,%f58,%f26 2501 ldd [%l3+%l2],%f36 2502 2503 fmuld %f10,%f14,%f14 2504 ldd [%l1+0x30],%f16 2505 2506 faddd %f4,%f32,%f4 2507 ldd [%o3+x0_1],%f32 2508 2509 faddd %f26,%f56,%f26 2510 fmuld %f22,%f62,%f24 2511 2512 faddd %f14,%f34,%f14 2513 ldd [%o4+x1_1],%f34 2514 2515 fmuld %f0,%f4,%f4 2516 std %f2,[%fp+y0_0] 2517 2518 fmuld %f22,%f26,%f26 2519 faddd %f24,%f60,%f24 2520 2521 fmuld %f10,%f14,%f14 2522 std %f12,[%fp+y1_0] 2523 2524 faddd %f4,%f6,%f4 2525 2526 faddd %f26,%f54,%f26 2527 fmuld %f22,%f24,%f24 2528 ldd [%g1+%l2],%f22 2529 2530 faddd %f14,%f16,%f14 2531 2532 fmuld %f0,%f4,%f4 2533 2534 fmuld %f20,%f26,%f26 2535 ldd [%l4+%l2],%f20 2536 2537 fmuld %f24,%f36,%f24 2538 2539 fmuld %f10,%f14,%f14 2540 2541 fmuld %f26,%f22,%f26 2542 2543 fmuld %f32,%f4,%f4 2544 ldd [%o3+y0_0],%f2 2545 2546 fmuld %f34,%f14,%f14 2547 ldd [%o4+y1_0],%f12 2548 2549 faddd %f26,%f24,%f26 2550 2551 faddd %f4,%f2,%f4 2552 2553 faddd %f14,%f12,%f14 2554 2555 faddd %f26,%f20,%f26 2556 2557 faddd %f32,%f4,%f6 2558 2559 faddd %f34,%f14,%f16 2560 ba,pt %icc,.FIXSIGN 2561 2562! delay slot 2563 faddd %f26,%f36,%f26 2564 2565 .align 32 2566.CASE7: 2567 fmuld %f0,%f0,%f0 2568 ldd [%l5+%o3],%f32 2569 add %l5,%o3,%l0 2570 2571 fmuld %f10,%f10,%f10 2572 ldd [%l5+%o4],%f34 2573 add %l5,%o4,%l1 2574 2575 fmuld %f20,%f20,%f20 2576 ldd [%l5+%o5],%f36 2577 add %l5,%o5,%l2 2578 2579 fmuld %f0,%f32,%f4 2580 ldd [%l0+0x10],%f6 2581 add %fp,%o3,%o3 2582 2583 fmuld %f10,%f34,%f14 2584 ldd [%l1+0x10],%f16 2585 add %fp,%o4,%o4 2586 2587 fmuld %f20,%f36,%f24 2588 ldd [%l2+0x10],%f26 2589 add %fp,%o5,%o5 2590 2591 faddd %f4,%f6,%f4 2592 ldd [%l0+0x20],%f32 2593 2594 faddd %f14,%f16,%f14 2595 ldd [%l1+0x20],%f34 2596 2597 faddd %f24,%f26,%f24 2598 ldd [%l2+0x20],%f36 2599 2600 fmuld %f0,%f4,%f4 2601 ldd [%l0+0x30],%f6 2602 2603 fmuld %f10,%f14,%f14 2604 ldd [%l1+0x30],%f16 2605 2606 fmuld %f20,%f24,%f24 2607 ldd [%l2+0x30],%f26 2608 2609 faddd %f4,%f32,%f4 2610 ldd [%o3+x0_1],%f32 2611 2612 faddd %f14,%f34,%f14 2613 ldd [%o4+x1_1],%f34 2614 2615 faddd %f24,%f36,%f24 2616 ldd [%o5+x2_1],%f36 2617 2618 fmuld %f0,%f4,%f4 2619 std %f2,[%fp+y0_0] 2620 2621 fmuld %f10,%f14,%f14 2622 std %f12,[%fp+y1_0] 2623 2624 fmuld %f20,%f24,%f24 2625 std %f22,[%fp+y2_0] 2626 2627 faddd %f4,%f6,%f4 2628 2629 faddd %f14,%f16,%f14 2630 2631 faddd %f24,%f26,%f24 2632 2633 fmuld %f0,%f4,%f4 2634 2635 fmuld %f10,%f14,%f14 2636 2637 fmuld %f20,%f24,%f24 2638 2639 fmuld %f32,%f4,%f4 2640 ldd [%o3+y0_0],%f2 2641 2642 fmuld %f34,%f14,%f14 2643 ldd [%o4+y1_0],%f12 2644 2645 fmuld %f36,%f24,%f24 2646 ldd [%o5+y2_0],%f22 2647 2648 faddd %f4,%f2,%f4 2649 2650 faddd %f14,%f12,%f14 2651 2652 faddd %f24,%f22,%f24 2653 2654 faddd %f32,%f4,%f6 2655 2656 faddd %f34,%f14,%f16 2657 ba,pt %icc,.FIXSIGN 2658 2659! delay slot 2660 faddd %f36,%f24,%f26 2661 2662 2663 .align 32 2664.ENDLOOP2: 2665 fmuld %f10,%f40,%f12 2666 add %l5,thresh,%g1 2667 faddd %f12,%f42,%f12 2668 st %f13,[%fp+n1] 2669 fsubd %f12,%f42,%f12 ! n 2670 fmuld %f12,%f46,%f14 2671 fsubd %f10,%f14,%f14 2672 fmuld %f12,%f48,%f16 2673 fsubd %f14,%f16,%f10 2674 ld [%fp+n1],%o4 2675 fsubd %f14,%f10,%f34 2676 and %o4,1,%o4 2677 fsubd %f34,%f16,%f34 2678 fmuld %f12,%f50,%f18 2679 sll %o4,3,%o4 2680 fsubd %f18,%f34,%f18 2681 ld [%g1+%o4],%f16 2682 fsubd %f10,%f18,%f14 2683 fsubd %f10,%f14,%f34 2684 add %l5,thresh+4,%o7 2685 fsubd %f34,%f18,%f34 2686 fmuld %f12,%f52,%f12 2687 fsubd %f12,%f34,%f12 2688 ld [%o7+%o4],%f18 2689 fsubd %f14,%f12,%f10 ! x 2690 fsubd %f14,%f10,%f14 2691 fands %f10,%f30,%f19 ! save signbit 2692 fabsd %f10,%f10 2693 std %f10,[%fp+x1_1] 2694 fsubd %f14,%f12,%f12 ! y 2695 fcmpgt32 %f16,%f10,%l1 2696 fxors %f12,%f19,%f12 2697 fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 2698 andcc %l1,2,%g0 2699 bne,pn %icc,1f 2700! delay slot 2701 nop 2702 fpadd32s %f10,%f31,%f18 2703 ld [%fp+x1_1],%l1 2704 fand %f18,%f44,%f14 2705 sethi %hi(0x3fc3c000),%o7 2706 add %l3,8,%g1 2707 fsubd %f10,%f14,%f10 2708 sub %l1,%o7,%l1 2709 srl %l1,10,%l1 2710 faddd %f10,%f12,%f10 2711 andn %l1,0x1f,%l1 2712 fmuld %f10,%f10,%f12 2713 add %l1,%o4,%l1 2714 fmuld %f12,%f58,%f16 2715 ldd [%l3+%l1],%f34 2716 faddd %f16,%f56,%f16 2717 fmuld %f12,%f62,%f14 2718 fmuld %f12,%f16,%f16 2719 faddd %f14,%f60,%f14 2720 faddd %f16,%f54,%f16 2721 fmuld %f12,%f14,%f14 2722 ldd [%g1+%l1],%f12 2723 fmuld %f10,%f16,%f16 2724 ldd [%l4+%l1],%f10 2725 fmuld %f14,%f34,%f14 2726 fmuld %f16,%f12,%f16 2727 faddd %f16,%f14,%f16 2728 faddd %f16,%f10,%f16 2729 ba,pt %icc,2f 2730 faddd %f16,%f34,%f16 27311: 2732 fmuld %f10,%f10,%f10 2733 ldd [%l5+%o4],%f34 2734 add %l5,%o4,%l1 2735 fmuld %f10,%f34,%f14 2736 ldd [%l1+0x10],%f16 2737 add %fp,%o4,%o4 2738 faddd %f14,%f16,%f14 2739 ldd [%l1+0x20],%f34 2740 fmuld %f10,%f14,%f14 2741 ldd [%l1+0x30],%f16 2742 faddd %f14,%f34,%f14 2743 ldd [%o4+x1_1],%f34 2744 fmuld %f10,%f14,%f14 2745 std %f12,[%fp+y1_0] 2746 faddd %f14,%f16,%f14 2747 fmuld %f10,%f14,%f14 2748 fmuld %f34,%f14,%f14 2749 ldd [%o4+y1_0],%f12 2750 faddd %f14,%f12,%f14 2751 faddd %f34,%f14,%f16 27522: 2753 add %l5,thresh-4,%g1 2754 ld [%fp+n1],%o4 2755 and %o4,2,%o4 2756 sll %o4,2,%o4 2757 ld [%g1+%o4],%f18 2758 fxors %f19,%f18,%f19 2759 fors %f16,%f19,%f16 ! tack on sign 2760 st %f16,[%o1] 2761 st %f17,[%o1+4] 2762 2763.ENDLOOP1: 2764 fmuld %f0,%f40,%f2 2765 add %l5,thresh,%g1 2766 faddd %f2,%f42,%f2 2767 st %f3,[%fp+n0] 2768 fsubd %f2,%f42,%f2 ! n 2769 fmuld %f2,%f46,%f4 2770 fsubd %f0,%f4,%f4 2771 fmuld %f2,%f48,%f6 2772 fsubd %f4,%f6,%f0 2773 ld [%fp+n0],%o3 2774 fsubd %f4,%f0,%f32 2775 and %o3,1,%o3 2776 fsubd %f32,%f6,%f32 2777 fmuld %f2,%f50,%f8 2778 sll %o3,3,%o3 2779 fsubd %f8,%f32,%f8 2780 ld [%g1+%o3],%f6 2781 fsubd %f0,%f8,%f4 2782 fsubd %f0,%f4,%f32 2783 add %l5,thresh+4,%o7 2784 fsubd %f32,%f8,%f32 2785 fmuld %f2,%f52,%f2 2786 fsubd %f2,%f32,%f2 2787 ld [%o7+%o3],%f8 2788 fsubd %f4,%f2,%f0 ! x 2789 fsubd %f4,%f0,%f4 2790 fands %f0,%f30,%f9 ! save signbit 2791 fabsd %f0,%f0 2792 std %f0,[%fp+x0_1] 2793 fsubd %f4,%f2,%f2 ! y 2794 fcmpgt32 %f6,%f0,%l0 2795 fxors %f2,%f9,%f2 2796 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 2797 andcc %l0,2,%g0 2798 bne,pn %icc,1f 2799! delay slot 2800 nop 2801 fpadd32s %f0,%f31,%f8 2802 ld [%fp+x0_1],%l0 2803 fand %f8,%f44,%f4 2804 sethi %hi(0x3fc3c000),%o7 2805 add %l3,8,%g1 2806 fsubd %f0,%f4,%f0 2807 sub %l0,%o7,%l0 2808 srl %l0,10,%l0 2809 faddd %f0,%f2,%f0 2810 andn %l0,0x1f,%l0 2811 fmuld %f0,%f0,%f2 2812 add %l0,%o3,%l0 2813 fmuld %f2,%f58,%f6 2814 ldd [%l3+%l0],%f32 2815 faddd %f6,%f56,%f6 2816 fmuld %f2,%f62,%f4 2817 fmuld %f2,%f6,%f6 2818 faddd %f4,%f60,%f4 2819 faddd %f6,%f54,%f6 2820 fmuld %f2,%f4,%f4 2821 ldd [%g1+%l0],%f2 2822 fmuld %f0,%f6,%f6 2823 ldd [%l4+%l0],%f0 2824 fmuld %f4,%f32,%f4 2825 fmuld %f6,%f2,%f6 2826 faddd %f6,%f4,%f6 2827 faddd %f6,%f0,%f6 2828 ba,pt %icc,2f 2829 faddd %f6,%f32,%f6 28301: 2831 fmuld %f0,%f0,%f0 2832 ldd [%l5+%o3],%f32 2833 add %l5,%o3,%l0 2834 fmuld %f0,%f32,%f4 2835 ldd [%l0+0x10],%f6 2836 add %fp,%o3,%o3 2837 faddd %f4,%f6,%f4 2838 ldd [%l0+0x20],%f32 2839 fmuld %f0,%f4,%f4 2840 ldd [%l0+0x30],%f6 2841 faddd %f4,%f32,%f4 2842 ldd [%o3+x0_1],%f32 2843 fmuld %f0,%f4,%f4 2844 std %f2,[%fp+y0_0] 2845 faddd %f4,%f6,%f4 2846 fmuld %f0,%f4,%f4 2847 fmuld %f32,%f4,%f4 2848 ldd [%o3+y0_0],%f2 2849 faddd %f4,%f2,%f4 2850 faddd %f32,%f4,%f6 28512: 2852 add %l5,thresh-4,%g1 2853 ld [%fp+n0],%o3 2854 and %o3,2,%o3 2855 sll %o3,2,%o3 2856 ld [%g1+%o3],%f8 2857 fxors %f9,%f8,%f9 2858 fors %f6,%f9,%f6 ! tack on sign 2859 st %f6,[%o0] 2860 st %f7,[%o0+4] 2861 2862.ENDLOOP0: 2863 2864! check for huge arguments remaining 2865 2866 tst LIM_l6 2867 be,pt %icc,.exit 2868! delay slot 2869 nop 2870 2871! ========== huge range (use C code) ========== 2872 2873#ifdef __sparcv9 2874 ldx [%fp+xsave],%o1 2875 ldx [%fp+ysave],%o3 2876#else 2877 ld [%fp+xsave],%o1 2878 ld [%fp+ysave],%o3 2879#endif 2880 ld [%fp+nsave],%o0 2881 ld [%fp+sxsave],%o2 2882 ld [%fp+sysave],%o4 2883 sra %o2,0,%o2 ! sign-extend for V9 2884 sra %o4,0,%o4 2885 call __vlibm_vsin_big 2886 mov %l7,%o5 ! delay slot 2887 2888.exit: 2889 ret 2890 restore 2891 2892 2893 .align 32 2894.SKIP0: 2895 addcc %i0,-1,%i0 2896 ble,pn %icc,.ENDLOOP0 2897! delay slot, harmless if branch taken 2898 add %i3,%i4,%i3 ! y += stridey 2899 andn %l1,%i5,%l0 ! hx &= ~0x80000000 2900 fmovs %f10,%f0 2901 ld [%i1+4],%f1 2902 ba,pt %icc,.LOOP0 2903! delay slot 2904 add %i1,%i2,%i1 ! x += stridex 2905 2906 2907 .align 32 2908.SKIP1: 2909 addcc %i0,-1,%i0 2910 ble,pn %icc,.ENDLOOP1 2911! delay slot, harmless if branch taken 2912 add %i3,%i4,%i3 ! y += stridey 2913 andn %l2,%i5,%l1 ! hx &= ~0x80000000 2914 fmovs %f20,%f10 2915 ld [%i1+4],%f11 2916 ba,pt %icc,.LOOP1 2917! delay slot 2918 add %i1,%i2,%i1 ! x += stridex 2919 2920 2921 .align 32 2922.SKIP2: 2923 addcc %i0,-1,%i0 2924 ble,pn %icc,.ENDLOOP2 2925! delay slot, harmless if branch taken 2926 add %i3,%i4,%i3 ! y += stridey 2927 ld [%i1],%l2 2928 ld [%i1],%f20 2929 ld [%i1+4],%f21 2930 andn %l2,%i5,%l2 ! hx &= ~0x80000000 2931 ba,pt %icc,.LOOP2 2932! delay slot 2933 add %i1,%i2,%i1 ! x += stridex 2934 2935 2936 .align 32 2937.BIG0: 2938 sethi %hi(0x7ff00000),%o7 2939 cmp %l0,%o7 2940 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2941! delay slot, annulled if branch not taken 2942 mov %l7,LIM_l6 ! set biguns flag or 2943 fsubd %f0,%f0,%f0 ! y = x - x 2944 st %f0,[%o0] 2945 st %f1,[%o0+4] 29461: 2947 addcc %i0,-1,%i0 2948 ble,pn %icc,.ENDLOOP0 2949! delay slot, harmless if branch taken 2950 andn %l1,%i5,%l0 ! hx &= ~0x80000000 2951 fmovd %f10,%f0 2952 ba,pt %icc,.LOOP0 2953! delay slot 2954 add %i1,%i2,%i1 ! x += stridex 2955 2956 2957 .align 32 2958.BIG1: 2959 sethi %hi(0x7ff00000),%o7 2960 cmp %l1,%o7 2961 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2962! delay slot, annulled if branch not taken 2963 mov %l7,LIM_l6 ! set biguns flag or 2964 fsubd %f10,%f10,%f10 ! y = x - x 2965 st %f10,[%o1] 2966 st %f11,[%o1+4] 29671: 2968 addcc %i0,-1,%i0 2969 ble,pn %icc,.ENDLOOP1 2970! delay slot, harmless if branch taken 2971 andn %l2,%i5,%l1 ! hx &= ~0x80000000 2972 fmovd %f20,%f10 2973 ba,pt %icc,.LOOP1 2974! delay slot 2975 add %i1,%i2,%i1 ! x += stridex 2976 2977 2978 .align 32 2979.BIG2: 2980 sethi %hi(0x7ff00000),%o7 2981 cmp %l2,%o7 2982 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2983! delay slot, annulled if branch not taken 2984 mov %l7,LIM_l6 ! set biguns flag or 2985 fsubd %f20,%f20,%f20 ! y = x - x 2986 st %f20,[%o2] 2987 st %f21,[%o2+4] 29881: 2989 addcc %i0,-1,%i0 2990 ble,pn %icc,.ENDLOOP2 2991! delay slot 2992 nop 2993 ld [%i1],%l2 2994 ld [%i1],%f20 2995 ld [%i1+4],%f21 2996 andn %l2,%i5,%l2 ! hx &= ~0x80000000 2997 ba,pt %icc,.LOOP2 2998! delay slot 2999 add %i1,%i2,%i1 ! x += stridex 3000 3001 SET_SIZE(__vsin) 3002 3003