1# 2# $NetBSD: fplsp.s,v 1.2 2003/02/05 00:02:32 perry Exp $ 3# 4 5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 7# M68000 Hi-Performance Microprocessor Division 8# M68060 Software Package Production Release 9# 10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc. 11# All rights reserved. 12# 13# THE SOFTWARE is provided on an "AS IS" basis and without warranty. 14# To the maximum extent permitted by applicable law, 15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS 17# FOR A PARTICULAR PURPOSE and any warranty against infringement with 18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) 19# and any accompanying written materials. 20# 21# To the maximum extent permitted by applicable law, 22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 26# 27# Motorola assumes no responsibility for the maintenance and support 28# of the SOFTWARE. 29# 30# You are hereby granted a copyright license to use, modify, and distribute the 31# SOFTWARE so long as this entire notice is retained without alteration 32# in any modified and/or redistributed versions, and that such modified 33# versions are clearly identified as such. 34# No licenses are granted by implication, estoppel or otherwise under any 35# patents or trademarks of Motorola, Inc. 36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 38# 39# lfptop.s: 40# This file is appended to the top of the 060ILSP package 41# and contains the entry points into the package. The user, in 42# effect, branches to one of the branch table entries located here. 43# 44 45 bra.l _facoss_ 46 short 0x0000 47 bra.l _facosd_ 48 short 0x0000 49 bra.l _facosx_ 50 short 0x0000 51 52 bra.l _fasins_ 53 short 0x0000 54 bra.l _fasind_ 55 short 0x0000 56 bra.l _fasinx_ 57 short 0x0000 58 59 bra.l _fatans_ 60 short 0x0000 61 bra.l _fatand_ 62 short 0x0000 63 bra.l _fatanx_ 64 short 0x0000 65 66 bra.l _fatanhs_ 67 short 0x0000 68 bra.l _fatanhd_ 69 short 0x0000 70 bra.l _fatanhx_ 71 short 0x0000 72 73 bra.l _fcoss_ 74 short 0x0000 75 bra.l _fcosd_ 76 short 0x0000 77 bra.l _fcosx_ 78 short 0x0000 79 80 bra.l _fcoshs_ 81 short 0x0000 82 bra.l _fcoshd_ 83 short 0x0000 84 bra.l _fcoshx_ 85 short 0x0000 86 87 bra.l _fetoxs_ 88 short 0x0000 89 bra.l _fetoxd_ 90 short 0x0000 91 bra.l _fetoxx_ 92 short 0x0000 93 94 bra.l _fetoxm1s_ 95 short 0x0000 96 bra.l _fetoxm1d_ 97 short 0x0000 98 bra.l _fetoxm1x_ 99 short 0x0000 100 101 bra.l _fgetexps_ 102 short 0x0000 103 bra.l _fgetexpd_ 104 short 0x0000 105 bra.l _fgetexpx_ 106 short 0x0000 107 108 bra.l _fgetmans_ 109 short 0x0000 110 bra.l _fgetmand_ 111 short 0x0000 112 bra.l _fgetmanx_ 113 short 0x0000 114 115 bra.l _flog10s_ 116 short 0x0000 117 bra.l _flog10d_ 118 short 0x0000 119 bra.l _flog10x_ 120 short 0x0000 121 122 bra.l _flog2s_ 123 short 0x0000 124 bra.l _flog2d_ 125 short 0x0000 126 bra.l _flog2x_ 127 short 0x0000 128 129 bra.l _flogns_ 130 short 0x0000 131 bra.l _flognd_ 132 short 0x0000 133 bra.l _flognx_ 134 short 0x0000 135 136 bra.l _flognp1s_ 137 short 0x0000 138 bra.l _flognp1d_ 139 short 0x0000 140 bra.l _flognp1x_ 141 short 0x0000 142 143 bra.l _fmods_ 144 short 0x0000 145 bra.l _fmodd_ 146 short 0x0000 147 bra.l _fmodx_ 148 short 0x0000 149 150 bra.l _frems_ 151 short 0x0000 152 bra.l _fremd_ 153 short 0x0000 154 bra.l _fremx_ 155 short 0x0000 156 157 bra.l _fscales_ 158 short 0x0000 159 bra.l _fscaled_ 160 short 0x0000 161 bra.l _fscalex_ 162 short 0x0000 163 164 bra.l _fsins_ 165 short 0x0000 166 bra.l _fsind_ 167 short 0x0000 168 bra.l _fsinx_ 169 short 0x0000 170 171 bra.l _fsincoss_ 172 short 0x0000 173 bra.l _fsincosd_ 174 short 0x0000 175 bra.l _fsincosx_ 176 short 0x0000 177 178 bra.l _fsinhs_ 179 short 0x0000 180 bra.l _fsinhd_ 181 short 0x0000 182 bra.l _fsinhx_ 183 short 0x0000 184 185 bra.l _ftans_ 186 short 0x0000 187 bra.l _ftand_ 188 short 0x0000 189 bra.l _ftanx_ 190 short 0x0000 191 192 bra.l _ftanhs_ 193 short 0x0000 194 bra.l _ftanhd_ 195 short 0x0000 196 bra.l _ftanhx_ 197 short 0x0000 198 199 bra.l _ftentoxs_ 200 short 0x0000 201 bra.l _ftentoxd_ 202 short 0x0000 203 bra.l _ftentoxx_ 204 short 0x0000 205 206 bra.l _ftwotoxs_ 207 short 0x0000 208 bra.l _ftwotoxd_ 209 short 0x0000 210 bra.l _ftwotoxx_ 211 short 0x0000 212 213 bra.l _fabss_ 214 short 0x0000 215 bra.l _fabsd_ 216 short 0x0000 217 bra.l _fabsx_ 218 short 0x0000 219 220 bra.l _fadds_ 221 short 0x0000 222 bra.l _faddd_ 223 short 0x0000 224 bra.l _faddx_ 225 short 0x0000 226 227 bra.l _fdivs_ 228 short 0x0000 229 bra.l _fdivd_ 230 short 0x0000 231 bra.l _fdivx_ 232 short 0x0000 233 234 bra.l _fints_ 235 short 0x0000 236 bra.l _fintd_ 237 short 0x0000 238 bra.l _fintx_ 239 short 0x0000 240 241 bra.l _fintrzs_ 242 short 0x0000 243 bra.l _fintrzd_ 244 short 0x0000 245 bra.l _fintrzx_ 246 short 0x0000 247 248 bra.l _fmuls_ 249 short 0x0000 250 bra.l _fmuld_ 251 short 0x0000 252 bra.l _fmulx_ 253 short 0x0000 254 255 bra.l _fnegs_ 256 short 0x0000 257 bra.l _fnegd_ 258 short 0x0000 259 bra.l _fnegx_ 260 short 0x0000 261 262 bra.l _fsqrts_ 263 short 0x0000 264 bra.l _fsqrtd_ 265 short 0x0000 266 bra.l _fsqrtx_ 267 short 0x0000 268 269 bra.l _fsubs_ 270 short 0x0000 271 bra.l _fsubd_ 272 short 0x0000 273 bra.l _fsubx_ 274 short 0x0000 275 276# leave room for future possible additions 277 align 0x400 278 279# 280# This file contains a set of define statements for constants 281# in order to promote readability within the corecode itself. 282# 283 284set LOCAL_SIZE, 192 # stack frame size(bytes) 285set LV, -LOCAL_SIZE # stack offset 286 287set EXC_SR, 0x4 # stack status register 288set EXC_PC, 0x6 # stack pc 289set EXC_VOFF, 0xa # stacked vector offset 290set EXC_EA, 0xc # stacked <ea> 291 292set EXC_FP, 0x0 # frame pointer 293 294set EXC_AREGS, -68 # offset of all address regs 295set EXC_DREGS, -100 # offset of all data regs 296set EXC_FPREGS, -36 # offset of all fp regs 297 298set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 299set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 300set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 301set EXC_A5, EXC_AREGS+(5*4) 302set EXC_A4, EXC_AREGS+(4*4) 303set EXC_A3, EXC_AREGS+(3*4) 304set EXC_A2, EXC_AREGS+(2*4) 305set EXC_A1, EXC_AREGS+(1*4) 306set EXC_A0, EXC_AREGS+(0*4) 307set EXC_D7, EXC_DREGS+(7*4) 308set EXC_D6, EXC_DREGS+(6*4) 309set EXC_D5, EXC_DREGS+(5*4) 310set EXC_D4, EXC_DREGS+(4*4) 311set EXC_D3, EXC_DREGS+(3*4) 312set EXC_D2, EXC_DREGS+(2*4) 313set EXC_D1, EXC_DREGS+(1*4) 314set EXC_D0, EXC_DREGS+(0*4) 315 316set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 317set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 318set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 319 320set FP_SCR1, LV+80 # fp scratch 1 321set FP_SCR1_EX, FP_SCR1+0 322set FP_SCR1_SGN, FP_SCR1+2 323set FP_SCR1_HI, FP_SCR1+4 324set FP_SCR1_LO, FP_SCR1+8 325 326set FP_SCR0, LV+68 # fp scratch 0 327set FP_SCR0_EX, FP_SCR0+0 328set FP_SCR0_SGN, FP_SCR0+2 329set FP_SCR0_HI, FP_SCR0+4 330set FP_SCR0_LO, FP_SCR0+8 331 332set FP_DST, LV+56 # fp destination operand 333set FP_DST_EX, FP_DST+0 334set FP_DST_SGN, FP_DST+2 335set FP_DST_HI, FP_DST+4 336set FP_DST_LO, FP_DST+8 337 338set FP_SRC, LV+44 # fp source operand 339set FP_SRC_EX, FP_SRC+0 340set FP_SRC_SGN, FP_SRC+2 341set FP_SRC_HI, FP_SRC+4 342set FP_SRC_LO, FP_SRC+8 343 344set USER_FPIAR, LV+40 # FP instr address register 345 346set USER_FPSR, LV+36 # FP status register 347set FPSR_CC, USER_FPSR+0 # FPSR condition codes 348set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 349set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 350set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 351 352set USER_FPCR, LV+32 # FP control register 353set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 354set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 355 356set L_SCR3, LV+28 # integer scratch 3 357set L_SCR2, LV+24 # integer scratch 2 358set L_SCR1, LV+20 # integer scratch 1 359 360set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 361 362set EXC_TEMP2, LV+24 # temporary space 363set EXC_TEMP, LV+16 # temporary space 364 365set DTAG, LV+15 # destination operand type 366set STAG, LV+14 # source operand type 367 368set SPCOND_FLG, LV+10 # flag: special case (see below) 369 370set EXC_CC, LV+8 # saved condition codes 371set EXC_EXTWPTR, LV+4 # saved current PC (active) 372set EXC_EXTWORD, LV+2 # saved extension word 373set EXC_CMDREG, LV+2 # saved extension word 374set EXC_OPWORD, LV+0 # saved operation word 375 376################################ 377 378# Helpful macros 379 380set FTEMP, 0 # offsets within an 381set FTEMP_EX, 0 # extended precision 382set FTEMP_SGN, 2 # value saved in memory. 383set FTEMP_HI, 4 384set FTEMP_LO, 8 385set FTEMP_GRS, 12 386 387set LOCAL, 0 # offsets within an 388set LOCAL_EX, 0 # extended precision 389set LOCAL_SGN, 2 # value saved in memory. 390set LOCAL_HI, 4 391set LOCAL_LO, 8 392set LOCAL_GRS, 12 393 394set DST, 0 # offsets within an 395set DST_EX, 0 # extended precision 396set DST_HI, 4 # value saved in memory. 397set DST_LO, 8 398 399set SRC, 0 # offsets within an 400set SRC_EX, 0 # extended precision 401set SRC_HI, 4 # value saved in memory. 402set SRC_LO, 8 403 404set SGL_LO, 0x3f81 # min sgl prec exponent 405set SGL_HI, 0x407e # max sgl prec exponent 406set DBL_LO, 0x3c01 # min dbl prec exponent 407set DBL_HI, 0x43fe # max dbl prec exponent 408set EXT_LO, 0x0 # min ext prec exponent 409set EXT_HI, 0x7ffe # max ext prec exponent 410 411set EXT_BIAS, 0x3fff # extended precision bias 412set SGL_BIAS, 0x007f # single precision bias 413set DBL_BIAS, 0x03ff # double precision bias 414 415set NORM, 0x00 # operand type for STAG/DTAG 416set ZERO, 0x01 # operand type for STAG/DTAG 417set INF, 0x02 # operand type for STAG/DTAG 418set QNAN, 0x03 # operand type for STAG/DTAG 419set DENORM, 0x04 # operand type for STAG/DTAG 420set SNAN, 0x05 # operand type for STAG/DTAG 421set UNNORM, 0x06 # operand type for STAG/DTAG 422 423################## 424# FPSR/FPCR bits # 425################## 426set neg_bit, 0x3 # negative result 427set z_bit, 0x2 # zero result 428set inf_bit, 0x1 # infinite result 429set nan_bit, 0x0 # NAN result 430 431set q_sn_bit, 0x7 # sign bit of quotient byte 432 433set bsun_bit, 7 # branch on unordered 434set snan_bit, 6 # signalling NAN 435set operr_bit, 5 # operand error 436set ovfl_bit, 4 # overflow 437set unfl_bit, 3 # underflow 438set dz_bit, 2 # divide by zero 439set inex2_bit, 1 # inexact result 2 440set inex1_bit, 0 # inexact result 1 441 442set aiop_bit, 7 # accrued inexact operation bit 443set aovfl_bit, 6 # accrued overflow bit 444set aunfl_bit, 5 # accrued underflow bit 445set adz_bit, 4 # accrued dz bit 446set ainex_bit, 3 # accrued inexact bit 447 448############################# 449# FPSR individual bit masks # 450############################# 451set neg_mask, 0x08000000 # negative bit mask (lw) 452set inf_mask, 0x02000000 # infinity bit mask (lw) 453set z_mask, 0x04000000 # zero bit mask (lw) 454set nan_mask, 0x01000000 # nan bit mask (lw) 455 456set neg_bmask, 0x08 # negative bit mask (byte) 457set inf_bmask, 0x02 # infinity bit mask (byte) 458set z_bmask, 0x04 # zero bit mask (byte) 459set nan_bmask, 0x01 # nan bit mask (byte) 460 461set bsun_mask, 0x00008000 # bsun exception mask 462set snan_mask, 0x00004000 # snan exception mask 463set operr_mask, 0x00002000 # operr exception mask 464set ovfl_mask, 0x00001000 # overflow exception mask 465set unfl_mask, 0x00000800 # underflow exception mask 466set dz_mask, 0x00000400 # dz exception mask 467set inex2_mask, 0x00000200 # inex2 exception mask 468set inex1_mask, 0x00000100 # inex1 exception mask 469 470set aiop_mask, 0x00000080 # accrued illegal operation 471set aovfl_mask, 0x00000040 # accrued overflow 472set aunfl_mask, 0x00000020 # accrued underflow 473set adz_mask, 0x00000010 # accrued divide by zero 474set ainex_mask, 0x00000008 # accrued inexact 475 476###################################### 477# FPSR combinations used in the FPSP # 478###################################### 479set dzinf_mask, inf_mask+dz_mask+adz_mask 480set opnan_mask, nan_mask+operr_mask+aiop_mask 481set nzi_mask, 0x01ffffff #clears N, Z, and I 482set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 483set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 484set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 485set inx1a_mask, inex1_mask+ainex_mask 486set inx2a_mask, inex2_mask+ainex_mask 487set snaniop_mask, nan_mask+snan_mask+aiop_mask 488set snaniop2_mask, snan_mask+aiop_mask 489set naniop_mask, nan_mask+aiop_mask 490set neginf_mask, neg_mask+inf_mask 491set infaiop_mask, inf_mask+aiop_mask 492set negz_mask, neg_mask+z_mask 493set opaop_mask, operr_mask+aiop_mask 494set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 495set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 496 497######### 498# misc. # 499######### 500set rnd_stky_bit, 29 # stky bit pos in longword 501 502set sign_bit, 0x7 # sign bit 503set signan_bit, 0x6 # signalling nan bit 504 505set sgl_thresh, 0x3f81 # minimum sgl exponent 506set dbl_thresh, 0x3c01 # minimum dbl exponent 507 508set x_mode, 0x0 # extended precision 509set s_mode, 0x4 # single precision 510set d_mode, 0x8 # double precision 511 512set rn_mode, 0x0 # round-to-nearest 513set rz_mode, 0x1 # round-to-zero 514set rm_mode, 0x2 # round-tp-minus-infinity 515set rp_mode, 0x3 # round-to-plus-infinity 516 517set mantissalen, 64 # length of mantissa in bits 518 519set BYTE, 1 # len(byte) == 1 byte 520set WORD, 2 # len(word) == 2 bytes 521set LONG, 4 # len(longword) == 2 bytes 522 523set BSUN_VEC, 0xc0 # bsun vector offset 524set INEX_VEC, 0xc4 # inexact vector offset 525set DZ_VEC, 0xc8 # dz vector offset 526set UNFL_VEC, 0xcc # unfl vector offset 527set OPERR_VEC, 0xd0 # operr vector offset 528set OVFL_VEC, 0xd4 # ovfl vector offset 529set SNAN_VEC, 0xd8 # snan vector offset 530 531########################### 532# SPecial CONDition FLaGs # 533########################### 534set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 535set fbsun_flg, 0x02 # flag bit: bsun exception 536set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 537set mda7_flg, 0x08 # flag bit: -(a7) <ea> 538set fmovm_flg, 0x40 # flag bit: fmovm instruction 539set immed_flg, 0x80 # flag bit: &<data> <ea> 540 541set ftrapcc_bit, 0x0 542set fbsun_bit, 0x1 543set mia7_bit, 0x2 544set mda7_bit, 0x3 545set immed_bit, 0x7 546 547################################## 548# TRANSCENDENTAL "LAST-OP" FLAGS # 549################################## 550set FMUL_OP, 0x0 # fmul instr performed last 551set FDIV_OP, 0x1 # fdiv performed last 552set FADD_OP, 0x2 # fadd performed last 553set FMOV_OP, 0x3 # fmov performed last 554 555############# 556# CONSTANTS # 557############# 558T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 559T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 560 561PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 562PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 563 564TWOBYPI: 565 long 0x3FE45F30,0x6DC9C883 566 567######################################################################### 568# MONADIC TEMPLATE # 569######################################################################### 570 global _fsins_ 571_fsins_: 572 link %a6,&-LOCAL_SIZE 573 574 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 575 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 576 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 577 578 fmov.l &0x0,%fpcr # zero FPCR 579 580# 581# copy, convert, and tag input argument 582# 583 fmov.s 0x8(%a6),%fp0 # load sgl input 584 fmov.x %fp0,FP_SRC(%a6) 585 lea FP_SRC(%a6),%a0 586 bsr.l tag # fetch operand type 587 mov.b %d0,STAG(%a6) 588 mov.b %d0,%d1 589 590 andi.l &0x00ff00ff,USER_FPSR(%a6) 591 592 clr.l %d0 593 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 594 595 tst.b %d1 596 bne.b _L0_2s 597 bsr.l ssin # operand is a NORM 598 bra.b _L0_6s 599_L0_2s: 600 cmpi.b %d1,&ZERO # is operand a ZERO? 601 bne.b _L0_3s # no 602 bsr.l src_zero # yes 603 bra.b _L0_6s 604_L0_3s: 605 cmpi.b %d1,&INF # is operand an INF? 606 bne.b _L0_4s # no 607 bsr.l t_operr # yes 608 bra.b _L0_6s 609_L0_4s: 610 cmpi.b %d1,&QNAN # is operand a QNAN? 611 bne.b _L0_5s # no 612 bsr.l src_qnan # yes 613 bra.b _L0_6s 614_L0_5s: 615 bsr.l ssind # operand is a DENORM 616_L0_6s: 617 618# 619# Result is now in FP0 620# 621 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 622 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 623 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 624 unlk %a6 625 rts 626 627 global _fsind_ 628_fsind_: 629 link %a6,&-LOCAL_SIZE 630 631 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 632 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 633 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 634 635 fmov.l &0x0,%fpcr # zero FPCR 636 637# 638# copy, convert, and tag input argument 639# 640 fmov.d 0x8(%a6),%fp0 # load dbl input 641 fmov.x %fp0,FP_SRC(%a6) 642 lea FP_SRC(%a6),%a0 643 bsr.l tag # fetch operand type 644 mov.b %d0,STAG(%a6) 645 mov.b %d0,%d1 646 647 andi.l &0x00ff00ff,USER_FPSR(%a6) 648 649 clr.l %d0 650 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 651 652 mov.b %d1,STAG(%a6) 653 tst.b %d1 654 bne.b _L0_2d 655 bsr.l ssin # operand is a NORM 656 bra.b _L0_6d 657_L0_2d: 658 cmpi.b %d1,&ZERO # is operand a ZERO? 659 bne.b _L0_3d # no 660 bsr.l src_zero # yes 661 bra.b _L0_6d 662_L0_3d: 663 cmpi.b %d1,&INF # is operand an INF? 664 bne.b _L0_4d # no 665 bsr.l t_operr # yes 666 bra.b _L0_6d 667_L0_4d: 668 cmpi.b %d1,&QNAN # is operand a QNAN? 669 bne.b _L0_5d # no 670 bsr.l src_qnan # yes 671 bra.b _L0_6d 672_L0_5d: 673 bsr.l ssind # operand is a DENORM 674_L0_6d: 675 676# 677# Result is now in FP0 678# 679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 680 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 681 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 682 unlk %a6 683 rts 684 685 global _fsinx_ 686_fsinx_: 687 link %a6,&-LOCAL_SIZE 688 689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 692 693 fmov.l &0x0,%fpcr # zero FPCR 694 695# 696# copy, convert, and tag input argument 697# 698 lea FP_SRC(%a6),%a0 699 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 700 mov.l 0x8+0x4(%a6),0x4(%a0) 701 mov.l 0x8+0x8(%a6),0x8(%a0) 702 bsr.l tag # fetch operand type 703 mov.b %d0,STAG(%a6) 704 mov.b %d0,%d1 705 706 andi.l &0x00ff00ff,USER_FPSR(%a6) 707 708 clr.l %d0 709 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 710 711 tst.b %d1 712 bne.b _L0_2x 713 bsr.l ssin # operand is a NORM 714 bra.b _L0_6x 715_L0_2x: 716 cmpi.b %d1,&ZERO # is operand a ZERO? 717 bne.b _L0_3x # no 718 bsr.l src_zero # yes 719 bra.b _L0_6x 720_L0_3x: 721 cmpi.b %d1,&INF # is operand an INF? 722 bne.b _L0_4x # no 723 bsr.l t_operr # yes 724 bra.b _L0_6x 725_L0_4x: 726 cmpi.b %d1,&QNAN # is operand a QNAN? 727 bne.b _L0_5x # no 728 bsr.l src_qnan # yes 729 bra.b _L0_6x 730_L0_5x: 731 bsr.l ssind # operand is a DENORM 732_L0_6x: 733 734# 735# Result is now in FP0 736# 737 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 739 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 740 unlk %a6 741 rts 742 743 744######################################################################### 745# MONADIC TEMPLATE # 746######################################################################### 747 global _fcoss_ 748_fcoss_: 749 link %a6,&-LOCAL_SIZE 750 751 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 752 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 753 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 754 755 fmov.l &0x0,%fpcr # zero FPCR 756 757# 758# copy, convert, and tag input argument 759# 760 fmov.s 0x8(%a6),%fp0 # load sgl input 761 fmov.x %fp0,FP_SRC(%a6) 762 lea FP_SRC(%a6),%a0 763 bsr.l tag # fetch operand type 764 mov.b %d0,STAG(%a6) 765 mov.b %d0,%d1 766 767 andi.l &0x00ff00ff,USER_FPSR(%a6) 768 769 clr.l %d0 770 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 771 772 tst.b %d1 773 bne.b _L1_2s 774 bsr.l scos # operand is a NORM 775 bra.b _L1_6s 776_L1_2s: 777 cmpi.b %d1,&ZERO # is operand a ZERO? 778 bne.b _L1_3s # no 779 bsr.l ld_pone # yes 780 bra.b _L1_6s 781_L1_3s: 782 cmpi.b %d1,&INF # is operand an INF? 783 bne.b _L1_4s # no 784 bsr.l t_operr # yes 785 bra.b _L1_6s 786_L1_4s: 787 cmpi.b %d1,&QNAN # is operand a QNAN? 788 bne.b _L1_5s # no 789 bsr.l src_qnan # yes 790 bra.b _L1_6s 791_L1_5s: 792 bsr.l scosd # operand is a DENORM 793_L1_6s: 794 795# 796# Result is now in FP0 797# 798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 799 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 800 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 801 unlk %a6 802 rts 803 804 global _fcosd_ 805_fcosd_: 806 link %a6,&-LOCAL_SIZE 807 808 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 809 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 810 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 811 812 fmov.l &0x0,%fpcr # zero FPCR 813 814# 815# copy, convert, and tag input argument 816# 817 fmov.d 0x8(%a6),%fp0 # load dbl input 818 fmov.x %fp0,FP_SRC(%a6) 819 lea FP_SRC(%a6),%a0 820 bsr.l tag # fetch operand type 821 mov.b %d0,STAG(%a6) 822 mov.b %d0,%d1 823 824 andi.l &0x00ff00ff,USER_FPSR(%a6) 825 826 clr.l %d0 827 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 828 829 mov.b %d1,STAG(%a6) 830 tst.b %d1 831 bne.b _L1_2d 832 bsr.l scos # operand is a NORM 833 bra.b _L1_6d 834_L1_2d: 835 cmpi.b %d1,&ZERO # is operand a ZERO? 836 bne.b _L1_3d # no 837 bsr.l ld_pone # yes 838 bra.b _L1_6d 839_L1_3d: 840 cmpi.b %d1,&INF # is operand an INF? 841 bne.b _L1_4d # no 842 bsr.l t_operr # yes 843 bra.b _L1_6d 844_L1_4d: 845 cmpi.b %d1,&QNAN # is operand a QNAN? 846 bne.b _L1_5d # no 847 bsr.l src_qnan # yes 848 bra.b _L1_6d 849_L1_5d: 850 bsr.l scosd # operand is a DENORM 851_L1_6d: 852 853# 854# Result is now in FP0 855# 856 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 857 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 858 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 859 unlk %a6 860 rts 861 862 global _fcosx_ 863_fcosx_: 864 link %a6,&-LOCAL_SIZE 865 866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 869 870 fmov.l &0x0,%fpcr # zero FPCR 871 872# 873# copy, convert, and tag input argument 874# 875 lea FP_SRC(%a6),%a0 876 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 877 mov.l 0x8+0x4(%a6),0x4(%a0) 878 mov.l 0x8+0x8(%a6),0x8(%a0) 879 bsr.l tag # fetch operand type 880 mov.b %d0,STAG(%a6) 881 mov.b %d0,%d1 882 883 andi.l &0x00ff00ff,USER_FPSR(%a6) 884 885 clr.l %d0 886 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 887 888 tst.b %d1 889 bne.b _L1_2x 890 bsr.l scos # operand is a NORM 891 bra.b _L1_6x 892_L1_2x: 893 cmpi.b %d1,&ZERO # is operand a ZERO? 894 bne.b _L1_3x # no 895 bsr.l ld_pone # yes 896 bra.b _L1_6x 897_L1_3x: 898 cmpi.b %d1,&INF # is operand an INF? 899 bne.b _L1_4x # no 900 bsr.l t_operr # yes 901 bra.b _L1_6x 902_L1_4x: 903 cmpi.b %d1,&QNAN # is operand a QNAN? 904 bne.b _L1_5x # no 905 bsr.l src_qnan # yes 906 bra.b _L1_6x 907_L1_5x: 908 bsr.l scosd # operand is a DENORM 909_L1_6x: 910 911# 912# Result is now in FP0 913# 914 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 915 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 916 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 917 unlk %a6 918 rts 919 920 921######################################################################### 922# MONADIC TEMPLATE # 923######################################################################### 924 global _fsinhs_ 925_fsinhs_: 926 link %a6,&-LOCAL_SIZE 927 928 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 929 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 930 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 931 932 fmov.l &0x0,%fpcr # zero FPCR 933 934# 935# copy, convert, and tag input argument 936# 937 fmov.s 0x8(%a6),%fp0 # load sgl input 938 fmov.x %fp0,FP_SRC(%a6) 939 lea FP_SRC(%a6),%a0 940 bsr.l tag # fetch operand type 941 mov.b %d0,STAG(%a6) 942 mov.b %d0,%d1 943 944 andi.l &0x00ff00ff,USER_FPSR(%a6) 945 946 clr.l %d0 947 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 948 949 tst.b %d1 950 bne.b _L2_2s 951 bsr.l ssinh # operand is a NORM 952 bra.b _L2_6s 953_L2_2s: 954 cmpi.b %d1,&ZERO # is operand a ZERO? 955 bne.b _L2_3s # no 956 bsr.l src_zero # yes 957 bra.b _L2_6s 958_L2_3s: 959 cmpi.b %d1,&INF # is operand an INF? 960 bne.b _L2_4s # no 961 bsr.l src_inf # yes 962 bra.b _L2_6s 963_L2_4s: 964 cmpi.b %d1,&QNAN # is operand a QNAN? 965 bne.b _L2_5s # no 966 bsr.l src_qnan # yes 967 bra.b _L2_6s 968_L2_5s: 969 bsr.l ssinhd # operand is a DENORM 970_L2_6s: 971 972# 973# Result is now in FP0 974# 975 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 976 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 977 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 978 unlk %a6 979 rts 980 981 global _fsinhd_ 982_fsinhd_: 983 link %a6,&-LOCAL_SIZE 984 985 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 986 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 987 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 988 989 fmov.l &0x0,%fpcr # zero FPCR 990 991# 992# copy, convert, and tag input argument 993# 994 fmov.d 0x8(%a6),%fp0 # load dbl input 995 fmov.x %fp0,FP_SRC(%a6) 996 lea FP_SRC(%a6),%a0 997 bsr.l tag # fetch operand type 998 mov.b %d0,STAG(%a6) 999 mov.b %d0,%d1 1000 1001 andi.l &0x00ff00ff,USER_FPSR(%a6) 1002 1003 clr.l %d0 1004 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1005 1006 mov.b %d1,STAG(%a6) 1007 tst.b %d1 1008 bne.b _L2_2d 1009 bsr.l ssinh # operand is a NORM 1010 bra.b _L2_6d 1011_L2_2d: 1012 cmpi.b %d1,&ZERO # is operand a ZERO? 1013 bne.b _L2_3d # no 1014 bsr.l src_zero # yes 1015 bra.b _L2_6d 1016_L2_3d: 1017 cmpi.b %d1,&INF # is operand an INF? 1018 bne.b _L2_4d # no 1019 bsr.l src_inf # yes 1020 bra.b _L2_6d 1021_L2_4d: 1022 cmpi.b %d1,&QNAN # is operand a QNAN? 1023 bne.b _L2_5d # no 1024 bsr.l src_qnan # yes 1025 bra.b _L2_6d 1026_L2_5d: 1027 bsr.l ssinhd # operand is a DENORM 1028_L2_6d: 1029 1030# 1031# Result is now in FP0 1032# 1033 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1034 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1035 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1036 unlk %a6 1037 rts 1038 1039 global _fsinhx_ 1040_fsinhx_: 1041 link %a6,&-LOCAL_SIZE 1042 1043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1046 1047 fmov.l &0x0,%fpcr # zero FPCR 1048 1049# 1050# copy, convert, and tag input argument 1051# 1052 lea FP_SRC(%a6),%a0 1053 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1054 mov.l 0x8+0x4(%a6),0x4(%a0) 1055 mov.l 0x8+0x8(%a6),0x8(%a0) 1056 bsr.l tag # fetch operand type 1057 mov.b %d0,STAG(%a6) 1058 mov.b %d0,%d1 1059 1060 andi.l &0x00ff00ff,USER_FPSR(%a6) 1061 1062 clr.l %d0 1063 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1064 1065 tst.b %d1 1066 bne.b _L2_2x 1067 bsr.l ssinh # operand is a NORM 1068 bra.b _L2_6x 1069_L2_2x: 1070 cmpi.b %d1,&ZERO # is operand a ZERO? 1071 bne.b _L2_3x # no 1072 bsr.l src_zero # yes 1073 bra.b _L2_6x 1074_L2_3x: 1075 cmpi.b %d1,&INF # is operand an INF? 1076 bne.b _L2_4x # no 1077 bsr.l src_inf # yes 1078 bra.b _L2_6x 1079_L2_4x: 1080 cmpi.b %d1,&QNAN # is operand a QNAN? 1081 bne.b _L2_5x # no 1082 bsr.l src_qnan # yes 1083 bra.b _L2_6x 1084_L2_5x: 1085 bsr.l ssinhd # operand is a DENORM 1086_L2_6x: 1087 1088# 1089# Result is now in FP0 1090# 1091 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1092 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1093 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1094 unlk %a6 1095 rts 1096 1097 1098######################################################################### 1099# MONADIC TEMPLATE # 1100######################################################################### 1101 global _flognp1s_ 1102_flognp1s_: 1103 link %a6,&-LOCAL_SIZE 1104 1105 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1106 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1107 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1108 1109 fmov.l &0x0,%fpcr # zero FPCR 1110 1111# 1112# copy, convert, and tag input argument 1113# 1114 fmov.s 0x8(%a6),%fp0 # load sgl input 1115 fmov.x %fp0,FP_SRC(%a6) 1116 lea FP_SRC(%a6),%a0 1117 bsr.l tag # fetch operand type 1118 mov.b %d0,STAG(%a6) 1119 mov.b %d0,%d1 1120 1121 andi.l &0x00ff00ff,USER_FPSR(%a6) 1122 1123 clr.l %d0 1124 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1125 1126 tst.b %d1 1127 bne.b _L3_2s 1128 bsr.l slognp1 # operand is a NORM 1129 bra.b _L3_6s 1130_L3_2s: 1131 cmpi.b %d1,&ZERO # is operand a ZERO? 1132 bne.b _L3_3s # no 1133 bsr.l src_zero # yes 1134 bra.b _L3_6s 1135_L3_3s: 1136 cmpi.b %d1,&INF # is operand an INF? 1137 bne.b _L3_4s # no 1138 bsr.l sopr_inf # yes 1139 bra.b _L3_6s 1140_L3_4s: 1141 cmpi.b %d1,&QNAN # is operand a QNAN? 1142 bne.b _L3_5s # no 1143 bsr.l src_qnan # yes 1144 bra.b _L3_6s 1145_L3_5s: 1146 bsr.l slognp1d # operand is a DENORM 1147_L3_6s: 1148 1149# 1150# Result is now in FP0 1151# 1152 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1153 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1154 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1155 unlk %a6 1156 rts 1157 1158 global _flognp1d_ 1159_flognp1d_: 1160 link %a6,&-LOCAL_SIZE 1161 1162 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1163 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1164 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1165 1166 fmov.l &0x0,%fpcr # zero FPCR 1167 1168# 1169# copy, convert, and tag input argument 1170# 1171 fmov.d 0x8(%a6),%fp0 # load dbl input 1172 fmov.x %fp0,FP_SRC(%a6) 1173 lea FP_SRC(%a6),%a0 1174 bsr.l tag # fetch operand type 1175 mov.b %d0,STAG(%a6) 1176 mov.b %d0,%d1 1177 1178 andi.l &0x00ff00ff,USER_FPSR(%a6) 1179 1180 clr.l %d0 1181 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1182 1183 mov.b %d1,STAG(%a6) 1184 tst.b %d1 1185 bne.b _L3_2d 1186 bsr.l slognp1 # operand is a NORM 1187 bra.b _L3_6d 1188_L3_2d: 1189 cmpi.b %d1,&ZERO # is operand a ZERO? 1190 bne.b _L3_3d # no 1191 bsr.l src_zero # yes 1192 bra.b _L3_6d 1193_L3_3d: 1194 cmpi.b %d1,&INF # is operand an INF? 1195 bne.b _L3_4d # no 1196 bsr.l sopr_inf # yes 1197 bra.b _L3_6d 1198_L3_4d: 1199 cmpi.b %d1,&QNAN # is operand a QNAN? 1200 bne.b _L3_5d # no 1201 bsr.l src_qnan # yes 1202 bra.b _L3_6d 1203_L3_5d: 1204 bsr.l slognp1d # operand is a DENORM 1205_L3_6d: 1206 1207# 1208# Result is now in FP0 1209# 1210 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1211 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1212 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1213 unlk %a6 1214 rts 1215 1216 global _flognp1x_ 1217_flognp1x_: 1218 link %a6,&-LOCAL_SIZE 1219 1220 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1221 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1222 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1223 1224 fmov.l &0x0,%fpcr # zero FPCR 1225 1226# 1227# copy, convert, and tag input argument 1228# 1229 lea FP_SRC(%a6),%a0 1230 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1231 mov.l 0x8+0x4(%a6),0x4(%a0) 1232 mov.l 0x8+0x8(%a6),0x8(%a0) 1233 bsr.l tag # fetch operand type 1234 mov.b %d0,STAG(%a6) 1235 mov.b %d0,%d1 1236 1237 andi.l &0x00ff00ff,USER_FPSR(%a6) 1238 1239 clr.l %d0 1240 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1241 1242 tst.b %d1 1243 bne.b _L3_2x 1244 bsr.l slognp1 # operand is a NORM 1245 bra.b _L3_6x 1246_L3_2x: 1247 cmpi.b %d1,&ZERO # is operand a ZERO? 1248 bne.b _L3_3x # no 1249 bsr.l src_zero # yes 1250 bra.b _L3_6x 1251_L3_3x: 1252 cmpi.b %d1,&INF # is operand an INF? 1253 bne.b _L3_4x # no 1254 bsr.l sopr_inf # yes 1255 bra.b _L3_6x 1256_L3_4x: 1257 cmpi.b %d1,&QNAN # is operand a QNAN? 1258 bne.b _L3_5x # no 1259 bsr.l src_qnan # yes 1260 bra.b _L3_6x 1261_L3_5x: 1262 bsr.l slognp1d # operand is a DENORM 1263_L3_6x: 1264 1265# 1266# Result is now in FP0 1267# 1268 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1269 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1270 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1271 unlk %a6 1272 rts 1273 1274 1275######################################################################### 1276# MONADIC TEMPLATE # 1277######################################################################### 1278 global _fetoxm1s_ 1279_fetoxm1s_: 1280 link %a6,&-LOCAL_SIZE 1281 1282 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1283 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1284 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1285 1286 fmov.l &0x0,%fpcr # zero FPCR 1287 1288# 1289# copy, convert, and tag input argument 1290# 1291 fmov.s 0x8(%a6),%fp0 # load sgl input 1292 fmov.x %fp0,FP_SRC(%a6) 1293 lea FP_SRC(%a6),%a0 1294 bsr.l tag # fetch operand type 1295 mov.b %d0,STAG(%a6) 1296 mov.b %d0,%d1 1297 1298 andi.l &0x00ff00ff,USER_FPSR(%a6) 1299 1300 clr.l %d0 1301 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1302 1303 tst.b %d1 1304 bne.b _L4_2s 1305 bsr.l setoxm1 # operand is a NORM 1306 bra.b _L4_6s 1307_L4_2s: 1308 cmpi.b %d1,&ZERO # is operand a ZERO? 1309 bne.b _L4_3s # no 1310 bsr.l src_zero # yes 1311 bra.b _L4_6s 1312_L4_3s: 1313 cmpi.b %d1,&INF # is operand an INF? 1314 bne.b _L4_4s # no 1315 bsr.l setoxm1i # yes 1316 bra.b _L4_6s 1317_L4_4s: 1318 cmpi.b %d1,&QNAN # is operand a QNAN? 1319 bne.b _L4_5s # no 1320 bsr.l src_qnan # yes 1321 bra.b _L4_6s 1322_L4_5s: 1323 bsr.l setoxm1d # operand is a DENORM 1324_L4_6s: 1325 1326# 1327# Result is now in FP0 1328# 1329 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1330 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1331 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1332 unlk %a6 1333 rts 1334 1335 global _fetoxm1d_ 1336_fetoxm1d_: 1337 link %a6,&-LOCAL_SIZE 1338 1339 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1340 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1341 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1342 1343 fmov.l &0x0,%fpcr # zero FPCR 1344 1345# 1346# copy, convert, and tag input argument 1347# 1348 fmov.d 0x8(%a6),%fp0 # load dbl input 1349 fmov.x %fp0,FP_SRC(%a6) 1350 lea FP_SRC(%a6),%a0 1351 bsr.l tag # fetch operand type 1352 mov.b %d0,STAG(%a6) 1353 mov.b %d0,%d1 1354 1355 andi.l &0x00ff00ff,USER_FPSR(%a6) 1356 1357 clr.l %d0 1358 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1359 1360 mov.b %d1,STAG(%a6) 1361 tst.b %d1 1362 bne.b _L4_2d 1363 bsr.l setoxm1 # operand is a NORM 1364 bra.b _L4_6d 1365_L4_2d: 1366 cmpi.b %d1,&ZERO # is operand a ZERO? 1367 bne.b _L4_3d # no 1368 bsr.l src_zero # yes 1369 bra.b _L4_6d 1370_L4_3d: 1371 cmpi.b %d1,&INF # is operand an INF? 1372 bne.b _L4_4d # no 1373 bsr.l setoxm1i # yes 1374 bra.b _L4_6d 1375_L4_4d: 1376 cmpi.b %d1,&QNAN # is operand a QNAN? 1377 bne.b _L4_5d # no 1378 bsr.l src_qnan # yes 1379 bra.b _L4_6d 1380_L4_5d: 1381 bsr.l setoxm1d # operand is a DENORM 1382_L4_6d: 1383 1384# 1385# Result is now in FP0 1386# 1387 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1388 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1389 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1390 unlk %a6 1391 rts 1392 1393 global _fetoxm1x_ 1394_fetoxm1x_: 1395 link %a6,&-LOCAL_SIZE 1396 1397 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1398 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1399 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1400 1401 fmov.l &0x0,%fpcr # zero FPCR 1402 1403# 1404# copy, convert, and tag input argument 1405# 1406 lea FP_SRC(%a6),%a0 1407 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1408 mov.l 0x8+0x4(%a6),0x4(%a0) 1409 mov.l 0x8+0x8(%a6),0x8(%a0) 1410 bsr.l tag # fetch operand type 1411 mov.b %d0,STAG(%a6) 1412 mov.b %d0,%d1 1413 1414 andi.l &0x00ff00ff,USER_FPSR(%a6) 1415 1416 clr.l %d0 1417 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1418 1419 tst.b %d1 1420 bne.b _L4_2x 1421 bsr.l setoxm1 # operand is a NORM 1422 bra.b _L4_6x 1423_L4_2x: 1424 cmpi.b %d1,&ZERO # is operand a ZERO? 1425 bne.b _L4_3x # no 1426 bsr.l src_zero # yes 1427 bra.b _L4_6x 1428_L4_3x: 1429 cmpi.b %d1,&INF # is operand an INF? 1430 bne.b _L4_4x # no 1431 bsr.l setoxm1i # yes 1432 bra.b _L4_6x 1433_L4_4x: 1434 cmpi.b %d1,&QNAN # is operand a QNAN? 1435 bne.b _L4_5x # no 1436 bsr.l src_qnan # yes 1437 bra.b _L4_6x 1438_L4_5x: 1439 bsr.l setoxm1d # operand is a DENORM 1440_L4_6x: 1441 1442# 1443# Result is now in FP0 1444# 1445 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1446 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1447 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1448 unlk %a6 1449 rts 1450 1451 1452######################################################################### 1453# MONADIC TEMPLATE # 1454######################################################################### 1455 global _ftanhs_ 1456_ftanhs_: 1457 link %a6,&-LOCAL_SIZE 1458 1459 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1460 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1461 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1462 1463 fmov.l &0x0,%fpcr # zero FPCR 1464 1465# 1466# copy, convert, and tag input argument 1467# 1468 fmov.s 0x8(%a6),%fp0 # load sgl input 1469 fmov.x %fp0,FP_SRC(%a6) 1470 lea FP_SRC(%a6),%a0 1471 bsr.l tag # fetch operand type 1472 mov.b %d0,STAG(%a6) 1473 mov.b %d0,%d1 1474 1475 andi.l &0x00ff00ff,USER_FPSR(%a6) 1476 1477 clr.l %d0 1478 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1479 1480 tst.b %d1 1481 bne.b _L5_2s 1482 bsr.l stanh # operand is a NORM 1483 bra.b _L5_6s 1484_L5_2s: 1485 cmpi.b %d1,&ZERO # is operand a ZERO? 1486 bne.b _L5_3s # no 1487 bsr.l src_zero # yes 1488 bra.b _L5_6s 1489_L5_3s: 1490 cmpi.b %d1,&INF # is operand an INF? 1491 bne.b _L5_4s # no 1492 bsr.l src_one # yes 1493 bra.b _L5_6s 1494_L5_4s: 1495 cmpi.b %d1,&QNAN # is operand a QNAN? 1496 bne.b _L5_5s # no 1497 bsr.l src_qnan # yes 1498 bra.b _L5_6s 1499_L5_5s: 1500 bsr.l stanhd # operand is a DENORM 1501_L5_6s: 1502 1503# 1504# Result is now in FP0 1505# 1506 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1507 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1508 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1509 unlk %a6 1510 rts 1511 1512 global _ftanhd_ 1513_ftanhd_: 1514 link %a6,&-LOCAL_SIZE 1515 1516 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1517 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1518 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1519 1520 fmov.l &0x0,%fpcr # zero FPCR 1521 1522# 1523# copy, convert, and tag input argument 1524# 1525 fmov.d 0x8(%a6),%fp0 # load dbl input 1526 fmov.x %fp0,FP_SRC(%a6) 1527 lea FP_SRC(%a6),%a0 1528 bsr.l tag # fetch operand type 1529 mov.b %d0,STAG(%a6) 1530 mov.b %d0,%d1 1531 1532 andi.l &0x00ff00ff,USER_FPSR(%a6) 1533 1534 clr.l %d0 1535 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1536 1537 mov.b %d1,STAG(%a6) 1538 tst.b %d1 1539 bne.b _L5_2d 1540 bsr.l stanh # operand is a NORM 1541 bra.b _L5_6d 1542_L5_2d: 1543 cmpi.b %d1,&ZERO # is operand a ZERO? 1544 bne.b _L5_3d # no 1545 bsr.l src_zero # yes 1546 bra.b _L5_6d 1547_L5_3d: 1548 cmpi.b %d1,&INF # is operand an INF? 1549 bne.b _L5_4d # no 1550 bsr.l src_one # yes 1551 bra.b _L5_6d 1552_L5_4d: 1553 cmpi.b %d1,&QNAN # is operand a QNAN? 1554 bne.b _L5_5d # no 1555 bsr.l src_qnan # yes 1556 bra.b _L5_6d 1557_L5_5d: 1558 bsr.l stanhd # operand is a DENORM 1559_L5_6d: 1560 1561# 1562# Result is now in FP0 1563# 1564 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1565 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1566 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1567 unlk %a6 1568 rts 1569 1570 global _ftanhx_ 1571_ftanhx_: 1572 link %a6,&-LOCAL_SIZE 1573 1574 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1575 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1576 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1577 1578 fmov.l &0x0,%fpcr # zero FPCR 1579 1580# 1581# copy, convert, and tag input argument 1582# 1583 lea FP_SRC(%a6),%a0 1584 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1585 mov.l 0x8+0x4(%a6),0x4(%a0) 1586 mov.l 0x8+0x8(%a6),0x8(%a0) 1587 bsr.l tag # fetch operand type 1588 mov.b %d0,STAG(%a6) 1589 mov.b %d0,%d1 1590 1591 andi.l &0x00ff00ff,USER_FPSR(%a6) 1592 1593 clr.l %d0 1594 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1595 1596 tst.b %d1 1597 bne.b _L5_2x 1598 bsr.l stanh # operand is a NORM 1599 bra.b _L5_6x 1600_L5_2x: 1601 cmpi.b %d1,&ZERO # is operand a ZERO? 1602 bne.b _L5_3x # no 1603 bsr.l src_zero # yes 1604 bra.b _L5_6x 1605_L5_3x: 1606 cmpi.b %d1,&INF # is operand an INF? 1607 bne.b _L5_4x # no 1608 bsr.l src_one # yes 1609 bra.b _L5_6x 1610_L5_4x: 1611 cmpi.b %d1,&QNAN # is operand a QNAN? 1612 bne.b _L5_5x # no 1613 bsr.l src_qnan # yes 1614 bra.b _L5_6x 1615_L5_5x: 1616 bsr.l stanhd # operand is a DENORM 1617_L5_6x: 1618 1619# 1620# Result is now in FP0 1621# 1622 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1623 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1624 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1625 unlk %a6 1626 rts 1627 1628 1629######################################################################### 1630# MONADIC TEMPLATE # 1631######################################################################### 1632 global _fatans_ 1633_fatans_: 1634 link %a6,&-LOCAL_SIZE 1635 1636 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1637 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1638 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1639 1640 fmov.l &0x0,%fpcr # zero FPCR 1641 1642# 1643# copy, convert, and tag input argument 1644# 1645 fmov.s 0x8(%a6),%fp0 # load sgl input 1646 fmov.x %fp0,FP_SRC(%a6) 1647 lea FP_SRC(%a6),%a0 1648 bsr.l tag # fetch operand type 1649 mov.b %d0,STAG(%a6) 1650 mov.b %d0,%d1 1651 1652 andi.l &0x00ff00ff,USER_FPSR(%a6) 1653 1654 clr.l %d0 1655 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1656 1657 tst.b %d1 1658 bne.b _L6_2s 1659 bsr.l satan # operand is a NORM 1660 bra.b _L6_6s 1661_L6_2s: 1662 cmpi.b %d1,&ZERO # is operand a ZERO? 1663 bne.b _L6_3s # no 1664 bsr.l src_zero # yes 1665 bra.b _L6_6s 1666_L6_3s: 1667 cmpi.b %d1,&INF # is operand an INF? 1668 bne.b _L6_4s # no 1669 bsr.l spi_2 # yes 1670 bra.b _L6_6s 1671_L6_4s: 1672 cmpi.b %d1,&QNAN # is operand a QNAN? 1673 bne.b _L6_5s # no 1674 bsr.l src_qnan # yes 1675 bra.b _L6_6s 1676_L6_5s: 1677 bsr.l satand # operand is a DENORM 1678_L6_6s: 1679 1680# 1681# Result is now in FP0 1682# 1683 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1684 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1685 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1686 unlk %a6 1687 rts 1688 1689 global _fatand_ 1690_fatand_: 1691 link %a6,&-LOCAL_SIZE 1692 1693 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1694 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1695 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1696 1697 fmov.l &0x0,%fpcr # zero FPCR 1698 1699# 1700# copy, convert, and tag input argument 1701# 1702 fmov.d 0x8(%a6),%fp0 # load dbl input 1703 fmov.x %fp0,FP_SRC(%a6) 1704 lea FP_SRC(%a6),%a0 1705 bsr.l tag # fetch operand type 1706 mov.b %d0,STAG(%a6) 1707 mov.b %d0,%d1 1708 1709 andi.l &0x00ff00ff,USER_FPSR(%a6) 1710 1711 clr.l %d0 1712 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1713 1714 mov.b %d1,STAG(%a6) 1715 tst.b %d1 1716 bne.b _L6_2d 1717 bsr.l satan # operand is a NORM 1718 bra.b _L6_6d 1719_L6_2d: 1720 cmpi.b %d1,&ZERO # is operand a ZERO? 1721 bne.b _L6_3d # no 1722 bsr.l src_zero # yes 1723 bra.b _L6_6d 1724_L6_3d: 1725 cmpi.b %d1,&INF # is operand an INF? 1726 bne.b _L6_4d # no 1727 bsr.l spi_2 # yes 1728 bra.b _L6_6d 1729_L6_4d: 1730 cmpi.b %d1,&QNAN # is operand a QNAN? 1731 bne.b _L6_5d # no 1732 bsr.l src_qnan # yes 1733 bra.b _L6_6d 1734_L6_5d: 1735 bsr.l satand # operand is a DENORM 1736_L6_6d: 1737 1738# 1739# Result is now in FP0 1740# 1741 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1742 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1743 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1744 unlk %a6 1745 rts 1746 1747 global _fatanx_ 1748_fatanx_: 1749 link %a6,&-LOCAL_SIZE 1750 1751 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1752 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1753 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1754 1755 fmov.l &0x0,%fpcr # zero FPCR 1756 1757# 1758# copy, convert, and tag input argument 1759# 1760 lea FP_SRC(%a6),%a0 1761 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1762 mov.l 0x8+0x4(%a6),0x4(%a0) 1763 mov.l 0x8+0x8(%a6),0x8(%a0) 1764 bsr.l tag # fetch operand type 1765 mov.b %d0,STAG(%a6) 1766 mov.b %d0,%d1 1767 1768 andi.l &0x00ff00ff,USER_FPSR(%a6) 1769 1770 clr.l %d0 1771 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1772 1773 tst.b %d1 1774 bne.b _L6_2x 1775 bsr.l satan # operand is a NORM 1776 bra.b _L6_6x 1777_L6_2x: 1778 cmpi.b %d1,&ZERO # is operand a ZERO? 1779 bne.b _L6_3x # no 1780 bsr.l src_zero # yes 1781 bra.b _L6_6x 1782_L6_3x: 1783 cmpi.b %d1,&INF # is operand an INF? 1784 bne.b _L6_4x # no 1785 bsr.l spi_2 # yes 1786 bra.b _L6_6x 1787_L6_4x: 1788 cmpi.b %d1,&QNAN # is operand a QNAN? 1789 bne.b _L6_5x # no 1790 bsr.l src_qnan # yes 1791 bra.b _L6_6x 1792_L6_5x: 1793 bsr.l satand # operand is a DENORM 1794_L6_6x: 1795 1796# 1797# Result is now in FP0 1798# 1799 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1801 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1802 unlk %a6 1803 rts 1804 1805 1806######################################################################### 1807# MONADIC TEMPLATE # 1808######################################################################### 1809 global _fasins_ 1810_fasins_: 1811 link %a6,&-LOCAL_SIZE 1812 1813 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1814 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1815 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1816 1817 fmov.l &0x0,%fpcr # zero FPCR 1818 1819# 1820# copy, convert, and tag input argument 1821# 1822 fmov.s 0x8(%a6),%fp0 # load sgl input 1823 fmov.x %fp0,FP_SRC(%a6) 1824 lea FP_SRC(%a6),%a0 1825 bsr.l tag # fetch operand type 1826 mov.b %d0,STAG(%a6) 1827 mov.b %d0,%d1 1828 1829 andi.l &0x00ff00ff,USER_FPSR(%a6) 1830 1831 clr.l %d0 1832 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1833 1834 tst.b %d1 1835 bne.b _L7_2s 1836 bsr.l sasin # operand is a NORM 1837 bra.b _L7_6s 1838_L7_2s: 1839 cmpi.b %d1,&ZERO # is operand a ZERO? 1840 bne.b _L7_3s # no 1841 bsr.l src_zero # yes 1842 bra.b _L7_6s 1843_L7_3s: 1844 cmpi.b %d1,&INF # is operand an INF? 1845 bne.b _L7_4s # no 1846 bsr.l t_operr # yes 1847 bra.b _L7_6s 1848_L7_4s: 1849 cmpi.b %d1,&QNAN # is operand a QNAN? 1850 bne.b _L7_5s # no 1851 bsr.l src_qnan # yes 1852 bra.b _L7_6s 1853_L7_5s: 1854 bsr.l sasind # operand is a DENORM 1855_L7_6s: 1856 1857# 1858# Result is now in FP0 1859# 1860 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1861 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1862 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1863 unlk %a6 1864 rts 1865 1866 global _fasind_ 1867_fasind_: 1868 link %a6,&-LOCAL_SIZE 1869 1870 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1871 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1872 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1873 1874 fmov.l &0x0,%fpcr # zero FPCR 1875 1876# 1877# copy, convert, and tag input argument 1878# 1879 fmov.d 0x8(%a6),%fp0 # load dbl input 1880 fmov.x %fp0,FP_SRC(%a6) 1881 lea FP_SRC(%a6),%a0 1882 bsr.l tag # fetch operand type 1883 mov.b %d0,STAG(%a6) 1884 mov.b %d0,%d1 1885 1886 andi.l &0x00ff00ff,USER_FPSR(%a6) 1887 1888 clr.l %d0 1889 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1890 1891 mov.b %d1,STAG(%a6) 1892 tst.b %d1 1893 bne.b _L7_2d 1894 bsr.l sasin # operand is a NORM 1895 bra.b _L7_6d 1896_L7_2d: 1897 cmpi.b %d1,&ZERO # is operand a ZERO? 1898 bne.b _L7_3d # no 1899 bsr.l src_zero # yes 1900 bra.b _L7_6d 1901_L7_3d: 1902 cmpi.b %d1,&INF # is operand an INF? 1903 bne.b _L7_4d # no 1904 bsr.l t_operr # yes 1905 bra.b _L7_6d 1906_L7_4d: 1907 cmpi.b %d1,&QNAN # is operand a QNAN? 1908 bne.b _L7_5d # no 1909 bsr.l src_qnan # yes 1910 bra.b _L7_6d 1911_L7_5d: 1912 bsr.l sasind # operand is a DENORM 1913_L7_6d: 1914 1915# 1916# Result is now in FP0 1917# 1918 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1919 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1920 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1921 unlk %a6 1922 rts 1923 1924 global _fasinx_ 1925_fasinx_: 1926 link %a6,&-LOCAL_SIZE 1927 1928 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1929 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1930 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1931 1932 fmov.l &0x0,%fpcr # zero FPCR 1933 1934# 1935# copy, convert, and tag input argument 1936# 1937 lea FP_SRC(%a6),%a0 1938 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1939 mov.l 0x8+0x4(%a6),0x4(%a0) 1940 mov.l 0x8+0x8(%a6),0x8(%a0) 1941 bsr.l tag # fetch operand type 1942 mov.b %d0,STAG(%a6) 1943 mov.b %d0,%d1 1944 1945 andi.l &0x00ff00ff,USER_FPSR(%a6) 1946 1947 clr.l %d0 1948 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1949 1950 tst.b %d1 1951 bne.b _L7_2x 1952 bsr.l sasin # operand is a NORM 1953 bra.b _L7_6x 1954_L7_2x: 1955 cmpi.b %d1,&ZERO # is operand a ZERO? 1956 bne.b _L7_3x # no 1957 bsr.l src_zero # yes 1958 bra.b _L7_6x 1959_L7_3x: 1960 cmpi.b %d1,&INF # is operand an INF? 1961 bne.b _L7_4x # no 1962 bsr.l t_operr # yes 1963 bra.b _L7_6x 1964_L7_4x: 1965 cmpi.b %d1,&QNAN # is operand a QNAN? 1966 bne.b _L7_5x # no 1967 bsr.l src_qnan # yes 1968 bra.b _L7_6x 1969_L7_5x: 1970 bsr.l sasind # operand is a DENORM 1971_L7_6x: 1972 1973# 1974# Result is now in FP0 1975# 1976 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1977 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1978 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1979 unlk %a6 1980 rts 1981 1982 1983######################################################################### 1984# MONADIC TEMPLATE # 1985######################################################################### 1986 global _fatanhs_ 1987_fatanhs_: 1988 link %a6,&-LOCAL_SIZE 1989 1990 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1991 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1992 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1993 1994 fmov.l &0x0,%fpcr # zero FPCR 1995 1996# 1997# copy, convert, and tag input argument 1998# 1999 fmov.s 0x8(%a6),%fp0 # load sgl input 2000 fmov.x %fp0,FP_SRC(%a6) 2001 lea FP_SRC(%a6),%a0 2002 bsr.l tag # fetch operand type 2003 mov.b %d0,STAG(%a6) 2004 mov.b %d0,%d1 2005 2006 andi.l &0x00ff00ff,USER_FPSR(%a6) 2007 2008 clr.l %d0 2009 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2010 2011 tst.b %d1 2012 bne.b _L8_2s 2013 bsr.l satanh # operand is a NORM 2014 bra.b _L8_6s 2015_L8_2s: 2016 cmpi.b %d1,&ZERO # is operand a ZERO? 2017 bne.b _L8_3s # no 2018 bsr.l src_zero # yes 2019 bra.b _L8_6s 2020_L8_3s: 2021 cmpi.b %d1,&INF # is operand an INF? 2022 bne.b _L8_4s # no 2023 bsr.l t_operr # yes 2024 bra.b _L8_6s 2025_L8_4s: 2026 cmpi.b %d1,&QNAN # is operand a QNAN? 2027 bne.b _L8_5s # no 2028 bsr.l src_qnan # yes 2029 bra.b _L8_6s 2030_L8_5s: 2031 bsr.l satanhd # operand is a DENORM 2032_L8_6s: 2033 2034# 2035# Result is now in FP0 2036# 2037 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2038 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2039 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2040 unlk %a6 2041 rts 2042 2043 global _fatanhd_ 2044_fatanhd_: 2045 link %a6,&-LOCAL_SIZE 2046 2047 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2048 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2049 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2050 2051 fmov.l &0x0,%fpcr # zero FPCR 2052 2053# 2054# copy, convert, and tag input argument 2055# 2056 fmov.d 0x8(%a6),%fp0 # load dbl input 2057 fmov.x %fp0,FP_SRC(%a6) 2058 lea FP_SRC(%a6),%a0 2059 bsr.l tag # fetch operand type 2060 mov.b %d0,STAG(%a6) 2061 mov.b %d0,%d1 2062 2063 andi.l &0x00ff00ff,USER_FPSR(%a6) 2064 2065 clr.l %d0 2066 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2067 2068 mov.b %d1,STAG(%a6) 2069 tst.b %d1 2070 bne.b _L8_2d 2071 bsr.l satanh # operand is a NORM 2072 bra.b _L8_6d 2073_L8_2d: 2074 cmpi.b %d1,&ZERO # is operand a ZERO? 2075 bne.b _L8_3d # no 2076 bsr.l src_zero # yes 2077 bra.b _L8_6d 2078_L8_3d: 2079 cmpi.b %d1,&INF # is operand an INF? 2080 bne.b _L8_4d # no 2081 bsr.l t_operr # yes 2082 bra.b _L8_6d 2083_L8_4d: 2084 cmpi.b %d1,&QNAN # is operand a QNAN? 2085 bne.b _L8_5d # no 2086 bsr.l src_qnan # yes 2087 bra.b _L8_6d 2088_L8_5d: 2089 bsr.l satanhd # operand is a DENORM 2090_L8_6d: 2091 2092# 2093# Result is now in FP0 2094# 2095 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2096 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2097 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2098 unlk %a6 2099 rts 2100 2101 global _fatanhx_ 2102_fatanhx_: 2103 link %a6,&-LOCAL_SIZE 2104 2105 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2106 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2107 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2108 2109 fmov.l &0x0,%fpcr # zero FPCR 2110 2111# 2112# copy, convert, and tag input argument 2113# 2114 lea FP_SRC(%a6),%a0 2115 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2116 mov.l 0x8+0x4(%a6),0x4(%a0) 2117 mov.l 0x8+0x8(%a6),0x8(%a0) 2118 bsr.l tag # fetch operand type 2119 mov.b %d0,STAG(%a6) 2120 mov.b %d0,%d1 2121 2122 andi.l &0x00ff00ff,USER_FPSR(%a6) 2123 2124 clr.l %d0 2125 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2126 2127 tst.b %d1 2128 bne.b _L8_2x 2129 bsr.l satanh # operand is a NORM 2130 bra.b _L8_6x 2131_L8_2x: 2132 cmpi.b %d1,&ZERO # is operand a ZERO? 2133 bne.b _L8_3x # no 2134 bsr.l src_zero # yes 2135 bra.b _L8_6x 2136_L8_3x: 2137 cmpi.b %d1,&INF # is operand an INF? 2138 bne.b _L8_4x # no 2139 bsr.l t_operr # yes 2140 bra.b _L8_6x 2141_L8_4x: 2142 cmpi.b %d1,&QNAN # is operand a QNAN? 2143 bne.b _L8_5x # no 2144 bsr.l src_qnan # yes 2145 bra.b _L8_6x 2146_L8_5x: 2147 bsr.l satanhd # operand is a DENORM 2148_L8_6x: 2149 2150# 2151# Result is now in FP0 2152# 2153 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2154 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2155 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2156 unlk %a6 2157 rts 2158 2159 2160######################################################################### 2161# MONADIC TEMPLATE # 2162######################################################################### 2163 global _ftans_ 2164_ftans_: 2165 link %a6,&-LOCAL_SIZE 2166 2167 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2168 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2169 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2170 2171 fmov.l &0x0,%fpcr # zero FPCR 2172 2173# 2174# copy, convert, and tag input argument 2175# 2176 fmov.s 0x8(%a6),%fp0 # load sgl input 2177 fmov.x %fp0,FP_SRC(%a6) 2178 lea FP_SRC(%a6),%a0 2179 bsr.l tag # fetch operand type 2180 mov.b %d0,STAG(%a6) 2181 mov.b %d0,%d1 2182 2183 andi.l &0x00ff00ff,USER_FPSR(%a6) 2184 2185 clr.l %d0 2186 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2187 2188 tst.b %d1 2189 bne.b _L9_2s 2190 bsr.l stan # operand is a NORM 2191 bra.b _L9_6s 2192_L9_2s: 2193 cmpi.b %d1,&ZERO # is operand a ZERO? 2194 bne.b _L9_3s # no 2195 bsr.l src_zero # yes 2196 bra.b _L9_6s 2197_L9_3s: 2198 cmpi.b %d1,&INF # is operand an INF? 2199 bne.b _L9_4s # no 2200 bsr.l t_operr # yes 2201 bra.b _L9_6s 2202_L9_4s: 2203 cmpi.b %d1,&QNAN # is operand a QNAN? 2204 bne.b _L9_5s # no 2205 bsr.l src_qnan # yes 2206 bra.b _L9_6s 2207_L9_5s: 2208 bsr.l stand # operand is a DENORM 2209_L9_6s: 2210 2211# 2212# Result is now in FP0 2213# 2214 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2215 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2216 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2217 unlk %a6 2218 rts 2219 2220 global _ftand_ 2221_ftand_: 2222 link %a6,&-LOCAL_SIZE 2223 2224 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2225 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2226 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2227 2228 fmov.l &0x0,%fpcr # zero FPCR 2229 2230# 2231# copy, convert, and tag input argument 2232# 2233 fmov.d 0x8(%a6),%fp0 # load dbl input 2234 fmov.x %fp0,FP_SRC(%a6) 2235 lea FP_SRC(%a6),%a0 2236 bsr.l tag # fetch operand type 2237 mov.b %d0,STAG(%a6) 2238 mov.b %d0,%d1 2239 2240 andi.l &0x00ff00ff,USER_FPSR(%a6) 2241 2242 clr.l %d0 2243 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2244 2245 mov.b %d1,STAG(%a6) 2246 tst.b %d1 2247 bne.b _L9_2d 2248 bsr.l stan # operand is a NORM 2249 bra.b _L9_6d 2250_L9_2d: 2251 cmpi.b %d1,&ZERO # is operand a ZERO? 2252 bne.b _L9_3d # no 2253 bsr.l src_zero # yes 2254 bra.b _L9_6d 2255_L9_3d: 2256 cmpi.b %d1,&INF # is operand an INF? 2257 bne.b _L9_4d # no 2258 bsr.l t_operr # yes 2259 bra.b _L9_6d 2260_L9_4d: 2261 cmpi.b %d1,&QNAN # is operand a QNAN? 2262 bne.b _L9_5d # no 2263 bsr.l src_qnan # yes 2264 bra.b _L9_6d 2265_L9_5d: 2266 bsr.l stand # operand is a DENORM 2267_L9_6d: 2268 2269# 2270# Result is now in FP0 2271# 2272 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2273 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2274 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2275 unlk %a6 2276 rts 2277 2278 global _ftanx_ 2279_ftanx_: 2280 link %a6,&-LOCAL_SIZE 2281 2282 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2283 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2284 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2285 2286 fmov.l &0x0,%fpcr # zero FPCR 2287 2288# 2289# copy, convert, and tag input argument 2290# 2291 lea FP_SRC(%a6),%a0 2292 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2293 mov.l 0x8+0x4(%a6),0x4(%a0) 2294 mov.l 0x8+0x8(%a6),0x8(%a0) 2295 bsr.l tag # fetch operand type 2296 mov.b %d0,STAG(%a6) 2297 mov.b %d0,%d1 2298 2299 andi.l &0x00ff00ff,USER_FPSR(%a6) 2300 2301 clr.l %d0 2302 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2303 2304 tst.b %d1 2305 bne.b _L9_2x 2306 bsr.l stan # operand is a NORM 2307 bra.b _L9_6x 2308_L9_2x: 2309 cmpi.b %d1,&ZERO # is operand a ZERO? 2310 bne.b _L9_3x # no 2311 bsr.l src_zero # yes 2312 bra.b _L9_6x 2313_L9_3x: 2314 cmpi.b %d1,&INF # is operand an INF? 2315 bne.b _L9_4x # no 2316 bsr.l t_operr # yes 2317 bra.b _L9_6x 2318_L9_4x: 2319 cmpi.b %d1,&QNAN # is operand a QNAN? 2320 bne.b _L9_5x # no 2321 bsr.l src_qnan # yes 2322 bra.b _L9_6x 2323_L9_5x: 2324 bsr.l stand # operand is a DENORM 2325_L9_6x: 2326 2327# 2328# Result is now in FP0 2329# 2330 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2331 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2332 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2333 unlk %a6 2334 rts 2335 2336 2337######################################################################### 2338# MONADIC TEMPLATE # 2339######################################################################### 2340 global _fetoxs_ 2341_fetoxs_: 2342 link %a6,&-LOCAL_SIZE 2343 2344 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2345 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2346 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2347 2348 fmov.l &0x0,%fpcr # zero FPCR 2349 2350# 2351# copy, convert, and tag input argument 2352# 2353 fmov.s 0x8(%a6),%fp0 # load sgl input 2354 fmov.x %fp0,FP_SRC(%a6) 2355 lea FP_SRC(%a6),%a0 2356 bsr.l tag # fetch operand type 2357 mov.b %d0,STAG(%a6) 2358 mov.b %d0,%d1 2359 2360 andi.l &0x00ff00ff,USER_FPSR(%a6) 2361 2362 clr.l %d0 2363 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2364 2365 tst.b %d1 2366 bne.b _L10_2s 2367 bsr.l setox # operand is a NORM 2368 bra.b _L10_6s 2369_L10_2s: 2370 cmpi.b %d1,&ZERO # is operand a ZERO? 2371 bne.b _L10_3s # no 2372 bsr.l ld_pone # yes 2373 bra.b _L10_6s 2374_L10_3s: 2375 cmpi.b %d1,&INF # is operand an INF? 2376 bne.b _L10_4s # no 2377 bsr.l szr_inf # yes 2378 bra.b _L10_6s 2379_L10_4s: 2380 cmpi.b %d1,&QNAN # is operand a QNAN? 2381 bne.b _L10_5s # no 2382 bsr.l src_qnan # yes 2383 bra.b _L10_6s 2384_L10_5s: 2385 bsr.l setoxd # operand is a DENORM 2386_L10_6s: 2387 2388# 2389# Result is now in FP0 2390# 2391 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2392 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2393 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2394 unlk %a6 2395 rts 2396 2397 global _fetoxd_ 2398_fetoxd_: 2399 link %a6,&-LOCAL_SIZE 2400 2401 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2402 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2403 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2404 2405 fmov.l &0x0,%fpcr # zero FPCR 2406 2407# 2408# copy, convert, and tag input argument 2409# 2410 fmov.d 0x8(%a6),%fp0 # load dbl input 2411 fmov.x %fp0,FP_SRC(%a6) 2412 lea FP_SRC(%a6),%a0 2413 bsr.l tag # fetch operand type 2414 mov.b %d0,STAG(%a6) 2415 mov.b %d0,%d1 2416 2417 andi.l &0x00ff00ff,USER_FPSR(%a6) 2418 2419 clr.l %d0 2420 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2421 2422 mov.b %d1,STAG(%a6) 2423 tst.b %d1 2424 bne.b _L10_2d 2425 bsr.l setox # operand is a NORM 2426 bra.b _L10_6d 2427_L10_2d: 2428 cmpi.b %d1,&ZERO # is operand a ZERO? 2429 bne.b _L10_3d # no 2430 bsr.l ld_pone # yes 2431 bra.b _L10_6d 2432_L10_3d: 2433 cmpi.b %d1,&INF # is operand an INF? 2434 bne.b _L10_4d # no 2435 bsr.l szr_inf # yes 2436 bra.b _L10_6d 2437_L10_4d: 2438 cmpi.b %d1,&QNAN # is operand a QNAN? 2439 bne.b _L10_5d # no 2440 bsr.l src_qnan # yes 2441 bra.b _L10_6d 2442_L10_5d: 2443 bsr.l setoxd # operand is a DENORM 2444_L10_6d: 2445 2446# 2447# Result is now in FP0 2448# 2449 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2450 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2451 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2452 unlk %a6 2453 rts 2454 2455 global _fetoxx_ 2456_fetoxx_: 2457 link %a6,&-LOCAL_SIZE 2458 2459 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2460 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2461 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2462 2463 fmov.l &0x0,%fpcr # zero FPCR 2464 2465# 2466# copy, convert, and tag input argument 2467# 2468 lea FP_SRC(%a6),%a0 2469 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2470 mov.l 0x8+0x4(%a6),0x4(%a0) 2471 mov.l 0x8+0x8(%a6),0x8(%a0) 2472 bsr.l tag # fetch operand type 2473 mov.b %d0,STAG(%a6) 2474 mov.b %d0,%d1 2475 2476 andi.l &0x00ff00ff,USER_FPSR(%a6) 2477 2478 clr.l %d0 2479 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2480 2481 tst.b %d1 2482 bne.b _L10_2x 2483 bsr.l setox # operand is a NORM 2484 bra.b _L10_6x 2485_L10_2x: 2486 cmpi.b %d1,&ZERO # is operand a ZERO? 2487 bne.b _L10_3x # no 2488 bsr.l ld_pone # yes 2489 bra.b _L10_6x 2490_L10_3x: 2491 cmpi.b %d1,&INF # is operand an INF? 2492 bne.b _L10_4x # no 2493 bsr.l szr_inf # yes 2494 bra.b _L10_6x 2495_L10_4x: 2496 cmpi.b %d1,&QNAN # is operand a QNAN? 2497 bne.b _L10_5x # no 2498 bsr.l src_qnan # yes 2499 bra.b _L10_6x 2500_L10_5x: 2501 bsr.l setoxd # operand is a DENORM 2502_L10_6x: 2503 2504# 2505# Result is now in FP0 2506# 2507 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2508 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2509 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2510 unlk %a6 2511 rts 2512 2513 2514######################################################################### 2515# MONADIC TEMPLATE # 2516######################################################################### 2517 global _ftwotoxs_ 2518_ftwotoxs_: 2519 link %a6,&-LOCAL_SIZE 2520 2521 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2522 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2523 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2524 2525 fmov.l &0x0,%fpcr # zero FPCR 2526 2527# 2528# copy, convert, and tag input argument 2529# 2530 fmov.s 0x8(%a6),%fp0 # load sgl input 2531 fmov.x %fp0,FP_SRC(%a6) 2532 lea FP_SRC(%a6),%a0 2533 bsr.l tag # fetch operand type 2534 mov.b %d0,STAG(%a6) 2535 mov.b %d0,%d1 2536 2537 andi.l &0x00ff00ff,USER_FPSR(%a6) 2538 2539 clr.l %d0 2540 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2541 2542 tst.b %d1 2543 bne.b _L11_2s 2544 bsr.l stwotox # operand is a NORM 2545 bra.b _L11_6s 2546_L11_2s: 2547 cmpi.b %d1,&ZERO # is operand a ZERO? 2548 bne.b _L11_3s # no 2549 bsr.l ld_pone # yes 2550 bra.b _L11_6s 2551_L11_3s: 2552 cmpi.b %d1,&INF # is operand an INF? 2553 bne.b _L11_4s # no 2554 bsr.l szr_inf # yes 2555 bra.b _L11_6s 2556_L11_4s: 2557 cmpi.b %d1,&QNAN # is operand a QNAN? 2558 bne.b _L11_5s # no 2559 bsr.l src_qnan # yes 2560 bra.b _L11_6s 2561_L11_5s: 2562 bsr.l stwotoxd # operand is a DENORM 2563_L11_6s: 2564 2565# 2566# Result is now in FP0 2567# 2568 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2569 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2570 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2571 unlk %a6 2572 rts 2573 2574 global _ftwotoxd_ 2575_ftwotoxd_: 2576 link %a6,&-LOCAL_SIZE 2577 2578 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2579 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2580 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2581 2582 fmov.l &0x0,%fpcr # zero FPCR 2583 2584# 2585# copy, convert, and tag input argument 2586# 2587 fmov.d 0x8(%a6),%fp0 # load dbl input 2588 fmov.x %fp0,FP_SRC(%a6) 2589 lea FP_SRC(%a6),%a0 2590 bsr.l tag # fetch operand type 2591 mov.b %d0,STAG(%a6) 2592 mov.b %d0,%d1 2593 2594 andi.l &0x00ff00ff,USER_FPSR(%a6) 2595 2596 clr.l %d0 2597 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2598 2599 mov.b %d1,STAG(%a6) 2600 tst.b %d1 2601 bne.b _L11_2d 2602 bsr.l stwotox # operand is a NORM 2603 bra.b _L11_6d 2604_L11_2d: 2605 cmpi.b %d1,&ZERO # is operand a ZERO? 2606 bne.b _L11_3d # no 2607 bsr.l ld_pone # yes 2608 bra.b _L11_6d 2609_L11_3d: 2610 cmpi.b %d1,&INF # is operand an INF? 2611 bne.b _L11_4d # no 2612 bsr.l szr_inf # yes 2613 bra.b _L11_6d 2614_L11_4d: 2615 cmpi.b %d1,&QNAN # is operand a QNAN? 2616 bne.b _L11_5d # no 2617 bsr.l src_qnan # yes 2618 bra.b _L11_6d 2619_L11_5d: 2620 bsr.l stwotoxd # operand is a DENORM 2621_L11_6d: 2622 2623# 2624# Result is now in FP0 2625# 2626 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2627 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2628 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2629 unlk %a6 2630 rts 2631 2632 global _ftwotoxx_ 2633_ftwotoxx_: 2634 link %a6,&-LOCAL_SIZE 2635 2636 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2637 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2638 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2639 2640 fmov.l &0x0,%fpcr # zero FPCR 2641 2642# 2643# copy, convert, and tag input argument 2644# 2645 lea FP_SRC(%a6),%a0 2646 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2647 mov.l 0x8+0x4(%a6),0x4(%a0) 2648 mov.l 0x8+0x8(%a6),0x8(%a0) 2649 bsr.l tag # fetch operand type 2650 mov.b %d0,STAG(%a6) 2651 mov.b %d0,%d1 2652 2653 andi.l &0x00ff00ff,USER_FPSR(%a6) 2654 2655 clr.l %d0 2656 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2657 2658 tst.b %d1 2659 bne.b _L11_2x 2660 bsr.l stwotox # operand is a NORM 2661 bra.b _L11_6x 2662_L11_2x: 2663 cmpi.b %d1,&ZERO # is operand a ZERO? 2664 bne.b _L11_3x # no 2665 bsr.l ld_pone # yes 2666 bra.b _L11_6x 2667_L11_3x: 2668 cmpi.b %d1,&INF # is operand an INF? 2669 bne.b _L11_4x # no 2670 bsr.l szr_inf # yes 2671 bra.b _L11_6x 2672_L11_4x: 2673 cmpi.b %d1,&QNAN # is operand a QNAN? 2674 bne.b _L11_5x # no 2675 bsr.l src_qnan # yes 2676 bra.b _L11_6x 2677_L11_5x: 2678 bsr.l stwotoxd # operand is a DENORM 2679_L11_6x: 2680 2681# 2682# Result is now in FP0 2683# 2684 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2685 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2686 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2687 unlk %a6 2688 rts 2689 2690 2691######################################################################### 2692# MONADIC TEMPLATE # 2693######################################################################### 2694 global _ftentoxs_ 2695_ftentoxs_: 2696 link %a6,&-LOCAL_SIZE 2697 2698 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2699 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2700 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2701 2702 fmov.l &0x0,%fpcr # zero FPCR 2703 2704# 2705# copy, convert, and tag input argument 2706# 2707 fmov.s 0x8(%a6),%fp0 # load sgl input 2708 fmov.x %fp0,FP_SRC(%a6) 2709 lea FP_SRC(%a6),%a0 2710 bsr.l tag # fetch operand type 2711 mov.b %d0,STAG(%a6) 2712 mov.b %d0,%d1 2713 2714 andi.l &0x00ff00ff,USER_FPSR(%a6) 2715 2716 clr.l %d0 2717 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2718 2719 tst.b %d1 2720 bne.b _L12_2s 2721 bsr.l stentox # operand is a NORM 2722 bra.b _L12_6s 2723_L12_2s: 2724 cmpi.b %d1,&ZERO # is operand a ZERO? 2725 bne.b _L12_3s # no 2726 bsr.l ld_pone # yes 2727 bra.b _L12_6s 2728_L12_3s: 2729 cmpi.b %d1,&INF # is operand an INF? 2730 bne.b _L12_4s # no 2731 bsr.l szr_inf # yes 2732 bra.b _L12_6s 2733_L12_4s: 2734 cmpi.b %d1,&QNAN # is operand a QNAN? 2735 bne.b _L12_5s # no 2736 bsr.l src_qnan # yes 2737 bra.b _L12_6s 2738_L12_5s: 2739 bsr.l stentoxd # operand is a DENORM 2740_L12_6s: 2741 2742# 2743# Result is now in FP0 2744# 2745 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2746 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2747 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2748 unlk %a6 2749 rts 2750 2751 global _ftentoxd_ 2752_ftentoxd_: 2753 link %a6,&-LOCAL_SIZE 2754 2755 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2756 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2757 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2758 2759 fmov.l &0x0,%fpcr # zero FPCR 2760 2761# 2762# copy, convert, and tag input argument 2763# 2764 fmov.d 0x8(%a6),%fp0 # load dbl input 2765 fmov.x %fp0,FP_SRC(%a6) 2766 lea FP_SRC(%a6),%a0 2767 bsr.l tag # fetch operand type 2768 mov.b %d0,STAG(%a6) 2769 mov.b %d0,%d1 2770 2771 andi.l &0x00ff00ff,USER_FPSR(%a6) 2772 2773 clr.l %d0 2774 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2775 2776 mov.b %d1,STAG(%a6) 2777 tst.b %d1 2778 bne.b _L12_2d 2779 bsr.l stentox # operand is a NORM 2780 bra.b _L12_6d 2781_L12_2d: 2782 cmpi.b %d1,&ZERO # is operand a ZERO? 2783 bne.b _L12_3d # no 2784 bsr.l ld_pone # yes 2785 bra.b _L12_6d 2786_L12_3d: 2787 cmpi.b %d1,&INF # is operand an INF? 2788 bne.b _L12_4d # no 2789 bsr.l szr_inf # yes 2790 bra.b _L12_6d 2791_L12_4d: 2792 cmpi.b %d1,&QNAN # is operand a QNAN? 2793 bne.b _L12_5d # no 2794 bsr.l src_qnan # yes 2795 bra.b _L12_6d 2796_L12_5d: 2797 bsr.l stentoxd # operand is a DENORM 2798_L12_6d: 2799 2800# 2801# Result is now in FP0 2802# 2803 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2804 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2805 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2806 unlk %a6 2807 rts 2808 2809 global _ftentoxx_ 2810_ftentoxx_: 2811 link %a6,&-LOCAL_SIZE 2812 2813 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2814 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2815 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2816 2817 fmov.l &0x0,%fpcr # zero FPCR 2818 2819# 2820# copy, convert, and tag input argument 2821# 2822 lea FP_SRC(%a6),%a0 2823 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2824 mov.l 0x8+0x4(%a6),0x4(%a0) 2825 mov.l 0x8+0x8(%a6),0x8(%a0) 2826 bsr.l tag # fetch operand type 2827 mov.b %d0,STAG(%a6) 2828 mov.b %d0,%d1 2829 2830 andi.l &0x00ff00ff,USER_FPSR(%a6) 2831 2832 clr.l %d0 2833 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2834 2835 tst.b %d1 2836 bne.b _L12_2x 2837 bsr.l stentox # operand is a NORM 2838 bra.b _L12_6x 2839_L12_2x: 2840 cmpi.b %d1,&ZERO # is operand a ZERO? 2841 bne.b _L12_3x # no 2842 bsr.l ld_pone # yes 2843 bra.b _L12_6x 2844_L12_3x: 2845 cmpi.b %d1,&INF # is operand an INF? 2846 bne.b _L12_4x # no 2847 bsr.l szr_inf # yes 2848 bra.b _L12_6x 2849_L12_4x: 2850 cmpi.b %d1,&QNAN # is operand a QNAN? 2851 bne.b _L12_5x # no 2852 bsr.l src_qnan # yes 2853 bra.b _L12_6x 2854_L12_5x: 2855 bsr.l stentoxd # operand is a DENORM 2856_L12_6x: 2857 2858# 2859# Result is now in FP0 2860# 2861 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2862 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2863 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2864 unlk %a6 2865 rts 2866 2867 2868######################################################################### 2869# MONADIC TEMPLATE # 2870######################################################################### 2871 global _flogns_ 2872_flogns_: 2873 link %a6,&-LOCAL_SIZE 2874 2875 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2876 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2877 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2878 2879 fmov.l &0x0,%fpcr # zero FPCR 2880 2881# 2882# copy, convert, and tag input argument 2883# 2884 fmov.s 0x8(%a6),%fp0 # load sgl input 2885 fmov.x %fp0,FP_SRC(%a6) 2886 lea FP_SRC(%a6),%a0 2887 bsr.l tag # fetch operand type 2888 mov.b %d0,STAG(%a6) 2889 mov.b %d0,%d1 2890 2891 andi.l &0x00ff00ff,USER_FPSR(%a6) 2892 2893 clr.l %d0 2894 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2895 2896 tst.b %d1 2897 bne.b _L13_2s 2898 bsr.l slogn # operand is a NORM 2899 bra.b _L13_6s 2900_L13_2s: 2901 cmpi.b %d1,&ZERO # is operand a ZERO? 2902 bne.b _L13_3s # no 2903 bsr.l t_dz2 # yes 2904 bra.b _L13_6s 2905_L13_3s: 2906 cmpi.b %d1,&INF # is operand an INF? 2907 bne.b _L13_4s # no 2908 bsr.l sopr_inf # yes 2909 bra.b _L13_6s 2910_L13_4s: 2911 cmpi.b %d1,&QNAN # is operand a QNAN? 2912 bne.b _L13_5s # no 2913 bsr.l src_qnan # yes 2914 bra.b _L13_6s 2915_L13_5s: 2916 bsr.l slognd # operand is a DENORM 2917_L13_6s: 2918 2919# 2920# Result is now in FP0 2921# 2922 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2923 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2924 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2925 unlk %a6 2926 rts 2927 2928 global _flognd_ 2929_flognd_: 2930 link %a6,&-LOCAL_SIZE 2931 2932 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2933 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2934 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2935 2936 fmov.l &0x0,%fpcr # zero FPCR 2937 2938# 2939# copy, convert, and tag input argument 2940# 2941 fmov.d 0x8(%a6),%fp0 # load dbl input 2942 fmov.x %fp0,FP_SRC(%a6) 2943 lea FP_SRC(%a6),%a0 2944 bsr.l tag # fetch operand type 2945 mov.b %d0,STAG(%a6) 2946 mov.b %d0,%d1 2947 2948 andi.l &0x00ff00ff,USER_FPSR(%a6) 2949 2950 clr.l %d0 2951 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2952 2953 mov.b %d1,STAG(%a6) 2954 tst.b %d1 2955 bne.b _L13_2d 2956 bsr.l slogn # operand is a NORM 2957 bra.b _L13_6d 2958_L13_2d: 2959 cmpi.b %d1,&ZERO # is operand a ZERO? 2960 bne.b _L13_3d # no 2961 bsr.l t_dz2 # yes 2962 bra.b _L13_6d 2963_L13_3d: 2964 cmpi.b %d1,&INF # is operand an INF? 2965 bne.b _L13_4d # no 2966 bsr.l sopr_inf # yes 2967 bra.b _L13_6d 2968_L13_4d: 2969 cmpi.b %d1,&QNAN # is operand a QNAN? 2970 bne.b _L13_5d # no 2971 bsr.l src_qnan # yes 2972 bra.b _L13_6d 2973_L13_5d: 2974 bsr.l slognd # operand is a DENORM 2975_L13_6d: 2976 2977# 2978# Result is now in FP0 2979# 2980 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2981 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2982 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2983 unlk %a6 2984 rts 2985 2986 global _flognx_ 2987_flognx_: 2988 link %a6,&-LOCAL_SIZE 2989 2990 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2991 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2992 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2993 2994 fmov.l &0x0,%fpcr # zero FPCR 2995 2996# 2997# copy, convert, and tag input argument 2998# 2999 lea FP_SRC(%a6),%a0 3000 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3001 mov.l 0x8+0x4(%a6),0x4(%a0) 3002 mov.l 0x8+0x8(%a6),0x8(%a0) 3003 bsr.l tag # fetch operand type 3004 mov.b %d0,STAG(%a6) 3005 mov.b %d0,%d1 3006 3007 andi.l &0x00ff00ff,USER_FPSR(%a6) 3008 3009 clr.l %d0 3010 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3011 3012 tst.b %d1 3013 bne.b _L13_2x 3014 bsr.l slogn # operand is a NORM 3015 bra.b _L13_6x 3016_L13_2x: 3017 cmpi.b %d1,&ZERO # is operand a ZERO? 3018 bne.b _L13_3x # no 3019 bsr.l t_dz2 # yes 3020 bra.b _L13_6x 3021_L13_3x: 3022 cmpi.b %d1,&INF # is operand an INF? 3023 bne.b _L13_4x # no 3024 bsr.l sopr_inf # yes 3025 bra.b _L13_6x 3026_L13_4x: 3027 cmpi.b %d1,&QNAN # is operand a QNAN? 3028 bne.b _L13_5x # no 3029 bsr.l src_qnan # yes 3030 bra.b _L13_6x 3031_L13_5x: 3032 bsr.l slognd # operand is a DENORM 3033_L13_6x: 3034 3035# 3036# Result is now in FP0 3037# 3038 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3039 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3040 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3041 unlk %a6 3042 rts 3043 3044 3045######################################################################### 3046# MONADIC TEMPLATE # 3047######################################################################### 3048 global _flog10s_ 3049_flog10s_: 3050 link %a6,&-LOCAL_SIZE 3051 3052 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3053 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3054 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3055 3056 fmov.l &0x0,%fpcr # zero FPCR 3057 3058# 3059# copy, convert, and tag input argument 3060# 3061 fmov.s 0x8(%a6),%fp0 # load sgl input 3062 fmov.x %fp0,FP_SRC(%a6) 3063 lea FP_SRC(%a6),%a0 3064 bsr.l tag # fetch operand type 3065 mov.b %d0,STAG(%a6) 3066 mov.b %d0,%d1 3067 3068 andi.l &0x00ff00ff,USER_FPSR(%a6) 3069 3070 clr.l %d0 3071 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3072 3073 tst.b %d1 3074 bne.b _L14_2s 3075 bsr.l slog10 # operand is a NORM 3076 bra.b _L14_6s 3077_L14_2s: 3078 cmpi.b %d1,&ZERO # is operand a ZERO? 3079 bne.b _L14_3s # no 3080 bsr.l t_dz2 # yes 3081 bra.b _L14_6s 3082_L14_3s: 3083 cmpi.b %d1,&INF # is operand an INF? 3084 bne.b _L14_4s # no 3085 bsr.l sopr_inf # yes 3086 bra.b _L14_6s 3087_L14_4s: 3088 cmpi.b %d1,&QNAN # is operand a QNAN? 3089 bne.b _L14_5s # no 3090 bsr.l src_qnan # yes 3091 bra.b _L14_6s 3092_L14_5s: 3093 bsr.l slog10d # operand is a DENORM 3094_L14_6s: 3095 3096# 3097# Result is now in FP0 3098# 3099 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3100 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3101 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3102 unlk %a6 3103 rts 3104 3105 global _flog10d_ 3106_flog10d_: 3107 link %a6,&-LOCAL_SIZE 3108 3109 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3110 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3111 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3112 3113 fmov.l &0x0,%fpcr # zero FPCR 3114 3115# 3116# copy, convert, and tag input argument 3117# 3118 fmov.d 0x8(%a6),%fp0 # load dbl input 3119 fmov.x %fp0,FP_SRC(%a6) 3120 lea FP_SRC(%a6),%a0 3121 bsr.l tag # fetch operand type 3122 mov.b %d0,STAG(%a6) 3123 mov.b %d0,%d1 3124 3125 andi.l &0x00ff00ff,USER_FPSR(%a6) 3126 3127 clr.l %d0 3128 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3129 3130 mov.b %d1,STAG(%a6) 3131 tst.b %d1 3132 bne.b _L14_2d 3133 bsr.l slog10 # operand is a NORM 3134 bra.b _L14_6d 3135_L14_2d: 3136 cmpi.b %d1,&ZERO # is operand a ZERO? 3137 bne.b _L14_3d # no 3138 bsr.l t_dz2 # yes 3139 bra.b _L14_6d 3140_L14_3d: 3141 cmpi.b %d1,&INF # is operand an INF? 3142 bne.b _L14_4d # no 3143 bsr.l sopr_inf # yes 3144 bra.b _L14_6d 3145_L14_4d: 3146 cmpi.b %d1,&QNAN # is operand a QNAN? 3147 bne.b _L14_5d # no 3148 bsr.l src_qnan # yes 3149 bra.b _L14_6d 3150_L14_5d: 3151 bsr.l slog10d # operand is a DENORM 3152_L14_6d: 3153 3154# 3155# Result is now in FP0 3156# 3157 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3158 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3159 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3160 unlk %a6 3161 rts 3162 3163 global _flog10x_ 3164_flog10x_: 3165 link %a6,&-LOCAL_SIZE 3166 3167 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3168 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3169 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3170 3171 fmov.l &0x0,%fpcr # zero FPCR 3172 3173# 3174# copy, convert, and tag input argument 3175# 3176 lea FP_SRC(%a6),%a0 3177 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3178 mov.l 0x8+0x4(%a6),0x4(%a0) 3179 mov.l 0x8+0x8(%a6),0x8(%a0) 3180 bsr.l tag # fetch operand type 3181 mov.b %d0,STAG(%a6) 3182 mov.b %d0,%d1 3183 3184 andi.l &0x00ff00ff,USER_FPSR(%a6) 3185 3186 clr.l %d0 3187 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3188 3189 tst.b %d1 3190 bne.b _L14_2x 3191 bsr.l slog10 # operand is a NORM 3192 bra.b _L14_6x 3193_L14_2x: 3194 cmpi.b %d1,&ZERO # is operand a ZERO? 3195 bne.b _L14_3x # no 3196 bsr.l t_dz2 # yes 3197 bra.b _L14_6x 3198_L14_3x: 3199 cmpi.b %d1,&INF # is operand an INF? 3200 bne.b _L14_4x # no 3201 bsr.l sopr_inf # yes 3202 bra.b _L14_6x 3203_L14_4x: 3204 cmpi.b %d1,&QNAN # is operand a QNAN? 3205 bne.b _L14_5x # no 3206 bsr.l src_qnan # yes 3207 bra.b _L14_6x 3208_L14_5x: 3209 bsr.l slog10d # operand is a DENORM 3210_L14_6x: 3211 3212# 3213# Result is now in FP0 3214# 3215 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3216 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3217 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3218 unlk %a6 3219 rts 3220 3221 3222######################################################################### 3223# MONADIC TEMPLATE # 3224######################################################################### 3225 global _flog2s_ 3226_flog2s_: 3227 link %a6,&-LOCAL_SIZE 3228 3229 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3230 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3231 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3232 3233 fmov.l &0x0,%fpcr # zero FPCR 3234 3235# 3236# copy, convert, and tag input argument 3237# 3238 fmov.s 0x8(%a6),%fp0 # load sgl input 3239 fmov.x %fp0,FP_SRC(%a6) 3240 lea FP_SRC(%a6),%a0 3241 bsr.l tag # fetch operand type 3242 mov.b %d0,STAG(%a6) 3243 mov.b %d0,%d1 3244 3245 andi.l &0x00ff00ff,USER_FPSR(%a6) 3246 3247 clr.l %d0 3248 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3249 3250 tst.b %d1 3251 bne.b _L15_2s 3252 bsr.l slog2 # operand is a NORM 3253 bra.b _L15_6s 3254_L15_2s: 3255 cmpi.b %d1,&ZERO # is operand a ZERO? 3256 bne.b _L15_3s # no 3257 bsr.l t_dz2 # yes 3258 bra.b _L15_6s 3259_L15_3s: 3260 cmpi.b %d1,&INF # is operand an INF? 3261 bne.b _L15_4s # no 3262 bsr.l sopr_inf # yes 3263 bra.b _L15_6s 3264_L15_4s: 3265 cmpi.b %d1,&QNAN # is operand a QNAN? 3266 bne.b _L15_5s # no 3267 bsr.l src_qnan # yes 3268 bra.b _L15_6s 3269_L15_5s: 3270 bsr.l slog2d # operand is a DENORM 3271_L15_6s: 3272 3273# 3274# Result is now in FP0 3275# 3276 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3277 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3278 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3279 unlk %a6 3280 rts 3281 3282 global _flog2d_ 3283_flog2d_: 3284 link %a6,&-LOCAL_SIZE 3285 3286 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3287 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3288 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3289 3290 fmov.l &0x0,%fpcr # zero FPCR 3291 3292# 3293# copy, convert, and tag input argument 3294# 3295 fmov.d 0x8(%a6),%fp0 # load dbl input 3296 fmov.x %fp0,FP_SRC(%a6) 3297 lea FP_SRC(%a6),%a0 3298 bsr.l tag # fetch operand type 3299 mov.b %d0,STAG(%a6) 3300 mov.b %d0,%d1 3301 3302 andi.l &0x00ff00ff,USER_FPSR(%a6) 3303 3304 clr.l %d0 3305 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3306 3307 mov.b %d1,STAG(%a6) 3308 tst.b %d1 3309 bne.b _L15_2d 3310 bsr.l slog2 # operand is a NORM 3311 bra.b _L15_6d 3312_L15_2d: 3313 cmpi.b %d1,&ZERO # is operand a ZERO? 3314 bne.b _L15_3d # no 3315 bsr.l t_dz2 # yes 3316 bra.b _L15_6d 3317_L15_3d: 3318 cmpi.b %d1,&INF # is operand an INF? 3319 bne.b _L15_4d # no 3320 bsr.l sopr_inf # yes 3321 bra.b _L15_6d 3322_L15_4d: 3323 cmpi.b %d1,&QNAN # is operand a QNAN? 3324 bne.b _L15_5d # no 3325 bsr.l src_qnan # yes 3326 bra.b _L15_6d 3327_L15_5d: 3328 bsr.l slog2d # operand is a DENORM 3329_L15_6d: 3330 3331# 3332# Result is now in FP0 3333# 3334 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3335 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3336 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3337 unlk %a6 3338 rts 3339 3340 global _flog2x_ 3341_flog2x_: 3342 link %a6,&-LOCAL_SIZE 3343 3344 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3345 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3346 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3347 3348 fmov.l &0x0,%fpcr # zero FPCR 3349 3350# 3351# copy, convert, and tag input argument 3352# 3353 lea FP_SRC(%a6),%a0 3354 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3355 mov.l 0x8+0x4(%a6),0x4(%a0) 3356 mov.l 0x8+0x8(%a6),0x8(%a0) 3357 bsr.l tag # fetch operand type 3358 mov.b %d0,STAG(%a6) 3359 mov.b %d0,%d1 3360 3361 andi.l &0x00ff00ff,USER_FPSR(%a6) 3362 3363 clr.l %d0 3364 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3365 3366 tst.b %d1 3367 bne.b _L15_2x 3368 bsr.l slog2 # operand is a NORM 3369 bra.b _L15_6x 3370_L15_2x: 3371 cmpi.b %d1,&ZERO # is operand a ZERO? 3372 bne.b _L15_3x # no 3373 bsr.l t_dz2 # yes 3374 bra.b _L15_6x 3375_L15_3x: 3376 cmpi.b %d1,&INF # is operand an INF? 3377 bne.b _L15_4x # no 3378 bsr.l sopr_inf # yes 3379 bra.b _L15_6x 3380_L15_4x: 3381 cmpi.b %d1,&QNAN # is operand a QNAN? 3382 bne.b _L15_5x # no 3383 bsr.l src_qnan # yes 3384 bra.b _L15_6x 3385_L15_5x: 3386 bsr.l slog2d # operand is a DENORM 3387_L15_6x: 3388 3389# 3390# Result is now in FP0 3391# 3392 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3393 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3394 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3395 unlk %a6 3396 rts 3397 3398 3399######################################################################### 3400# MONADIC TEMPLATE # 3401######################################################################### 3402 global _fcoshs_ 3403_fcoshs_: 3404 link %a6,&-LOCAL_SIZE 3405 3406 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3407 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3408 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3409 3410 fmov.l &0x0,%fpcr # zero FPCR 3411 3412# 3413# copy, convert, and tag input argument 3414# 3415 fmov.s 0x8(%a6),%fp0 # load sgl input 3416 fmov.x %fp0,FP_SRC(%a6) 3417 lea FP_SRC(%a6),%a0 3418 bsr.l tag # fetch operand type 3419 mov.b %d0,STAG(%a6) 3420 mov.b %d0,%d1 3421 3422 andi.l &0x00ff00ff,USER_FPSR(%a6) 3423 3424 clr.l %d0 3425 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3426 3427 tst.b %d1 3428 bne.b _L16_2s 3429 bsr.l scosh # operand is a NORM 3430 bra.b _L16_6s 3431_L16_2s: 3432 cmpi.b %d1,&ZERO # is operand a ZERO? 3433 bne.b _L16_3s # no 3434 bsr.l ld_pone # yes 3435 bra.b _L16_6s 3436_L16_3s: 3437 cmpi.b %d1,&INF # is operand an INF? 3438 bne.b _L16_4s # no 3439 bsr.l ld_pinf # yes 3440 bra.b _L16_6s 3441_L16_4s: 3442 cmpi.b %d1,&QNAN # is operand a QNAN? 3443 bne.b _L16_5s # no 3444 bsr.l src_qnan # yes 3445 bra.b _L16_6s 3446_L16_5s: 3447 bsr.l scoshd # operand is a DENORM 3448_L16_6s: 3449 3450# 3451# Result is now in FP0 3452# 3453 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3454 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3455 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3456 unlk %a6 3457 rts 3458 3459 global _fcoshd_ 3460_fcoshd_: 3461 link %a6,&-LOCAL_SIZE 3462 3463 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3464 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3465 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3466 3467 fmov.l &0x0,%fpcr # zero FPCR 3468 3469# 3470# copy, convert, and tag input argument 3471# 3472 fmov.d 0x8(%a6),%fp0 # load dbl input 3473 fmov.x %fp0,FP_SRC(%a6) 3474 lea FP_SRC(%a6),%a0 3475 bsr.l tag # fetch operand type 3476 mov.b %d0,STAG(%a6) 3477 mov.b %d0,%d1 3478 3479 andi.l &0x00ff00ff,USER_FPSR(%a6) 3480 3481 clr.l %d0 3482 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3483 3484 mov.b %d1,STAG(%a6) 3485 tst.b %d1 3486 bne.b _L16_2d 3487 bsr.l scosh # operand is a NORM 3488 bra.b _L16_6d 3489_L16_2d: 3490 cmpi.b %d1,&ZERO # is operand a ZERO? 3491 bne.b _L16_3d # no 3492 bsr.l ld_pone # yes 3493 bra.b _L16_6d 3494_L16_3d: 3495 cmpi.b %d1,&INF # is operand an INF? 3496 bne.b _L16_4d # no 3497 bsr.l ld_pinf # yes 3498 bra.b _L16_6d 3499_L16_4d: 3500 cmpi.b %d1,&QNAN # is operand a QNAN? 3501 bne.b _L16_5d # no 3502 bsr.l src_qnan # yes 3503 bra.b _L16_6d 3504_L16_5d: 3505 bsr.l scoshd # operand is a DENORM 3506_L16_6d: 3507 3508# 3509# Result is now in FP0 3510# 3511 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3512 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3513 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3514 unlk %a6 3515 rts 3516 3517 global _fcoshx_ 3518_fcoshx_: 3519 link %a6,&-LOCAL_SIZE 3520 3521 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3522 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3523 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3524 3525 fmov.l &0x0,%fpcr # zero FPCR 3526 3527# 3528# copy, convert, and tag input argument 3529# 3530 lea FP_SRC(%a6),%a0 3531 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3532 mov.l 0x8+0x4(%a6),0x4(%a0) 3533 mov.l 0x8+0x8(%a6),0x8(%a0) 3534 bsr.l tag # fetch operand type 3535 mov.b %d0,STAG(%a6) 3536 mov.b %d0,%d1 3537 3538 andi.l &0x00ff00ff,USER_FPSR(%a6) 3539 3540 clr.l %d0 3541 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3542 3543 tst.b %d1 3544 bne.b _L16_2x 3545 bsr.l scosh # operand is a NORM 3546 bra.b _L16_6x 3547_L16_2x: 3548 cmpi.b %d1,&ZERO # is operand a ZERO? 3549 bne.b _L16_3x # no 3550 bsr.l ld_pone # yes 3551 bra.b _L16_6x 3552_L16_3x: 3553 cmpi.b %d1,&INF # is operand an INF? 3554 bne.b _L16_4x # no 3555 bsr.l ld_pinf # yes 3556 bra.b _L16_6x 3557_L16_4x: 3558 cmpi.b %d1,&QNAN # is operand a QNAN? 3559 bne.b _L16_5x # no 3560 bsr.l src_qnan # yes 3561 bra.b _L16_6x 3562_L16_5x: 3563 bsr.l scoshd # operand is a DENORM 3564_L16_6x: 3565 3566# 3567# Result is now in FP0 3568# 3569 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3570 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3571 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3572 unlk %a6 3573 rts 3574 3575 3576######################################################################### 3577# MONADIC TEMPLATE # 3578######################################################################### 3579 global _facoss_ 3580_facoss_: 3581 link %a6,&-LOCAL_SIZE 3582 3583 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3584 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3585 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3586 3587 fmov.l &0x0,%fpcr # zero FPCR 3588 3589# 3590# copy, convert, and tag input argument 3591# 3592 fmov.s 0x8(%a6),%fp0 # load sgl input 3593 fmov.x %fp0,FP_SRC(%a6) 3594 lea FP_SRC(%a6),%a0 3595 bsr.l tag # fetch operand type 3596 mov.b %d0,STAG(%a6) 3597 mov.b %d0,%d1 3598 3599 andi.l &0x00ff00ff,USER_FPSR(%a6) 3600 3601 clr.l %d0 3602 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3603 3604 tst.b %d1 3605 bne.b _L17_2s 3606 bsr.l sacos # operand is a NORM 3607 bra.b _L17_6s 3608_L17_2s: 3609 cmpi.b %d1,&ZERO # is operand a ZERO? 3610 bne.b _L17_3s # no 3611 bsr.l ld_ppi2 # yes 3612 bra.b _L17_6s 3613_L17_3s: 3614 cmpi.b %d1,&INF # is operand an INF? 3615 bne.b _L17_4s # no 3616 bsr.l t_operr # yes 3617 bra.b _L17_6s 3618_L17_4s: 3619 cmpi.b %d1,&QNAN # is operand a QNAN? 3620 bne.b _L17_5s # no 3621 bsr.l src_qnan # yes 3622 bra.b _L17_6s 3623_L17_5s: 3624 bsr.l sacosd # operand is a DENORM 3625_L17_6s: 3626 3627# 3628# Result is now in FP0 3629# 3630 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3631 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3632 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3633 unlk %a6 3634 rts 3635 3636 global _facosd_ 3637_facosd_: 3638 link %a6,&-LOCAL_SIZE 3639 3640 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3641 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3642 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3643 3644 fmov.l &0x0,%fpcr # zero FPCR 3645 3646# 3647# copy, convert, and tag input argument 3648# 3649 fmov.d 0x8(%a6),%fp0 # load dbl input 3650 fmov.x %fp0,FP_SRC(%a6) 3651 lea FP_SRC(%a6),%a0 3652 bsr.l tag # fetch operand type 3653 mov.b %d0,STAG(%a6) 3654 mov.b %d0,%d1 3655 3656 andi.l &0x00ff00ff,USER_FPSR(%a6) 3657 3658 clr.l %d0 3659 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3660 3661 mov.b %d1,STAG(%a6) 3662 tst.b %d1 3663 bne.b _L17_2d 3664 bsr.l sacos # operand is a NORM 3665 bra.b _L17_6d 3666_L17_2d: 3667 cmpi.b %d1,&ZERO # is operand a ZERO? 3668 bne.b _L17_3d # no 3669 bsr.l ld_ppi2 # yes 3670 bra.b _L17_6d 3671_L17_3d: 3672 cmpi.b %d1,&INF # is operand an INF? 3673 bne.b _L17_4d # no 3674 bsr.l t_operr # yes 3675 bra.b _L17_6d 3676_L17_4d: 3677 cmpi.b %d1,&QNAN # is operand a QNAN? 3678 bne.b _L17_5d # no 3679 bsr.l src_qnan # yes 3680 bra.b _L17_6d 3681_L17_5d: 3682 bsr.l sacosd # operand is a DENORM 3683_L17_6d: 3684 3685# 3686# Result is now in FP0 3687# 3688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3689 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3690 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3691 unlk %a6 3692 rts 3693 3694 global _facosx_ 3695_facosx_: 3696 link %a6,&-LOCAL_SIZE 3697 3698 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3699 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3700 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3701 3702 fmov.l &0x0,%fpcr # zero FPCR 3703 3704# 3705# copy, convert, and tag input argument 3706# 3707 lea FP_SRC(%a6),%a0 3708 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3709 mov.l 0x8+0x4(%a6),0x4(%a0) 3710 mov.l 0x8+0x8(%a6),0x8(%a0) 3711 bsr.l tag # fetch operand type 3712 mov.b %d0,STAG(%a6) 3713 mov.b %d0,%d1 3714 3715 andi.l &0x00ff00ff,USER_FPSR(%a6) 3716 3717 clr.l %d0 3718 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3719 3720 tst.b %d1 3721 bne.b _L17_2x 3722 bsr.l sacos # operand is a NORM 3723 bra.b _L17_6x 3724_L17_2x: 3725 cmpi.b %d1,&ZERO # is operand a ZERO? 3726 bne.b _L17_3x # no 3727 bsr.l ld_ppi2 # yes 3728 bra.b _L17_6x 3729_L17_3x: 3730 cmpi.b %d1,&INF # is operand an INF? 3731 bne.b _L17_4x # no 3732 bsr.l t_operr # yes 3733 bra.b _L17_6x 3734_L17_4x: 3735 cmpi.b %d1,&QNAN # is operand a QNAN? 3736 bne.b _L17_5x # no 3737 bsr.l src_qnan # yes 3738 bra.b _L17_6x 3739_L17_5x: 3740 bsr.l sacosd # operand is a DENORM 3741_L17_6x: 3742 3743# 3744# Result is now in FP0 3745# 3746 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3747 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3748 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3749 unlk %a6 3750 rts 3751 3752 3753######################################################################### 3754# MONADIC TEMPLATE # 3755######################################################################### 3756 global _fgetexps_ 3757_fgetexps_: 3758 link %a6,&-LOCAL_SIZE 3759 3760 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3761 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3762 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3763 3764 fmov.l &0x0,%fpcr # zero FPCR 3765 3766# 3767# copy, convert, and tag input argument 3768# 3769 fmov.s 0x8(%a6),%fp0 # load sgl input 3770 fmov.x %fp0,FP_SRC(%a6) 3771 lea FP_SRC(%a6),%a0 3772 bsr.l tag # fetch operand type 3773 mov.b %d0,STAG(%a6) 3774 mov.b %d0,%d1 3775 3776 andi.l &0x00ff00ff,USER_FPSR(%a6) 3777 3778 clr.l %d0 3779 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3780 3781 tst.b %d1 3782 bne.b _L18_2s 3783 bsr.l sgetexp # operand is a NORM 3784 bra.b _L18_6s 3785_L18_2s: 3786 cmpi.b %d1,&ZERO # is operand a ZERO? 3787 bne.b _L18_3s # no 3788 bsr.l src_zero # yes 3789 bra.b _L18_6s 3790_L18_3s: 3791 cmpi.b %d1,&INF # is operand an INF? 3792 bne.b _L18_4s # no 3793 bsr.l t_operr # yes 3794 bra.b _L18_6s 3795_L18_4s: 3796 cmpi.b %d1,&QNAN # is operand a QNAN? 3797 bne.b _L18_5s # no 3798 bsr.l src_qnan # yes 3799 bra.b _L18_6s 3800_L18_5s: 3801 bsr.l sgetexpd # operand is a DENORM 3802_L18_6s: 3803 3804# 3805# Result is now in FP0 3806# 3807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3808 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3809 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3810 unlk %a6 3811 rts 3812 3813 global _fgetexpd_ 3814_fgetexpd_: 3815 link %a6,&-LOCAL_SIZE 3816 3817 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3818 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3819 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3820 3821 fmov.l &0x0,%fpcr # zero FPCR 3822 3823# 3824# copy, convert, and tag input argument 3825# 3826 fmov.d 0x8(%a6),%fp0 # load dbl input 3827 fmov.x %fp0,FP_SRC(%a6) 3828 lea FP_SRC(%a6),%a0 3829 bsr.l tag # fetch operand type 3830 mov.b %d0,STAG(%a6) 3831 mov.b %d0,%d1 3832 3833 andi.l &0x00ff00ff,USER_FPSR(%a6) 3834 3835 clr.l %d0 3836 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3837 3838 mov.b %d1,STAG(%a6) 3839 tst.b %d1 3840 bne.b _L18_2d 3841 bsr.l sgetexp # operand is a NORM 3842 bra.b _L18_6d 3843_L18_2d: 3844 cmpi.b %d1,&ZERO # is operand a ZERO? 3845 bne.b _L18_3d # no 3846 bsr.l src_zero # yes 3847 bra.b _L18_6d 3848_L18_3d: 3849 cmpi.b %d1,&INF # is operand an INF? 3850 bne.b _L18_4d # no 3851 bsr.l t_operr # yes 3852 bra.b _L18_6d 3853_L18_4d: 3854 cmpi.b %d1,&QNAN # is operand a QNAN? 3855 bne.b _L18_5d # no 3856 bsr.l src_qnan # yes 3857 bra.b _L18_6d 3858_L18_5d: 3859 bsr.l sgetexpd # operand is a DENORM 3860_L18_6d: 3861 3862# 3863# Result is now in FP0 3864# 3865 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3866 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3867 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3868 unlk %a6 3869 rts 3870 3871 global _fgetexpx_ 3872_fgetexpx_: 3873 link %a6,&-LOCAL_SIZE 3874 3875 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3876 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3877 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3878 3879 fmov.l &0x0,%fpcr # zero FPCR 3880 3881# 3882# copy, convert, and tag input argument 3883# 3884 lea FP_SRC(%a6),%a0 3885 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3886 mov.l 0x8+0x4(%a6),0x4(%a0) 3887 mov.l 0x8+0x8(%a6),0x8(%a0) 3888 bsr.l tag # fetch operand type 3889 mov.b %d0,STAG(%a6) 3890 mov.b %d0,%d1 3891 3892 andi.l &0x00ff00ff,USER_FPSR(%a6) 3893 3894 clr.l %d0 3895 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3896 3897 tst.b %d1 3898 bne.b _L18_2x 3899 bsr.l sgetexp # operand is a NORM 3900 bra.b _L18_6x 3901_L18_2x: 3902 cmpi.b %d1,&ZERO # is operand a ZERO? 3903 bne.b _L18_3x # no 3904 bsr.l src_zero # yes 3905 bra.b _L18_6x 3906_L18_3x: 3907 cmpi.b %d1,&INF # is operand an INF? 3908 bne.b _L18_4x # no 3909 bsr.l t_operr # yes 3910 bra.b _L18_6x 3911_L18_4x: 3912 cmpi.b %d1,&QNAN # is operand a QNAN? 3913 bne.b _L18_5x # no 3914 bsr.l src_qnan # yes 3915 bra.b _L18_6x 3916_L18_5x: 3917 bsr.l sgetexpd # operand is a DENORM 3918_L18_6x: 3919 3920# 3921# Result is now in FP0 3922# 3923 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3924 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3925 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3926 unlk %a6 3927 rts 3928 3929 3930######################################################################### 3931# MONADIC TEMPLATE # 3932######################################################################### 3933 global _fgetmans_ 3934_fgetmans_: 3935 link %a6,&-LOCAL_SIZE 3936 3937 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3938 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3939 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3940 3941 fmov.l &0x0,%fpcr # zero FPCR 3942 3943# 3944# copy, convert, and tag input argument 3945# 3946 fmov.s 0x8(%a6),%fp0 # load sgl input 3947 fmov.x %fp0,FP_SRC(%a6) 3948 lea FP_SRC(%a6),%a0 3949 bsr.l tag # fetch operand type 3950 mov.b %d0,STAG(%a6) 3951 mov.b %d0,%d1 3952 3953 andi.l &0x00ff00ff,USER_FPSR(%a6) 3954 3955 clr.l %d0 3956 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3957 3958 tst.b %d1 3959 bne.b _L19_2s 3960 bsr.l sgetman # operand is a NORM 3961 bra.b _L19_6s 3962_L19_2s: 3963 cmpi.b %d1,&ZERO # is operand a ZERO? 3964 bne.b _L19_3s # no 3965 bsr.l src_zero # yes 3966 bra.b _L19_6s 3967_L19_3s: 3968 cmpi.b %d1,&INF # is operand an INF? 3969 bne.b _L19_4s # no 3970 bsr.l t_operr # yes 3971 bra.b _L19_6s 3972_L19_4s: 3973 cmpi.b %d1,&QNAN # is operand a QNAN? 3974 bne.b _L19_5s # no 3975 bsr.l src_qnan # yes 3976 bra.b _L19_6s 3977_L19_5s: 3978 bsr.l sgetmand # operand is a DENORM 3979_L19_6s: 3980 3981# 3982# Result is now in FP0 3983# 3984 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3985 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3986 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3987 unlk %a6 3988 rts 3989 3990 global _fgetmand_ 3991_fgetmand_: 3992 link %a6,&-LOCAL_SIZE 3993 3994 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3995 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3996 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3997 3998 fmov.l &0x0,%fpcr # zero FPCR 3999 4000# 4001# copy, convert, and tag input argument 4002# 4003 fmov.d 0x8(%a6),%fp0 # load dbl input 4004 fmov.x %fp0,FP_SRC(%a6) 4005 lea FP_SRC(%a6),%a0 4006 bsr.l tag # fetch operand type 4007 mov.b %d0,STAG(%a6) 4008 mov.b %d0,%d1 4009 4010 andi.l &0x00ff00ff,USER_FPSR(%a6) 4011 4012 clr.l %d0 4013 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4014 4015 mov.b %d1,STAG(%a6) 4016 tst.b %d1 4017 bne.b _L19_2d 4018 bsr.l sgetman # operand is a NORM 4019 bra.b _L19_6d 4020_L19_2d: 4021 cmpi.b %d1,&ZERO # is operand a ZERO? 4022 bne.b _L19_3d # no 4023 bsr.l src_zero # yes 4024 bra.b _L19_6d 4025_L19_3d: 4026 cmpi.b %d1,&INF # is operand an INF? 4027 bne.b _L19_4d # no 4028 bsr.l t_operr # yes 4029 bra.b _L19_6d 4030_L19_4d: 4031 cmpi.b %d1,&QNAN # is operand a QNAN? 4032 bne.b _L19_5d # no 4033 bsr.l src_qnan # yes 4034 bra.b _L19_6d 4035_L19_5d: 4036 bsr.l sgetmand # operand is a DENORM 4037_L19_6d: 4038 4039# 4040# Result is now in FP0 4041# 4042 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4043 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4044 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4045 unlk %a6 4046 rts 4047 4048 global _fgetmanx_ 4049_fgetmanx_: 4050 link %a6,&-LOCAL_SIZE 4051 4052 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4053 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4054 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4055 4056 fmov.l &0x0,%fpcr # zero FPCR 4057 4058# 4059# copy, convert, and tag input argument 4060# 4061 lea FP_SRC(%a6),%a0 4062 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 4063 mov.l 0x8+0x4(%a6),0x4(%a0) 4064 mov.l 0x8+0x8(%a6),0x8(%a0) 4065 bsr.l tag # fetch operand type 4066 mov.b %d0,STAG(%a6) 4067 mov.b %d0,%d1 4068 4069 andi.l &0x00ff00ff,USER_FPSR(%a6) 4070 4071 clr.l %d0 4072 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4073 4074 tst.b %d1 4075 bne.b _L19_2x 4076 bsr.l sgetman # operand is a NORM 4077 bra.b _L19_6x 4078_L19_2x: 4079 cmpi.b %d1,&ZERO # is operand a ZERO? 4080 bne.b _L19_3x # no 4081 bsr.l src_zero # yes 4082 bra.b _L19_6x 4083_L19_3x: 4084 cmpi.b %d1,&INF # is operand an INF? 4085 bne.b _L19_4x # no 4086 bsr.l t_operr # yes 4087 bra.b _L19_6x 4088_L19_4x: 4089 cmpi.b %d1,&QNAN # is operand a QNAN? 4090 bne.b _L19_5x # no 4091 bsr.l src_qnan # yes 4092 bra.b _L19_6x 4093_L19_5x: 4094 bsr.l sgetmand # operand is a DENORM 4095_L19_6x: 4096 4097# 4098# Result is now in FP0 4099# 4100 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4101 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4102 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4103 unlk %a6 4104 rts 4105 4106 4107######################################################################### 4108# MONADIC TEMPLATE # 4109######################################################################### 4110 global _fsincoss_ 4111_fsincoss_: 4112 link %a6,&-LOCAL_SIZE 4113 4114 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4115 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4116 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4117 4118 fmov.l &0x0,%fpcr # zero FPCR 4119 4120# 4121# copy, convert, and tag input argument 4122# 4123 fmov.s 0x8(%a6),%fp0 # load sgl input 4124 fmov.x %fp0,FP_SRC(%a6) 4125 lea FP_SRC(%a6),%a0 4126 bsr.l tag # fetch operand type 4127 mov.b %d0,STAG(%a6) 4128 mov.b %d0,%d1 4129 4130 andi.l &0x00ff00ff,USER_FPSR(%a6) 4131 4132 clr.l %d0 4133 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4134 4135 tst.b %d1 4136 bne.b _L20_2s 4137 bsr.l ssincos # operand is a NORM 4138 bra.b _L20_6s 4139_L20_2s: 4140 cmpi.b %d1,&ZERO # is operand a ZERO? 4141 bne.b _L20_3s # no 4142 bsr.l ssincosz # yes 4143 bra.b _L20_6s 4144_L20_3s: 4145 cmpi.b %d1,&INF # is operand an INF? 4146 bne.b _L20_4s # no 4147 bsr.l ssincosi # yes 4148 bra.b _L20_6s 4149_L20_4s: 4150 cmpi.b %d1,&QNAN # is operand a QNAN? 4151 bne.b _L20_5s # no 4152 bsr.l ssincosqnan # yes 4153 bra.b _L20_6s 4154_L20_5s: 4155 bsr.l ssincosd # operand is a DENORM 4156_L20_6s: 4157 4158# 4159# Result is now in FP0 4160# 4161 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4162 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4163 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4164 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4165 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4166 unlk %a6 4167 rts 4168 4169 global _fsincosd_ 4170_fsincosd_: 4171 link %a6,&-LOCAL_SIZE 4172 4173 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4174 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4175 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4176 4177 fmov.l &0x0,%fpcr # zero FPCR 4178 4179# 4180# copy, convert, and tag input argument 4181# 4182 fmov.d 0x8(%a6),%fp0 # load dbl input 4183 fmov.x %fp0,FP_SRC(%a6) 4184 lea FP_SRC(%a6),%a0 4185 bsr.l tag # fetch operand type 4186 mov.b %d0,STAG(%a6) 4187 mov.b %d0,%d1 4188 4189 andi.l &0x00ff00ff,USER_FPSR(%a6) 4190 4191 clr.l %d0 4192 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4193 4194 mov.b %d1,STAG(%a6) 4195 tst.b %d1 4196 bne.b _L20_2d 4197 bsr.l ssincos # operand is a NORM 4198 bra.b _L20_6d 4199_L20_2d: 4200 cmpi.b %d1,&ZERO # is operand a ZERO? 4201 bne.b _L20_3d # no 4202 bsr.l ssincosz # yes 4203 bra.b _L20_6d 4204_L20_3d: 4205 cmpi.b %d1,&INF # is operand an INF? 4206 bne.b _L20_4d # no 4207 bsr.l ssincosi # yes 4208 bra.b _L20_6d 4209_L20_4d: 4210 cmpi.b %d1,&QNAN # is operand a QNAN? 4211 bne.b _L20_5d # no 4212 bsr.l ssincosqnan # yes 4213 bra.b _L20_6d 4214_L20_5d: 4215 bsr.l ssincosd # operand is a DENORM 4216_L20_6d: 4217 4218# 4219# Result is now in FP0 4220# 4221 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4222 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4223 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4224 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4225 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4226 unlk %a6 4227 rts 4228 4229 global _fsincosx_ 4230_fsincosx_: 4231 link %a6,&-LOCAL_SIZE 4232 4233 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4234 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4235 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4236 4237 fmov.l &0x0,%fpcr # zero FPCR 4238 4239# 4240# copy, convert, and tag input argument 4241# 4242 lea FP_SRC(%a6),%a0 4243 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 4244 mov.l 0x8+0x4(%a6),0x4(%a0) 4245 mov.l 0x8+0x8(%a6),0x8(%a0) 4246 bsr.l tag # fetch operand type 4247 mov.b %d0,STAG(%a6) 4248 mov.b %d0,%d1 4249 4250 andi.l &0x00ff00ff,USER_FPSR(%a6) 4251 4252 clr.l %d0 4253 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4254 4255 tst.b %d1 4256 bne.b _L20_2x 4257 bsr.l ssincos # operand is a NORM 4258 bra.b _L20_6x 4259_L20_2x: 4260 cmpi.b %d1,&ZERO # is operand a ZERO? 4261 bne.b _L20_3x # no 4262 bsr.l ssincosz # yes 4263 bra.b _L20_6x 4264_L20_3x: 4265 cmpi.b %d1,&INF # is operand an INF? 4266 bne.b _L20_4x # no 4267 bsr.l ssincosi # yes 4268 bra.b _L20_6x 4269_L20_4x: 4270 cmpi.b %d1,&QNAN # is operand a QNAN? 4271 bne.b _L20_5x # no 4272 bsr.l ssincosqnan # yes 4273 bra.b _L20_6x 4274_L20_5x: 4275 bsr.l ssincosd # operand is a DENORM 4276_L20_6x: 4277 4278# 4279# Result is now in FP0 4280# 4281 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4282 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4283 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4284 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4285 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4286 unlk %a6 4287 rts 4288 4289 4290######################################################################### 4291# DYADIC TEMPLATE # 4292######################################################################### 4293 global _frems_ 4294_frems_: 4295 link %a6,&-LOCAL_SIZE 4296 4297 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4298 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4299 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4300 4301 fmov.l &0x0,%fpcr # zero FPCR 4302 4303# 4304# copy, convert, and tag input argument 4305# 4306 fmov.s 0x8(%a6),%fp0 # load sgl dst 4307 fmov.x %fp0,FP_DST(%a6) 4308 lea FP_DST(%a6),%a0 4309 bsr.l tag # fetch operand type 4310 mov.b %d0,DTAG(%a6) 4311 4312 fmov.s 0xc(%a6),%fp0 # load sgl src 4313 fmov.x %fp0,FP_SRC(%a6) 4314 lea FP_SRC(%a6),%a0 4315 bsr.l tag # fetch operand type 4316 mov.b %d0,STAG(%a6) 4317 mov.l %d0,%d1 4318 4319 andi.l &0x00ff00ff,USER_FPSR(%a6) 4320 4321 clr.l %d0 4322 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4323 4324 lea FP_SRC(%a6),%a0 # pass ptr to src 4325 lea FP_DST(%a6),%a1 # pass ptr to dst 4326 4327 tst.b %d1 4328 bne.b _L21_2s 4329 bsr.l srem_snorm # operand is a NORM 4330 bra.b _L21_6s 4331_L21_2s: 4332 cmpi.b %d1,&ZERO # is operand a ZERO? 4333 bne.b _L21_3s # no 4334 bsr.l srem_szero # yes 4335 bra.b _L21_6s 4336_L21_3s: 4337 cmpi.b %d1,&INF # is operand an INF? 4338 bne.b _L21_4s # no 4339 bsr.l srem_sinf # yes 4340 bra.b _L21_6s 4341_L21_4s: 4342 cmpi.b %d1,&QNAN # is operand a QNAN? 4343 bne.b _L21_5s # no 4344 bsr.l sop_sqnan # yes 4345 bra.b _L21_6s 4346_L21_5s: 4347 bsr.l srem_sdnrm # operand is a DENORM 4348_L21_6s: 4349 4350# 4351# Result is now in FP0 4352# 4353 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4354 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4355 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4356 unlk %a6 4357 rts 4358 4359 global _fremd_ 4360_fremd_: 4361 link %a6,&-LOCAL_SIZE 4362 4363 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4364 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4365 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4366 4367 fmov.l &0x0,%fpcr # zero FPCR 4368 4369# 4370# copy, convert, and tag input argument 4371# 4372 fmov.d 0x8(%a6),%fp0 # load dbl dst 4373 fmov.x %fp0,FP_DST(%a6) 4374 lea FP_DST(%a6),%a0 4375 bsr.l tag # fetch operand type 4376 mov.b %d0,DTAG(%a6) 4377 4378 fmov.d 0x10(%a6),%fp0 # load dbl src 4379 fmov.x %fp0,FP_SRC(%a6) 4380 lea FP_SRC(%a6),%a0 4381 bsr.l tag # fetch operand type 4382 mov.b %d0,STAG(%a6) 4383 mov.l %d0,%d1 4384 4385 andi.l &0x00ff00ff,USER_FPSR(%a6) 4386 4387 clr.l %d0 4388 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4389 4390 lea FP_SRC(%a6),%a0 # pass ptr to src 4391 lea FP_DST(%a6),%a1 # pass ptr to dst 4392 4393 tst.b %d1 4394 bne.b _L21_2d 4395 bsr.l srem_snorm # operand is a NORM 4396 bra.b _L21_6d 4397_L21_2d: 4398 cmpi.b %d1,&ZERO # is operand a ZERO? 4399 bne.b _L21_3d # no 4400 bsr.l srem_szero # yes 4401 bra.b _L21_6d 4402_L21_3d: 4403 cmpi.b %d1,&INF # is operand an INF? 4404 bne.b _L21_4d # no 4405 bsr.l srem_sinf # yes 4406 bra.b _L21_6d 4407_L21_4d: 4408 cmpi.b %d1,&QNAN # is operand a QNAN? 4409 bne.b _L21_5d # no 4410 bsr.l sop_sqnan # yes 4411 bra.b _L21_6d 4412_L21_5d: 4413 bsr.l srem_sdnrm # operand is a DENORM 4414_L21_6d: 4415 4416# 4417# Result is now in FP0 4418# 4419 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4420 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4421 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4422 unlk %a6 4423 rts 4424 4425 global _fremx_ 4426_fremx_: 4427 link %a6,&-LOCAL_SIZE 4428 4429 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4430 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4431 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4432 4433 fmov.l &0x0,%fpcr # zero FPCR 4434 4435# 4436# copy, convert, and tag input argument 4437# 4438 lea FP_DST(%a6),%a0 4439 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4440 mov.l 0x8+0x4(%a6),0x4(%a0) 4441 mov.l 0x8+0x8(%a6),0x8(%a0) 4442 bsr.l tag # fetch operand type 4443 mov.b %d0,DTAG(%a6) 4444 4445 lea FP_SRC(%a6),%a0 4446 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4447 mov.l 0x14+0x4(%a6),0x4(%a0) 4448 mov.l 0x14+0x8(%a6),0x8(%a0) 4449 bsr.l tag # fetch operand type 4450 mov.b %d0,STAG(%a6) 4451 mov.l %d0,%d1 4452 4453 andi.l &0x00ff00ff,USER_FPSR(%a6) 4454 4455 clr.l %d0 4456 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4457 4458 lea FP_SRC(%a6),%a0 # pass ptr to src 4459 lea FP_DST(%a6),%a1 # pass ptr to dst 4460 4461 tst.b %d1 4462 bne.b _L21_2x 4463 bsr.l srem_snorm # operand is a NORM 4464 bra.b _L21_6x 4465_L21_2x: 4466 cmpi.b %d1,&ZERO # is operand a ZERO? 4467 bne.b _L21_3x # no 4468 bsr.l srem_szero # yes 4469 bra.b _L21_6x 4470_L21_3x: 4471 cmpi.b %d1,&INF # is operand an INF? 4472 bne.b _L21_4x # no 4473 bsr.l srem_sinf # yes 4474 bra.b _L21_6x 4475_L21_4x: 4476 cmpi.b %d1,&QNAN # is operand a QNAN? 4477 bne.b _L21_5x # no 4478 bsr.l sop_sqnan # yes 4479 bra.b _L21_6x 4480_L21_5x: 4481 bsr.l srem_sdnrm # operand is a DENORM 4482_L21_6x: 4483 4484# 4485# Result is now in FP0 4486# 4487 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4488 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4489 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4490 unlk %a6 4491 rts 4492 4493 4494######################################################################### 4495# DYADIC TEMPLATE # 4496######################################################################### 4497 global _fmods_ 4498_fmods_: 4499 link %a6,&-LOCAL_SIZE 4500 4501 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4502 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4503 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4504 4505 fmov.l &0x0,%fpcr # zero FPCR 4506 4507# 4508# copy, convert, and tag input argument 4509# 4510 fmov.s 0x8(%a6),%fp0 # load sgl dst 4511 fmov.x %fp0,FP_DST(%a6) 4512 lea FP_DST(%a6),%a0 4513 bsr.l tag # fetch operand type 4514 mov.b %d0,DTAG(%a6) 4515 4516 fmov.s 0xc(%a6),%fp0 # load sgl src 4517 fmov.x %fp0,FP_SRC(%a6) 4518 lea FP_SRC(%a6),%a0 4519 bsr.l tag # fetch operand type 4520 mov.b %d0,STAG(%a6) 4521 mov.l %d0,%d1 4522 4523 andi.l &0x00ff00ff,USER_FPSR(%a6) 4524 4525 clr.l %d0 4526 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4527 4528 lea FP_SRC(%a6),%a0 # pass ptr to src 4529 lea FP_DST(%a6),%a1 # pass ptr to dst 4530 4531 tst.b %d1 4532 bne.b _L22_2s 4533 bsr.l smod_snorm # operand is a NORM 4534 bra.b _L22_6s 4535_L22_2s: 4536 cmpi.b %d1,&ZERO # is operand a ZERO? 4537 bne.b _L22_3s # no 4538 bsr.l smod_szero # yes 4539 bra.b _L22_6s 4540_L22_3s: 4541 cmpi.b %d1,&INF # is operand an INF? 4542 bne.b _L22_4s # no 4543 bsr.l smod_sinf # yes 4544 bra.b _L22_6s 4545_L22_4s: 4546 cmpi.b %d1,&QNAN # is operand a QNAN? 4547 bne.b _L22_5s # no 4548 bsr.l sop_sqnan # yes 4549 bra.b _L22_6s 4550_L22_5s: 4551 bsr.l smod_sdnrm # operand is a DENORM 4552_L22_6s: 4553 4554# 4555# Result is now in FP0 4556# 4557 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4558 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4559 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4560 unlk %a6 4561 rts 4562 4563 global _fmodd_ 4564_fmodd_: 4565 link %a6,&-LOCAL_SIZE 4566 4567 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4568 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4569 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4570 4571 fmov.l &0x0,%fpcr # zero FPCR 4572 4573# 4574# copy, convert, and tag input argument 4575# 4576 fmov.d 0x8(%a6),%fp0 # load dbl dst 4577 fmov.x %fp0,FP_DST(%a6) 4578 lea FP_DST(%a6),%a0 4579 bsr.l tag # fetch operand type 4580 mov.b %d0,DTAG(%a6) 4581 4582 fmov.d 0x10(%a6),%fp0 # load dbl src 4583 fmov.x %fp0,FP_SRC(%a6) 4584 lea FP_SRC(%a6),%a0 4585 bsr.l tag # fetch operand type 4586 mov.b %d0,STAG(%a6) 4587 mov.l %d0,%d1 4588 4589 andi.l &0x00ff00ff,USER_FPSR(%a6) 4590 4591 clr.l %d0 4592 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4593 4594 lea FP_SRC(%a6),%a0 # pass ptr to src 4595 lea FP_DST(%a6),%a1 # pass ptr to dst 4596 4597 tst.b %d1 4598 bne.b _L22_2d 4599 bsr.l smod_snorm # operand is a NORM 4600 bra.b _L22_6d 4601_L22_2d: 4602 cmpi.b %d1,&ZERO # is operand a ZERO? 4603 bne.b _L22_3d # no 4604 bsr.l smod_szero # yes 4605 bra.b _L22_6d 4606_L22_3d: 4607 cmpi.b %d1,&INF # is operand an INF? 4608 bne.b _L22_4d # no 4609 bsr.l smod_sinf # yes 4610 bra.b _L22_6d 4611_L22_4d: 4612 cmpi.b %d1,&QNAN # is operand a QNAN? 4613 bne.b _L22_5d # no 4614 bsr.l sop_sqnan # yes 4615 bra.b _L22_6d 4616_L22_5d: 4617 bsr.l smod_sdnrm # operand is a DENORM 4618_L22_6d: 4619 4620# 4621# Result is now in FP0 4622# 4623 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4624 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4625 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4626 unlk %a6 4627 rts 4628 4629 global _fmodx_ 4630_fmodx_: 4631 link %a6,&-LOCAL_SIZE 4632 4633 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4634 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4635 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4636 4637 fmov.l &0x0,%fpcr # zero FPCR 4638 4639# 4640# copy, convert, and tag input argument 4641# 4642 lea FP_DST(%a6),%a0 4643 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4644 mov.l 0x8+0x4(%a6),0x4(%a0) 4645 mov.l 0x8+0x8(%a6),0x8(%a0) 4646 bsr.l tag # fetch operand type 4647 mov.b %d0,DTAG(%a6) 4648 4649 lea FP_SRC(%a6),%a0 4650 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4651 mov.l 0x14+0x4(%a6),0x4(%a0) 4652 mov.l 0x14+0x8(%a6),0x8(%a0) 4653 bsr.l tag # fetch operand type 4654 mov.b %d0,STAG(%a6) 4655 mov.l %d0,%d1 4656 4657 andi.l &0x00ff00ff,USER_FPSR(%a6) 4658 4659 clr.l %d0 4660 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4661 4662 lea FP_SRC(%a6),%a0 # pass ptr to src 4663 lea FP_DST(%a6),%a1 # pass ptr to dst 4664 4665 tst.b %d1 4666 bne.b _L22_2x 4667 bsr.l smod_snorm # operand is a NORM 4668 bra.b _L22_6x 4669_L22_2x: 4670 cmpi.b %d1,&ZERO # is operand a ZERO? 4671 bne.b _L22_3x # no 4672 bsr.l smod_szero # yes 4673 bra.b _L22_6x 4674_L22_3x: 4675 cmpi.b %d1,&INF # is operand an INF? 4676 bne.b _L22_4x # no 4677 bsr.l smod_sinf # yes 4678 bra.b _L22_6x 4679_L22_4x: 4680 cmpi.b %d1,&QNAN # is operand a QNAN? 4681 bne.b _L22_5x # no 4682 bsr.l sop_sqnan # yes 4683 bra.b _L22_6x 4684_L22_5x: 4685 bsr.l smod_sdnrm # operand is a DENORM 4686_L22_6x: 4687 4688# 4689# Result is now in FP0 4690# 4691 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4693 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4694 unlk %a6 4695 rts 4696 4697 4698######################################################################### 4699# DYADIC TEMPLATE # 4700######################################################################### 4701 global _fscales_ 4702_fscales_: 4703 link %a6,&-LOCAL_SIZE 4704 4705 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4706 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4707 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4708 4709 fmov.l &0x0,%fpcr # zero FPCR 4710 4711# 4712# copy, convert, and tag input argument 4713# 4714 fmov.s 0x8(%a6),%fp0 # load sgl dst 4715 fmov.x %fp0,FP_DST(%a6) 4716 lea FP_DST(%a6),%a0 4717 bsr.l tag # fetch operand type 4718 mov.b %d0,DTAG(%a6) 4719 4720 fmov.s 0xc(%a6),%fp0 # load sgl src 4721 fmov.x %fp0,FP_SRC(%a6) 4722 lea FP_SRC(%a6),%a0 4723 bsr.l tag # fetch operand type 4724 mov.b %d0,STAG(%a6) 4725 mov.l %d0,%d1 4726 4727 andi.l &0x00ff00ff,USER_FPSR(%a6) 4728 4729 clr.l %d0 4730 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4731 4732 lea FP_SRC(%a6),%a0 # pass ptr to src 4733 lea FP_DST(%a6),%a1 # pass ptr to dst 4734 4735 tst.b %d1 4736 bne.b _L23_2s 4737 bsr.l sscale_snorm # operand is a NORM 4738 bra.b _L23_6s 4739_L23_2s: 4740 cmpi.b %d1,&ZERO # is operand a ZERO? 4741 bne.b _L23_3s # no 4742 bsr.l sscale_szero # yes 4743 bra.b _L23_6s 4744_L23_3s: 4745 cmpi.b %d1,&INF # is operand an INF? 4746 bne.b _L23_4s # no 4747 bsr.l sscale_sinf # yes 4748 bra.b _L23_6s 4749_L23_4s: 4750 cmpi.b %d1,&QNAN # is operand a QNAN? 4751 bne.b _L23_5s # no 4752 bsr.l sop_sqnan # yes 4753 bra.b _L23_6s 4754_L23_5s: 4755 bsr.l sscale_sdnrm # operand is a DENORM 4756_L23_6s: 4757 4758# 4759# Result is now in FP0 4760# 4761 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4762 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4763 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4764 unlk %a6 4765 rts 4766 4767 global _fscaled_ 4768_fscaled_: 4769 link %a6,&-LOCAL_SIZE 4770 4771 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4772 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4773 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4774 4775 fmov.l &0x0,%fpcr # zero FPCR 4776 4777# 4778# copy, convert, and tag input argument 4779# 4780 fmov.d 0x8(%a6),%fp0 # load dbl dst 4781 fmov.x %fp0,FP_DST(%a6) 4782 lea FP_DST(%a6),%a0 4783 bsr.l tag # fetch operand type 4784 mov.b %d0,DTAG(%a6) 4785 4786 fmov.d 0x10(%a6),%fp0 # load dbl src 4787 fmov.x %fp0,FP_SRC(%a6) 4788 lea FP_SRC(%a6),%a0 4789 bsr.l tag # fetch operand type 4790 mov.b %d0,STAG(%a6) 4791 mov.l %d0,%d1 4792 4793 andi.l &0x00ff00ff,USER_FPSR(%a6) 4794 4795 clr.l %d0 4796 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4797 4798 lea FP_SRC(%a6),%a0 # pass ptr to src 4799 lea FP_DST(%a6),%a1 # pass ptr to dst 4800 4801 tst.b %d1 4802 bne.b _L23_2d 4803 bsr.l sscale_snorm # operand is a NORM 4804 bra.b _L23_6d 4805_L23_2d: 4806 cmpi.b %d1,&ZERO # is operand a ZERO? 4807 bne.b _L23_3d # no 4808 bsr.l sscale_szero # yes 4809 bra.b _L23_6d 4810_L23_3d: 4811 cmpi.b %d1,&INF # is operand an INF? 4812 bne.b _L23_4d # no 4813 bsr.l sscale_sinf # yes 4814 bra.b _L23_6d 4815_L23_4d: 4816 cmpi.b %d1,&QNAN # is operand a QNAN? 4817 bne.b _L23_5d # no 4818 bsr.l sop_sqnan # yes 4819 bra.b _L23_6d 4820_L23_5d: 4821 bsr.l sscale_sdnrm # operand is a DENORM 4822_L23_6d: 4823 4824# 4825# Result is now in FP0 4826# 4827 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4828 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4829 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4830 unlk %a6 4831 rts 4832 4833 global _fscalex_ 4834_fscalex_: 4835 link %a6,&-LOCAL_SIZE 4836 4837 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4838 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4839 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4840 4841 fmov.l &0x0,%fpcr # zero FPCR 4842 4843# 4844# copy, convert, and tag input argument 4845# 4846 lea FP_DST(%a6),%a0 4847 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4848 mov.l 0x8+0x4(%a6),0x4(%a0) 4849 mov.l 0x8+0x8(%a6),0x8(%a0) 4850 bsr.l tag # fetch operand type 4851 mov.b %d0,DTAG(%a6) 4852 4853 lea FP_SRC(%a6),%a0 4854 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4855 mov.l 0x14+0x4(%a6),0x4(%a0) 4856 mov.l 0x14+0x8(%a6),0x8(%a0) 4857 bsr.l tag # fetch operand type 4858 mov.b %d0,STAG(%a6) 4859 mov.l %d0,%d1 4860 4861 andi.l &0x00ff00ff,USER_FPSR(%a6) 4862 4863 clr.l %d0 4864 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4865 4866 lea FP_SRC(%a6),%a0 # pass ptr to src 4867 lea FP_DST(%a6),%a1 # pass ptr to dst 4868 4869 tst.b %d1 4870 bne.b _L23_2x 4871 bsr.l sscale_snorm # operand is a NORM 4872 bra.b _L23_6x 4873_L23_2x: 4874 cmpi.b %d1,&ZERO # is operand a ZERO? 4875 bne.b _L23_3x # no 4876 bsr.l sscale_szero # yes 4877 bra.b _L23_6x 4878_L23_3x: 4879 cmpi.b %d1,&INF # is operand an INF? 4880 bne.b _L23_4x # no 4881 bsr.l sscale_sinf # yes 4882 bra.b _L23_6x 4883_L23_4x: 4884 cmpi.b %d1,&QNAN # is operand a QNAN? 4885 bne.b _L23_5x # no 4886 bsr.l sop_sqnan # yes 4887 bra.b _L23_6x 4888_L23_5x: 4889 bsr.l sscale_sdnrm # operand is a DENORM 4890_L23_6x: 4891 4892# 4893# Result is now in FP0 4894# 4895 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4896 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4897 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4898 unlk %a6 4899 rts 4900 4901 4902######################################################################### 4903# ssin(): computes the sine of a normalized input # 4904# ssind(): computes the sine of a denormalized input # 4905# scos(): computes the cosine of a normalized input # 4906# scosd(): computes the cosine of a denormalized input # 4907# ssincos(): computes the sine and cosine of a normalized input # 4908# ssincosd(): computes the sine and cosine of a denormalized input # 4909# # 4910# INPUT *************************************************************** # 4911# a0 = pointer to extended precision input # 4912# d0 = round precision,mode # 4913# # 4914# OUTPUT ************************************************************** # 4915# fp0 = sin(X) or cos(X) # 4916# # 4917# For ssincos(X): # 4918# fp0 = sin(X) # 4919# fp1 = cos(X) # 4920# # 4921# ACCURACY and MONOTONICITY ******************************************* # 4922# The returned result is within 1 ulp in 64 significant bit, i.e. # 4923# within 0.5001 ulp to 53 bits if the result is subsequently # 4924# rounded to double precision. The result is provably monotonic # 4925# in double precision. # 4926# # 4927# ALGORITHM *********************************************************** # 4928# # 4929# SIN and COS: # 4930# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # 4931# # 4932# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # 4933# # 4934# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 4935# k = N mod 4, so in particular, k = 0,1,2,or 3. # 4936# Overwrite k by k := k + AdjN. # 4937# # 4938# 4. If k is even, go to 6. # 4939# # 4940# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # 4941# Return sgn*cos(r) where cos(r) is approximated by an # 4942# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # 4943# s = r*r. # 4944# Exit. # 4945# # 4946# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # 4947# where sin(r) is approximated by an odd polynomial in r # 4948# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # 4949# Exit. # 4950# # 4951# 7. If |X| > 1, go to 9. # 4952# # 4953# 8. (|X|<2**(-40)) If SIN is invoked, return X; # 4954# otherwise return 1. # 4955# # 4956# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 4957# go back to 3. # 4958# # 4959# SINCOS: # 4960# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 4961# # 4962# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 4963# k = N mod 4, so in particular, k = 0,1,2,or 3. # 4964# # 4965# 3. If k is even, go to 5. # 4966# # 4967# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # 4968# j1 exclusive or with the l.s.b. of k. # 4969# sgn1 := (-1)**j1, sgn2 := (-1)**j2. # 4970# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # 4971# sin(r) and cos(r) are computed as odd and even # 4972# polynomials in r, respectively. Exit # 4973# # 4974# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # 4975# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # 4976# sin(r) and cos(r) are computed as odd and even # 4977# polynomials in r, respectively. Exit # 4978# # 4979# 6. If |X| > 1, go to 8. # 4980# # 4981# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # 4982# # 4983# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 4984# go back to 2. # 4985# # 4986######################################################################### 4987 4988SINA7: long 0xBD6AAA77,0xCCC994F5 4989SINA6: long 0x3DE61209,0x7AAE8DA1 4990SINA5: long 0xBE5AE645,0x2A118AE4 4991SINA4: long 0x3EC71DE3,0xA5341531 4992SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 4993SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 4994SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 4995 4996COSB8: long 0x3D2AC4D0,0xD6011EE3 4997COSB7: long 0xBDA9396F,0x9F45AC19 4998COSB6: long 0x3E21EED9,0x0612C972 4999COSB5: long 0xBE927E4F,0xB79D9FCF 5000COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 5001COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 5002COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E 5003COSB1: long 0xBF000000 5004 5005 set INARG,FP_SCR0 5006 5007 set X,FP_SCR0 5008# set XDCARE,X+2 5009 set XFRAC,X+4 5010 5011 set RPRIME,FP_SCR0 5012 set SPRIME,FP_SCR1 5013 5014 set POSNEG1,L_SCR1 5015 set TWOTO63,L_SCR1 5016 5017 set ENDFLAG,L_SCR2 5018 set INT,L_SCR2 5019 5020 set ADJN,L_SCR3 5021 5022############################################ 5023 global ssin 5024ssin: 5025 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 5026 bra.b SINBGN 5027 5028############################################ 5029 global scos 5030scos: 5031 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 5032 5033############################################ 5034SINBGN: 5035#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE 5036 5037 fmov.x (%a0),%fp0 # LOAD INPUT 5038 fmov.x %fp0,X(%a6) # save input at X 5039 5040# "COMPACTIFY" X 5041 mov.l (%a0),%d1 # put exp in hi word 5042 mov.w 4(%a0),%d1 # fetch hi(man) 5043 and.l &0x7FFFFFFF,%d1 # strip sign 5044 5045 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? 5046 bge.b SOK1 # no 5047 bra.w SINSM # yes; input is very small 5048 5049SOK1: 5050 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? 5051 blt.b SINMAIN # no 5052 bra.w SREDUCEX # yes; input is very large 5053 5054#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5055#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5056SINMAIN: 5057 fmov.x %fp0,%fp1 5058 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5059 5060 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5061 5062 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5063 5064 mov.l INT(%a6),%d1 # make a copy of N 5065 asl.l &4,%d1 # N *= 16 5066 add.l %d1,%a1 # tbl_addr = a1 + (N*16) 5067 5068# A1 IS THE ADDRESS OF N*PIBY2 5069# ...WHICH IS IN TWO PIECES Y1 & Y2 5070 fsub.x (%a1)+,%fp0 # X-Y1 5071 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 5072 5073SINCONT: 5074#--continuation from REDUCEX 5075 5076#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED 5077 mov.l INT(%a6),%d1 5078 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN 5079 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE 5080 cmp.l %d1,&0 5081 blt.w COSPOLY 5082 5083#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5084#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY 5085#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE 5086#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS 5087#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) 5088#--WHERE T=S*S. 5089#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION 5090#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. 5091SINPOLY: 5092 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5093 5094 fmov.x %fp0,X(%a6) # X IS R 5095 fmul.x %fp0,%fp0 # FP0 IS S 5096 5097 fmov.d SINA7(%pc),%fp3 5098 fmov.d SINA6(%pc),%fp2 5099 5100 fmov.x %fp0,%fp1 5101 fmul.x %fp1,%fp1 # FP1 IS T 5102 5103 ror.l &1,%d1 5104 and.l &0x80000000,%d1 5105# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5106 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R 5107 5108 fmul.x %fp1,%fp3 # TA7 5109 fmul.x %fp1,%fp2 # TA6 5110 5111 fadd.d SINA5(%pc),%fp3 # A5+TA7 5112 fadd.d SINA4(%pc),%fp2 # A4+TA6 5113 5114 fmul.x %fp1,%fp3 # T(A5+TA7) 5115 fmul.x %fp1,%fp2 # T(A4+TA6) 5116 5117 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) 5118 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) 5119 5120 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) 5121 5122 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) 5123 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) 5124 fmul.x X(%a6),%fp0 # R'*S 5125 5126 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] 5127 5128 fmul.x %fp1,%fp0 # SIN(R')-R' 5129 5130 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5131 5132 fmov.l %d0,%fpcr # restore users round mode,prec 5133 fadd.x X(%a6),%fp0 # last inst - possible exception set 5134 bra t_inx2 5135 5136#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5137#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY 5138#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE 5139#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS 5140#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) 5141#--WHERE T=S*S. 5142#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION 5143#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 5144#--AND IS THEREFORE STORED AS SINGLE PRECISION. 5145COSPOLY: 5146 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5147 5148 fmul.x %fp0,%fp0 # FP0 IS S 5149 5150 fmov.d COSB8(%pc),%fp2 5151 fmov.d COSB7(%pc),%fp3 5152 5153 fmov.x %fp0,%fp1 5154 fmul.x %fp1,%fp1 # FP1 IS T 5155 5156 fmov.x %fp0,X(%a6) # X IS S 5157 ror.l &1,%d1 5158 and.l &0x80000000,%d1 5159# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5160 5161 fmul.x %fp1,%fp2 # TB8 5162 5163 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S 5164 and.l &0x80000000,%d1 5165 5166 fmul.x %fp1,%fp3 # TB7 5167 5168 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE 5169 mov.l %d1,POSNEG1(%a6) 5170 5171 fadd.d COSB6(%pc),%fp2 # B6+TB8 5172 fadd.d COSB5(%pc),%fp3 # B5+TB7 5173 5174 fmul.x %fp1,%fp2 # T(B6+TB8) 5175 fmul.x %fp1,%fp3 # T(B5+TB7) 5176 5177 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) 5178 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) 5179 5180 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) 5181 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) 5182 5183 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) 5184 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) 5185 5186 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) 5187 5188 fadd.x %fp1,%fp0 5189 5190 fmul.x X(%a6),%fp0 5191 5192 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5193 5194 fmov.l %d0,%fpcr # restore users round mode,prec 5195 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set 5196 bra t_inx2 5197 5198############################################## 5199 5200# SINe: Big OR Small? 5201#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5202#--IF |X| < 2**(-40), RETURN X OR 1. 5203SINBORS: 5204 cmp.l %d1,&0x3FFF8000 5205 bgt.l SREDUCEX 5206 5207SINSM: 5208 mov.l ADJN(%a6),%d1 5209 cmp.l %d1,&0 5210 bgt.b COSTINY 5211 5212# here, the operation may underflow iff the precision is sgl or dbl. 5213# extended denorms are handled through another entry point. 5214SINTINY: 5215# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE 5216 5217 fmov.l %d0,%fpcr # restore users round mode,prec 5218 mov.b &FMOV_OP,%d1 # last inst is MOVE 5219 fmov.x X(%a6),%fp0 # last inst - possible exception set 5220 bra t_catch 5221 5222COSTINY: 5223 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5224 fmov.l %d0,%fpcr # restore users round mode,prec 5225 fadd.s &0x80800000,%fp0 # last inst - possible exception set 5226 bra t_pinx2 5227 5228################################################ 5229 global ssind 5230#--SIN(X) = X FOR DENORMALIZED X 5231ssind: 5232 bra t_extdnrm 5233 5234############################################ 5235 global scosd 5236#--COS(X) = 1 FOR DENORMALIZED X 5237scosd: 5238 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5239 bra t_pinx2 5240 5241################################################## 5242 5243 global ssincos 5244ssincos: 5245#--SET ADJN TO 4 5246 mov.l &4,ADJN(%a6) 5247 5248 fmov.x (%a0),%fp0 # LOAD INPUT 5249 fmov.x %fp0,X(%a6) 5250 5251 mov.l (%a0),%d1 5252 mov.w 4(%a0),%d1 5253 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X 5254 5255 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5256 bge.b SCOK1 5257 bra.w SCSM 5258 5259SCOK1: 5260 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5261 blt.b SCMAIN 5262 bra.w SREDUCEX 5263 5264 5265#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5266#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5267SCMAIN: 5268 fmov.x %fp0,%fp1 5269 5270 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5271 5272 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5273 5274 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5275 5276 mov.l INT(%a6),%d1 5277 asl.l &4,%d1 5278 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 5279 5280 fsub.x (%a1)+,%fp0 # X-Y1 5281 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5282 5283SCCONT: 5284#--continuation point from REDUCEX 5285 5286 mov.l INT(%a6),%d1 5287 ror.l &1,%d1 5288 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD 5289 bge.w NEVEN 5290 5291SNODD: 5292#--REGISTERS SAVED SO FAR: D0, A0, FP2. 5293 fmovm.x &0x04,-(%sp) # save fp2 5294 5295 fmov.x %fp0,RPRIME(%a6) 5296 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5297 fmov.d SINA7(%pc),%fp1 # A7 5298 fmov.d COSB8(%pc),%fp2 # B8 5299 fmul.x %fp0,%fp1 # SA7 5300 fmul.x %fp0,%fp2 # SB8 5301 5302 mov.l %d2,-(%sp) 5303 mov.l %d1,%d2 5304 ror.l &1,%d2 5305 and.l &0x80000000,%d2 5306 eor.l %d1,%d2 5307 and.l &0x80000000,%d2 5308 5309 fadd.d SINA6(%pc),%fp1 # A6+SA7 5310 fadd.d COSB7(%pc),%fp2 # B7+SB8 5311 5312 fmul.x %fp0,%fp1 # S(A6+SA7) 5313 eor.l %d2,RPRIME(%a6) 5314 mov.l (%sp)+,%d2 5315 fmul.x %fp0,%fp2 # S(B7+SB8) 5316 ror.l &1,%d1 5317 and.l &0x80000000,%d1 5318 mov.l &0x3F800000,POSNEG1(%a6) 5319 eor.l %d1,POSNEG1(%a6) 5320 5321 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) 5322 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) 5323 5324 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) 5325 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) 5326 fmov.x %fp0,SPRIME(%a6) 5327 5328 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) 5329 eor.l %d1,SPRIME(%a6) 5330 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) 5331 5332 fmul.x %fp0,%fp1 # S(A4+...) 5333 fmul.x %fp0,%fp2 # S(B5+...) 5334 5335 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) 5336 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) 5337 5338 fmul.x %fp0,%fp1 # S(A3+...) 5339 fmul.x %fp0,%fp2 # S(B4+...) 5340 5341 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) 5342 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) 5343 5344 fmul.x %fp0,%fp1 # S(A2+...) 5345 fmul.x %fp0,%fp2 # S(B3+...) 5346 5347 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) 5348 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) 5349 5350 fmul.x %fp0,%fp1 # S(A1+...) 5351 fmul.x %fp2,%fp0 # S(B2+...) 5352 5353 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) 5354 fadd.s COSB1(%pc),%fp0 # B1+S(B2...) 5355 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) 5356 5357 fmovm.x (%sp)+,&0x20 # restore fp2 5358 5359 fmov.l %d0,%fpcr 5360 fadd.x RPRIME(%a6),%fp1 # COS(X) 5361 bsr sto_cos # store cosine result 5362 fadd.s POSNEG1(%a6),%fp0 # SIN(X) 5363 bra t_inx2 5364 5365NEVEN: 5366#--REGISTERS SAVED SO FAR: FP2. 5367 fmovm.x &0x04,-(%sp) # save fp2 5368 5369 fmov.x %fp0,RPRIME(%a6) 5370 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5371 5372 fmov.d COSB8(%pc),%fp1 # B8 5373 fmov.d SINA7(%pc),%fp2 # A7 5374 5375 fmul.x %fp0,%fp1 # SB8 5376 fmov.x %fp0,SPRIME(%a6) 5377 fmul.x %fp0,%fp2 # SA7 5378 5379 ror.l &1,%d1 5380 and.l &0x80000000,%d1 5381 5382 fadd.d COSB7(%pc),%fp1 # B7+SB8 5383 fadd.d SINA6(%pc),%fp2 # A6+SA7 5384 5385 eor.l %d1,RPRIME(%a6) 5386 eor.l %d1,SPRIME(%a6) 5387 5388 fmul.x %fp0,%fp1 # S(B7+SB8) 5389 5390 or.l &0x3F800000,%d1 5391 mov.l %d1,POSNEG1(%a6) 5392 5393 fmul.x %fp0,%fp2 # S(A6+SA7) 5394 5395 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) 5396 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) 5397 5398 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) 5399 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) 5400 5401 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) 5402 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) 5403 5404 fmul.x %fp0,%fp1 # S(B5+...) 5405 fmul.x %fp0,%fp2 # S(A4+...) 5406 5407 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) 5408 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) 5409 5410 fmul.x %fp0,%fp1 # S(B4+...) 5411 fmul.x %fp0,%fp2 # S(A3+...) 5412 5413 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) 5414 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) 5415 5416 fmul.x %fp0,%fp1 # S(B3+...) 5417 fmul.x %fp0,%fp2 # S(A2+...) 5418 5419 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) 5420 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) 5421 5422 fmul.x %fp0,%fp1 # S(B2+...) 5423 fmul.x %fp2,%fp0 # s(a1+...) 5424 5425 5426 fadd.s COSB1(%pc),%fp1 # B1+S(B2...) 5427 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) 5428 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) 5429 5430 fmovm.x (%sp)+,&0x20 # restore fp2 5431 5432 fmov.l %d0,%fpcr 5433 fadd.s POSNEG1(%a6),%fp1 # COS(X) 5434 bsr sto_cos # store cosine result 5435 fadd.x RPRIME(%a6),%fp0 # SIN(X) 5436 bra t_inx2 5437 5438################################################ 5439 5440SCBORS: 5441 cmp.l %d1,&0x3FFF8000 5442 bgt.w SREDUCEX 5443 5444################################################ 5445 5446SCSM: 5447# mov.w &0x0000,XDCARE(%a6) 5448 fmov.s &0x3F800000,%fp1 5449 5450 fmov.l %d0,%fpcr 5451 fsub.s &0x00800000,%fp1 5452 bsr sto_cos # store cosine result 5453 fmov.l %fpcr,%d0 # d0 must have fpcr,too 5454 mov.b &FMOV_OP,%d1 # last inst is MOVE 5455 fmov.x X(%a6),%fp0 5456 bra t_catch 5457 5458############################################## 5459 5460 global ssincosd 5461#--SIN AND COS OF X FOR DENORMALIZED X 5462ssincosd: 5463 mov.l %d0,-(%sp) # save d0 5464 fmov.s &0x3F800000,%fp1 5465 bsr sto_cos # store cosine result 5466 mov.l (%sp)+,%d0 # restore d0 5467 bra t_extdnrm 5468 5469############################################ 5470 5471#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5472#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5473#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5474SREDUCEX: 5475 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5476 mov.l %d2,-(%sp) # save d2 5477 fmov.s &0x00000000,%fp1 # fp1 = 0 5478 5479#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5480#--there is a danger of unwanted overflow in first LOOP iteration. In this 5481#--case, reduce argument by one remainder step to make subsequent reduction 5482#--safe. 5483 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5484 bne.b SLOOP # no 5485 5486# yes; create 2**16383*PI/2 5487 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5488 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5489 clr.l FP_SCR0_LO(%a6) 5490 5491# create low half of 2**16383*PI/2 at FP_SCR1 5492 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5493 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5494 clr.l FP_SCR1_LO(%a6) 5495 5496 ftest.x %fp0 # test sign of argument 5497 fblt.w sred_neg 5498 5499 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5500 or.b &0x80,FP_SCR1_EX(%a6) 5501sred_neg: 5502 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5503 fmov.x %fp0,%fp1 # save high result in fp1 5504 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5505 fsub.x %fp0,%fp1 # determine low component of result 5506 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5507 5508#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5509#--integer quotient will be stored in N 5510#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5511SLOOP: 5512 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5513 mov.w INARG(%a6),%d1 5514 mov.l %d1,%a1 # save a copy of D0 5515 and.l &0x00007FFF,%d1 5516 sub.l &0x00003FFF,%d1 # d0 = K 5517 cmp.l %d1,&28 5518 ble.b SLASTLOOP 5519SCONTLOOP: 5520 sub.l &27,%d1 # d0 = L := K-27 5521 mov.b &0,ENDFLAG(%a6) 5522 bra.b SWORK 5523SLASTLOOP: 5524 clr.l %d1 # d0 = L := 0 5525 mov.b &1,ENDFLAG(%a6) 5526 5527SWORK: 5528#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5529#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5530 5531#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5532#--2**L * (PIby2_1), 2**L * (PIby2_2) 5533 5534 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5535 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5536 5537 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5538 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5539 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5540 5541 fmov.x %fp0,%fp2 5542 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5543 5544#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5545#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5546#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5547#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5548#--US THE DESIRED VALUE IN FLOATING POINT. 5549 mov.l %a1,%d2 5550 swap %d2 5551 and.l &0x80000000,%d2 5552 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5553 mov.l %d2,TWOTO63(%a6) 5554 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5555 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5556# fint.x %fp2 5557 5558#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5559 mov.l %d1,%d2 # d2 = L 5560 5561 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5562 mov.w %d2,FP_SCR0_EX(%a6) 5563 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5564 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5565 5566 add.l &0x00003FDD,%d1 5567 mov.w %d1,FP_SCR1_EX(%a6) 5568 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 5569 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 5570 5571 mov.b ENDFLAG(%a6),%d1 5572 5573#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 5574#--P2 = 2**(L) * Piby2_2 5575 fmov.x %fp2,%fp4 # fp4 = N 5576 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 5577 fmov.x %fp2,%fp5 # fp5 = N 5578 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 5579 fmov.x %fp4,%fp3 # fp3 = W = N*P1 5580 5581#--we want P+p = W+w but |p| <= half ulp of P 5582#--Then, we need to compute A := R-P and a := r-p 5583 fadd.x %fp5,%fp3 # fp3 = P 5584 fsub.x %fp3,%fp4 # fp4 = W-P 5585 5586 fsub.x %fp3,%fp0 # fp0 = A := R - P 5587 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 5588 5589 fmov.x %fp0,%fp3 # fp3 = A 5590 fsub.x %fp4,%fp1 # fp1 = a := r - p 5591 5592#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 5593#--|r| <= half ulp of R. 5594 fadd.x %fp1,%fp0 # fp0 = R := A+a 5595#--No need to calculate r if this is the last loop 5596 cmp.b %d1,&0 5597 bgt.w SRESTORE 5598 5599#--Need to calculate r 5600 fsub.x %fp0,%fp3 # fp3 = A-R 5601 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 5602 bra.w SLOOP 5603 5604SRESTORE: 5605 fmov.l %fp2,INT(%a6) 5606 mov.l (%sp)+,%d2 # restore d2 5607 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 5608 5609 mov.l ADJN(%a6),%d1 5610 cmp.l %d1,&4 5611 5612 blt.w SINCONT 5613 bra.w SCCONT 5614 5615######################################################################### 5616# stan(): computes the tangent of a normalized input # 5617# stand(): computes the tangent of a denormalized input # 5618# # 5619# INPUT *************************************************************** # 5620# a0 = pointer to extended precision input # 5621# d0 = round precision,mode # 5622# # 5623# OUTPUT ************************************************************** # 5624# fp0 = tan(X) # 5625# # 5626# ACCURACY and MONOTONICITY ******************************************* # 5627# The returned result is within 3 ulp in 64 significant bit, i.e. # 5628# within 0.5001 ulp to 53 bits if the result is subsequently # 5629# rounded to double precision. The result is provably monotonic # 5630# in double precision. # 5631# # 5632# ALGORITHM *********************************************************** # 5633# # 5634# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5635# # 5636# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5637# k = N mod 2, so in particular, k = 0 or 1. # 5638# # 5639# 3. If k is odd, go to 5. # 5640# # 5641# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # 5642# rational function U/V where # 5643# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5644# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # 5645# Exit. # 5646# # 5647# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # 5648# a rational function U/V where # 5649# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5650# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # 5651# -Cot(r) = -V/U. Exit. # 5652# # 5653# 6. If |X| > 1, go to 8. # 5654# # 5655# 7. (|X|<2**(-40)) Tan(X) = X. Exit. # 5656# # 5657# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # 5658# to 2. # 5659# # 5660######################################################################### 5661 5662TANQ4: 5663 long 0x3EA0B759,0xF50F8688 5664TANP3: 5665 long 0xBEF2BAA5,0xA8924F04 5666 5667TANQ3: 5668 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 5669 5670TANP2: 5671 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 5672 5673TANQ2: 5674 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 5675 5676TANP1: 5677 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 5678 5679TANQ1: 5680 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 5681 5682INVTWOPI: 5683 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 5684 5685TWOPI1: 5686 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 5687TWOPI2: 5688 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 5689 5690#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING 5691#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT 5692#--MOST 69 BITS LONG. 5693# global PITBL 5694PITBL: 5695 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 5696 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 5697 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 5698 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 5699 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 5700 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 5701 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 5702 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 5703 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 5704 long 0xC0040000,0x90836524,0x88034B96,0x20B00000 5705 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 5706 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 5707 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 5708 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 5709 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 5710 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 5711 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 5712 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 5713 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 5714 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 5715 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 5716 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 5717 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 5718 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 5719 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 5720 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 5721 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 5722 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 5723 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 5724 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 5725 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 5726 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 5727 long 0x00000000,0x00000000,0x00000000,0x00000000 5728 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 5729 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 5730 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 5731 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 5732 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 5733 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 5734 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 5735 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 5736 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 5737 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 5738 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 5739 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 5740 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 5741 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 5742 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 5743 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 5744 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 5745 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 5746 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 5747 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 5748 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 5749 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 5750 long 0x40040000,0x90836524,0x88034B96,0xA0B00000 5751 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 5752 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 5753 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 5754 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 5755 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 5756 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 5757 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 5758 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 5759 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 5760 5761 set INARG,FP_SCR0 5762 5763 set TWOTO63,L_SCR1 5764 set INT,L_SCR1 5765 set ENDFLAG,L_SCR2 5766 5767 global stan 5768stan: 5769 fmov.x (%a0),%fp0 # LOAD INPUT 5770 5771 mov.l (%a0),%d1 5772 mov.w 4(%a0),%d1 5773 and.l &0x7FFFFFFF,%d1 5774 5775 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5776 bge.b TANOK1 5777 bra.w TANSM 5778TANOK1: 5779 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5780 blt.b TANMAIN 5781 bra.w REDUCEX 5782 5783TANMAIN: 5784#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5785#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5786 fmov.x %fp0,%fp1 5787 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5788 5789 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5790 5791 fmov.l %fp1,%d1 # CONVERT TO INTEGER 5792 5793 asl.l &4,%d1 5794 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 5795 5796 fsub.x (%a1)+,%fp0 # X-Y1 5797 5798 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5799 5800 ror.l &5,%d1 5801 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 5802 5803TANCONT: 5804 fmovm.x &0x0c,-(%sp) # save fp2,fp3 5805 5806 cmp.l %d1,&0 5807 blt.w NODD 5808 5809 fmov.x %fp0,%fp1 5810 fmul.x %fp1,%fp1 # S = R*R 5811 5812 fmov.d TANQ4(%pc),%fp3 5813 fmov.d TANP3(%pc),%fp2 5814 5815 fmul.x %fp1,%fp3 # SQ4 5816 fmul.x %fp1,%fp2 # SP3 5817 5818 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5819 fadd.x TANP2(%pc),%fp2 # P2+SP3 5820 5821 fmul.x %fp1,%fp3 # S(Q3+SQ4) 5822 fmul.x %fp1,%fp2 # S(P2+SP3) 5823 5824 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5825 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5826 5827 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) 5828 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) 5829 5830 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5831 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) 5832 5833 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) 5834 5835 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) 5836 5837 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) 5838 5839 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5840 5841 fmov.l %d0,%fpcr # restore users round mode,prec 5842 fdiv.x %fp1,%fp0 # last inst - possible exception set 5843 bra t_inx2 5844 5845NODD: 5846 fmov.x %fp0,%fp1 5847 fmul.x %fp0,%fp0 # S = R*R 5848 5849 fmov.d TANQ4(%pc),%fp3 5850 fmov.d TANP3(%pc),%fp2 5851 5852 fmul.x %fp0,%fp3 # SQ4 5853 fmul.x %fp0,%fp2 # SP3 5854 5855 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5856 fadd.x TANP2(%pc),%fp2 # P2+SP3 5857 5858 fmul.x %fp0,%fp3 # S(Q3+SQ4) 5859 fmul.x %fp0,%fp2 # S(P2+SP3) 5860 5861 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5862 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5863 5864 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) 5865 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) 5866 5867 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5868 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) 5869 5870 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) 5871 5872 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) 5873 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) 5874 5875 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5876 5877 fmov.x %fp1,-(%sp) 5878 eor.l &0x80000000,(%sp) 5879 5880 fmov.l %d0,%fpcr # restore users round mode,prec 5881 fdiv.x (%sp)+,%fp0 # last inst - possible exception set 5882 bra t_inx2 5883 5884TANBORS: 5885#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5886#--IF |X| < 2**(-40), RETURN X OR 1. 5887 cmp.l %d1,&0x3FFF8000 5888 bgt.b REDUCEX 5889 5890TANSM: 5891 fmov.x %fp0,-(%sp) 5892 fmov.l %d0,%fpcr # restore users round mode,prec 5893 mov.b &FMOV_OP,%d1 # last inst is MOVE 5894 fmov.x (%sp)+,%fp0 # last inst - posibble exception set 5895 bra t_catch 5896 5897 global stand 5898#--TAN(X) = X FOR DENORMALIZED X 5899stand: 5900 bra t_extdnrm 5901 5902#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5903#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5904#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5905REDUCEX: 5906 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5907 mov.l %d2,-(%sp) # save d2 5908 fmov.s &0x00000000,%fp1 # fp1 = 0 5909 5910#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5911#--there is a danger of unwanted overflow in first LOOP iteration. In this 5912#--case, reduce argument by one remainder step to make subsequent reduction 5913#--safe. 5914 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5915 bne.b LOOP # no 5916 5917# yes; create 2**16383*PI/2 5918 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5919 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5920 clr.l FP_SCR0_LO(%a6) 5921 5922# create low half of 2**16383*PI/2 at FP_SCR1 5923 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5924 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5925 clr.l FP_SCR1_LO(%a6) 5926 5927 ftest.x %fp0 # test sign of argument 5928 fblt.w red_neg 5929 5930 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5931 or.b &0x80,FP_SCR1_EX(%a6) 5932red_neg: 5933 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5934 fmov.x %fp0,%fp1 # save high result in fp1 5935 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5936 fsub.x %fp0,%fp1 # determine low component of result 5937 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5938 5939#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5940#--integer quotient will be stored in N 5941#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5942LOOP: 5943 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5944 mov.w INARG(%a6),%d1 5945 mov.l %d1,%a1 # save a copy of D0 5946 and.l &0x00007FFF,%d1 5947 sub.l &0x00003FFF,%d1 # d0 = K 5948 cmp.l %d1,&28 5949 ble.b LASTLOOP 5950CONTLOOP: 5951 sub.l &27,%d1 # d0 = L := K-27 5952 mov.b &0,ENDFLAG(%a6) 5953 bra.b WORK 5954LASTLOOP: 5955 clr.l %d1 # d0 = L := 0 5956 mov.b &1,ENDFLAG(%a6) 5957 5958WORK: 5959#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5960#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5961 5962#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5963#--2**L * (PIby2_1), 2**L * (PIby2_2) 5964 5965 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5966 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5967 5968 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5969 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5970 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5971 5972 fmov.x %fp0,%fp2 5973 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5974 5975#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5976#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5977#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5978#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5979#--US THE DESIRED VALUE IN FLOATING POINT. 5980 mov.l %a1,%d2 5981 swap %d2 5982 and.l &0x80000000,%d2 5983 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5984 mov.l %d2,TWOTO63(%a6) 5985 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5986 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5987# fintrz.x %fp2,%fp2 5988 5989#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5990 mov.l %d1,%d2 # d2 = L 5991 5992 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5993 mov.w %d2,FP_SCR0_EX(%a6) 5994 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5995 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5996 5997 add.l &0x00003FDD,%d1 5998 mov.w %d1,FP_SCR1_EX(%a6) 5999 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 6000 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 6001 6002 mov.b ENDFLAG(%a6),%d1 6003 6004#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 6005#--P2 = 2**(L) * Piby2_2 6006 fmov.x %fp2,%fp4 # fp4 = N 6007 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 6008 fmov.x %fp2,%fp5 # fp5 = N 6009 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 6010 fmov.x %fp4,%fp3 # fp3 = W = N*P1 6011 6012#--we want P+p = W+w but |p| <= half ulp of P 6013#--Then, we need to compute A := R-P and a := r-p 6014 fadd.x %fp5,%fp3 # fp3 = P 6015 fsub.x %fp3,%fp4 # fp4 = W-P 6016 6017 fsub.x %fp3,%fp0 # fp0 = A := R - P 6018 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 6019 6020 fmov.x %fp0,%fp3 # fp3 = A 6021 fsub.x %fp4,%fp1 # fp1 = a := r - p 6022 6023#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 6024#--|r| <= half ulp of R. 6025 fadd.x %fp1,%fp0 # fp0 = R := A+a 6026#--No need to calculate r if this is the last loop 6027 cmp.b %d1,&0 6028 bgt.w RESTORE 6029 6030#--Need to calculate r 6031 fsub.x %fp0,%fp3 # fp3 = A-R 6032 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 6033 bra.w LOOP 6034 6035RESTORE: 6036 fmov.l %fp2,INT(%a6) 6037 mov.l (%sp)+,%d2 # restore d2 6038 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 6039 6040 mov.l INT(%a6),%d1 6041 ror.l &1,%d1 6042 6043 bra.w TANCONT 6044 6045######################################################################### 6046# satan(): computes the arctangent of a normalized number # 6047# satand(): computes the arctangent of a denormalized number # 6048# # 6049# INPUT *************************************************************** # 6050# a0 = pointer to extended precision input # 6051# d0 = round precision,mode # 6052# # 6053# OUTPUT ************************************************************** # 6054# fp0 = arctan(X) # 6055# # 6056# ACCURACY and MONOTONICITY ******************************************* # 6057# The returned result is within 2 ulps in 64 significant bit, # 6058# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6059# rounded to double precision. The result is provably monotonic # 6060# in double precision. # 6061# # 6062# ALGORITHM *********************************************************** # 6063# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # 6064# # 6065# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # 6066# Note that k = -4, -3,..., or 3. # 6067# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # 6068# significant bits of X with a bit-1 attached at the 6-th # 6069# bit position. Define u to be u = (X-F) / (1 + X*F). # 6070# # 6071# Step 3. Approximate arctan(u) by a polynomial poly. # 6072# # 6073# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # 6074# table of values calculated beforehand. Exit. # 6075# # 6076# Step 5. If |X| >= 16, go to Step 7. # 6077# # 6078# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # 6079# # 6080# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # 6081# polynomial in X'. # 6082# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # 6083# # 6084######################################################################### 6085 6086ATANA3: long 0xBFF6687E,0x314987D8 6087ATANA2: long 0x4002AC69,0x34A26DB3 6088ATANA1: long 0xBFC2476F,0x4E1DA28E 6089 6090ATANB6: long 0x3FB34444,0x7F876989 6091ATANB5: long 0xBFB744EE,0x7FAF45DB 6092ATANB4: long 0x3FBC71C6,0x46940220 6093ATANB3: long 0xBFC24924,0x921872F9 6094ATANB2: long 0x3FC99999,0x99998FA9 6095ATANB1: long 0xBFD55555,0x55555555 6096 6097ATANC5: long 0xBFB70BF3,0x98539E6A 6098ATANC4: long 0x3FBC7187,0x962D1D7D 6099ATANC3: long 0xBFC24924,0x827107B8 6100ATANC2: long 0x3FC99999,0x9996263E 6101ATANC1: long 0xBFD55555,0x55555536 6102 6103PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 6104NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 6105 6106PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 6107NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 6108 6109ATANTBL: 6110 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 6111 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 6112 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 6113 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 6114 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 6115 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 6116 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 6117 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 6118 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 6119 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 6120 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 6121 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 6122 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 6123 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 6124 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 6125 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 6126 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 6127 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 6128 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 6129 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 6130 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 6131 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 6132 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 6133 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 6134 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 6135 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 6136 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 6137 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 6138 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 6139 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 6140 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 6141 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 6142 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 6143 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 6144 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 6145 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 6146 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 6147 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 6148 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 6149 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 6150 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 6151 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 6152 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 6153 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 6154 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 6155 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 6156 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 6157 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 6158 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 6159 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 6160 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 6161 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 6162 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 6163 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 6164 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 6165 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 6166 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 6167 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 6168 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 6169 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 6170 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 6171 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 6172 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 6173 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 6174 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 6175 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 6176 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 6177 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 6178 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 6179 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 6180 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 6181 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 6182 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 6183 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 6184 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 6185 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 6186 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 6187 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 6188 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 6189 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 6190 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 6191 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 6192 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 6193 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 6194 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 6195 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 6196 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 6197 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 6198 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 6199 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 6200 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 6201 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 6202 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 6203 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 6204 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 6205 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 6206 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 6207 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 6208 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 6209 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 6210 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 6211 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 6212 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 6213 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 6214 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 6215 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 6216 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 6217 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 6218 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 6219 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 6220 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 6221 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 6222 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 6223 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 6224 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 6225 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 6226 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 6227 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 6228 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 6229 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 6230 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 6231 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 6232 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 6233 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 6234 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 6235 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 6236 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 6237 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 6238 6239 set X,FP_SCR0 6240 set XDCARE,X+2 6241 set XFRAC,X+4 6242 set XFRACLO,X+8 6243 6244 set ATANF,FP_SCR1 6245 set ATANFHI,ATANF+4 6246 set ATANFLO,ATANF+8 6247 6248 global satan 6249#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 6250satan: 6251 fmov.x (%a0),%fp0 # LOAD INPUT 6252 6253 mov.l (%a0),%d1 6254 mov.w 4(%a0),%d1 6255 fmov.x %fp0,X(%a6) 6256 and.l &0x7FFFFFFF,%d1 6257 6258 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? 6259 bge.b ATANOK1 6260 bra.w ATANSM 6261 6262ATANOK1: 6263 cmp.l %d1,&0x4002FFFF # |X| < 16 ? 6264 ble.b ATANMAIN 6265 bra.w ATANBIG 6266 6267#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 6268#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 6269#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 6270#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 6271#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 6272#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 6273#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 6274#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 6275#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 6276#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 6277#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 6278#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 6279#--WILL INVOLVE A VERY LONG POLYNOMIAL. 6280 6281#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 6282#--WE CHOSE F TO BE +-2^K * 1.BBBB1 6283#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 6284#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 6285#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 6286#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 6287 6288ATANMAIN: 6289 6290 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS 6291 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 6292 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F 6293 6294 fmov.x %fp0,%fp1 # FP1 IS X 6295 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 6296 fsub.x X(%a6),%fp0 # FP0 IS X-F 6297 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F 6298 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) 6299 6300#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 6301#--CREATE ATAN(F) AND STORE IT IN ATANF, AND 6302#--SAVE REGISTERS FP2. 6303 6304 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY 6305 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X 6306 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION 6307 and.l &0x7FFF0000,%d2 # EXPONENT OF F 6308 sub.l &0x3FFB0000,%d2 # K+4 6309 asr.l &1,%d2 6310 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F 6311 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) 6312 lea ATANTBL(%pc),%a1 6313 add.l %d1,%a1 # ADDRESS OF ATAN(|F|) 6314 mov.l (%a1)+,ATANF(%a6) 6315 mov.l (%a1)+,ATANFHI(%a6) 6316 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) 6317 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN 6318 and.l &0x80000000,%d1 # SIGN(F) 6319 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) 6320 mov.l (%sp)+,%d2 # RESTORE d2 6321 6322#--THAT'S ALL I HAVE TO DO FOR NOW, 6323#--BUT ALAS, THE DIVIDE IS STILL CRANKING! 6324 6325#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 6326#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 6327#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 6328#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 6329#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 6330#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 6331#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 6332 6333 fmovm.x &0x04,-(%sp) # save fp2 6334 6335 fmov.x %fp0,%fp1 6336 fmul.x %fp1,%fp1 6337 fmov.d ATANA3(%pc),%fp2 6338 fadd.x %fp1,%fp2 # A3+V 6339 fmul.x %fp1,%fp2 # V*(A3+V) 6340 fmul.x %fp0,%fp1 # U*V 6341 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) 6342 fmul.d ATANA1(%pc),%fp1 # A1*U*V 6343 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) 6344 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED 6345 6346 fmovm.x (%sp)+,&0x20 # restore fp2 6347 6348 fmov.l %d0,%fpcr # restore users rnd mode,prec 6349 fadd.x ATANF(%a6),%fp0 # ATAN(X) 6350 bra t_inx2 6351 6352ATANBORS: 6353#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 6354#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 6355 cmp.l %d1,&0x3FFF8000 6356 bgt.w ATANBIG # I.E. |X| >= 16 6357 6358ATANSM: 6359#--|X| <= 1/16 6360#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 6361#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 6362#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 6363#--WHERE Y = X*X, AND Z = Y*Y. 6364 6365 cmp.l %d1,&0x3FD78000 6366 blt.w ATANTINY 6367 6368#--COMPUTE POLYNOMIAL 6369 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6370 6371 fmul.x %fp0,%fp0 # FPO IS Y = X*X 6372 6373 fmov.x %fp0,%fp1 6374 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6375 6376 fmov.d ATANB6(%pc),%fp2 6377 fmov.d ATANB5(%pc),%fp3 6378 6379 fmul.x %fp1,%fp2 # Z*B6 6380 fmul.x %fp1,%fp3 # Z*B5 6381 6382 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 6383 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 6384 6385 fmul.x %fp1,%fp2 # Z*(B4+Z*B6) 6386 fmul.x %fp3,%fp1 # Z*(B3+Z*B5) 6387 6388 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) 6389 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) 6390 6391 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) 6392 fmul.x X(%a6),%fp0 # X*Y 6393 6394 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 6395 6396 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 6397 6398 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6399 6400 fmov.l %d0,%fpcr # restore users rnd mode,prec 6401 fadd.x X(%a6),%fp0 6402 bra t_inx2 6403 6404ATANTINY: 6405#--|X| < 2^(-40), ATAN(X) = X 6406 6407 fmov.l %d0,%fpcr # restore users rnd mode,prec 6408 mov.b &FMOV_OP,%d1 # last inst is MOVE 6409 fmov.x X(%a6),%fp0 # last inst - possible exception set 6410 6411 bra t_catch 6412 6413ATANBIG: 6414#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 6415#--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 6416 cmp.l %d1,&0x40638000 6417 bgt.w ATANHUGE 6418 6419#--APPROXIMATE ATAN(-1/X) BY 6420#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 6421#--THIS CAN BE RE-WRITTEN AS 6422#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 6423 6424 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6425 6426 fmov.s &0xBF800000,%fp1 # LOAD -1 6427 fdiv.x %fp0,%fp1 # FP1 IS -1/X 6428 6429#--DIVIDE IS STILL CRANKING 6430 6431 fmov.x %fp1,%fp0 # FP0 IS X' 6432 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' 6433 fmov.x %fp1,X(%a6) # X IS REALLY X' 6434 6435 fmov.x %fp0,%fp1 6436 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6437 6438 fmov.d ATANC5(%pc),%fp3 6439 fmov.d ATANC4(%pc),%fp2 6440 6441 fmul.x %fp1,%fp3 # Z*C5 6442 fmul.x %fp1,%fp2 # Z*B4 6443 6444 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 6445 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 6446 6447 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED 6448 fmul.x %fp0,%fp2 # Y*(C2+Z*C4) 6449 6450 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) 6451 fmul.x X(%a6),%fp0 # X'*Y 6452 6453 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 6454 6455 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] 6456# ... +[Y*(B2+Z*(B4+Z*B6))]) 6457 fadd.x X(%a6),%fp0 6458 6459 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6460 6461 fmov.l %d0,%fpcr # restore users rnd mode,prec 6462 tst.b (%a0) 6463 bpl.b pos_big 6464 6465neg_big: 6466 fadd.x NPIBY2(%pc),%fp0 6467 bra t_minx2 6468 6469pos_big: 6470 fadd.x PPIBY2(%pc),%fp0 6471 bra t_pinx2 6472 6473ATANHUGE: 6474#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 6475 tst.b (%a0) 6476 bpl.b pos_huge 6477 6478neg_huge: 6479 fmov.x NPIBY2(%pc),%fp0 6480 fmov.l %d0,%fpcr 6481 fadd.x PTINY(%pc),%fp0 6482 bra t_minx2 6483 6484pos_huge: 6485 fmov.x PPIBY2(%pc),%fp0 6486 fmov.l %d0,%fpcr 6487 fadd.x NTINY(%pc),%fp0 6488 bra t_pinx2 6489 6490 global satand 6491#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 6492satand: 6493 bra t_extdnrm 6494 6495######################################################################### 6496# sasin(): computes the inverse sine of a normalized input # 6497# sasind(): computes the inverse sine of a denormalized input # 6498# # 6499# INPUT *************************************************************** # 6500# a0 = pointer to extended precision input # 6501# d0 = round precision,mode # 6502# # 6503# OUTPUT ************************************************************** # 6504# fp0 = arcsin(X) # 6505# # 6506# ACCURACY and MONOTONICITY ******************************************* # 6507# The returned result is within 3 ulps in 64 significant bit, # 6508# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6509# rounded to double precision. The result is provably monotonic # 6510# in double precision. # 6511# # 6512# ALGORITHM *********************************************************** # 6513# # 6514# ASIN # 6515# 1. If |X| >= 1, go to 3. # 6516# # 6517# 2. (|X| < 1) Calculate asin(X) by # 6518# z := sqrt( [1-X][1+X] ) # 6519# asin(X) = atan( x / z ). # 6520# Exit. # 6521# # 6522# 3. If |X| > 1, go to 5. # 6523# # 6524# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# 6525# # 6526# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6527# Exit. # 6528# # 6529######################################################################### 6530 6531 global sasin 6532sasin: 6533 fmov.x (%a0),%fp0 # LOAD INPUT 6534 6535 mov.l (%a0),%d1 6536 mov.w 4(%a0),%d1 6537 and.l &0x7FFFFFFF,%d1 6538 cmp.l %d1,&0x3FFF8000 6539 bge.b ASINBIG 6540 6541# This catch is added here for the '060 QSP. Originally, the call to 6542# satan() would handle this case by causing the exception which would 6543# not be caught until gen_except(). Now, with the exceptions being 6544# detected inside of satan(), the exception would have been handled there 6545# instead of inside sasin() as expected. 6546 cmp.l %d1,&0x3FD78000 6547 blt.w ASINTINY 6548 6549#--THIS IS THE USUAL CASE, |X| < 1 6550#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) 6551 6552ASINMAIN: 6553 fmov.s &0x3F800000,%fp1 6554 fsub.x %fp0,%fp1 # 1-X 6555 fmovm.x &0x4,-(%sp) # {fp2} 6556 fmov.s &0x3F800000,%fp2 6557 fadd.x %fp0,%fp2 # 1+X 6558 fmul.x %fp2,%fp1 # (1+X)(1-X) 6559 fmovm.x (%sp)+,&0x20 # {fp2} 6560 fsqrt.x %fp1 # SQRT([1-X][1+X]) 6561 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) 6562 fmovm.x &0x01,-(%sp) # save X/SQRT(...) 6563 lea (%sp),%a0 # pass ptr to X/SQRT(...) 6564 bsr satan 6565 add.l &0xc,%sp # clear X/SQRT(...) from stack 6566 bra t_inx2 6567 6568ASINBIG: 6569 fabs.x %fp0 # |X| 6570 fcmp.s %fp0,&0x3F800000 6571 fbgt t_operr # cause an operr exception 6572 6573#--|X| = 1, ASIN(X) = +- PI/2. 6574ASINONE: 6575 fmov.x PIBY2(%pc),%fp0 6576 mov.l (%a0),%d1 6577 and.l &0x80000000,%d1 # SIGN BIT OF X 6578 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT 6579 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT 6580 fmov.l %d0,%fpcr 6581 fmul.s (%sp)+,%fp0 6582 bra t_inx2 6583 6584#--|X| < 2^(-40), ATAN(X) = X 6585ASINTINY: 6586 fmov.l %d0,%fpcr # restore users rnd mode,prec 6587 mov.b &FMOV_OP,%d1 # last inst is MOVE 6588 fmov.x (%a0),%fp0 # last inst - possible exception 6589 bra t_catch 6590 6591 global sasind 6592#--ASIN(X) = X FOR DENORMALIZED X 6593sasind: 6594 bra t_extdnrm 6595 6596######################################################################### 6597# sacos(): computes the inverse cosine of a normalized input # 6598# sacosd(): computes the inverse cosine of a denormalized input # 6599# # 6600# INPUT *************************************************************** # 6601# a0 = pointer to extended precision input # 6602# d0 = round precision,mode # 6603# # 6604# OUTPUT ************************************************************** # 6605# fp0 = arccos(X) # 6606# # 6607# ACCURACY and MONOTONICITY ******************************************* # 6608# The returned result is within 3 ulps in 64 significant bit, # 6609# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6610# rounded to double precision. The result is provably monotonic # 6611# in double precision. # 6612# # 6613# ALGORITHM *********************************************************** # 6614# # 6615# ACOS # 6616# 1. If |X| >= 1, go to 3. # 6617# # 6618# 2. (|X| < 1) Calculate acos(X) by # 6619# z := (1-X) / (1+X) # 6620# acos(X) = 2 * atan( sqrt(z) ). # 6621# Exit. # 6622# # 6623# 3. If |X| > 1, go to 5. # 6624# # 6625# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # 6626# # 6627# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6628# Exit. # 6629# # 6630######################################################################### 6631 6632 global sacos 6633sacos: 6634 fmov.x (%a0),%fp0 # LOAD INPUT 6635 6636 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction 6637 mov.w 4(%a0),%d1 6638 and.l &0x7FFFFFFF,%d1 6639 cmp.l %d1,&0x3FFF8000 6640 bge.b ACOSBIG 6641 6642#--THIS IS THE USUAL CASE, |X| < 1 6643#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) 6644 6645ACOSMAIN: 6646 fmov.s &0x3F800000,%fp1 6647 fadd.x %fp0,%fp1 # 1+X 6648 fneg.x %fp0 # -X 6649 fadd.s &0x3F800000,%fp0 # 1-X 6650 fdiv.x %fp1,%fp0 # (1-X)/(1+X) 6651 fsqrt.x %fp0 # SQRT((1-X)/(1+X)) 6652 mov.l %d0,-(%sp) # save original users fpcr 6653 clr.l %d0 6654 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack 6655 lea (%sp),%a0 # pass ptr to sqrt 6656 bsr satan # ATAN(SQRT([1-X]/[1+X])) 6657 add.l &0xc,%sp # clear SQRT(...) from stack 6658 6659 fmov.l (%sp)+,%fpcr # restore users round prec,mode 6660 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) 6661 bra t_pinx2 6662 6663ACOSBIG: 6664 fabs.x %fp0 6665 fcmp.s %fp0,&0x3F800000 6666 fbgt t_operr # cause an operr exception 6667 6668#--|X| = 1, ACOS(X) = 0 OR PI 6669 tst.b (%a0) # is X positive or negative? 6670 bpl.b ACOSP1 6671 6672#--X = -1 6673#Returns PI and inexact exception 6674ACOSM1: 6675 fmov.x PI(%pc),%fp0 # load PI 6676 fmov.l %d0,%fpcr # load round mode,prec 6677 fadd.s &0x00800000,%fp0 # add a small value 6678 bra t_pinx2 6679 6680ACOSP1: 6681 bra ld_pzero # answer is positive zero 6682 6683 global sacosd 6684#--ACOS(X) = PI/2 FOR DENORMALIZED X 6685sacosd: 6686 fmov.l %d0,%fpcr # load user's rnd mode/prec 6687 fmov.x PIBY2(%pc),%fp0 6688 bra t_pinx2 6689 6690######################################################################### 6691# setox(): computes the exponential for a normalized input # 6692# setoxd(): computes the exponential for a denormalized input # 6693# setoxm1(): computes the exponential minus 1 for a normalized input # 6694# setoxm1d(): computes the exponential minus 1 for a denormalized input # 6695# # 6696# INPUT *************************************************************** # 6697# a0 = pointer to extended precision input # 6698# d0 = round precision,mode # 6699# # 6700# OUTPUT ************************************************************** # 6701# fp0 = exp(X) or exp(X)-1 # 6702# # 6703# ACCURACY and MONOTONICITY ******************************************* # 6704# The returned result is within 0.85 ulps in 64 significant bit, # 6705# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6706# rounded to double precision. The result is provably monotonic # 6707# in double precision. # 6708# # 6709# ALGORITHM and IMPLEMENTATION **************************************** # 6710# # 6711# setoxd # 6712# ------ # 6713# Step 1. Set ans := 1.0 # 6714# # 6715# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # 6716# Notes: This will always generate one exception -- inexact. # 6717# # 6718# # 6719# setox # 6720# ----- # 6721# # 6722# Step 1. Filter out extreme cases of input argument. # 6723# 1.1 If |X| >= 2^(-65), go to Step 1.3. # 6724# 1.2 Go to Step 7. # 6725# 1.3 If |X| < 16380 log(2), go to Step 2. # 6726# 1.4 Go to Step 8. # 6727# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6728# To avoid the use of floating-point comparisons, a # 6729# compact representation of |X| is used. This format is a # 6730# 32-bit integer, the upper (more significant) 16 bits # 6731# are the sign and biased exponent field of |X|; the # 6732# lower 16 bits are the 16 most significant fraction # 6733# (including the explicit bit) bits of |X|. Consequently, # 6734# the comparisons in Steps 1.1 and 1.3 can be performed # 6735# by integer comparison. Note also that the constant # 6736# 16380 log(2) used in Step 1.3 is also in the compact # 6737# form. Thus taking the branch to Step 2 guarantees # 6738# |X| < 16380 log(2). There is no harm to have a small # 6739# number of cases where |X| is less than, but close to, # 6740# 16380 log(2) and the branch to Step 9 is taken. # 6741# # 6742# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6743# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # 6744# was taken) # 6745# 2.2 N := round-to-nearest-integer( X * 64/log2 ). # 6746# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # 6747# or 63. # 6748# 2.4 Calculate M = (N - J)/64; so N = 64M + J. # 6749# 2.5 Calculate the address of the stored value of # 6750# 2^(J/64). # 6751# 2.6 Create the value Scale = 2^M. # 6752# Notes: The calculation in 2.2 is really performed by # 6753# Z := X * constant # 6754# N := round-to-nearest-integer(Z) # 6755# where # 6756# constant := single-precision( 64/log 2 ). # 6757# # 6758# Using a single-precision constant avoids memory # 6759# access. Another effect of using a single-precision # 6760# "constant" is that the calculated value Z is # 6761# # 6762# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # 6763# # 6764# This error has to be considered later in Steps 3 and 4. # 6765# # 6766# Step 3. Calculate X - N*log2/64. # 6767# 3.1 R := X + N*L1, # 6768# where L1 := single-precision(-log2/64). # 6769# 3.2 R := R + N*L2, # 6770# L2 := extended-precision(-log2/64 - L1).# 6771# Notes: a) The way L1 and L2 are chosen ensures L1+L2 # 6772# approximate the value -log2/64 to 88 bits of accuracy. # 6773# b) N*L1 is exact because N is no longer than 22 bits # 6774# and L1 is no longer than 24 bits. # 6775# c) The calculation X+N*L1 is also exact due to # 6776# cancellation. Thus, R is practically X+N(L1+L2) to full # 6777# 64 bits. # 6778# d) It is important to estimate how large can |R| be # 6779# after Step 3.2. # 6780# # 6781# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # 6782# X*64/log2 (1+eps) = N + f, |f| <= 0.5 # 6783# X*64/log2 - N = f - eps*X 64/log2 # 6784# X - N*log2/64 = f*log2/64 - eps*X # 6785# # 6786# # 6787# Now |X| <= 16446 log2, thus # 6788# # 6789# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # 6790# <= 0.57 log2/64. # 6791# This bound will be used in Step 4. # 6792# # 6793# Step 4. Approximate exp(R)-1 by a polynomial # 6794# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # 6795# Notes: a) In order to reduce memory access, the coefficients # 6796# are made as "short" as possible: A1 (which is 1/2), A4 # 6797# and A5 are single precision; A2 and A3 are double # 6798# precision. # 6799# b) Even with the restrictions above, # 6800# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # 6801# Note that 0.0062 is slightly bigger than 0.57 log2/64. # 6802# c) To fully use the pipeline, p is separated into # 6803# two independent pieces of roughly equal complexities # 6804# p = [ R + R*S*(A2 + S*A4) ] + # 6805# [ S*(A1 + S*(A3 + S*A5)) ] # 6806# where S = R*R. # 6807# # 6808# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # 6809# ans := T + ( T*p + t) # 6810# where T and t are the stored values for 2^(J/64). # 6811# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6812# 2^(J/64) to roughly 85 bits; T is in extended precision # 6813# and t is in single precision. Note also that T is # 6814# rounded to 62 bits so that the last two bits of T are # 6815# zero. The reason for such a special form is that T-1, # 6816# T-2, and T-8 will all be exact --- a property that will # 6817# give much more accurate computation of the function # 6818# EXPM1. # 6819# # 6820# Step 6. Reconstruction of exp(X) # 6821# exp(X) = 2^M * 2^(J/64) * exp(R). # 6822# 6.1 If AdjFlag = 0, go to 6.3 # 6823# 6.2 ans := ans * AdjScale # 6824# 6.3 Restore the user FPCR # 6825# 6.4 Return ans := ans * Scale. Exit. # 6826# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # 6827# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # 6828# neither overflow nor underflow. If AdjFlag = 1, that # 6829# means that # 6830# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # 6831# Hence, exp(X) may overflow or underflow or neither. # 6832# When that is the case, AdjScale = 2^(M1) where M1 is # 6833# approximately M. Thus 6.2 will never cause # 6834# over/underflow. Possible exception in 6.4 is overflow # 6835# or underflow. The inexact exception is not generated in # 6836# 6.4. Although one can argue that the inexact flag # 6837# should always be raised, to simulate that exception # 6838# cost to much than the flag is worth in practical uses. # 6839# # 6840# Step 7. Return 1 + X. # 6841# 7.1 ans := X # 6842# 7.2 Restore user FPCR. # 6843# 7.3 Return ans := 1 + ans. Exit # 6844# Notes: For non-zero X, the inexact exception will always be # 6845# raised by 7.3. That is the only exception raised by 7.3.# 6846# Note also that we use the FMOVEM instruction to move X # 6847# in Step 7.1 to avoid unnecessary trapping. (Although # 6848# the FMOVEM may not seem relevant since X is normalized, # 6849# the precaution will be useful in the library version of # 6850# this code where the separate entry for denormalized # 6851# inputs will be done away with.) # 6852# # 6853# Step 8. Handle exp(X) where |X| >= 16380log2. # 6854# 8.1 If |X| > 16480 log2, go to Step 9. # 6855# (mimic 2.2 - 2.6) # 6856# 8.2 N := round-to-integer( X * 64/log2 ) # 6857# 8.3 Calculate J = N mod 64, J = 0,1,...,63 # 6858# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # 6859# AdjFlag := 1. # 6860# 8.5 Calculate the address of the stored value # 6861# 2^(J/64). # 6862# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # 6863# 8.7 Go to Step 3. # 6864# Notes: Refer to notes for 2.2 - 2.6. # 6865# # 6866# Step 9. Handle exp(X), |X| > 16480 log2. # 6867# 9.1 If X < 0, go to 9.3 # 6868# 9.2 ans := Huge, go to 9.4 # 6869# 9.3 ans := Tiny. # 6870# 9.4 Restore user FPCR. # 6871# 9.5 Return ans := ans * ans. Exit. # 6872# Notes: Exp(X) will surely overflow or underflow, depending on # 6873# X's sign. "Huge" and "Tiny" are respectively large/tiny # 6874# extended-precision numbers whose square over/underflow # 6875# with an inexact result. Thus, 9.5 always raises the # 6876# inexact together with either overflow or underflow. # 6877# # 6878# setoxm1d # 6879# -------- # 6880# # 6881# Step 1. Set ans := 0 # 6882# # 6883# Step 2. Return ans := X + ans. Exit. # 6884# Notes: This will return X with the appropriate rounding # 6885# precision prescribed by the user FPCR. # 6886# # 6887# setoxm1 # 6888# ------- # 6889# # 6890# Step 1. Check |X| # 6891# 1.1 If |X| >= 1/4, go to Step 1.3. # 6892# 1.2 Go to Step 7. # 6893# 1.3 If |X| < 70 log(2), go to Step 2. # 6894# 1.4 Go to Step 10. # 6895# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6896# However, it is conceivable |X| can be small very often # 6897# because EXPM1 is intended to evaluate exp(X)-1 # 6898# accurately when |X| is small. For further details on # 6899# the comparisons, see the notes on Step 1 of setox. # 6900# # 6901# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6902# 2.1 N := round-to-nearest-integer( X * 64/log2 ). # 6903# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # 6904# or 63. # 6905# 2.3 Calculate M = (N - J)/64; so N = 64M + J. # 6906# 2.4 Calculate the address of the stored value of # 6907# 2^(J/64). # 6908# 2.5 Create the values Sc = 2^M and # 6909# OnebySc := -2^(-M). # 6910# Notes: See the notes on Step 2 of setox. # 6911# # 6912# Step 3. Calculate X - N*log2/64. # 6913# 3.1 R := X + N*L1, # 6914# where L1 := single-precision(-log2/64). # 6915# 3.2 R := R + N*L2, # 6916# L2 := extended-precision(-log2/64 - L1).# 6917# Notes: Applying the analysis of Step 3 of setox in this case # 6918# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # 6919# this case). # 6920# # 6921# Step 4. Approximate exp(R)-1 by a polynomial # 6922# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # 6923# Notes: a) In order to reduce memory access, the coefficients # 6924# are made as "short" as possible: A1 (which is 1/2), A5 # 6925# and A6 are single precision; A2, A3 and A4 are double # 6926# precision. # 6927# b) Even with the restriction above, # 6928# |p - (exp(R)-1)| < |R| * 2^(-72.7) # 6929# for all |R| <= 0.0055. # 6930# c) To fully use the pipeline, p is separated into # 6931# two independent pieces of roughly equal complexity # 6932# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # 6933# [ R + S*(A1 + S*(A3 + S*A5)) ] # 6934# where S = R*R. # 6935# # 6936# Step 5. Compute 2^(J/64)*p by # 6937# p := T*p # 6938# where T and t are the stored values for 2^(J/64). # 6939# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6940# 2^(J/64) to roughly 85 bits; T is in extended precision # 6941# and t is in single precision. Note also that T is # 6942# rounded to 62 bits so that the last two bits of T are # 6943# zero. The reason for such a special form is that T-1, # 6944# T-2, and T-8 will all be exact --- a property that will # 6945# be exploited in Step 6 below. The total relative error # 6946# in p is no bigger than 2^(-67.7) compared to the final # 6947# result. # 6948# # 6949# Step 6. Reconstruction of exp(X)-1 # 6950# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # 6951# 6.1 If M <= 63, go to Step 6.3. # 6952# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # 6953# 6.3 If M >= -3, go to 6.5. # 6954# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # 6955# 6.5 ans := (T + OnebySc) + (p + t). # 6956# 6.6 Restore user FPCR. # 6957# 6.7 Return ans := Sc * ans. Exit. # 6958# Notes: The various arrangements of the expressions give # 6959# accurate evaluations. # 6960# # 6961# Step 7. exp(X)-1 for |X| < 1/4. # 6962# 7.1 If |X| >= 2^(-65), go to Step 9. # 6963# 7.2 Go to Step 8. # 6964# # 6965# Step 8. Calculate exp(X)-1, |X| < 2^(-65). # 6966# 8.1 If |X| < 2^(-16312), goto 8.3 # 6967# 8.2 Restore FPCR; return ans := X - 2^(-16382). # 6968# Exit. # 6969# 8.3 X := X * 2^(140). # 6970# 8.4 Restore FPCR; ans := ans - 2^(-16382). # 6971# Return ans := ans*2^(140). Exit # 6972# Notes: The idea is to return "X - tiny" under the user # 6973# precision and rounding modes. To avoid unnecessary # 6974# inefficiency, we stay away from denormalized numbers # 6975# the best we can. For |X| >= 2^(-16312), the # 6976# straightforward 8.2 generates the inexact exception as # 6977# the case warrants. # 6978# # 6979# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # 6980# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # 6981# Notes: a) In order to reduce memory access, the coefficients # 6982# are made as "short" as possible: B1 (which is 1/2), B9 # 6983# to B12 are single precision; B3 to B8 are double # 6984# precision; and B2 is double extended. # 6985# b) Even with the restriction above, # 6986# |p - (exp(X)-1)| < |X| 2^(-70.6) # 6987# for all |X| <= 0.251. # 6988# Note that 0.251 is slightly bigger than 1/4. # 6989# c) To fully preserve accuracy, the polynomial is # 6990# computed as # 6991# X + ( S*B1 + Q ) where S = X*X and # 6992# Q = X*S*(B2 + X*(B3 + ... + X*B12)) # 6993# d) To fully use the pipeline, Q is separated into # 6994# two independent pieces of roughly equal complexity # 6995# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # 6996# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # 6997# # 6998# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # 6999# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # 7000# practical purposes. Therefore, go to Step 1 of setox. # 7001# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # 7002# purposes. # 7003# ans := -1 # 7004# Restore user FPCR # 7005# Return ans := ans + 2^(-126). Exit. # 7006# Notes: 10.2 will always create an inexact and return -1 + tiny # 7007# in the user rounding precision and mode. # 7008# # 7009######################################################################### 7010 7011L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 7012 7013EEXPA3: long 0x3FA55555,0x55554CC1 7014EEXPA2: long 0x3FC55555,0x55554A54 7015 7016EM1A4: long 0x3F811111,0x11174385 7017EM1A3: long 0x3FA55555,0x55554F5A 7018 7019EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 7020 7021EM1B8: long 0x3EC71DE3,0xA5774682 7022EM1B7: long 0x3EFA01A0,0x19D7CB68 7023 7024EM1B6: long 0x3F2A01A0,0x1A019DF3 7025EM1B5: long 0x3F56C16C,0x16C170E2 7026 7027EM1B4: long 0x3F811111,0x11111111 7028EM1B3: long 0x3FA55555,0x55555555 7029 7030EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB 7031 long 0x00000000 7032 7033TWO140: long 0x48B00000,0x00000000 7034TWON140: 7035 long 0x37300000,0x00000000 7036 7037EEXPTBL: 7038 long 0x3FFF0000,0x80000000,0x00000000,0x00000000 7039 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B 7040 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 7041 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 7042 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C 7043 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F 7044 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 7045 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF 7046 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF 7047 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA 7048 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 7049 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 7050 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 7051 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 7052 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D 7053 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 7054 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD 7055 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 7056 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 7057 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D 7058 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 7059 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C 7060 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 7061 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 7062 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 7063 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA 7064 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A 7065 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC 7066 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC 7067 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 7068 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 7069 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A 7070 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 7071 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 7072 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC 7073 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 7074 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 7075 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 7076 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 7077 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B 7078 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 7079 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E 7080 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 7081 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D 7082 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 7083 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C 7084 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 7085 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 7086 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F 7087 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F 7088 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 7089 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 7090 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B 7091 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 7092 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A 7093 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 7094 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 7095 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B 7096 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 7097 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 7098 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 7099 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 7100 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 7101 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A 7102 7103 set ADJFLAG,L_SCR2 7104 set SCALE,FP_SCR0 7105 set ADJSCALE,FP_SCR1 7106 set SC,FP_SCR0 7107 set ONEBYSC,FP_SCR1 7108 7109 global setox 7110setox: 7111#--entry point for EXP(X), here X is finite, non-zero, and not NaN's 7112 7113#--Step 1. 7114 mov.l (%a0),%d1 # load part of input X 7115 and.l &0x7FFF0000,%d1 # biased expo. of X 7116 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7117 bge.b EXPC1 # normal case 7118 bra EXPSM 7119 7120EXPC1: 7121#--The case |X| >= 2^(-65) 7122 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7123 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits 7124 blt.b EXPMAIN # normal case 7125 bra EEXPBIG 7126 7127EXPMAIN: 7128#--Step 2. 7129#--This is the normal branch: 2^(-65) <= |X| < 16380 log2. 7130 fmov.x (%a0),%fp0 # load input from (a0) 7131 7132 fmov.x %fp0,%fp1 7133 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7134 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7135 mov.l &0,ADJFLAG(%a6) 7136 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7137 lea EEXPTBL(%pc),%a1 7138 fmov.l %d1,%fp0 # convert to floating-format 7139 7140 mov.l %d1,L_SCR1(%a6) # save N temporarily 7141 and.l &0x3F,%d1 # D0 is J = N mod 64 7142 lsl.l &4,%d1 7143 add.l %d1,%a1 # address of 2^(J/64) 7144 mov.l L_SCR1(%a6),%d1 7145 asr.l &6,%d1 # D0 is M 7146 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7147 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB 7148 7149EXPCONT1: 7150#--Step 3. 7151#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7152#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) 7153 fmov.x %fp0,%fp2 7154 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7155 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7156 fadd.x %fp1,%fp0 # X + N*L1 7157 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7158 7159#--Step 4. 7160#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7161#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) 7162#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R 7163#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] 7164 7165 fmov.x %fp0,%fp1 7166 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7167 7168 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 7169 7170 fmul.x %fp1,%fp2 # fp2 IS S*A5 7171 fmov.x %fp1,%fp3 7172 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 7173 7174 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 7175 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 7176 7177 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) 7178 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended 7179 mov.l &0x80000000,SCALE+4(%a6) 7180 clr.l SCALE+8(%a6) 7181 7182 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) 7183 7184 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) 7185 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) 7186 7187 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) 7188 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), 7189 7190 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) 7191 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 7192 7193#--Step 5 7194#--final reconstruction process 7195#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) 7196 7197 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) 7198 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7199 fadd.s (%a1),%fp0 # accurate 2^(J/64) 7200 7201 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... 7202 mov.l ADJFLAG(%a6),%d1 7203 7204#--Step 6 7205 tst.l %d1 7206 beq.b NORMAL 7207ADJUST: 7208 fmul.x ADJSCALE(%a6),%fp0 7209NORMAL: 7210 fmov.l %d0,%fpcr # restore user FPCR 7211 mov.b &FMUL_OP,%d1 # last inst is MUL 7212 fmul.x SCALE(%a6),%fp0 # multiply 2^(M) 7213 bra t_catch 7214 7215EXPSM: 7216#--Step 7 7217 fmovm.x (%a0),&0x80 # load X 7218 fmov.l %d0,%fpcr 7219 fadd.s &0x3F800000,%fp0 # 1+X in user mode 7220 bra t_pinx2 7221 7222EEXPBIG: 7223#--Step 8 7224 cmp.l %d1,&0x400CB27C # 16480 log2 7225 bgt.b EXP2BIG 7226#--Steps 8.2 -- 8.6 7227 fmov.x (%a0),%fp0 # load input from (a0) 7228 7229 fmov.x %fp0,%fp1 7230 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7231 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7232 mov.l &1,ADJFLAG(%a6) 7233 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7234 lea EEXPTBL(%pc),%a1 7235 fmov.l %d1,%fp0 # convert to floating-format 7236 mov.l %d1,L_SCR1(%a6) # save N temporarily 7237 and.l &0x3F,%d1 # D0 is J = N mod 64 7238 lsl.l &4,%d1 7239 add.l %d1,%a1 # address of 2^(J/64) 7240 mov.l L_SCR1(%a6),%d1 7241 asr.l &6,%d1 # D0 is K 7242 mov.l %d1,L_SCR1(%a6) # save K temporarily 7243 asr.l &1,%d1 # D0 is M1 7244 sub.l %d1,L_SCR1(%a6) # a1 is M 7245 add.w &0x3FFF,%d1 # biased expo. of 2^(M1) 7246 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) 7247 mov.l &0x80000000,ADJSCALE+4(%a6) 7248 clr.l ADJSCALE+8(%a6) 7249 mov.l L_SCR1(%a6),%d1 # D0 is M 7250 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7251 bra.w EXPCONT1 # go back to Step 3 7252 7253EXP2BIG: 7254#--Step 9 7255 tst.b (%a0) # is X positive or negative? 7256 bmi t_unfl2 7257 bra t_ovfl2 7258 7259 global setoxd 7260setoxd: 7261#--entry point for EXP(X), X is denormalized 7262 mov.l (%a0),-(%sp) 7263 andi.l &0x80000000,(%sp) 7264 ori.l &0x00800000,(%sp) # sign(X)*2^(-126) 7265 7266 fmov.s &0x3F800000,%fp0 7267 7268 fmov.l %d0,%fpcr 7269 fadd.s (%sp)+,%fp0 7270 bra t_pinx2 7271 7272 global setoxm1 7273setoxm1: 7274#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN 7275 7276#--Step 1. 7277#--Step 1.1 7278 mov.l (%a0),%d1 # load part of input X 7279 and.l &0x7FFF0000,%d1 # biased expo. of X 7280 cmp.l %d1,&0x3FFD0000 # 1/4 7281 bge.b EM1CON1 # |X| >= 1/4 7282 bra EM1SM 7283 7284EM1CON1: 7285#--Step 1.3 7286#--The case |X| >= 1/4 7287 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7288 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits 7289 ble.b EM1MAIN # 1/4 <= |X| <= 70log2 7290 bra EM1BIG 7291 7292EM1MAIN: 7293#--Step 2. 7294#--This is the case: 1/4 <= |X| <= 70 log2. 7295 fmov.x (%a0),%fp0 # load input from (a0) 7296 7297 fmov.x %fp0,%fp1 7298 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7299 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7300 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7301 lea EEXPTBL(%pc),%a1 7302 fmov.l %d1,%fp0 # convert to floating-format 7303 7304 mov.l %d1,L_SCR1(%a6) # save N temporarily 7305 and.l &0x3F,%d1 # D0 is J = N mod 64 7306 lsl.l &4,%d1 7307 add.l %d1,%a1 # address of 2^(J/64) 7308 mov.l L_SCR1(%a6),%d1 7309 asr.l &6,%d1 # D0 is M 7310 mov.l %d1,L_SCR1(%a6) # save a copy of M 7311 7312#--Step 3. 7313#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7314#--a0 points to 2^(J/64), D0 and a1 both contain M 7315 fmov.x %fp0,%fp2 7316 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7317 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7318 fadd.x %fp1,%fp0 # X + N*L1 7319 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7320 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M 7321 7322#--Step 4. 7323#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7324#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) 7325#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R 7326#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] 7327 7328 fmov.x %fp0,%fp1 7329 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7330 7331 fmov.s &0x3950097B,%fp2 # fp2 IS a6 7332 7333 fmul.x %fp1,%fp2 # fp2 IS S*A6 7334 fmov.x %fp1,%fp3 7335 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 7336 7337 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 7338 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 7339 mov.w %d1,SC(%a6) # SC is 2^(M) in extended 7340 mov.l &0x80000000,SC+4(%a6) 7341 clr.l SC+8(%a6) 7342 7343 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) 7344 mov.l L_SCR1(%a6),%d1 # D0 is M 7345 neg.w %d1 # D0 is -M 7346 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) 7347 add.w &0x3FFF,%d1 # biased expo. of 2^(-M) 7348 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) 7349 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) 7350 7351 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) 7352 or.w &0x8000,%d1 # signed/expo. of -2^(-M) 7353 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) 7354 mov.l &0x80000000,ONEBYSC+4(%a6) 7355 clr.l ONEBYSC+8(%a6) 7356 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) 7357 7358 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) 7359 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) 7360 7361 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 7362 7363 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7364 7365#--Step 5 7366#--Compute 2^(J/64)*p 7367 7368 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) 7369 7370#--Step 6 7371#--Step 6.1 7372 mov.l L_SCR1(%a6),%d1 # retrieve M 7373 cmp.l %d1,&63 7374 ble.b MLE63 7375#--Step 6.2 M >= 64 7376 fmov.s 12(%a1),%fp1 # fp1 is t 7377 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc 7378 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released 7379 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) 7380 bra EM1SCALE 7381MLE63: 7382#--Step 6.3 M <= 63 7383 cmp.l %d1,&-3 7384 bge.b MGEN3 7385MLTN3: 7386#--Step 6.4 M <= -4 7387 fadd.s 12(%a1),%fp0 # p+t 7388 fadd.x (%a1),%fp0 # T+(p+t) 7389 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) 7390 bra EM1SCALE 7391MGEN3: 7392#--Step 6.5 -3 <= M <= 63 7393 fmov.x (%a1)+,%fp1 # fp1 is T 7394 fadd.s (%a1),%fp0 # fp0 is p+t 7395 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc 7396 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) 7397 7398EM1SCALE: 7399#--Step 6.6 7400 fmov.l %d0,%fpcr 7401 fmul.x SC(%a6),%fp0 7402 bra t_inx2 7403 7404EM1SM: 7405#--Step 7 |X| < 1/4. 7406 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7407 bge.b EM1POLY 7408 7409EM1TINY: 7410#--Step 8 |X| < 2^(-65) 7411 cmp.l %d1,&0x00330000 # 2^(-16312) 7412 blt.b EM12TINY 7413#--Step 8.2 7414 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) 7415 mov.l &0x80000000,SC+4(%a6) 7416 clr.l SC+8(%a6) 7417 fmov.x (%a0),%fp0 7418 fmov.l %d0,%fpcr 7419 mov.b &FADD_OP,%d1 # last inst is ADD 7420 fadd.x SC(%a6),%fp0 7421 bra t_catch 7422 7423EM12TINY: 7424#--Step 8.3 7425 fmov.x (%a0),%fp0 7426 fmul.d TWO140(%pc),%fp0 7427 mov.l &0x80010000,SC(%a6) 7428 mov.l &0x80000000,SC+4(%a6) 7429 clr.l SC+8(%a6) 7430 fadd.x SC(%a6),%fp0 7431 fmov.l %d0,%fpcr 7432 mov.b &FMUL_OP,%d1 # last inst is MUL 7433 fmul.d TWON140(%pc),%fp0 7434 bra t_catch 7435 7436EM1POLY: 7437#--Step 9 exp(X)-1 by a simple polynomial 7438 fmov.x (%a0),%fp0 # fp0 is X 7439 fmul.x %fp0,%fp0 # fp0 is S := X*X 7440 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7441 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 7442 fmul.x %fp0,%fp1 # fp1 is S*B12 7443 fmov.s &0x310F8290,%fp2 # fp2 is B11 7444 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 7445 7446 fmul.x %fp0,%fp2 # fp2 is S*B11 7447 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... 7448 7449 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... 7450 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... 7451 7452 fmul.x %fp0,%fp2 # fp2 is S*(B9+... 7453 fmul.x %fp0,%fp1 # fp1 is S*(B8+... 7454 7455 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... 7456 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... 7457 7458 fmul.x %fp0,%fp2 # fp2 is S*(B7+... 7459 fmul.x %fp0,%fp1 # fp1 is S*(B6+... 7460 7461 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... 7462 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... 7463 7464 fmul.x %fp0,%fp2 # fp2 is S*(B5+... 7465 fmul.x %fp0,%fp1 # fp1 is S*(B4+... 7466 7467 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... 7468 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... 7469 7470 fmul.x %fp0,%fp2 # fp2 is S*(B3+... 7471 fmul.x %fp0,%fp1 # fp1 is S*(B2+... 7472 7473 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) 7474 fmul.x (%a0),%fp1 # fp1 is X*S*(B2... 7475 7476 fmul.s &0x3F000000,%fp0 # fp0 is S*B1 7477 fadd.x %fp2,%fp1 # fp1 is Q 7478 7479 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7480 7481 fadd.x %fp1,%fp0 # fp0 is S*B1+Q 7482 7483 fmov.l %d0,%fpcr 7484 fadd.x (%a0),%fp0 7485 bra t_inx2 7486 7487EM1BIG: 7488#--Step 10 |X| > 70 log2 7489 mov.l (%a0),%d1 7490 cmp.l %d1,&0 7491 bgt.w EXPC1 7492#--Step 10.2 7493 fmov.s &0xBF800000,%fp0 # fp0 is -1 7494 fmov.l %d0,%fpcr 7495 fadd.s &0x00800000,%fp0 # -1 + 2^(-126) 7496 bra t_minx2 7497 7498 global setoxm1d 7499setoxm1d: 7500#--entry point for EXPM1(X), here X is denormalized 7501#--Step 0. 7502 bra t_extdnrm 7503 7504######################################################################### 7505# sgetexp(): returns the exponent portion of the input argument. # 7506# The exponent bias is removed and the exponent value is # 7507# returned as an extended precision number in fp0. # 7508# sgetexpd(): handles denormalized numbers. # 7509# # 7510# sgetman(): extracts the mantissa of the input argument. The # 7511# mantissa is converted to an extended precision number w/ # 7512# an exponent of $3fff and is returned in fp0. The range of # 7513# the result is [1.0 - 2.0). # 7514# sgetmand(): handles denormalized numbers. # 7515# # 7516# INPUT *************************************************************** # 7517# a0 = pointer to extended precision input # 7518# # 7519# OUTPUT ************************************************************** # 7520# fp0 = exponent(X) or mantissa(X) # 7521# # 7522######################################################################### 7523 7524 global sgetexp 7525sgetexp: 7526 mov.w SRC_EX(%a0),%d0 # get the exponent 7527 bclr &0xf,%d0 # clear the sign bit 7528 subi.w &0x3fff,%d0 # subtract off the bias 7529 fmov.w %d0,%fp0 # return exp in fp0 7530 blt.b sgetexpn # it's negative 7531 rts 7532 7533sgetexpn: 7534 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7535 rts 7536 7537 global sgetexpd 7538sgetexpd: 7539 bsr.l norm # normalize 7540 neg.w %d0 # new exp = -(shft amt) 7541 subi.w &0x3fff,%d0 # subtract off the bias 7542 fmov.w %d0,%fp0 # return exp in fp0 7543 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7544 rts 7545 7546 global sgetman 7547sgetman: 7548 mov.w SRC_EX(%a0),%d0 # get the exp 7549 ori.w &0x7fff,%d0 # clear old exp 7550 bclr &0xe,%d0 # make it the new exp +-3fff 7551 7552# here, we build the result in a tmp location so as not to disturb the input 7553 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc 7554 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc 7555 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7556 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 7557 bmi.b sgetmann # it's negative 7558 rts 7559 7560sgetmann: 7561 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7562 rts 7563 7564# 7565# For denormalized numbers, shift the mantissa until the j-bit = 1, 7566# then load the exponent with +/1 $3fff. 7567# 7568 global sgetmand 7569sgetmand: 7570 bsr.l norm # normalize exponent 7571 bra.b sgetman 7572 7573######################################################################### 7574# scosh(): computes the hyperbolic cosine of a normalized input # 7575# scoshd(): computes the hyperbolic cosine of a denormalized input # 7576# # 7577# INPUT *************************************************************** # 7578# a0 = pointer to extended precision input # 7579# d0 = round precision,mode # 7580# # 7581# OUTPUT ************************************************************** # 7582# fp0 = cosh(X) # 7583# # 7584# ACCURACY and MONOTONICITY ******************************************* # 7585# The returned result is within 3 ulps in 64 significant bit, # 7586# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7587# rounded to double precision. The result is provably monotonic # 7588# in double precision. # 7589# # 7590# ALGORITHM *********************************************************** # 7591# # 7592# COSH # 7593# 1. If |X| > 16380 log2, go to 3. # 7594# # 7595# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # 7596# y = |X|, z = exp(Y), and # 7597# cosh(X) = (1/2)*( z + 1/z ). # 7598# Exit. # 7599# # 7600# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # 7601# # 7602# 4. (16380 log2 < |X| <= 16480 log2) # 7603# cosh(X) = sign(X) * exp(|X|)/2. # 7604# However, invoking exp(|X|) may cause premature # 7605# overflow. Thus, we calculate sinh(X) as follows: # 7606# Y := |X| # 7607# Fact := 2**(16380) # 7608# Y' := Y - 16381 log2 # 7609# cosh(X) := Fact * exp(Y'). # 7610# Exit. # 7611# # 7612# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7613# Huge*Huge to generate overflow and an infinity with # 7614# the appropriate sign. Huge is the largest finite number # 7615# in extended format. Exit. # 7616# # 7617######################################################################### 7618 7619TWO16380: 7620 long 0x7FFB0000,0x80000000,0x00000000,0x00000000 7621 7622 global scosh 7623scosh: 7624 fmov.x (%a0),%fp0 # LOAD INPUT 7625 7626 mov.l (%a0),%d1 7627 mov.w 4(%a0),%d1 7628 and.l &0x7FFFFFFF,%d1 7629 cmp.l %d1,&0x400CB167 7630 bgt.b COSHBIG 7631 7632#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7633#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) 7634 7635 fabs.x %fp0 # |X| 7636 7637 mov.l %d0,-(%sp) 7638 clr.l %d0 7639 fmovm.x &0x01,-(%sp) # save |X| to stack 7640 lea (%sp),%a0 # pass ptr to |X| 7641 bsr setox # FP0 IS EXP(|X|) 7642 add.l &0xc,%sp # erase |X| from stack 7643 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) 7644 mov.l (%sp)+,%d0 7645 7646 fmov.s &0x3E800000,%fp1 # (1/4) 7647 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) 7648 7649 fmov.l %d0,%fpcr 7650 mov.b &FADD_OP,%d1 # last inst is ADD 7651 fadd.x %fp1,%fp0 7652 bra t_catch 7653 7654COSHBIG: 7655 cmp.l %d1,&0x400CB2B3 7656 bgt.b COSHHUGE 7657 7658 fabs.x %fp0 7659 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7660 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7661 7662 mov.l %d0,-(%sp) 7663 clr.l %d0 7664 fmovm.x &0x01,-(%sp) # save fp0 to stack 7665 lea (%sp),%a0 # pass ptr to fp0 7666 bsr setox 7667 add.l &0xc,%sp # clear fp0 from stack 7668 mov.l (%sp)+,%d0 7669 7670 fmov.l %d0,%fpcr 7671 mov.b &FMUL_OP,%d1 # last inst is MUL 7672 fmul.x TWO16380(%pc),%fp0 7673 bra t_catch 7674 7675COSHHUGE: 7676 bra t_ovfl2 7677 7678 global scoshd 7679#--COSH(X) = 1 FOR DENORMALIZED X 7680scoshd: 7681 fmov.s &0x3F800000,%fp0 7682 7683 fmov.l %d0,%fpcr 7684 fadd.s &0x00800000,%fp0 7685 bra t_pinx2 7686 7687######################################################################### 7688# ssinh(): computes the hyperbolic sine of a normalized input # 7689# ssinhd(): computes the hyperbolic sine of a denormalized input # 7690# # 7691# INPUT *************************************************************** # 7692# a0 = pointer to extended precision input # 7693# d0 = round precision,mode # 7694# # 7695# OUTPUT ************************************************************** # 7696# fp0 = sinh(X) # 7697# # 7698# ACCURACY and MONOTONICITY ******************************************* # 7699# The returned result is within 3 ulps in 64 significant bit, # 7700# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7701# rounded to double precision. The result is provably monotonic # 7702# in double precision. # 7703# # 7704# ALGORITHM *********************************************************** # 7705# # 7706# SINH # 7707# 1. If |X| > 16380 log2, go to 3. # 7708# # 7709# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # 7710# y = |X|, sgn = sign(X), and z = expm1(Y), # 7711# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # 7712# Exit. # 7713# # 7714# 3. If |X| > 16480 log2, go to 5. # 7715# # 7716# 4. (16380 log2 < |X| <= 16480 log2) # 7717# sinh(X) = sign(X) * exp(|X|)/2. # 7718# However, invoking exp(|X|) may cause premature overflow. # 7719# Thus, we calculate sinh(X) as follows: # 7720# Y := |X| # 7721# sgn := sign(X) # 7722# sgnFact := sgn * 2**(16380) # 7723# Y' := Y - 16381 log2 # 7724# sinh(X) := sgnFact * exp(Y'). # 7725# Exit. # 7726# # 7727# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7728# sign(X)*Huge*Huge to generate overflow and an infinity with # 7729# the appropriate sign. Huge is the largest finite number in # 7730# extended format. Exit. # 7731# # 7732######################################################################### 7733 7734 global ssinh 7735ssinh: 7736 fmov.x (%a0),%fp0 # LOAD INPUT 7737 7738 mov.l (%a0),%d1 7739 mov.w 4(%a0),%d1 7740 mov.l %d1,%a1 # save (compacted) operand 7741 and.l &0x7FFFFFFF,%d1 7742 cmp.l %d1,&0x400CB167 7743 bgt.b SINHBIG 7744 7745#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7746#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) 7747 7748 fabs.x %fp0 # Y = |X| 7749 7750 movm.l &0x8040,-(%sp) # {a1/d0} 7751 fmovm.x &0x01,-(%sp) # save Y on stack 7752 lea (%sp),%a0 # pass ptr to Y 7753 clr.l %d0 7754 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7755 add.l &0xc,%sp # clear Y from stack 7756 fmov.l &0,%fpcr 7757 movm.l (%sp)+,&0x0201 # {a1/d0} 7758 7759 fmov.x %fp0,%fp1 7760 fadd.s &0x3F800000,%fp1 # 1+Z 7761 fmov.x %fp0,-(%sp) 7762 fdiv.x %fp1,%fp0 # Z/(1+Z) 7763 mov.l %a1,%d1 7764 and.l &0x80000000,%d1 7765 or.l &0x3F000000,%d1 7766 fadd.x (%sp)+,%fp0 7767 mov.l %d1,-(%sp) 7768 7769 fmov.l %d0,%fpcr 7770 mov.b &FMUL_OP,%d1 # last inst is MUL 7771 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set 7772 bra t_catch 7773 7774SINHBIG: 7775 cmp.l %d1,&0x400CB2B3 7776 bgt t_ovfl 7777 fabs.x %fp0 7778 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7779 mov.l &0,-(%sp) 7780 mov.l &0x80000000,-(%sp) 7781 mov.l %a1,%d1 7782 and.l &0x80000000,%d1 7783 or.l &0x7FFB0000,%d1 7784 mov.l %d1,-(%sp) # EXTENDED FMT 7785 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7786 7787 mov.l %d0,-(%sp) 7788 clr.l %d0 7789 fmovm.x &0x01,-(%sp) # save fp0 on stack 7790 lea (%sp),%a0 # pass ptr to fp0 7791 bsr setox 7792 add.l &0xc,%sp # clear fp0 from stack 7793 7794 mov.l (%sp)+,%d0 7795 fmov.l %d0,%fpcr 7796 mov.b &FMUL_OP,%d1 # last inst is MUL 7797 fmul.x (%sp)+,%fp0 # possible exception 7798 bra t_catch 7799 7800 global ssinhd 7801#--SINH(X) = X FOR DENORMALIZED X 7802ssinhd: 7803 bra t_extdnrm 7804 7805######################################################################### 7806# stanh(): computes the hyperbolic tangent of a normalized input # 7807# stanhd(): computes the hyperbolic tangent of a denormalized input # 7808# # 7809# INPUT *************************************************************** # 7810# a0 = pointer to extended precision input # 7811# d0 = round precision,mode # 7812# # 7813# OUTPUT ************************************************************** # 7814# fp0 = tanh(X) # 7815# # 7816# ACCURACY and MONOTONICITY ******************************************* # 7817# The returned result is within 3 ulps in 64 significant bit, # 7818# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7819# rounded to double precision. The result is provably monotonic # 7820# in double precision. # 7821# # 7822# ALGORITHM *********************************************************** # 7823# # 7824# TANH # 7825# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # 7826# # 7827# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # 7828# sgn := sign(X), y := 2|X|, z := expm1(Y), and # 7829# tanh(X) = sgn*( z/(2+z) ). # 7830# Exit. # 7831# # 7832# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # 7833# go to 7. # 7834# # 7835# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # 7836# # 7837# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # 7838# sgn := sign(X), y := 2|X|, z := exp(Y), # 7839# tanh(X) = sgn - [ sgn*2/(1+z) ]. # 7840# Exit. # 7841# # 7842# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # 7843# calculate Tanh(X) by # 7844# sgn := sign(X), Tiny := 2**(-126), # 7845# tanh(X) := sgn - sgn*Tiny. # 7846# Exit. # 7847# # 7848# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # 7849# # 7850######################################################################### 7851 7852 set X,FP_SCR0 7853 set XFRAC,X+4 7854 7855 set SGN,L_SCR3 7856 7857 set V,FP_SCR0 7858 7859 global stanh 7860stanh: 7861 fmov.x (%a0),%fp0 # LOAD INPUT 7862 7863 fmov.x %fp0,X(%a6) 7864 mov.l (%a0),%d1 7865 mov.w 4(%a0),%d1 7866 mov.l %d1,X(%a6) 7867 and.l &0x7FFFFFFF,%d1 7868 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? 7869 blt.w TANHBORS # yes 7870 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? 7871 bgt.w TANHBORS # yes 7872 7873#--THIS IS THE USUAL CASE 7874#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). 7875 7876 mov.l X(%a6),%d1 7877 mov.l %d1,SGN(%a6) 7878 and.l &0x7FFF0000,%d1 7879 add.l &0x00010000,%d1 # EXPONENT OF 2|X| 7880 mov.l %d1,X(%a6) 7881 and.l &0x80000000,SGN(%a6) 7882 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| 7883 7884 mov.l %d0,-(%sp) 7885 clr.l %d0 7886 fmovm.x &0x1,-(%sp) # save Y on stack 7887 lea (%sp),%a0 # pass ptr to Y 7888 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7889 add.l &0xc,%sp # clear Y from stack 7890 mov.l (%sp)+,%d0 7891 7892 fmov.x %fp0,%fp1 7893 fadd.s &0x40000000,%fp1 # Z+2 7894 mov.l SGN(%a6),%d1 7895 fmov.x %fp1,V(%a6) 7896 eor.l %d1,V(%a6) 7897 7898 fmov.l %d0,%fpcr # restore users round prec,mode 7899 fdiv.x V(%a6),%fp0 7900 bra t_inx2 7901 7902TANHBORS: 7903 cmp.l %d1,&0x3FFF8000 7904 blt.w TANHSM 7905 7906 cmp.l %d1,&0x40048AA1 7907 bgt.w TANHHUGE 7908 7909#-- (5/2) LOG2 < |X| < 50 LOG2, 7910#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), 7911#--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. 7912 7913 mov.l X(%a6),%d1 7914 mov.l %d1,SGN(%a6) 7915 and.l &0x7FFF0000,%d1 7916 add.l &0x00010000,%d1 # EXPO OF 2|X| 7917 mov.l %d1,X(%a6) # Y = 2|X| 7918 and.l &0x80000000,SGN(%a6) 7919 mov.l SGN(%a6),%d1 7920 fmov.x X(%a6),%fp0 # Y = 2|X| 7921 7922 mov.l %d0,-(%sp) 7923 clr.l %d0 7924 fmovm.x &0x01,-(%sp) # save Y on stack 7925 lea (%sp),%a0 # pass ptr to Y 7926 bsr setox # FP0 IS EXP(Y) 7927 add.l &0xc,%sp # clear Y from stack 7928 mov.l (%sp)+,%d0 7929 mov.l SGN(%a6),%d1 7930 fadd.s &0x3F800000,%fp0 # EXP(Y)+1 7931 7932 eor.l &0xC0000000,%d1 # -SIGN(X)*2 7933 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT 7934 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] 7935 7936 mov.l SGN(%a6),%d1 7937 or.l &0x3F800000,%d1 # SGN 7938 fmov.s %d1,%fp0 # SGN IN SGL FMT 7939 7940 fmov.l %d0,%fpcr # restore users round prec,mode 7941 mov.b &FADD_OP,%d1 # last inst is ADD 7942 fadd.x %fp1,%fp0 7943 bra t_inx2 7944 7945TANHSM: 7946 fmov.l %d0,%fpcr # restore users round prec,mode 7947 mov.b &FMOV_OP,%d1 # last inst is MOVE 7948 fmov.x X(%a6),%fp0 # last inst - possible exception set 7949 bra t_catch 7950 7951#---RETURN SGN(X) - SGN(X)EPS 7952TANHHUGE: 7953 mov.l X(%a6),%d1 7954 and.l &0x80000000,%d1 7955 or.l &0x3F800000,%d1 7956 fmov.s %d1,%fp0 7957 and.l &0x80000000,%d1 7958 eor.l &0x80800000,%d1 # -SIGN(X)*EPS 7959 7960 fmov.l %d0,%fpcr # restore users round prec,mode 7961 fadd.s %d1,%fp0 7962 bra t_inx2 7963 7964 global stanhd 7965#--TANH(X) = X FOR DENORMALIZED X 7966stanhd: 7967 bra t_extdnrm 7968 7969######################################################################### 7970# slogn(): computes the natural logarithm of a normalized input # 7971# slognd(): computes the natural logarithm of a denormalized input # 7972# slognp1(): computes the log(1+X) of a normalized input # 7973# slognp1d(): computes the log(1+X) of a denormalized input # 7974# # 7975# INPUT *************************************************************** # 7976# a0 = pointer to extended precision input # 7977# d0 = round precision,mode # 7978# # 7979# OUTPUT ************************************************************** # 7980# fp0 = log(X) or log(1+X) # 7981# # 7982# ACCURACY and MONOTONICITY ******************************************* # 7983# The returned result is within 2 ulps in 64 significant bit, # 7984# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7985# rounded to double precision. The result is provably monotonic # 7986# in double precision. # 7987# # 7988# ALGORITHM *********************************************************** # 7989# LOGN: # 7990# Step 1. If |X-1| < 1/16, approximate log(X) by an odd # 7991# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # 7992# move on to Step 2. # 7993# # 7994# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # 7995# seven significant bits of Y plus 2**(-7), i.e. # 7996# F = 1.xxxxxx1 in base 2 where the six "x" match those # 7997# of Y. Note that |Y-F| <= 2**(-7). # 7998# # 7999# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # 8000# polynomial in u, log(1+u) = poly. # 8001# # 8002# Step 4. Reconstruct # 8003# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # 8004# by k*log(2) + (log(F) + poly). The values of log(F) are # 8005# calculated beforehand and stored in the program. # 8006# # 8007# lognp1: # 8008# Step 1: If |X| < 1/16, approximate log(1+X) by an odd # 8009# polynomial in u where u = 2X/(2+X). Otherwise, move on # 8010# to Step 2. # 8011# # 8012# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # 8013# in Step 2 of the algorithm for LOGN and compute # 8014# log(1+X) as k*log(2) + log(F) + poly where poly # 8015# approximates log(1+u), u = (Y-F)/F. # 8016# # 8017# Implementation Notes: # 8018# Note 1. There are 64 different possible values for F, thus 64 # 8019# log(F)'s need to be tabulated. Moreover, the values of # 8020# 1/F are also tabulated so that the division in (Y-F)/F # 8021# can be performed by a multiplication. # 8022# # 8023# Note 2. In Step 2 of lognp1, in order to preserved accuracy, # 8024# the value Y-F has to be calculated carefully when # 8025# 1/2 <= X < 3/2. # 8026# # 8027# Note 3. To fully exploit the pipeline, polynomials are usually # 8028# separated into two parts evaluated independently before # 8029# being added up. # 8030# # 8031######################################################################### 8032LOGOF2: 8033 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8034 8035one: 8036 long 0x3F800000 8037zero: 8038 long 0x00000000 8039infty: 8040 long 0x7F800000 8041negone: 8042 long 0xBF800000 8043 8044LOGA6: 8045 long 0x3FC2499A,0xB5E4040B 8046LOGA5: 8047 long 0xBFC555B5,0x848CB7DB 8048 8049LOGA4: 8050 long 0x3FC99999,0x987D8730 8051LOGA3: 8052 long 0xBFCFFFFF,0xFF6F7E97 8053 8054LOGA2: 8055 long 0x3FD55555,0x555555A4 8056LOGA1: 8057 long 0xBFE00000,0x00000008 8058 8059LOGB5: 8060 long 0x3F175496,0xADD7DAD6 8061LOGB4: 8062 long 0x3F3C71C2,0xFE80C7E0 8063 8064LOGB3: 8065 long 0x3F624924,0x928BCCFF 8066LOGB2: 8067 long 0x3F899999,0x999995EC 8068 8069LOGB1: 8070 long 0x3FB55555,0x55555555 8071TWO: 8072 long 0x40000000,0x00000000 8073 8074LTHOLD: 8075 long 0x3f990000,0x80000000,0x00000000,0x00000000 8076 8077LOGTBL: 8078 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 8079 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 8080 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 8081 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 8082 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 8083 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 8084 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 8085 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 8086 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 8087 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 8088 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 8089 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 8090 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 8091 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 8092 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 8093 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 8094 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 8095 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 8096 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 8097 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 8098 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 8099 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 8100 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 8101 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 8102 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 8103 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 8104 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 8105 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 8106 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 8107 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 8108 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 8109 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 8110 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 8111 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 8112 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 8113 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 8114 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 8115 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 8116 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 8117 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 8118 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 8119 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 8120 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 8121 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 8122 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 8123 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 8124 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 8125 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 8126 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 8127 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 8128 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 8129 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 8130 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 8131 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 8132 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 8133 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 8134 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 8135 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 8136 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 8137 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 8138 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 8139 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 8140 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 8141 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 8142 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 8143 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 8144 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 8145 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 8146 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 8147 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 8148 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 8149 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 8150 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 8151 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 8152 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 8153 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 8154 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 8155 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 8156 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 8157 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 8158 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 8159 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 8160 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 8161 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 8162 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 8163 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 8164 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 8165 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 8166 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 8167 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 8168 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 8169 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 8170 long 0x3FFE0000,0x94458094,0x45809446,0x00000000 8171 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 8172 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 8173 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 8174 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 8175 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 8176 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 8177 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 8178 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 8179 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 8180 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 8181 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 8182 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 8183 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 8184 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 8185 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 8186 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 8187 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 8188 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 8189 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 8190 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 8191 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 8192 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 8193 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 8194 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 8195 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 8196 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 8197 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 8198 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 8199 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 8200 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 8201 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 8202 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 8203 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 8204 long 0x3FFE0000,0x80808080,0x80808081,0x00000000 8205 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 8206 8207 set ADJK,L_SCR1 8208 8209 set X,FP_SCR0 8210 set XDCARE,X+2 8211 set XFRAC,X+4 8212 8213 set F,FP_SCR1 8214 set FFRAC,F+4 8215 8216 set KLOG2,FP_SCR0 8217 8218 set SAVEU,FP_SCR0 8219 8220 global slogn 8221#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 8222slogn: 8223 fmov.x (%a0),%fp0 # LOAD INPUT 8224 mov.l &0x00000000,ADJK(%a6) 8225 8226LOGBGN: 8227#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 8228#--A FINITE, NON-ZERO, NORMALIZED NUMBER. 8229 8230 mov.l (%a0),%d1 8231 mov.w 4(%a0),%d1 8232 8233 mov.l (%a0),X(%a6) 8234 mov.l 4(%a0),X+4(%a6) 8235 mov.l 8(%a0),X+8(%a6) 8236 8237 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE 8238 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID 8239# X IS POSITIVE, CHECK IF X IS NEAR 1 8240 cmp.l %d1,&0x3ffef07d # IS X < 15/16? 8241 blt.b LOGMAIN # YES 8242 cmp.l %d1,&0x3fff8841 # IS X > 17/16? 8243 ble.w LOGNEAR1 # NO 8244 8245LOGMAIN: 8246#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 8247 8248#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 8249#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 8250#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 8251#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 8252#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 8253#--LOG(1+U) CAN BE VERY EFFICIENT. 8254#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 8255#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 8256 8257#--GET K, Y, F, AND ADDRESS OF 1/F. 8258 asr.l &8,%d1 8259 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X 8260 sub.l &0x3FFF,%d1 # THIS IS K 8261 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. 8262 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) 8263 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT 8264 8265#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 8266 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X 8267 mov.l XFRAC(%a6),FFRAC(%a6) 8268 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y 8269 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT 8270 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F 8271 and.l &0x7E000000,%d1 8272 asr.l &8,%d1 8273 asr.l &8,%d1 8274 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT 8275 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F 8276 8277 fmov.x X(%a6),%fp0 8278 mov.l &0x3fff0000,F(%a6) 8279 clr.l F+8(%a6) 8280 fsub.x F(%a6),%fp0 # Y-F 8281 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY 8282#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 8283#--REGISTERS SAVED: FPCR, FP1, FP2 8284 8285LP1CONT1: 8286#--AN RE-ENTRY POINT FOR LOGNP1 8287 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F 8288 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY 8289 fmov.x %fp0,%fp2 8290 fmul.x %fp2,%fp2 # FP2 IS V=U*U 8291 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 8292 8293#--LOG(1+U) IS APPROXIMATED BY 8294#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 8295#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 8296 8297 fmov.x %fp2,%fp3 8298 fmov.x %fp2,%fp1 8299 8300 fmul.d LOGA6(%pc),%fp1 # V*A6 8301 fmul.d LOGA5(%pc),%fp2 # V*A5 8302 8303 fadd.d LOGA4(%pc),%fp1 # A4+V*A6 8304 fadd.d LOGA3(%pc),%fp2 # A3+V*A5 8305 8306 fmul.x %fp3,%fp1 # V*(A4+V*A6) 8307 fmul.x %fp3,%fp2 # V*(A3+V*A5) 8308 8309 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) 8310 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) 8311 8312 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) 8313 add.l &16,%a0 # ADDRESS OF LOG(F) 8314 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) 8315 8316 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) 8317 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) 8318 8319 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) 8320 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 8321 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) 8322 8323 fmov.l %d0,%fpcr 8324 fadd.x KLOG2(%a6),%fp0 # FINAL ADD 8325 bra t_inx2 8326 8327 8328LOGNEAR1: 8329 8330# if the input is exactly equal to one, then exit through ld_pzero. 8331# if these 2 lines weren't here, the correct answer would be returned 8332# but the INEX2 bit would be set. 8333 fcmp.b %fp0,&0x1 # is it equal to one? 8334 fbeq.l ld_pzero # yes 8335 8336#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 8337 fmov.x %fp0,%fp1 8338 fsub.s one(%pc),%fp1 # FP1 IS X-1 8339 fadd.s one(%pc),%fp0 # FP0 IS X+1 8340 fadd.x %fp1,%fp1 # FP1 IS 2(X-1) 8341#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 8342#--IN U, U = 2(X-1)/(X+1) = FP1/FP0 8343 8344LP1CONT2: 8345#--THIS IS AN RE-ENTRY POINT FOR LOGNP1 8346 fdiv.x %fp0,%fp1 # FP1 IS U 8347 fmovm.x &0xc,-(%sp) # SAVE FP2-3 8348#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 8349#--LET V=U*U, W=V*V, CALCULATE 8350#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 8351#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 8352 fmov.x %fp1,%fp0 8353 fmul.x %fp0,%fp0 # FP0 IS V 8354 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 8355 fmov.x %fp0,%fp1 8356 fmul.x %fp1,%fp1 # FP1 IS W 8357 8358 fmov.d LOGB5(%pc),%fp3 8359 fmov.d LOGB4(%pc),%fp2 8360 8361 fmul.x %fp1,%fp3 # W*B5 8362 fmul.x %fp1,%fp2 # W*B4 8363 8364 fadd.d LOGB3(%pc),%fp3 # B3+W*B5 8365 fadd.d LOGB2(%pc),%fp2 # B2+W*B4 8366 8367 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED 8368 8369 fmul.x %fp0,%fp2 # V*(B2+W*B4) 8370 8371 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) 8372 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V 8373 8374 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 8375 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED 8376 8377 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 8378 8379 fmov.l %d0,%fpcr 8380 fadd.x SAVEU(%a6),%fp0 8381 bra t_inx2 8382 8383#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 8384LOGNEG: 8385 bra t_operr 8386 8387 global slognd 8388slognd: 8389#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 8390 8391 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 8392 8393#----normalize the input value by left shifting k bits (k to be determined 8394#----below), adjusting exponent and storing -k to ADJK 8395#----the value TWOTO100 is no longer needed. 8396#----Note that this code assumes the denormalized input is NON-ZERO. 8397 8398 movm.l &0x3f00,-(%sp) # save some registers {d2-d7} 8399 mov.l (%a0),%d3 # D3 is exponent of smallest norm. # 8400 mov.l 4(%a0),%d4 8401 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) 8402 clr.l %d2 # D2 used for holding K 8403 8404 tst.l %d4 8405 bne.b Hi_not0 8406 8407Hi_0: 8408 mov.l %d5,%d4 8409 clr.l %d5 8410 mov.l &32,%d2 8411 clr.l %d6 8412 bfffo %d4{&0:&32},%d6 8413 lsl.l %d6,%d4 8414 add.l %d6,%d2 # (D3,D4,D5) is normalized 8415 8416 mov.l %d3,X(%a6) 8417 mov.l %d4,XFRAC(%a6) 8418 mov.l %d5,XFRAC+4(%a6) 8419 neg.l %d2 8420 mov.l %d2,ADJK(%a6) 8421 fmov.x X(%a6),%fp0 8422 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8423 lea X(%a6),%a0 8424 bra.w LOGBGN # begin regular log(X) 8425 8426Hi_not0: 8427 clr.l %d6 8428 bfffo %d4{&0:&32},%d6 # find first 1 8429 mov.l %d6,%d2 # get k 8430 lsl.l %d6,%d4 8431 mov.l %d5,%d7 # a copy of D5 8432 lsl.l %d6,%d5 8433 neg.l %d6 8434 add.l &32,%d6 8435 lsr.l %d6,%d7 8436 or.l %d7,%d4 # (D3,D4,D5) normalized 8437 8438 mov.l %d3,X(%a6) 8439 mov.l %d4,XFRAC(%a6) 8440 mov.l %d5,XFRAC+4(%a6) 8441 neg.l %d2 8442 mov.l %d2,ADJK(%a6) 8443 fmov.x X(%a6),%fp0 8444 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8445 lea X(%a6),%a0 8446 bra.w LOGBGN # begin regular log(X) 8447 8448 global slognp1 8449#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 8450slognp1: 8451 fmov.x (%a0),%fp0 # LOAD INPUT 8452 fabs.x %fp0 # test magnitude 8453 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold 8454 fbgt.w LP1REAL # if greater, continue 8455 fmov.l %d0,%fpcr 8456 mov.b &FMOV_OP,%d1 # last inst is MOVE 8457 fmov.x (%a0),%fp0 # return signed argument 8458 bra t_catch 8459 8460LP1REAL: 8461 fmov.x (%a0),%fp0 # LOAD INPUT 8462 mov.l &0x00000000,ADJK(%a6) 8463 fmov.x %fp0,%fp1 # FP1 IS INPUT Z 8464 fadd.s one(%pc),%fp0 # X := ROUND(1+Z) 8465 fmov.x %fp0,X(%a6) 8466 mov.w XFRAC(%a6),XDCARE(%a6) 8467 mov.l X(%a6),%d1 8468 cmp.l %d1,&0 8469 ble.w LP1NEG0 # LOG OF ZERO OR -VE 8470 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? 8471 blt.w LOGMAIN 8472 cmp.l %d1,&0x3fffc000 8473 bgt.w LOGMAIN 8474#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 8475#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 8476#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 8477 8478LP1NEAR1: 8479#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 8480 cmp.l %d1,&0x3ffef07d 8481 blt.w LP1CARE 8482 cmp.l %d1,&0x3fff8841 8483 bgt.w LP1CARE 8484 8485LP1ONE16: 8486#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 8487#--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 8488 fadd.x %fp1,%fp1 # FP1 IS 2Z 8489 fadd.s one(%pc),%fp0 # FP0 IS 1+X 8490#--U = FP1/FP0 8491 bra.w LP1CONT2 8492 8493LP1CARE: 8494#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 8495#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 8496#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 8497#--THERE ARE ONLY TWO CASES. 8498#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 8499#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 8500#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 8501#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 8502 8503 mov.l XFRAC(%a6),FFRAC(%a6) 8504 and.l &0xFE000000,FFRAC(%a6) 8505 or.l &0x01000000,FFRAC(%a6) # F OBTAINED 8506 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 8507 bge.b KISZERO 8508 8509KISNEG1: 8510 fmov.s TWO(%pc),%fp0 8511 mov.l &0x3fff0000,F(%a6) 8512 clr.l F+8(%a6) 8513 fsub.x F(%a6),%fp0 # 2-F 8514 mov.l FFRAC(%a6),%d1 8515 and.l &0x7E000000,%d1 8516 asr.l &8,%d1 8517 asr.l &8,%d1 8518 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F 8519 fadd.x %fp1,%fp1 # GET 2Z 8520 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} 8521 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z 8522 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F 8523 add.l %d1,%a0 8524 fmov.s negone(%pc),%fp1 # FP1 IS K = -1 8525 bra.w LP1CONT1 8526 8527KISZERO: 8528 fmov.s one(%pc),%fp0 8529 mov.l &0x3fff0000,F(%a6) 8530 clr.l F+8(%a6) 8531 fsub.x F(%a6),%fp0 # 1-F 8532 mov.l FFRAC(%a6),%d1 8533 and.l &0x7E000000,%d1 8534 asr.l &8,%d1 8535 asr.l &8,%d1 8536 asr.l &4,%d1 8537 fadd.x %fp1,%fp0 # FP0 IS Y-F 8538 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} 8539 lea LOGTBL(%pc),%a0 8540 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F 8541 fmov.s zero(%pc),%fp1 # FP1 IS K = 0 8542 bra.w LP1CONT1 8543 8544LP1NEG0: 8545#--FPCR SAVED. D0 IS X IN COMPACT FORM. 8546 cmp.l %d1,&0 8547 blt.b LP1NEG 8548LP1ZERO: 8549 fmov.s negone(%pc),%fp0 8550 8551 fmov.l %d0,%fpcr 8552 bra t_dz 8553 8554LP1NEG: 8555 fmov.s zero(%pc),%fp0 8556 8557 fmov.l %d0,%fpcr 8558 bra t_operr 8559 8560 global slognp1d 8561#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 8562# Simply return the denorm 8563slognp1d: 8564 bra t_extdnrm 8565 8566######################################################################### 8567# satanh(): computes the inverse hyperbolic tangent of a norm input # 8568# satanhd(): computes the inverse hyperbolic tangent of a denorm input # 8569# # 8570# INPUT *************************************************************** # 8571# a0 = pointer to extended precision input # 8572# d0 = round precision,mode # 8573# # 8574# OUTPUT ************************************************************** # 8575# fp0 = arctanh(X) # 8576# # 8577# ACCURACY and MONOTONICITY ******************************************* # 8578# The returned result is within 3 ulps in 64 significant bit, # 8579# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8580# rounded to double precision. The result is provably monotonic # 8581# in double precision. # 8582# # 8583# ALGORITHM *********************************************************** # 8584# # 8585# ATANH # 8586# 1. If |X| >= 1, go to 3. # 8587# # 8588# 2. (|X| < 1) Calculate atanh(X) by # 8589# sgn := sign(X) # 8590# y := |X| # 8591# z := 2y/(1-y) # 8592# atanh(X) := sgn * (1/2) * logp1(z) # 8593# Exit. # 8594# # 8595# 3. If |X| > 1, go to 5. # 8596# # 8597# 4. (|X| = 1) Generate infinity with an appropriate sign and # 8598# divide-by-zero by # 8599# sgn := sign(X) # 8600# atan(X) := sgn / (+0). # 8601# Exit. # 8602# # 8603# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 8604# Exit. # 8605# # 8606######################################################################### 8607 8608 global satanh 8609satanh: 8610 mov.l (%a0),%d1 8611 mov.w 4(%a0),%d1 8612 and.l &0x7FFFFFFF,%d1 8613 cmp.l %d1,&0x3FFF8000 8614 bge.b ATANHBIG 8615 8616#--THIS IS THE USUAL CASE, |X| < 1 8617#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). 8618 8619 fabs.x (%a0),%fp0 # Y = |X| 8620 fmov.x %fp0,%fp1 8621 fneg.x %fp1 # -Y 8622 fadd.x %fp0,%fp0 # 2Y 8623 fadd.s &0x3F800000,%fp1 # 1-Y 8624 fdiv.x %fp1,%fp0 # 2Y/(1-Y) 8625 mov.l (%a0),%d1 8626 and.l &0x80000000,%d1 8627 or.l &0x3F000000,%d1 # SIGN(X)*HALF 8628 mov.l %d1,-(%sp) 8629 8630 mov.l %d0,-(%sp) # save rnd prec,mode 8631 clr.l %d0 # pass ext prec,RN 8632 fmovm.x &0x01,-(%sp) # save Z on stack 8633 lea (%sp),%a0 # pass ptr to Z 8634 bsr slognp1 # LOG1P(Z) 8635 add.l &0xc,%sp # clear Z from stack 8636 8637 mov.l (%sp)+,%d0 # fetch old prec,mode 8638 fmov.l %d0,%fpcr # load it 8639 mov.b &FMUL_OP,%d1 # last inst is MUL 8640 fmul.s (%sp)+,%fp0 8641 bra t_catch 8642 8643ATANHBIG: 8644 fabs.x (%a0),%fp0 # |X| 8645 fcmp.s %fp0,&0x3F800000 8646 fbgt t_operr 8647 bra t_dz 8648 8649 global satanhd 8650#--ATANH(X) = X FOR DENORMALIZED X 8651satanhd: 8652 bra t_extdnrm 8653 8654######################################################################### 8655# slog10(): computes the base-10 logarithm of a normalized input # 8656# slog10d(): computes the base-10 logarithm of a denormalized input # 8657# slog2(): computes the base-2 logarithm of a normalized input # 8658# slog2d(): computes the base-2 logarithm of a denormalized input # 8659# # 8660# INPUT *************************************************************** # 8661# a0 = pointer to extended precision input # 8662# d0 = round precision,mode # 8663# # 8664# OUTPUT ************************************************************** # 8665# fp0 = log_10(X) or log_2(X) # 8666# # 8667# ACCURACY and MONOTONICITY ******************************************* # 8668# The returned result is within 1.7 ulps in 64 significant bit, # 8669# i.e. within 0.5003 ulp to 53 bits if the result is subsequently # 8670# rounded to double precision. The result is provably monotonic # 8671# in double precision. # 8672# # 8673# ALGORITHM *********************************************************** # 8674# # 8675# slog10d: # 8676# # 8677# Step 0. If X < 0, create a NaN and raise the invalid operation # 8678# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8679# Notes: Default means round-to-nearest mode, no floating-point # 8680# traps, and precision control = double extended. # 8681# # 8682# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8683# Notes: Even if X is denormalized, log(X) is always normalized. # 8684# # 8685# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8686# 2.1 Restore the user FPCR # 8687# 2.2 Return ans := Y * INV_L10. # 8688# # 8689# slog10: # 8690# # 8691# Step 0. If X < 0, create a NaN and raise the invalid operation # 8692# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8693# Notes: Default means round-to-nearest mode, no floating-point # 8694# traps, and precision control = double extended. # 8695# # 8696# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # 8697# # 8698# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8699# 2.1 Restore the user FPCR # 8700# 2.2 Return ans := Y * INV_L10. # 8701# # 8702# sLog2d: # 8703# # 8704# Step 0. If X < 0, create a NaN and raise the invalid operation # 8705# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8706# Notes: Default means round-to-nearest mode, no floating-point # 8707# traps, and precision control = double extended. # 8708# # 8709# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8710# Notes: Even if X is denormalized, log(X) is always normalized. # 8711# # 8712# Step 2. Compute log_10(X) = log(X) * (1/log(2)). # 8713# 2.1 Restore the user FPCR # 8714# 2.2 Return ans := Y * INV_L2. # 8715# # 8716# sLog2: # 8717# # 8718# Step 0. If X < 0, create a NaN and raise the invalid operation # 8719# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8720# Notes: Default means round-to-nearest mode, no floating-point # 8721# traps, and precision control = double extended. # 8722# # 8723# Step 1. If X is not an integer power of two, i.e., X != 2^k, # 8724# go to Step 3. # 8725# # 8726# Step 2. Return k. # 8727# 2.1 Get integer k, X = 2^k. # 8728# 2.2 Restore the user FPCR. # 8729# 2.3 Return ans := convert-to-double-extended(k). # 8730# # 8731# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # 8732# # 8733# Step 4. Compute log_2(X) = log(X) * (1/log(2)). # 8734# 4.1 Restore the user FPCR # 8735# 4.2 Return ans := Y * INV_L2. # 8736# # 8737######################################################################### 8738 8739INV_L10: 8740 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 8741 8742INV_L2: 8743 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 8744 8745 global slog10 8746#--entry point for Log10(X), X is normalized 8747slog10: 8748 fmov.b &0x1,%fp0 8749 fcmp.x %fp0,(%a0) # if operand == 1, 8750 fbeq.l ld_pzero # return an EXACT zero 8751 8752 mov.l (%a0),%d1 8753 blt.w invalid 8754 mov.l %d0,-(%sp) 8755 clr.l %d0 8756 bsr slogn # log(X), X normal. 8757 fmov.l (%sp)+,%fpcr 8758 fmul.x INV_L10(%pc),%fp0 8759 bra t_inx2 8760 8761 global slog10d 8762#--entry point for Log10(X), X is denormalized 8763slog10d: 8764 mov.l (%a0),%d1 8765 blt.w invalid 8766 mov.l %d0,-(%sp) 8767 clr.l %d0 8768 bsr slognd # log(X), X denorm. 8769 fmov.l (%sp)+,%fpcr 8770 fmul.x INV_L10(%pc),%fp0 8771 bra t_minx2 8772 8773 global slog2 8774#--entry point for Log2(X), X is normalized 8775slog2: 8776 mov.l (%a0),%d1 8777 blt.w invalid 8778 8779 mov.l 8(%a0),%d1 8780 bne.b continue # X is not 2^k 8781 8782 mov.l 4(%a0),%d1 8783 and.l &0x7FFFFFFF,%d1 8784 bne.b continue 8785 8786#--X = 2^k. 8787 mov.w (%a0),%d1 8788 and.l &0x00007FFF,%d1 8789 sub.l &0x3FFF,%d1 8790 beq.l ld_pzero 8791 fmov.l %d0,%fpcr 8792 fmov.l %d1,%fp0 8793 bra t_inx2 8794 8795continue: 8796 mov.l %d0,-(%sp) 8797 clr.l %d0 8798 bsr slogn # log(X), X normal. 8799 fmov.l (%sp)+,%fpcr 8800 fmul.x INV_L2(%pc),%fp0 8801 bra t_inx2 8802 8803invalid: 8804 bra t_operr 8805 8806 global slog2d 8807#--entry point for Log2(X), X is denormalized 8808slog2d: 8809 mov.l (%a0),%d1 8810 blt.w invalid 8811 mov.l %d0,-(%sp) 8812 clr.l %d0 8813 bsr slognd # log(X), X denorm. 8814 fmov.l (%sp)+,%fpcr 8815 fmul.x INV_L2(%pc),%fp0 8816 bra t_minx2 8817 8818######################################################################### 8819# stwotox(): computes 2**X for a normalized input # 8820# stwotoxd(): computes 2**X for a denormalized input # 8821# stentox(): computes 10**X for a normalized input # 8822# stentoxd(): computes 10**X for a denormalized input # 8823# # 8824# INPUT *************************************************************** # 8825# a0 = pointer to extended precision input # 8826# d0 = round precision,mode # 8827# # 8828# OUTPUT ************************************************************** # 8829# fp0 = 2**X or 10**X # 8830# # 8831# ACCURACY and MONOTONICITY ******************************************* # 8832# The returned result is within 2 ulps in 64 significant bit, # 8833# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8834# rounded to double precision. The result is provably monotonic # 8835# in double precision. # 8836# # 8837# ALGORITHM *********************************************************** # 8838# # 8839# twotox # 8840# 1. If |X| > 16480, go to ExpBig. # 8841# # 8842# 2. If |X| < 2**(-70), go to ExpSm. # 8843# # 8844# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # 8845# decompose N as # 8846# N = 64(M + M') + j, j = 0,1,2,...,63. # 8847# # 8848# 4. Overwrite r := r * log2. Then # 8849# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8850# Go to expr to compute that expression. # 8851# # 8852# tentox # 8853# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # 8854# # 8855# 2. If |X| < 2**(-70), go to ExpSm. # 8856# # 8857# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # 8858# N := round-to-int(y). Decompose N as # 8859# N = 64(M + M') + j, j = 0,1,2,...,63. # 8860# # 8861# 4. Define r as # 8862# r := ((X - N*L1)-N*L2) * L10 # 8863# where L1, L2 are the leading and trailing parts of # 8864# log_10(2)/64 and L10 is the natural log of 10. Then # 8865# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8866# Go to expr to compute that expression. # 8867# # 8868# expr # 8869# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # 8870# # 8871# 2. Overwrite Fact1 and Fact2 by # 8872# Fact1 := 2**(M) * Fact1 # 8873# Fact2 := 2**(M) * Fact2 # 8874# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # 8875# # 8876# 3. Calculate P where 1 + P approximates exp(r): # 8877# P = r + r*r*(A1+r*(A2+...+r*A5)). # 8878# # 8879# 4. Let AdjFact := 2**(M'). Return # 8880# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # 8881# Exit. # 8882# # 8883# ExpBig # 8884# 1. Generate overflow by Huge * Huge if X > 0; otherwise, # 8885# generate underflow by Tiny * Tiny. # 8886# # 8887# ExpSm # 8888# 1. Return 1 + X. # 8889# # 8890######################################################################### 8891 8892L2TEN64: 8893 long 0x406A934F,0x0979A371 # 64LOG10/LOG2 8894L10TWO1: 8895 long 0x3F734413,0x509F8000 # LOG2/64LOG10 8896 8897L10TWO2: 8898 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 8899 8900LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 8901 8902LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8903 8904EXPA5: long 0x3F56C16D,0x6F7BD0B2 8905EXPA4: long 0x3F811112,0x302C712C 8906EXPA3: long 0x3FA55555,0x55554CC1 8907EXPA2: long 0x3FC55555,0x55554A54 8908EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 8909 8910TEXPTBL: 8911 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 8912 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA 8913 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 8914 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 8915 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA 8916 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C 8917 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 8918 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA 8919 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 8920 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 8921 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 8922 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 8923 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D 8924 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 8925 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B 8926 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 8927 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A 8928 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B 8929 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF 8930 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA 8931 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD 8932 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E 8933 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B 8934 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB 8935 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB 8936 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 8937 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C 8938 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 8939 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 8940 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 8941 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F 8942 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C 8943 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB 8944 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB 8945 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C 8946 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA 8947 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD 8948 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 8949 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A 8950 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 8951 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB 8952 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 8953 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C 8954 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 8955 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 8956 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE 8957 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 8958 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 8959 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A 8960 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 8961 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 8962 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 8963 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 8964 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE 8965 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 8966 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F 8967 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A 8968 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A 8969 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC 8970 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F 8971 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A 8972 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 8973 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B 8974 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 8975 8976 set INT,L_SCR1 8977 8978 set X,FP_SCR0 8979 set XDCARE,X+2 8980 set XFRAC,X+4 8981 8982 set ADJFACT,FP_SCR0 8983 8984 set FACT1,FP_SCR0 8985 set FACT1HI,FACT1+4 8986 set FACT1LOW,FACT1+8 8987 8988 set FACT2,FP_SCR1 8989 set FACT2HI,FACT2+4 8990 set FACT2LOW,FACT2+8 8991 8992 global stwotox 8993#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 8994stwotox: 8995 fmovm.x (%a0),&0x80 # LOAD INPUT 8996 8997 mov.l (%a0),%d1 8998 mov.w 4(%a0),%d1 8999 fmov.x %fp0,X(%a6) 9000 and.l &0x7FFFFFFF,%d1 9001 9002 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9003 bge.b TWOOK1 9004 bra.w EXPBORS 9005 9006TWOOK1: 9007 cmp.l %d1,&0x400D80C0 # |X| > 16480? 9008 ble.b TWOMAIN 9009 bra.w EXPBORS 9010 9011TWOMAIN: 9012#--USUAL CASE, 2^(-70) <= |X| <= 16480 9013 9014 fmov.x %fp0,%fp1 9015 fmul.s &0x42800000,%fp1 # 64 * X 9016 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) 9017 mov.l %d2,-(%sp) 9018 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9019 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9020 mov.l INT(%a6),%d1 9021 mov.l %d1,%d2 9022 and.l &0x3F,%d1 # D0 IS J 9023 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9024 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9025 asr.l &6,%d2 # d2 IS L, N = 64L + J 9026 mov.l %d2,%d1 9027 asr.l &1,%d1 # D0 IS M 9028 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9029 add.l &0x3FFF,%d2 9030 9031#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9032#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9033#--ADJFACT = 2^(M'). 9034#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9035 9036 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9037 9038 fmul.s &0x3C800000,%fp1 # (1/64)*N 9039 mov.l (%a1)+,FACT1(%a6) 9040 mov.l (%a1)+,FACT1HI(%a6) 9041 mov.l (%a1)+,FACT1LOW(%a6) 9042 mov.w (%a1)+,FACT2(%a6) 9043 9044 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) 9045 9046 mov.w (%a1)+,FACT2HI(%a6) 9047 clr.w FACT2HI+2(%a6) 9048 clr.l FACT2LOW(%a6) 9049 add.w %d1,FACT1(%a6) 9050 fmul.x LOG2(%pc),%fp0 # FP0 IS R 9051 add.w %d1,FACT2(%a6) 9052 9053 bra.w expr 9054 9055EXPBORS: 9056#--FPCR, D0 SAVED 9057 cmp.l %d1,&0x3FFF8000 9058 bgt.b TEXPBIG 9059 9060#--|X| IS SMALL, RETURN 1 + X 9061 9062 fmov.l %d0,%fpcr # restore users round prec,mode 9063 fadd.s &0x3F800000,%fp0 # RETURN 1 + X 9064 bra t_pinx2 9065 9066TEXPBIG: 9067#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW 9068#--REGISTERS SAVE SO FAR ARE FPCR AND D0 9069 mov.l X(%a6),%d1 9070 cmp.l %d1,&0 9071 blt.b EXPNEG 9072 9073 bra t_ovfl2 # t_ovfl expects positive value 9074 9075EXPNEG: 9076 bra t_unfl2 # t_unfl expects positive value 9077 9078 global stwotoxd 9079stwotoxd: 9080#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT 9081 9082 fmov.l %d0,%fpcr # set user's rounding mode/precision 9083 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9084 mov.l (%a0),%d1 9085 or.l &0x00800001,%d1 9086 fadd.s %d1,%fp0 9087 bra t_pinx2 9088 9089 global stentox 9090#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9091stentox: 9092 fmovm.x (%a0),&0x80 # LOAD INPUT 9093 9094 mov.l (%a0),%d1 9095 mov.w 4(%a0),%d1 9096 fmov.x %fp0,X(%a6) 9097 and.l &0x7FFFFFFF,%d1 9098 9099 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9100 bge.b TENOK1 9101 bra.w EXPBORS 9102 9103TENOK1: 9104 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? 9105 ble.b TENMAIN 9106 bra.w EXPBORS 9107 9108TENMAIN: 9109#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 9110 9111 fmov.x %fp0,%fp1 9112 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 9113 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) 9114 mov.l %d2,-(%sp) 9115 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9116 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9117 mov.l INT(%a6),%d1 9118 mov.l %d1,%d2 9119 and.l &0x3F,%d1 # D0 IS J 9120 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9121 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9122 asr.l &6,%d2 # d2 IS L, N = 64L + J 9123 mov.l %d2,%d1 9124 asr.l &1,%d1 # D0 IS M 9125 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9126 add.l &0x3FFF,%d2 9127 9128#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9129#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9130#--ADJFACT = 2^(M'). 9131#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9132 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9133 9134 fmov.x %fp1,%fp2 9135 9136 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD 9137 mov.l (%a1)+,FACT1(%a6) 9138 9139 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL 9140 9141 mov.l (%a1)+,FACT1HI(%a6) 9142 mov.l (%a1)+,FACT1LOW(%a6) 9143 fsub.x %fp1,%fp0 # X - N L_LEAD 9144 mov.w (%a1)+,FACT2(%a6) 9145 9146 fsub.x %fp2,%fp0 # X - N L_TRAIL 9147 9148 mov.w (%a1)+,FACT2HI(%a6) 9149 clr.w FACT2HI+2(%a6) 9150 clr.l FACT2LOW(%a6) 9151 9152 fmul.x LOG10(%pc),%fp0 # FP0 IS R 9153 add.w %d1,FACT1(%a6) 9154 add.w %d1,FACT2(%a6) 9155 9156expr: 9157#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. 9158#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). 9159#--FP0 IS R. THE FOLLOWING CODE COMPUTES 9160#-- 2**(M'+M) * 2**(J/64) * EXP(R) 9161 9162 fmov.x %fp0,%fp1 9163 fmul.x %fp1,%fp1 # FP1 IS S = R*R 9164 9165 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 9166 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 9167 9168 fmul.x %fp1,%fp2 # FP2 IS S*A5 9169 fmul.x %fp1,%fp3 # FP3 IS S*A4 9170 9171 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 9172 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 9173 9174 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) 9175 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) 9176 9177 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) 9178 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) 9179 9180 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) 9181 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) 9182 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 9183 9184 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 9185 9186#--FINAL RECONSTRUCTION PROCESS 9187#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) 9188 9189 fmul.x FACT1(%a6),%fp0 9190 fadd.x FACT2(%a6),%fp0 9191 fadd.x FACT1(%a6),%fp0 9192 9193 fmov.l %d0,%fpcr # restore users round prec,mode 9194 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT 9195 mov.l (%sp)+,%d2 9196 mov.l &0x80000000,ADJFACT+4(%a6) 9197 clr.l ADJFACT+8(%a6) 9198 mov.b &FMUL_OP,%d1 # last inst is MUL 9199 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT 9200 bra t_catch 9201 9202 global stentoxd 9203stentoxd: 9204#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT 9205 9206 fmov.l %d0,%fpcr # set user's rounding mode/precision 9207 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9208 mov.l (%a0),%d1 9209 or.l &0x00800001,%d1 9210 fadd.s %d1,%fp0 9211 bra t_pinx2 9212 9213######################################################################### 9214# sscale(): computes the destination operand scaled by the source # 9215# operand. If the absoulute value of the source operand is # 9216# >= 2^14, an overflow or underflow is returned. # 9217# # 9218# INPUT *************************************************************** # 9219# a0 = pointer to double-extended source operand X # 9220# a1 = pointer to double-extended destination operand Y # 9221# # 9222# OUTPUT ************************************************************** # 9223# fp0 = scale(X,Y) # 9224# # 9225######################################################################### 9226 9227set SIGN, L_SCR1 9228 9229 global sscale 9230sscale: 9231 mov.l %d0,-(%sp) # store off ctrl bits for now 9232 9233 mov.w DST_EX(%a1),%d1 # get dst exponent 9234 smi.b SIGN(%a6) # use SIGN to hold dst sign 9235 andi.l &0x00007fff,%d1 # strip sign from dst exp 9236 9237 mov.w SRC_EX(%a0),%d0 # check src bounds 9238 andi.w &0x7fff,%d0 # clr src sign bit 9239 cmpi.w %d0,&0x3fff # is src ~ ZERO? 9240 blt.w src_small # yes 9241 cmpi.w %d0,&0x400c # no; is src too big? 9242 bgt.w src_out # yes 9243 9244# 9245# Source is within 2^14 range. 9246# 9247src_ok: 9248 fintrz.x SRC(%a0),%fp0 # calc int of src 9249 fmov.l %fp0,%d0 # int src to d0 9250# don't want any accrued bits from the fintrz showing up later since 9251# we may need to read the fpsr for the last fp op in t_catch2(). 9252 fmov.l &0x0,%fpsr 9253 9254 tst.b DST_HI(%a1) # is dst denormalized? 9255 bmi.b sok_norm 9256 9257# the dst is a DENORM. normalize the DENORM and add the adjustment to 9258# the src value. then, jump to the norm part of the routine. 9259sok_dnrm: 9260 mov.l %d0,-(%sp) # save src for now 9261 9262 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy 9263 mov.l DST_HI(%a1),FP_SCR0_HI(%a6) 9264 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 9265 9266 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM 9267 bsr.l norm # normalize the DENORM 9268 neg.l %d0 9269 add.l (%sp)+,%d0 # add adjustment to src 9270 9271 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM 9272 9273 cmpi.w %d0,&-0x3fff # is the shft amt really low? 9274 bge.b sok_norm2 # thank goodness no 9275 9276# the multiply factor that we're trying to create should be a denorm 9277# for the multiply to work. therefore, we're going to actually do a 9278# multiply with a denorm which will cause an unimplemented data type 9279# exception to be put into the machine which will be caught and corrected 9280# later. we don't do this with the DENORMs above because this method 9281# is slower. but, don't fret, I don't see it being used much either. 9282 fmov.l (%sp)+,%fpcr # restore user fpcr 9283 mov.l &0x80000000,%d1 # load normalized mantissa 9284 subi.l &-0x3fff,%d0 # how many should we shift? 9285 neg.l %d0 # make it positive 9286 cmpi.b %d0,&0x20 # is it > 32? 9287 bge.b sok_dnrm_32 # yes 9288 lsr.l %d0,%d1 # no; bit stays in upper lw 9289 clr.l -(%sp) # insert zero low mantissa 9290 mov.l %d1,-(%sp) # insert new high mantissa 9291 clr.l -(%sp) # make zero exponent 9292 bra.b sok_norm_cont 9293sok_dnrm_32: 9294 subi.b &0x20,%d0 # get shift count 9295 lsr.l %d0,%d1 # make low mantissa longword 9296 mov.l %d1,-(%sp) # insert new low mantissa 9297 clr.l -(%sp) # insert zero high mantissa 9298 clr.l -(%sp) # make zero exponent 9299 bra.b sok_norm_cont 9300 9301# the src will force the dst to a DENORM value or worse. so, let's 9302# create an fp multiply that will create the result. 9303sok_norm: 9304 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src 9305sok_norm2: 9306 fmov.l (%sp)+,%fpcr # restore user fpcr 9307 9308 addi.w &0x3fff,%d0 # turn src amt into exp value 9309 swap %d0 # put exponent in high word 9310 clr.l -(%sp) # insert new exponent 9311 mov.l &0x80000000,-(%sp) # insert new high mantissa 9312 mov.l %d0,-(%sp) # insert new lo mantissa 9313 9314sok_norm_cont: 9315 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 9316 mov.b &FMUL_OP,%d1 # last inst is MUL 9317 fmul.x (%sp)+,%fp0 # do the multiply 9318 bra t_catch2 # catch any exceptions 9319 9320# 9321# Source is outside of 2^14 range. Test the sign and branch 9322# to the appropriate exception handler. 9323# 9324src_out: 9325 mov.l (%sp)+,%d0 # restore ctrl bits 9326 exg %a0,%a1 # swap src,dst ptrs 9327 tst.b SRC_EX(%a1) # is src negative? 9328 bmi t_unfl # yes; underflow 9329 bra t_ovfl_sc # no; overflow 9330 9331# 9332# The source input is below 1, so we check for denormalized numbers 9333# and set unfl. 9334# 9335src_small: 9336 tst.b DST_HI(%a1) # is dst denormalized? 9337 bpl.b ssmall_done # yes 9338 9339 mov.l (%sp)+,%d0 9340 fmov.l %d0,%fpcr # no; load control bits 9341 mov.b &FMOV_OP,%d1 # last inst is MOVE 9342 fmov.x DST(%a1),%fp0 # simply return dest 9343 bra t_catch2 9344ssmall_done: 9345 mov.l (%sp)+,%d0 # load control bits into d1 9346 mov.l %a1,%a0 # pass ptr to dst 9347 bra t_resdnrm 9348 9349######################################################################### 9350# smod(): computes the fp MOD of the input values X,Y. # 9351# srem(): computes the fp (IEEE) REM of the input values X,Y. # 9352# # 9353# INPUT *************************************************************** # 9354# a0 = pointer to extended precision input X # 9355# a1 = pointer to extended precision input Y # 9356# d0 = round precision,mode # 9357# # 9358# The input operands X and Y can be either normalized or # 9359# denormalized. # 9360# # 9361# OUTPUT ************************************************************** # 9362# fp0 = FREM(X,Y) or FMOD(X,Y) # 9363# # 9364# ALGORITHM *********************************************************** # 9365# # 9366# Step 1. Save and strip signs of X and Y: signX := sign(X), # 9367# signY := sign(Y), X := |X|, Y := |Y|, # 9368# signQ := signX EOR signY. Record whether MOD or REM # 9369# is requested. # 9370# # 9371# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # 9372# If (L < 0) then # 9373# R := X, go to Step 4. # 9374# else # 9375# R := 2^(-L)X, j := L. # 9376# endif # 9377# # 9378# Step 3. Perform MOD(X,Y) # 9379# 3.1 If R = Y, go to Step 9. # 9380# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # 9381# 3.3 If j = 0, go to Step 4. # 9382# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # 9383# Step 3.1. # 9384# # 9385# Step 4. At this point, R = X - QY = MOD(X,Y). Set # 9386# Last_Subtract := false (used in Step 7 below). If # 9387# MOD is requested, go to Step 6. # 9388# # 9389# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # 9390# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # 9391# Step 6. # 9392# 5.2 If R > Y/2, then { set Last_Subtract := true, # 9393# Q := Q + 1, Y := signY*Y }. Go to Step 6. # 9394# 5.3 This is the tricky case of R = Y/2. If Q is odd, # 9395# then { Q := Q + 1, signX := -signX }. # 9396# # 9397# Step 6. R := signX*R. # 9398# # 9399# Step 7. If Last_Subtract = true, R := R - Y. # 9400# # 9401# Step 8. Return signQ, last 7 bits of Q, and R as required. # 9402# # 9403# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # 9404# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # 9405# R := 0. Return signQ, last 7 bits of Q, and R. # 9406# # 9407######################################################################### 9408 9409 set Mod_Flag,L_SCR3 9410 set Sc_Flag,L_SCR3+1 9411 9412 set SignY,L_SCR2 9413 set SignX,L_SCR2+2 9414 set SignQ,L_SCR3+2 9415 9416 set Y,FP_SCR0 9417 set Y_Hi,Y+4 9418 set Y_Lo,Y+8 9419 9420 set R,FP_SCR1 9421 set R_Hi,R+4 9422 set R_Lo,R+8 9423 9424Scale: 9425 long 0x00010000,0x80000000,0x00000000,0x00000000 9426 9427 global smod 9428smod: 9429 clr.b FPSR_QBYTE(%a6) 9430 mov.l %d0,-(%sp) # save ctrl bits 9431 clr.b Mod_Flag(%a6) 9432 bra.b Mod_Rem 9433 9434 global srem 9435srem: 9436 clr.b FPSR_QBYTE(%a6) 9437 mov.l %d0,-(%sp) # save ctrl bits 9438 mov.b &0x1,Mod_Flag(%a6) 9439 9440Mod_Rem: 9441#..Save sign of X and Y 9442 movm.l &0x3f00,-(%sp) # save data registers 9443 mov.w SRC_EX(%a0),%d3 9444 mov.w %d3,SignY(%a6) 9445 and.l &0x00007FFF,%d3 # Y := |Y| 9446 9447# 9448 mov.l SRC_HI(%a0),%d4 9449 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| 9450 9451 tst.l %d3 9452 bne.b Y_Normal 9453 9454 mov.l &0x00003FFE,%d3 # $3FFD + 1 9455 tst.l %d4 9456 bne.b HiY_not0 9457 9458HiY_0: 9459 mov.l %d5,%d4 9460 clr.l %d5 9461 sub.l &32,%d3 9462 clr.l %d6 9463 bfffo %d4{&0:&32},%d6 9464 lsl.l %d6,%d4 9465 sub.l %d6,%d3 # (D3,D4,D5) is normalized 9466# ...with bias $7FFD 9467 bra.b Chk_X 9468 9469HiY_not0: 9470 clr.l %d6 9471 bfffo %d4{&0:&32},%d6 9472 sub.l %d6,%d3 9473 lsl.l %d6,%d4 9474 mov.l %d5,%d7 # a copy of D5 9475 lsl.l %d6,%d5 9476 neg.l %d6 9477 add.l &32,%d6 9478 lsr.l %d6,%d7 9479 or.l %d7,%d4 # (D3,D4,D5) normalized 9480# ...with bias $7FFD 9481 bra.b Chk_X 9482 9483Y_Normal: 9484 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized 9485# ...with bias $7FFD 9486 9487Chk_X: 9488 mov.w DST_EX(%a1),%d0 9489 mov.w %d0,SignX(%a6) 9490 mov.w SignY(%a6),%d1 9491 eor.l %d0,%d1 9492 and.l &0x00008000,%d1 9493 mov.w %d1,SignQ(%a6) # sign(Q) obtained 9494 and.l &0x00007FFF,%d0 9495 mov.l DST_HI(%a1),%d1 9496 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| 9497 tst.l %d0 9498 bne.b X_Normal 9499 mov.l &0x00003FFE,%d0 9500 tst.l %d1 9501 bne.b HiX_not0 9502 9503HiX_0: 9504 mov.l %d2,%d1 9505 clr.l %d2 9506 sub.l &32,%d0 9507 clr.l %d6 9508 bfffo %d1{&0:&32},%d6 9509 lsl.l %d6,%d1 9510 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9511# ...with bias $7FFD 9512 bra.b Init 9513 9514HiX_not0: 9515 clr.l %d6 9516 bfffo %d1{&0:&32},%d6 9517 sub.l %d6,%d0 9518 lsl.l %d6,%d1 9519 mov.l %d2,%d7 # a copy of D2 9520 lsl.l %d6,%d2 9521 neg.l %d6 9522 add.l &32,%d6 9523 lsr.l %d6,%d7 9524 or.l %d7,%d1 # (D0,D1,D2) normalized 9525# ...with bias $7FFD 9526 bra.b Init 9527 9528X_Normal: 9529 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized 9530# ...with bias $7FFD 9531 9532Init: 9533# 9534 mov.l %d3,L_SCR1(%a6) # save biased exp(Y) 9535 mov.l %d0,-(%sp) # save biased exp(X) 9536 sub.l %d3,%d0 # L := expo(X)-expo(Y) 9537 9538 clr.l %d6 # D6 := carry <- 0 9539 clr.l %d3 # D3 is Q 9540 mov.l &0,%a1 # A1 is k; j+k=L, Q=0 9541 9542#..(Carry,D1,D2) is R 9543 tst.l %d0 9544 bge.b Mod_Loop_pre 9545 9546#..expo(X) < expo(Y). Thus X = mod(X,Y) 9547# 9548 mov.l (%sp)+,%d0 # restore d0 9549 bra.w Get_Mod 9550 9551Mod_Loop_pre: 9552 addq.l &0x4,%sp # erase exp(X) 9553#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L 9554Mod_Loop: 9555 tst.l %d6 # test carry bit 9556 bgt.b R_GT_Y 9557 9558#..At this point carry = 0, R = (D1,D2), Y = (D4,D5) 9559 cmp.l %d1,%d4 # compare hi(R) and hi(Y) 9560 bne.b R_NE_Y 9561 cmp.l %d2,%d5 # compare lo(R) and lo(Y) 9562 bne.b R_NE_Y 9563 9564#..At this point, R = Y 9565 bra.w Rem_is_0 9566 9567R_NE_Y: 9568#..use the borrow of the previous compare 9569 bcs.b R_LT_Y # borrow is set iff R < Y 9570 9571R_GT_Y: 9572#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 9573#..and Y < (D1,D2) < 2Y. Either way, perform R - Y 9574 sub.l %d5,%d2 # lo(R) - lo(Y) 9575 subx.l %d4,%d1 # hi(R) - hi(Y) 9576 clr.l %d6 # clear carry 9577 addq.l &1,%d3 # Q := Q + 1 9578 9579R_LT_Y: 9580#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. 9581 tst.l %d0 # see if j = 0. 9582 beq.b PostLoop 9583 9584 add.l %d3,%d3 # Q := 2Q 9585 add.l %d2,%d2 # lo(R) = 2lo(R) 9586 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry 9587 scs %d6 # set Carry if 2(R) overflows 9588 addq.l &1,%a1 # k := k+1 9589 subq.l &1,%d0 # j := j - 1 9590#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. 9591 9592 bra.b Mod_Loop 9593 9594PostLoop: 9595#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. 9596 9597#..normalize R. 9598 mov.l L_SCR1(%a6),%d0 # new biased expo of R 9599 tst.l %d1 9600 bne.b HiR_not0 9601 9602HiR_0: 9603 mov.l %d2,%d1 9604 clr.l %d2 9605 sub.l &32,%d0 9606 clr.l %d6 9607 bfffo %d1{&0:&32},%d6 9608 lsl.l %d6,%d1 9609 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9610# ...with bias $7FFD 9611 bra.b Get_Mod 9612 9613HiR_not0: 9614 clr.l %d6 9615 bfffo %d1{&0:&32},%d6 9616 bmi.b Get_Mod # already normalized 9617 sub.l %d6,%d0 9618 lsl.l %d6,%d1 9619 mov.l %d2,%d7 # a copy of D2 9620 lsl.l %d6,%d2 9621 neg.l %d6 9622 add.l &32,%d6 9623 lsr.l %d6,%d7 9624 or.l %d7,%d1 # (D0,D1,D2) normalized 9625 9626# 9627Get_Mod: 9628 cmp.l %d0,&0x000041FE 9629 bge.b No_Scale 9630Do_Scale: 9631 mov.w %d0,R(%a6) 9632 mov.l %d1,R_Hi(%a6) 9633 mov.l %d2,R_Lo(%a6) 9634 mov.l L_SCR1(%a6),%d6 9635 mov.w %d6,Y(%a6) 9636 mov.l %d4,Y_Hi(%a6) 9637 mov.l %d5,Y_Lo(%a6) 9638 fmov.x R(%a6),%fp0 # no exception 9639 mov.b &1,Sc_Flag(%a6) 9640 bra.b ModOrRem 9641No_Scale: 9642 mov.l %d1,R_Hi(%a6) 9643 mov.l %d2,R_Lo(%a6) 9644 sub.l &0x3FFE,%d0 9645 mov.w %d0,R(%a6) 9646 mov.l L_SCR1(%a6),%d6 9647 sub.l &0x3FFE,%d6 9648 mov.l %d6,L_SCR1(%a6) 9649 fmov.x R(%a6),%fp0 9650 mov.w %d6,Y(%a6) 9651 mov.l %d4,Y_Hi(%a6) 9652 mov.l %d5,Y_Lo(%a6) 9653 clr.b Sc_Flag(%a6) 9654 9655# 9656ModOrRem: 9657 tst.b Mod_Flag(%a6) 9658 beq.b Fix_Sign 9659 9660 mov.l L_SCR1(%a6),%d6 # new biased expo(Y) 9661 subq.l &1,%d6 # biased expo(Y/2) 9662 cmp.l %d0,%d6 9663 blt.b Fix_Sign 9664 bgt.b Last_Sub 9665 9666 cmp.l %d1,%d4 9667 bne.b Not_EQ 9668 cmp.l %d2,%d5 9669 bne.b Not_EQ 9670 bra.w Tie_Case 9671 9672Not_EQ: 9673 bcs.b Fix_Sign 9674 9675Last_Sub: 9676# 9677 fsub.x Y(%a6),%fp0 # no exceptions 9678 addq.l &1,%d3 # Q := Q + 1 9679 9680# 9681Fix_Sign: 9682#..Get sign of X 9683 mov.w SignX(%a6),%d6 9684 bge.b Get_Q 9685 fneg.x %fp0 9686 9687#..Get Q 9688# 9689Get_Q: 9690 clr.l %d6 9691 mov.w SignQ(%a6),%d6 # D6 is sign(Q) 9692 mov.l &8,%d7 9693 lsr.l %d7,%d6 9694 and.l &0x0000007F,%d3 # 7 bits of Q 9695 or.l %d6,%d3 # sign and bits of Q 9696# swap %d3 9697# fmov.l %fpsr,%d6 9698# and.l &0xFF00FFFF,%d6 9699# or.l %d3,%d6 9700# fmov.l %d6,%fpsr # put Q in fpsr 9701 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr 9702 9703# 9704Restore: 9705 movm.l (%sp)+,&0xfc # {%d2-%d7} 9706 mov.l (%sp)+,%d0 9707 fmov.l %d0,%fpcr 9708 tst.b Sc_Flag(%a6) 9709 beq.b Finish 9710 mov.b &FMUL_OP,%d1 # last inst is MUL 9711 fmul.x Scale(%pc),%fp0 # may cause underflow 9712 bra t_catch2 9713# the '040 package did this apparently to see if the dst operand for the 9714# preceding fmul was a denorm. but, it better not have been since the 9715# algorithm just got done playing with fp0 and expected no exceptions 9716# as a result. trust me... 9717# bra t_avoid_unsupp # check for denorm as a 9718# ;result of the scaling 9719 9720Finish: 9721 mov.b &FMOV_OP,%d1 # last inst is MOVE 9722 fmov.x %fp0,%fp0 # capture exceptions & round 9723 bra t_catch2 9724 9725Rem_is_0: 9726#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) 9727 addq.l &1,%d3 9728 cmp.l %d0,&8 # D0 is j 9729 bge.b Q_Big 9730 9731 lsl.l %d0,%d3 9732 bra.b Set_R_0 9733 9734Q_Big: 9735 clr.l %d3 9736 9737Set_R_0: 9738 fmov.s &0x00000000,%fp0 9739 clr.b Sc_Flag(%a6) 9740 bra.w Fix_Sign 9741 9742Tie_Case: 9743#..Check parity of Q 9744 mov.l %d3,%d6 9745 and.l &0x00000001,%d6 9746 tst.l %d6 9747 beq.w Fix_Sign # Q is even 9748 9749#..Q is odd, Q := Q + 1, signX := -signX 9750 addq.l &1,%d3 9751 mov.w SignX(%a6),%d6 9752 eor.l &0x00008000,%d6 9753 mov.w %d6,SignX(%a6) 9754 bra.w Fix_Sign 9755 9756######################################################################### 9757# XDEF **************************************************************** # 9758# tag(): return the optype of the input ext fp number # 9759# # 9760# This routine is used by the 060FPLSP. # 9761# # 9762# XREF **************************************************************** # 9763# None # 9764# # 9765# INPUT *************************************************************** # 9766# a0 = pointer to extended precision operand # 9767# # 9768# OUTPUT ************************************************************** # 9769# d0 = value of type tag # 9770# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 9771# # 9772# ALGORITHM *********************************************************** # 9773# Simply test the exponent, j-bit, and mantissa values to # 9774# determine the type of operand. # 9775# If it's an unnormalized zero, alter the operand and force it # 9776# to be a normal zero. # 9777# # 9778######################################################################### 9779 9780 global tag 9781tag: 9782 mov.w FTEMP_EX(%a0), %d0 # extract exponent 9783 andi.w &0x7fff, %d0 # strip off sign 9784 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 9785 beq.b inf_or_nan_x 9786not_inf_or_nan_x: 9787 btst &0x7,FTEMP_HI(%a0) 9788 beq.b not_norm_x 9789is_norm_x: 9790 mov.b &NORM, %d0 9791 rts 9792not_norm_x: 9793 tst.w %d0 # is exponent = 0? 9794 bne.b is_unnorm_x 9795not_unnorm_x: 9796 tst.l FTEMP_HI(%a0) 9797 bne.b is_denorm_x 9798 tst.l FTEMP_LO(%a0) 9799 bne.b is_denorm_x 9800is_zero_x: 9801 mov.b &ZERO, %d0 9802 rts 9803is_denorm_x: 9804 mov.b &DENORM, %d0 9805 rts 9806is_unnorm_x: 9807 bsr.l unnorm_fix # convert to norm,denorm,or zero 9808 rts 9809is_unnorm_reg_x: 9810 mov.b &UNNORM, %d0 9811 rts 9812inf_or_nan_x: 9813 tst.l FTEMP_LO(%a0) 9814 bne.b is_nan_x 9815 mov.l FTEMP_HI(%a0), %d0 9816 and.l &0x7fffffff, %d0 # msb is a don't care! 9817 bne.b is_nan_x 9818is_inf_x: 9819 mov.b &INF, %d0 9820 rts 9821is_nan_x: 9822 mov.b &QNAN, %d0 9823 rts 9824 9825############################################################# 9826 9827qnan: long 0x7fff0000, 0xffffffff, 0xffffffff 9828 9829######################################################################### 9830# XDEF **************************************************************** # 9831# t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. # 9832# t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. # 9833# # 9834# These rouitnes are used by the 060FPLSP package. # 9835# # 9836# XREF **************************************************************** # 9837# None # 9838# # 9839# INPUT *************************************************************** # 9840# a0 = pointer to extended precision source operand. # 9841# # 9842# OUTPUT ************************************************************** # 9843# fp0 = default DZ result. # 9844# # 9845# ALGORITHM *********************************************************** # 9846# Transcendental emulation for the 060FPLSP has detected that # 9847# a DZ exception should occur for the instruction. If DZ is disabled, # 9848# return the default result. # 9849# If DZ is enabled, the dst operand should be returned unscathed # 9850# in fp0 while fp1 is used to create a DZ exception so that the # 9851# operating system can log that such an event occurred. # 9852# # 9853######################################################################### 9854 9855 global t_dz 9856t_dz: 9857 tst.b SRC_EX(%a0) # check sign for neg or pos 9858 bpl.b dz_pinf # branch if pos sign 9859 9860 global t_dz2 9861t_dz2: 9862 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ 9863 9864 btst &dz_bit,FPCR_ENABLE(%a6) 9865 bne.b dz_minf_ena 9866 9867# dz is disabled. return a -INF. 9868 fmov.s &0xff800000,%fp0 # return -INF 9869 rts 9870 9871# dz is enabled. create a dz exception so the user can record it 9872# but use fp1 instead. return the dst operand unscathed in fp0. 9873dz_minf_ena: 9874 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9875 fmov.l USER_FPCR(%a6),%fpcr 9876 fmov.s &0xbf800000,%fp1 # load -1 9877 fdiv.s &0x00000000,%fp1 # -1 / 0 9878 rts 9879 9880dz_pinf: 9881 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ 9882 9883 btst &dz_bit,FPCR_ENABLE(%a6) 9884 bne.b dz_pinf_ena 9885 9886# dz is disabled. return a +INF. 9887 fmov.s &0x7f800000,%fp0 # return +INF 9888 rts 9889 9890# dz is enabled. create a dz exception so the user can record it 9891# but use fp1 instead. return the dst operand unscathed in fp0. 9892dz_pinf_ena: 9893 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9894 fmov.l USER_FPCR(%a6),%fpcr 9895 fmov.s &0x3f800000,%fp1 # load +1 9896 fdiv.s &0x00000000,%fp1 # +1 / 0 9897 rts 9898 9899######################################################################### 9900# XDEF **************************************************************** # 9901# t_operr(): Handle 060FPLSP OPERR exception during emulation. # 9902# # 9903# This routine is used by the 060FPLSP package. # 9904# # 9905# XREF **************************************************************** # 9906# None. # 9907# # 9908# INPUT *************************************************************** # 9909# fp1 = source operand # 9910# # 9911# OUTPUT ************************************************************** # 9912# fp0 = default result # 9913# fp1 = unchanged # 9914# # 9915# ALGORITHM *********************************************************** # 9916# An operand error should occur as the result of transcendental # 9917# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN # 9918# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 # 9919# and the source operand in fp1. Use fp2 to create an OPERR exception # 9920# so that the operating system can log the event. # 9921# # 9922######################################################################### 9923 9924 global t_operr 9925t_operr: 9926 ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP 9927 9928 btst &operr_bit,FPCR_ENABLE(%a6) 9929 bne.b operr_ena 9930 9931# operr is disabled. return a QNAN in fp0 9932 fmovm.x qnan(%pc),&0x80 # return QNAN 9933 rts 9934 9935# operr is enabled. create an operr exception so the user can record it 9936# but use fp2 instead. return the dst operand unscathed in fp0. 9937operr_ena: 9938 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9939 fmov.l USER_FPCR(%a6),%fpcr 9940 fmovm.x &0x04,-(%sp) # save fp2 9941 fmov.s &0x7f800000,%fp2 # load +INF 9942 fmul.s &0x00000000,%fp2 # +INF x 0 9943 fmovm.x (%sp)+,&0x20 # restore fp2 9944 rts 9945 9946pls_huge: 9947 long 0x7ffe0000,0xffffffff,0xffffffff 9948mns_huge: 9949 long 0xfffe0000,0xffffffff,0xffffffff 9950pls_tiny: 9951 long 0x00000000,0x80000000,0x00000000 9952mns_tiny: 9953 long 0x80000000,0x80000000,0x00000000 9954 9955######################################################################### 9956# XDEF **************************************************************** # 9957# t_unfl(): Handle 060FPLSP underflow exception during emulation. # 9958# t_unfl2(): Handle 060FPLSP underflow exception during # 9959# emulation. result always positive. # 9960# # 9961# This routine is used by the 060FPLSP package. # 9962# # 9963# XREF **************************************************************** # 9964# None. # 9965# # 9966# INPUT *************************************************************** # 9967# a0 = pointer to extended precision source operand # 9968# # 9969# OUTPUT ************************************************************** # 9970# fp0 = default underflow result # 9971# # 9972# ALGORITHM *********************************************************** # 9973# An underflow should occur as the result of transcendental # 9974# emulation in the 060FPLSP. Create an underflow by using "fmul" # 9975# and two very small numbers of appropriate sign so that the operating # 9976# system can log the event. # 9977# # 9978######################################################################### 9979 9980 global t_unfl 9981t_unfl: 9982 tst.b SRC_EX(%a0) 9983 bpl.b unf_pos 9984 9985 global t_unfl2 9986t_unfl2: 9987 ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX 9988 9989 fmov.l USER_FPCR(%a6),%fpcr 9990 fmovm.x mns_tiny(%pc),&0x80 9991 fmul.x pls_tiny(%pc),%fp0 9992 9993 fmov.l %fpsr,%d0 9994 rol.l &0x8,%d0 9995 mov.b %d0,FPSR_CC(%a6) 9996 rts 9997unf_pos: 9998 ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX 9999 10000 fmov.l USER_FPCR(%a6),%fpcr 10001 fmovm.x pls_tiny(%pc),&0x80 10002 fmul.x %fp0,%fp0 10003 10004 fmov.l %fpsr,%d0 10005 rol.l &0x8,%d0 10006 mov.b %d0,FPSR_CC(%a6) 10007 rts 10008 10009######################################################################### 10010# XDEF **************************************************************** # 10011# t_ovfl(): Handle 060FPLSP overflow exception during emulation. # 10012# (monadic) # 10013# t_ovfl2(): Handle 060FPLSP overflow exception during # 10014# emulation. result always positive. (dyadic) # 10015# t_ovfl_sc(): Handle 060FPLSP overflow exception during # 10016# emulation for "fscale". # 10017# # 10018# This routine is used by the 060FPLSP package. # 10019# # 10020# XREF **************************************************************** # 10021# None. # 10022# # 10023# INPUT *************************************************************** # 10024# a0 = pointer to extended precision source operand # 10025# # 10026# OUTPUT ************************************************************** # 10027# fp0 = default underflow result # 10028# # 10029# ALGORITHM *********************************************************** # 10030# An overflow should occur as the result of transcendental # 10031# emulation in the 060FPLSP. Create an overflow by using "fmul" # 10032# and two very lareg numbers of appropriate sign so that the operating # 10033# system can log the event. # 10034# For t_ovfl_sc() we take special care not to lose the INEX2 bit. # 10035# # 10036######################################################################### 10037 10038 global t_ovfl_sc 10039t_ovfl_sc: 10040 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX 10041 10042 mov.b %d0,%d1 # fetch rnd prec,mode 10043 andi.b &0xc0,%d1 # extract prec 10044 beq.w ovfl_work 10045 10046# dst op is a DENORM. we have to normalize the mantissa to see if the 10047# result would be inexact for the given precision. make a copy of the 10048# dst so we don't screw up the version passed to us. 10049 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) 10050 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) 10051 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) 10052 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 10053 movm.l &0xc080,-(%sp) # save d0-d1/a0 10054 bsr.l norm # normalize mantissa 10055 movm.l (%sp)+,&0x0103 # restore d0-d1/a0 10056 10057 cmpi.b %d1,&0x40 # is precision sgl? 10058 bne.b ovfl_sc_dbl # no; dbl 10059ovfl_sc_sgl: 10060 tst.l LOCAL_LO(%a0) # is lo lw of sgl set? 10061 bne.b ovfl_sc_inx # yes 10062 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? 10063 bne.b ovfl_sc_inx # yes 10064 bra.w ovfl_work # don't set INEX2 10065ovfl_sc_dbl: 10066 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of 10067 andi.l &0x7ff,%d1 # dbl mantissa set? 10068 beq.w ovfl_work # no; don't set INEX2 10069ovfl_sc_inx: 10070 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 10071 bra.b ovfl_work # continue 10072 10073 global t_ovfl 10074t_ovfl: 10075 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX 10076ovfl_work: 10077 tst.b SRC_EX(%a0) 10078 bpl.b ovfl_p 10079ovfl_m: 10080 fmov.l USER_FPCR(%a6),%fpcr 10081 fmovm.x mns_huge(%pc),&0x80 10082 fmul.x pls_huge(%pc),%fp0 10083 10084 fmov.l %fpsr,%d0 10085 rol.l &0x8,%d0 10086 ori.b &neg_mask,%d0 10087 mov.b %d0,FPSR_CC(%a6) 10088 rts 10089ovfl_p: 10090 fmov.l USER_FPCR(%a6),%fpcr 10091 fmovm.x pls_huge(%pc),&0x80 10092 fmul.x pls_huge(%pc),%fp0 10093 10094 fmov.l %fpsr,%d0 10095 rol.l &0x8,%d0 10096 mov.b %d0,FPSR_CC(%a6) 10097 rts 10098 10099 global t_ovfl2 10100t_ovfl2: 10101 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX 10102 fmov.l USER_FPCR(%a6),%fpcr 10103 fmovm.x pls_huge(%pc),&0x80 10104 fmul.x pls_huge(%pc),%fp0 10105 10106 fmov.l %fpsr,%d0 10107 rol.l &0x8,%d0 10108 mov.b %d0,FPSR_CC(%a6) 10109 rts 10110 10111######################################################################### 10112# XDEF **************************************************************** # 10113# t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # 10114# emulation. # 10115# t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # 10116# emulation. # 10117# # 10118# These routines are used by the 060FPLSP package. # 10119# # 10120# XREF **************************************************************** # 10121# None. # 10122# # 10123# INPUT *************************************************************** # 10124# fp0 = default underflow or overflow result # 10125# # 10126# OUTPUT ************************************************************** # 10127# fp0 = default result # 10128# # 10129# ALGORITHM *********************************************************** # 10130# If an overflow or underflow occurred during the last # 10131# instruction of transcendental 060FPLSP emulation, then it has already # 10132# occurred and has been logged. Now we need to see if an inexact # 10133# exception should occur. # 10134# # 10135######################################################################### 10136 10137 global t_catch2 10138t_catch2: 10139 fmov.l %fpsr,%d0 10140 or.l %d0,USER_FPSR(%a6) 10141 bra.b inx2_work 10142 10143 global t_catch 10144t_catch: 10145 fmov.l %fpsr,%d0 10146 or.l %d0,USER_FPSR(%a6) 10147 10148######################################################################### 10149# XDEF **************************************************************** # 10150# t_inx2(): Handle inexact 060FPLSP exception during emulation. # 10151# t_pinx2(): Handle inexact 060FPLSP exception for "+" results. # 10152# t_minx2(): Handle inexact 060FPLSP exception for "-" results. # 10153# # 10154# XREF **************************************************************** # 10155# None. # 10156# # 10157# INPUT *************************************************************** # 10158# fp0 = default result # 10159# # 10160# OUTPUT ************************************************************** # 10161# fp0 = default result # 10162# # 10163# ALGORITHM *********************************************************** # 10164# The last instruction of transcendental emulation for the # 10165# 060FPLSP should be inexact. So, if inexact is enabled, then we create # 10166# the event here by adding a large and very small number together # 10167# so that the operating system can log the event. # 10168# Must check, too, if the result was zero, in which case we just # 10169# set the FPSR bits and return. # 10170# # 10171######################################################################### 10172 10173 global t_inx2 10174t_inx2: 10175 fblt.w t_minx2 10176 fbeq.w inx2_zero 10177 10178 global t_pinx2 10179t_pinx2: 10180 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX 10181 bra.b inx2_work 10182 10183 global t_minx2 10184t_minx2: 10185 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) 10186 10187inx2_work: 10188 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 10189 bne.b inx2_work_ena # yes 10190 rts 10191inx2_work_ena: 10192 fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions 10193 fmov.s &0x3f800000,%fp1 # load +1 10194 fadd.x pls_tiny(%pc),%fp1 # cause exception 10195 rts 10196 10197inx2_zero: 10198 mov.b &z_bmask,FPSR_CC(%a6) 10199 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX 10200 rts 10201 10202######################################################################### 10203# XDEF **************************************************************** # 10204# t_extdnrm(): Handle DENORM inputs in 060FPLSP. # 10205# t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". # 10206# # 10207# This routine is used by the 060FPLSP package. # 10208# # 10209# XREF **************************************************************** # 10210# None. # 10211# # 10212# INPUT *************************************************************** # 10213# a0 = pointer to extended precision input operand # 10214# # 10215# OUTPUT ************************************************************** # 10216# fp0 = default result # 10217# # 10218# ALGORITHM *********************************************************** # 10219# For all functions that have a denormalized input and that # 10220# f(x)=x, this is the entry point. # 10221# DENORM value is moved using "fmove" which triggers an exception # 10222# if enabled so the operating system can log the event. # 10223# # 10224######################################################################### 10225 10226 global t_extdnrm 10227t_extdnrm: 10228 fmov.l USER_FPCR(%a6),%fpcr 10229 fmov.x SRC_EX(%a0),%fp0 10230 fmov.l %fpsr,%d0 10231 ori.l &unfinx_mask,%d0 10232 or.l %d0,USER_FPSR(%a6) 10233 rts 10234 10235 global t_resdnrm 10236t_resdnrm: 10237 fmov.l USER_FPCR(%a6),%fpcr 10238 fmov.x SRC_EX(%a0),%fp0 10239 fmov.l %fpsr,%d0 10240 or.l %d0,USER_FPSR(%a6) 10241 rts 10242 10243########################################## 10244 10245# 10246# sto_cos: 10247# This is used by fsincos library emulation. The correct 10248# values are already in fp0 and fp1 so we do nothing here. 10249# 10250 global sto_cos 10251sto_cos: 10252 rts 10253 10254########################################## 10255 10256# 10257# dst_qnan --- force result when destination is a NaN 10258# 10259 global dst_qnan 10260dst_qnan: 10261 fmov.x DST(%a1),%fp0 10262 tst.b DST_EX(%a1) 10263 bmi.b dst_qnan_m 10264dst_qnan_p: 10265 mov.b &nan_bmask,FPSR_CC(%a6) 10266 rts 10267dst_qnan_m: 10268 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) 10269 rts 10270 10271# 10272# src_qnan --- force result when source is a NaN 10273# 10274 global src_qnan 10275src_qnan: 10276 fmov.x SRC(%a0),%fp0 10277 tst.b SRC_EX(%a0) 10278 bmi.b src_qnan_m 10279src_qnan_p: 10280 mov.b &nan_bmask,FPSR_CC(%a6) 10281 rts 10282src_qnan_m: 10283 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) 10284 rts 10285 10286########################################## 10287 10288# 10289# Native instruction support 10290# 10291# Some systems may need entry points even for 68060 native 10292# instructions. These routines are provided for 10293# convenience. 10294# 10295 global _fadds_ 10296_fadds_: 10297 fmov.l %fpcr,-(%sp) # save fpcr 10298 fmov.l &0x00000000,%fpcr # clear fpcr for load 10299 fmov.s 0x8(%sp),%fp0 # load sgl dst 10300 fmov.l (%sp)+,%fpcr # restore fpcr 10301 fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src 10302 rts 10303 10304 global _faddd_ 10305_faddd_: 10306 fmov.l %fpcr,-(%sp) # save fpcr 10307 fmov.l &0x00000000,%fpcr # clear fpcr for load 10308 fmov.d 0x8(%sp),%fp0 # load dbl dst 10309 fmov.l (%sp)+,%fpcr # restore fpcr 10310 fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src 10311 rts 10312 10313 global _faddx_ 10314_faddx_: 10315 fmovm.x 0x4(%sp),&0x80 # load ext dst 10316 fadd.x 0x10(%sp),%fp0 # fadd w/ ext src 10317 rts 10318 10319 global _fsubs_ 10320_fsubs_: 10321 fmov.l %fpcr,-(%sp) # save fpcr 10322 fmov.l &0x00000000,%fpcr # clear fpcr for load 10323 fmov.s 0x8(%sp),%fp0 # load sgl dst 10324 fmov.l (%sp)+,%fpcr # restore fpcr 10325 fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src 10326 rts 10327 10328 global _fsubd_ 10329_fsubd_: 10330 fmov.l %fpcr,-(%sp) # save fpcr 10331 fmov.l &0x00000000,%fpcr # clear fpcr for load 10332 fmov.d 0x8(%sp),%fp0 # load dbl dst 10333 fmov.l (%sp)+,%fpcr # restore fpcr 10334 fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src 10335 rts 10336 10337 global _fsubx_ 10338_fsubx_: 10339 fmovm.x 0x4(%sp),&0x80 # load ext dst 10340 fsub.x 0x10(%sp),%fp0 # fsub w/ ext src 10341 rts 10342 10343 global _fmuls_ 10344_fmuls_: 10345 fmov.l %fpcr,-(%sp) # save fpcr 10346 fmov.l &0x00000000,%fpcr # clear fpcr for load 10347 fmov.s 0x8(%sp),%fp0 # load sgl dst 10348 fmov.l (%sp)+,%fpcr # restore fpcr 10349 fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src 10350 rts 10351 10352 global _fmuld_ 10353_fmuld_: 10354 fmov.l %fpcr,-(%sp) # save fpcr 10355 fmov.l &0x00000000,%fpcr # clear fpcr for load 10356 fmov.d 0x8(%sp),%fp0 # load dbl dst 10357 fmov.l (%sp)+,%fpcr # restore fpcr 10358 fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src 10359 rts 10360 10361 global _fmulx_ 10362_fmulx_: 10363 fmovm.x 0x4(%sp),&0x80 # load ext dst 10364 fmul.x 0x10(%sp),%fp0 # fmul w/ ext src 10365 rts 10366 10367 global _fdivs_ 10368_fdivs_: 10369 fmov.l %fpcr,-(%sp) # save fpcr 10370 fmov.l &0x00000000,%fpcr # clear fpcr for load 10371 fmov.s 0x8(%sp),%fp0 # load sgl dst 10372 fmov.l (%sp)+,%fpcr # restore fpcr 10373 fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src 10374 rts 10375 10376 global _fdivd_ 10377_fdivd_: 10378 fmov.l %fpcr,-(%sp) # save fpcr 10379 fmov.l &0x00000000,%fpcr # clear fpcr for load 10380 fmov.d 0x8(%sp),%fp0 # load dbl dst 10381 fmov.l (%sp)+,%fpcr # restore fpcr 10382 fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src 10383 rts 10384 10385 global _fdivx_ 10386_fdivx_: 10387 fmovm.x 0x4(%sp),&0x80 # load ext dst 10388 fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src 10389 rts 10390 10391 global _fabss_ 10392_fabss_: 10393 fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src 10394 rts 10395 10396 global _fabsd_ 10397_fabsd_: 10398 fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src 10399 rts 10400 10401 global _fabsx_ 10402_fabsx_: 10403 fabs.x 0x4(%sp),%fp0 # fabs w/ ext src 10404 rts 10405 10406 global _fnegs_ 10407_fnegs_: 10408 fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src 10409 rts 10410 10411 global _fnegd_ 10412_fnegd_: 10413 fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src 10414 rts 10415 10416 global _fnegx_ 10417_fnegx_: 10418 fneg.x 0x4(%sp),%fp0 # fneg w/ ext src 10419 rts 10420 10421 global _fsqrts_ 10422_fsqrts_: 10423 fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src 10424 rts 10425 10426 global _fsqrtd_ 10427_fsqrtd_: 10428 fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src 10429 rts 10430 10431 global _fsqrtx_ 10432_fsqrtx_: 10433 fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src 10434 rts 10435 10436 global _fints_ 10437_fints_: 10438 fint.s 0x4(%sp),%fp0 # fint w/ sgl src 10439 rts 10440 10441 global _fintd_ 10442_fintd_: 10443 fint.d 0x4(%sp),%fp0 # fint w/ dbl src 10444 rts 10445 10446 global _fintx_ 10447_fintx_: 10448 fint.x 0x4(%sp),%fp0 # fint w/ ext src 10449 rts 10450 10451 global _fintrzs_ 10452_fintrzs_: 10453 fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src 10454 rts 10455 10456 global _fintrzd_ 10457_fintrzd_: 10458 fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src 10459 rts 10460 10461 global _fintrzx_ 10462_fintrzx_: 10463 fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src 10464 rts 10465 10466######################################################################## 10467 10468######################################################################### 10469# src_zero(): Return signed zero according to sign of src operand. # 10470######################################################################### 10471 global src_zero 10472src_zero: 10473 tst.b SRC_EX(%a0) # get sign of src operand 10474 bmi.b ld_mzero # if neg, load neg zero 10475 10476# 10477# ld_pzero(): return a positive zero. 10478# 10479 global ld_pzero 10480ld_pzero: 10481 fmov.s &0x00000000,%fp0 # load +0 10482 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10483 rts 10484 10485# ld_mzero(): return a negative zero. 10486 global ld_mzero 10487ld_mzero: 10488 fmov.s &0x80000000,%fp0 # load -0 10489 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits 10490 rts 10491 10492######################################################################### 10493# dst_zero(): Return signed zero according to sign of dst operand. # 10494######################################################################### 10495 global dst_zero 10496dst_zero: 10497 tst.b DST_EX(%a1) # get sign of dst operand 10498 bmi.b ld_mzero # if neg, load neg zero 10499 bra.b ld_pzero # load positive zero 10500 10501######################################################################### 10502# src_inf(): Return signed inf according to sign of src operand. # 10503######################################################################### 10504 global src_inf 10505src_inf: 10506 tst.b SRC_EX(%a0) # get sign of src operand 10507 bmi.b ld_minf # if negative branch 10508 10509# 10510# ld_pinf(): return a positive infinity. 10511# 10512 global ld_pinf 10513ld_pinf: 10514 fmov.s &0x7f800000,%fp0 # load +INF 10515 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit 10516 rts 10517 10518# 10519# ld_minf():return a negative infinity. 10520# 10521 global ld_minf 10522ld_minf: 10523 fmov.s &0xff800000,%fp0 # load -INF 10524 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 10525 rts 10526 10527######################################################################### 10528# dst_inf(): Return signed inf according to sign of dst operand. # 10529######################################################################### 10530 global dst_inf 10531dst_inf: 10532 tst.b DST_EX(%a1) # get sign of dst operand 10533 bmi.b ld_minf # if negative branch 10534 bra.b ld_pinf 10535 10536 global szr_inf 10537################################################################# 10538# szr_inf(): Return +ZERO for a negative src operand or # 10539# +INF for a positive src operand. # 10540# Routine used for fetox, ftwotox, and ftentox. # 10541################################################################# 10542szr_inf: 10543 tst.b SRC_EX(%a0) # check sign of source 10544 bmi.b ld_pzero 10545 bra.b ld_pinf 10546 10547######################################################################### 10548# sopr_inf(): Return +INF for a positive src operand or # 10549# jump to operand error routine for a negative src operand. # 10550# Routine used for flogn, flognp1, flog10, and flog2. # 10551######################################################################### 10552 global sopr_inf 10553sopr_inf: 10554 tst.b SRC_EX(%a0) # check sign of source 10555 bmi.w t_operr 10556 bra.b ld_pinf 10557 10558################################################################# 10559# setoxm1i(): Return minus one for a negative src operand or # 10560# positive infinity for a positive src operand. # 10561# Routine used for fetoxm1. # 10562################################################################# 10563 global setoxm1i 10564setoxm1i: 10565 tst.b SRC_EX(%a0) # check sign of source 10566 bmi.b ld_mone 10567 bra.b ld_pinf 10568 10569######################################################################### 10570# src_one(): Return signed one according to sign of src operand. # 10571######################################################################### 10572 global src_one 10573src_one: 10574 tst.b SRC_EX(%a0) # check sign of source 10575 bmi.b ld_mone 10576 10577# 10578# ld_pone(): return positive one. 10579# 10580 global ld_pone 10581ld_pone: 10582 fmov.s &0x3f800000,%fp0 # load +1 10583 clr.b FPSR_CC(%a6) 10584 rts 10585 10586# 10587# ld_mone(): return negative one. 10588# 10589 global ld_mone 10590ld_mone: 10591 fmov.s &0xbf800000,%fp0 # load -1 10592 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 10593 rts 10594 10595ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 10596mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 10597 10598################################################################# 10599# spi_2(): Return signed PI/2 according to sign of src operand. # 10600################################################################# 10601 global spi_2 10602spi_2: 10603 tst.b SRC_EX(%a0) # check sign of source 10604 bmi.b ld_mpi2 10605 10606# 10607# ld_ppi2(): return positive PI/2. 10608# 10609 global ld_ppi2 10610ld_ppi2: 10611 fmov.l %d0,%fpcr 10612 fmov.x ppiby2(%pc),%fp0 # load +pi/2 10613 bra.w t_pinx2 # set INEX2 10614 10615# 10616# ld_mpi2(): return negative PI/2. 10617# 10618 global ld_mpi2 10619ld_mpi2: 10620 fmov.l %d0,%fpcr 10621 fmov.x mpiby2(%pc),%fp0 # load -pi/2 10622 bra.w t_minx2 # set INEX2 10623 10624#################################################### 10625# The following routines give support for fsincos. # 10626#################################################### 10627 10628# 10629# ssincosz(): When the src operand is ZERO, store a one in the 10630# cosine register and return a ZERO in fp0 w/ the same sign 10631# as the src operand. 10632# 10633 global ssincosz 10634ssincosz: 10635 fmov.s &0x3f800000,%fp1 10636 tst.b SRC_EX(%a0) # test sign 10637 bpl.b sincoszp 10638 fmov.s &0x80000000,%fp0 # return sin result in fp0 10639 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) 10640 rts 10641sincoszp: 10642 fmov.s &0x00000000,%fp0 # return sin result in fp0 10643 mov.b &z_bmask,FPSR_CC(%a6) 10644 rts 10645 10646# 10647# ssincosi(): When the src operand is INF, store a QNAN in the cosine 10648# register and jump to the operand error routine for negative 10649# src operands. 10650# 10651 global ssincosi 10652ssincosi: 10653 fmov.x qnan(%pc),%fp1 # load NAN 10654 bra.w t_operr 10655 10656# 10657# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine 10658# register and branch to the src QNAN routine. 10659# 10660 global ssincosqnan 10661ssincosqnan: 10662 fmov.x LOCAL_EX(%a0),%fp1 10663 bra.w src_qnan 10664 10665######################################################################## 10666 10667 global smod_sdnrm 10668 global smod_snorm 10669smod_sdnrm: 10670smod_snorm: 10671 mov.b DTAG(%a6),%d1 10672 beq.l smod 10673 cmpi.b %d1,&ZERO 10674 beq.w smod_zro 10675 cmpi.b %d1,&INF 10676 beq.l t_operr 10677 cmpi.b %d1,&DENORM 10678 beq.l smod 10679 bra.l dst_qnan 10680 10681 global smod_szero 10682smod_szero: 10683 mov.b DTAG(%a6),%d1 10684 beq.l t_operr 10685 cmpi.b %d1,&ZERO 10686 beq.l t_operr 10687 cmpi.b %d1,&INF 10688 beq.l t_operr 10689 cmpi.b %d1,&DENORM 10690 beq.l t_operr 10691 bra.l dst_qnan 10692 10693 global smod_sinf 10694smod_sinf: 10695 mov.b DTAG(%a6),%d1 10696 beq.l smod_fpn 10697 cmpi.b %d1,&ZERO 10698 beq.l smod_zro 10699 cmpi.b %d1,&INF 10700 beq.l t_operr 10701 cmpi.b %d1,&DENORM 10702 beq.l smod_fpn 10703 bra.l dst_qnan 10704 10705smod_zro: 10706srem_zro: 10707 mov.b SRC_EX(%a0),%d1 # get src sign 10708 mov.b DST_EX(%a1),%d0 # get dst sign 10709 eor.b %d0,%d1 # get qbyte sign 10710 andi.b &0x80,%d1 10711 mov.b %d1,FPSR_QBYTE(%a6) 10712 tst.b %d0 10713 bpl.w ld_pzero 10714 bra.w ld_mzero 10715 10716smod_fpn: 10717srem_fpn: 10718 clr.b FPSR_QBYTE(%a6) 10719 mov.l %d0,-(%sp) 10720 mov.b SRC_EX(%a0),%d1 # get src sign 10721 mov.b DST_EX(%a1),%d0 # get dst sign 10722 eor.b %d0,%d1 # get qbyte sign 10723 andi.b &0x80,%d1 10724 mov.b %d1,FPSR_QBYTE(%a6) 10725 cmpi.b DTAG(%a6),&DENORM 10726 bne.b smod_nrm 10727 lea DST(%a1),%a0 10728 mov.l (%sp)+,%d0 10729 bra t_resdnrm 10730smod_nrm: 10731 fmov.l (%sp)+,%fpcr 10732 fmov.x DST(%a1),%fp0 10733 tst.b DST_EX(%a1) 10734 bmi.b smod_nrm_neg 10735 rts 10736 10737smod_nrm_neg: 10738 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code 10739 rts 10740 10741######################################################################### 10742 global srem_snorm 10743 global srem_sdnrm 10744srem_sdnrm: 10745srem_snorm: 10746 mov.b DTAG(%a6),%d1 10747 beq.l srem 10748 cmpi.b %d1,&ZERO 10749 beq.w srem_zro 10750 cmpi.b %d1,&INF 10751 beq.l t_operr 10752 cmpi.b %d1,&DENORM 10753 beq.l srem 10754 bra.l dst_qnan 10755 10756 global srem_szero 10757srem_szero: 10758 mov.b DTAG(%a6),%d1 10759 beq.l t_operr 10760 cmpi.b %d1,&ZERO 10761 beq.l t_operr 10762 cmpi.b %d1,&INF 10763 beq.l t_operr 10764 cmpi.b %d1,&DENORM 10765 beq.l t_operr 10766 bra.l dst_qnan 10767 10768 global srem_sinf 10769srem_sinf: 10770 mov.b DTAG(%a6),%d1 10771 beq.w srem_fpn 10772 cmpi.b %d1,&ZERO 10773 beq.w srem_zro 10774 cmpi.b %d1,&INF 10775 beq.l t_operr 10776 cmpi.b %d1,&DENORM 10777 beq.l srem_fpn 10778 bra.l dst_qnan 10779 10780######################################################################### 10781 10782 global sscale_snorm 10783 global sscale_sdnrm 10784sscale_snorm: 10785sscale_sdnrm: 10786 mov.b DTAG(%a6),%d1 10787 beq.l sscale 10788 cmpi.b %d1,&ZERO 10789 beq.l dst_zero 10790 cmpi.b %d1,&INF 10791 beq.l dst_inf 10792 cmpi.b %d1,&DENORM 10793 beq.l sscale 10794 bra.l dst_qnan 10795 10796 global sscale_szero 10797sscale_szero: 10798 mov.b DTAG(%a6),%d1 10799 beq.l sscale 10800 cmpi.b %d1,&ZERO 10801 beq.l dst_zero 10802 cmpi.b %d1,&INF 10803 beq.l dst_inf 10804 cmpi.b %d1,&DENORM 10805 beq.l sscale 10806 bra.l dst_qnan 10807 10808 global sscale_sinf 10809sscale_sinf: 10810 mov.b DTAG(%a6),%d1 10811 beq.l t_operr 10812 cmpi.b %d1,&QNAN 10813 beq.l dst_qnan 10814 bra.l t_operr 10815 10816######################################################################## 10817 10818 global sop_sqnan 10819sop_sqnan: 10820 mov.b DTAG(%a6),%d1 10821 cmpi.b %d1,&QNAN 10822 beq.l dst_qnan 10823 bra.l src_qnan 10824 10825######################################################################### 10826# norm(): normalize the mantissa of an extended precision input. the # 10827# input operand should not be normalized already. # 10828# # 10829# XDEF **************************************************************** # 10830# norm() # 10831# # 10832# XREF **************************************************************** # 10833# none # 10834# # 10835# INPUT *************************************************************** # 10836# a0 = pointer fp extended precision operand to normalize # 10837# # 10838# OUTPUT ************************************************************** # 10839# d0 = number of bit positions the mantissa was shifted # 10840# a0 = the input operand's mantissa is normalized; the exponent # 10841# is unchanged. # 10842# # 10843######################################################################### 10844 global norm 10845norm: 10846 mov.l %d2, -(%sp) # create some temp regs 10847 mov.l %d3, -(%sp) 10848 10849 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 10850 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 10851 10852 bfffo %d0{&0:&32}, %d2 # how many places to shift? 10853 beq.b norm_lo # hi(man) is all zeroes! 10854 10855norm_hi: 10856 lsl.l %d2, %d0 # left shift hi(man) 10857 bfextu %d1{&0:%d2}, %d3 # extract lo bits 10858 10859 or.l %d3, %d0 # create hi(man) 10860 lsl.l %d2, %d1 # create lo(man) 10861 10862 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 10863 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 10864 10865 mov.l %d2, %d0 # return shift amount 10866 10867 mov.l (%sp)+, %d3 # restore temp regs 10868 mov.l (%sp)+, %d2 10869 10870 rts 10871 10872norm_lo: 10873 bfffo %d1{&0:&32}, %d2 # how many places to shift? 10874 lsl.l %d2, %d1 # shift lo(man) 10875 add.l &32, %d2 # add 32 to shft amount 10876 10877 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 10878 clr.l FTEMP_LO(%a0) # lo(man) is now zero 10879 10880 mov.l %d2, %d0 # return shift amount 10881 10882 mov.l (%sp)+, %d3 # restore temp regs 10883 mov.l (%sp)+, %d2 10884 10885 rts 10886 10887######################################################################### 10888# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 10889# - returns corresponding optype tag # 10890# # 10891# XDEF **************************************************************** # 10892# unnorm_fix() # 10893# # 10894# XREF **************************************************************** # 10895# norm() - normalize the mantissa # 10896# # 10897# INPUT *************************************************************** # 10898# a0 = pointer to unnormalized extended precision number # 10899# # 10900# OUTPUT ************************************************************** # 10901# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 10902# a0 = input operand has been converted to a norm, denorm, or # 10903# zero; both the exponent and mantissa are changed. # 10904# # 10905######################################################################### 10906 10907 global unnorm_fix 10908unnorm_fix: 10909 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 10910 bne.b unnorm_shift # hi(man) is not all zeroes 10911 10912# 10913# hi(man) is all zeroes so see if any bits in lo(man) are set 10914# 10915unnorm_chk_lo: 10916 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 10917 beq.w unnorm_zero # yes 10918 10919 add.w &32, %d0 # no; fix shift distance 10920 10921# 10922# d0 = # shifts needed for complete normalization 10923# 10924unnorm_shift: 10925 clr.l %d1 # clear top word 10926 mov.w FTEMP_EX(%a0), %d1 # extract exponent 10927 and.w &0x7fff, %d1 # strip off sgn 10928 10929 cmp.w %d0, %d1 # will denorm push exp < 0? 10930 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 10931 10932# 10933# exponent would not go < 0. therefore, number stays normalized 10934# 10935 sub.w %d0, %d1 # shift exponent value 10936 mov.w FTEMP_EX(%a0), %d0 # load old exponent 10937 and.w &0x8000, %d0 # save old sign 10938 or.w %d0, %d1 # {sgn,new exp} 10939 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 10940 10941 bsr.l norm # normalize UNNORM 10942 10943 mov.b &NORM, %d0 # return new optype tag 10944 rts 10945 10946# 10947# exponent would go < 0, so only denormalize until exp = 0 10948# 10949unnorm_nrm_zero: 10950 cmp.b %d1, &32 # is exp <= 32? 10951 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 10952 10953 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 10954 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 10955 10956 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 10957 lsl.l %d1, %d0 # extract new lo(man) 10958 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 10959 10960 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 10961 10962 mov.b &DENORM, %d0 # return new optype tag 10963 rts 10964 10965# 10966# only mantissa bits set are in lo(man) 10967# 10968unnorm_nrm_zero_lrg: 10969 sub.w &32, %d1 # adjust shft amt by 32 10970 10971 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 10972 lsl.l %d1, %d0 # left shift lo(man) 10973 10974 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 10975 clr.l FTEMP_LO(%a0) # lo(man) = 0 10976 10977 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 10978 10979 mov.b &DENORM, %d0 # return new optype tag 10980 rts 10981 10982# 10983# whole mantissa is zero so this UNNORM is actually a zero 10984# 10985unnorm_zero: 10986 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 10987 10988 mov.b &ZERO, %d0 # fix optype tag 10989 rts 10990