1# 2# $NetBSD: fpsp.s,v 1.7 2023/06/24 05:31:04 msaitoh Exp $ 3# 4 5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 7# M68000 Hi-Performance Microprocessor Division 8# M68060 Software Package Production Release 9# 10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc. 11# All rights reserved. 12# 13# THE SOFTWARE is provided on an "AS IS" basis and without warranty. 14# To the maximum extent permitted by applicable law, 15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS 17# FOR A PARTICULAR PURPOSE and any warranty against infringement with 18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) 19# and any accompanying written materials. 20# 21# To the maximum extent permitted by applicable law, 22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 26# 27# Motorola assumes no responsibility for the maintenance and support 28# of the SOFTWARE. 29# 30# You are hereby granted a copyright license to use, modify, and distribute the 31# SOFTWARE so long as this entire notice is retained without alteration 32# in any modified and/or redistributed versions, and that such modified 33# versions are clearly identified as such. 34# No licenses are granted by implication, estoppel or otherwise under any 35# patents or trademarks of Motorola, Inc. 36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 38# 39# freal.s: 40# This file is appended to the top of the 060FPSP package 41# and contains the entry points into the package. The user, in 42# effect, branches to one of the branch table entries located 43# after _060FPSP_TABLE. 44# Also, subroutine stubs exist in this file (_fpsp_done for 45# example) that are referenced by the FPSP package itself in order 46# to call a given routine. The stub routine actually performs the 47# callout. The FPSP code does a "bsr" to the stub routine. This 48# extra layer of hierarchy adds a slight performance penalty but 49# it makes the FPSP code easier to read and more mainatinable. 50# 51 52set _off_bsun, 0x00 53set _off_snan, 0x04 54set _off_operr, 0x08 55set _off_ovfl, 0x0c 56set _off_unfl, 0x10 57set _off_dz, 0x14 58set _off_inex, 0x18 59set _off_fline, 0x1c 60set _off_fpu_dis, 0x20 61set _off_trap, 0x24 62set _off_trace, 0x28 63set _off_access, 0x2c 64set _off_done, 0x30 65 66set _off_imr, 0x40 67set _off_dmr, 0x44 68set _off_dmw, 0x48 69set _off_irw, 0x4c 70set _off_irl, 0x50 71set _off_drb, 0x54 72set _off_drw, 0x58 73set _off_drl, 0x5c 74set _off_dwb, 0x60 75set _off_dww, 0x64 76set _off_dwl, 0x68 77 78_060FPSP_TABLE: 79 80############################################################### 81 82# Here's the table of ENTRY POINTS for those linking the package. 83 bra.l _fpsp_snan 84 short 0x0000 85 bra.l _fpsp_operr 86 short 0x0000 87 bra.l _fpsp_ovfl 88 short 0x0000 89 bra.l _fpsp_unfl 90 short 0x0000 91 bra.l _fpsp_dz 92 short 0x0000 93 bra.l _fpsp_inex 94 short 0x0000 95 bra.l _fpsp_fline 96 short 0x0000 97 bra.l _fpsp_unsupp 98 short 0x0000 99 bra.l _fpsp_effadd 100 short 0x0000 101 102 space 56 103 104############################################################### 105 global _fpsp_done 106_fpsp_done: 107 mov.l %d0,-(%sp) 108 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 109 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 110 mov.l 0x4(%sp),%d0 111 rtd &0x4 112 113 global _real_ovfl 114_real_ovfl: 115 mov.l %d0,-(%sp) 116 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 117 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 118 mov.l 0x4(%sp),%d0 119 rtd &0x4 120 121 global _real_unfl 122_real_unfl: 123 mov.l %d0,-(%sp) 124 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 125 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 126 mov.l 0x4(%sp),%d0 127 rtd &0x4 128 129 global _real_inex 130_real_inex: 131 mov.l %d0,-(%sp) 132 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 133 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 134 mov.l 0x4(%sp),%d0 135 rtd &0x4 136 137 global _real_bsun 138_real_bsun: 139 mov.l %d0,-(%sp) 140 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 141 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 142 mov.l 0x4(%sp),%d0 143 rtd &0x4 144 145 global _real_operr 146_real_operr: 147 mov.l %d0,-(%sp) 148 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 149 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 150 mov.l 0x4(%sp),%d0 151 rtd &0x4 152 153 global _real_snan 154_real_snan: 155 mov.l %d0,-(%sp) 156 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 157 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 158 mov.l 0x4(%sp),%d0 159 rtd &0x4 160 161 global _real_dz 162_real_dz: 163 mov.l %d0,-(%sp) 164 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 165 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 166 mov.l 0x4(%sp),%d0 167 rtd &0x4 168 169 global _real_fline 170_real_fline: 171 mov.l %d0,-(%sp) 172 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 173 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 174 mov.l 0x4(%sp),%d0 175 rtd &0x4 176 177 global _real_fpu_disabled 178_real_fpu_disabled: 179 mov.l %d0,-(%sp) 180 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 181 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 182 mov.l 0x4(%sp),%d0 183 rtd &0x4 184 185 global _real_trap 186_real_trap: 187 mov.l %d0,-(%sp) 188 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 189 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 190 mov.l 0x4(%sp),%d0 191 rtd &0x4 192 193 global _real_trace 194_real_trace: 195 mov.l %d0,-(%sp) 196 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 197 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 198 mov.l 0x4(%sp),%d0 199 rtd &0x4 200 201 global _real_access 202_real_access: 203 mov.l %d0,-(%sp) 204 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 206 mov.l 0x4(%sp),%d0 207 rtd &0x4 208 209####################################### 210 211 global _imem_read 212_imem_read: 213 mov.l %d0,-(%sp) 214 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 215 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 216 mov.l 0x4(%sp),%d0 217 rtd &0x4 218 219 global _dmem_read 220_dmem_read: 221 mov.l %d0,-(%sp) 222 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 223 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 224 mov.l 0x4(%sp),%d0 225 rtd &0x4 226 227 global _dmem_write 228_dmem_write: 229 mov.l %d0,-(%sp) 230 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 231 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 232 mov.l 0x4(%sp),%d0 233 rtd &0x4 234 235 global _imem_read_word 236_imem_read_word: 237 mov.l %d0,-(%sp) 238 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 239 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 240 mov.l 0x4(%sp),%d0 241 rtd &0x4 242 243 global _imem_read_long 244_imem_read_long: 245 mov.l %d0,-(%sp) 246 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 247 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 248 mov.l 0x4(%sp),%d0 249 rtd &0x4 250 251 global _dmem_read_byte 252_dmem_read_byte: 253 mov.l %d0,-(%sp) 254 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 255 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 256 mov.l 0x4(%sp),%d0 257 rtd &0x4 258 259 global _dmem_read_word 260_dmem_read_word: 261 mov.l %d0,-(%sp) 262 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 263 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 264 mov.l 0x4(%sp),%d0 265 rtd &0x4 266 267 global _dmem_read_long 268_dmem_read_long: 269 mov.l %d0,-(%sp) 270 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 271 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 272 mov.l 0x4(%sp),%d0 273 rtd &0x4 274 275 global _dmem_write_byte 276_dmem_write_byte: 277 mov.l %d0,-(%sp) 278 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 279 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 280 mov.l 0x4(%sp),%d0 281 rtd &0x4 282 283 global _dmem_write_word 284_dmem_write_word: 285 mov.l %d0,-(%sp) 286 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 287 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 288 mov.l 0x4(%sp),%d0 289 rtd &0x4 290 291 global _dmem_write_long 292_dmem_write_long: 293 mov.l %d0,-(%sp) 294 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 295 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 296 mov.l 0x4(%sp),%d0 297 rtd &0x4 298 299# 300# This file contains a set of define statements for constants 301# in order to promote readability within the corecode itself. 302# 303 304set LOCAL_SIZE, 192 # stack frame size(bytes) 305set LV, -LOCAL_SIZE # stack offset 306 307set EXC_SR, 0x4 # stack status register 308set EXC_PC, 0x6 # stack pc 309set EXC_VOFF, 0xa # stacked vector offset 310set EXC_EA, 0xc # stacked <ea> 311 312set EXC_FP, 0x0 # frame pointer 313 314set EXC_AREGS, -68 # offset of all address regs 315set EXC_DREGS, -100 # offset of all data regs 316set EXC_FPREGS, -36 # offset of all fp regs 317 318set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 319set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 320set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 321set EXC_A5, EXC_AREGS+(5*4) 322set EXC_A4, EXC_AREGS+(4*4) 323set EXC_A3, EXC_AREGS+(3*4) 324set EXC_A2, EXC_AREGS+(2*4) 325set EXC_A1, EXC_AREGS+(1*4) 326set EXC_A0, EXC_AREGS+(0*4) 327set EXC_D7, EXC_DREGS+(7*4) 328set EXC_D6, EXC_DREGS+(6*4) 329set EXC_D5, EXC_DREGS+(5*4) 330set EXC_D4, EXC_DREGS+(4*4) 331set EXC_D3, EXC_DREGS+(3*4) 332set EXC_D2, EXC_DREGS+(2*4) 333set EXC_D1, EXC_DREGS+(1*4) 334set EXC_D0, EXC_DREGS+(0*4) 335 336set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 337set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 338set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 339 340set FP_SCR1, LV+80 # fp scratch 1 341set FP_SCR1_EX, FP_SCR1+0 342set FP_SCR1_SGN, FP_SCR1+2 343set FP_SCR1_HI, FP_SCR1+4 344set FP_SCR1_LO, FP_SCR1+8 345 346set FP_SCR0, LV+68 # fp scratch 0 347set FP_SCR0_EX, FP_SCR0+0 348set FP_SCR0_SGN, FP_SCR0+2 349set FP_SCR0_HI, FP_SCR0+4 350set FP_SCR0_LO, FP_SCR0+8 351 352set FP_DST, LV+56 # fp destination operand 353set FP_DST_EX, FP_DST+0 354set FP_DST_SGN, FP_DST+2 355set FP_DST_HI, FP_DST+4 356set FP_DST_LO, FP_DST+8 357 358set FP_SRC, LV+44 # fp source operand 359set FP_SRC_EX, FP_SRC+0 360set FP_SRC_SGN, FP_SRC+2 361set FP_SRC_HI, FP_SRC+4 362set FP_SRC_LO, FP_SRC+8 363 364set USER_FPIAR, LV+40 # FP instr address register 365 366set USER_FPSR, LV+36 # FP status register 367set FPSR_CC, USER_FPSR+0 # FPSR condition codes 368set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 369set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 370set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 371 372set USER_FPCR, LV+32 # FP control register 373set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 374set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 375 376set L_SCR3, LV+28 # integer scratch 3 377set L_SCR2, LV+24 # integer scratch 2 378set L_SCR1, LV+20 # integer scratch 1 379 380set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 381 382set EXC_TEMP2, LV+24 # temporary space 383set EXC_TEMP, LV+16 # temporary space 384 385set DTAG, LV+15 # destination operand type 386set STAG, LV+14 # source operand type 387 388set SPCOND_FLG, LV+10 # flag: special case (see below) 389 390set EXC_CC, LV+8 # saved condition codes 391set EXC_EXTWPTR, LV+4 # saved current PC (active) 392set EXC_EXTWORD, LV+2 # saved extension word 393set EXC_CMDREG, LV+2 # saved extension word 394set EXC_OPWORD, LV+0 # saved operation word 395 396################################ 397 398# Helpful macros 399 400set FTEMP, 0 # offsets within an 401set FTEMP_EX, 0 # extended precision 402set FTEMP_SGN, 2 # value saved in memory. 403set FTEMP_HI, 4 404set FTEMP_LO, 8 405set FTEMP_GRS, 12 406 407set LOCAL, 0 # offsets within an 408set LOCAL_EX, 0 # extended precision 409set LOCAL_SGN, 2 # value saved in memory. 410set LOCAL_HI, 4 411set LOCAL_LO, 8 412set LOCAL_GRS, 12 413 414set DST, 0 # offsets within an 415set DST_EX, 0 # extended precision 416set DST_HI, 4 # value saved in memory. 417set DST_LO, 8 418 419set SRC, 0 # offsets within an 420set SRC_EX, 0 # extended precision 421set SRC_HI, 4 # value saved in memory. 422set SRC_LO, 8 423 424set SGL_LO, 0x3f81 # min sgl prec exponent 425set SGL_HI, 0x407e # max sgl prec exponent 426set DBL_LO, 0x3c01 # min dbl prec exponent 427set DBL_HI, 0x43fe # max dbl prec exponent 428set EXT_LO, 0x0 # min ext prec exponent 429set EXT_HI, 0x7ffe # max ext prec exponent 430 431set EXT_BIAS, 0x3fff # extended precision bias 432set SGL_BIAS, 0x007f # single precision bias 433set DBL_BIAS, 0x03ff # double precision bias 434 435set NORM, 0x00 # operand type for STAG/DTAG 436set ZERO, 0x01 # operand type for STAG/DTAG 437set INF, 0x02 # operand type for STAG/DTAG 438set QNAN, 0x03 # operand type for STAG/DTAG 439set DENORM, 0x04 # operand type for STAG/DTAG 440set SNAN, 0x05 # operand type for STAG/DTAG 441set UNNORM, 0x06 # operand type for STAG/DTAG 442 443################## 444# FPSR/FPCR bits # 445################## 446set neg_bit, 0x3 # negative result 447set z_bit, 0x2 # zero result 448set inf_bit, 0x1 # infinite result 449set nan_bit, 0x0 # NAN result 450 451set q_sn_bit, 0x7 # sign bit of quotient byte 452 453set bsun_bit, 7 # branch on unordered 454set snan_bit, 6 # signalling NAN 455set operr_bit, 5 # operand error 456set ovfl_bit, 4 # overflow 457set unfl_bit, 3 # underflow 458set dz_bit, 2 # divide by zero 459set inex2_bit, 1 # inexact result 2 460set inex1_bit, 0 # inexact result 1 461 462set aiop_bit, 7 # accrued inexact operation bit 463set aovfl_bit, 6 # accrued overflow bit 464set aunfl_bit, 5 # accrued underflow bit 465set adz_bit, 4 # accrued dz bit 466set ainex_bit, 3 # accrued inexact bit 467 468############################# 469# FPSR individual bit masks # 470############################# 471set neg_mask, 0x08000000 # negative bit mask (lw) 472set inf_mask, 0x02000000 # infinity bit mask (lw) 473set z_mask, 0x04000000 # zero bit mask (lw) 474set nan_mask, 0x01000000 # nan bit mask (lw) 475 476set neg_bmask, 0x08 # negative bit mask (byte) 477set inf_bmask, 0x02 # infinity bit mask (byte) 478set z_bmask, 0x04 # zero bit mask (byte) 479set nan_bmask, 0x01 # nan bit mask (byte) 480 481set bsun_mask, 0x00008000 # bsun exception mask 482set snan_mask, 0x00004000 # snan exception mask 483set operr_mask, 0x00002000 # operr exception mask 484set ovfl_mask, 0x00001000 # overflow exception mask 485set unfl_mask, 0x00000800 # underflow exception mask 486set dz_mask, 0x00000400 # dz exception mask 487set inex2_mask, 0x00000200 # inex2 exception mask 488set inex1_mask, 0x00000100 # inex1 exception mask 489 490set aiop_mask, 0x00000080 # accrued illegal operation 491set aovfl_mask, 0x00000040 # accrued overflow 492set aunfl_mask, 0x00000020 # accrued underflow 493set adz_mask, 0x00000010 # accrued divide by zero 494set ainex_mask, 0x00000008 # accrued inexact 495 496###################################### 497# FPSR combinations used in the FPSP # 498###################################### 499set dzinf_mask, inf_mask+dz_mask+adz_mask 500set opnan_mask, nan_mask+operr_mask+aiop_mask 501set nzi_mask, 0x01ffffff #clears N, Z, and I 502set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 503set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 504set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 505set inx1a_mask, inex1_mask+ainex_mask 506set inx2a_mask, inex2_mask+ainex_mask 507set snaniop_mask, nan_mask+snan_mask+aiop_mask 508set snaniop2_mask, snan_mask+aiop_mask 509set naniop_mask, nan_mask+aiop_mask 510set neginf_mask, neg_mask+inf_mask 511set infaiop_mask, inf_mask+aiop_mask 512set negz_mask, neg_mask+z_mask 513set opaop_mask, operr_mask+aiop_mask 514set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 515set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 516 517######### 518# misc. # 519######### 520set rnd_stky_bit, 29 # stky bit pos in longword 521 522set sign_bit, 0x7 # sign bit 523set signan_bit, 0x6 # signalling nan bit 524 525set sgl_thresh, 0x3f81 # minimum sgl exponent 526set dbl_thresh, 0x3c01 # minimum dbl exponent 527 528set x_mode, 0x0 # extended precision 529set s_mode, 0x4 # single precision 530set d_mode, 0x8 # double precision 531 532set rn_mode, 0x0 # round-to-nearest 533set rz_mode, 0x1 # round-to-zero 534set rm_mode, 0x2 # round-tp-minus-infinity 535set rp_mode, 0x3 # round-to-plus-infinity 536 537set mantissalen, 64 # length of mantissa in bits 538 539set BYTE, 1 # len(byte) == 1 byte 540set WORD, 2 # len(word) == 2 bytes 541set LONG, 4 # len(longword) == 2 bytes 542 543set BSUN_VEC, 0xc0 # bsun vector offset 544set INEX_VEC, 0xc4 # inexact vector offset 545set DZ_VEC, 0xc8 # dz vector offset 546set UNFL_VEC, 0xcc # unfl vector offset 547set OPERR_VEC, 0xd0 # operr vector offset 548set OVFL_VEC, 0xd4 # ovfl vector offset 549set SNAN_VEC, 0xd8 # snan vector offset 550 551########################### 552# SPecial CONDition FLaGs # 553########################### 554set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 555set fbsun_flg, 0x02 # flag bit: bsun exception 556set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 557set mda7_flg, 0x08 # flag bit: -(a7) <ea> 558set fmovm_flg, 0x40 # flag bit: fmovm instruction 559set immed_flg, 0x80 # flag bit: &<data> <ea> 560 561set ftrapcc_bit, 0x0 562set fbsun_bit, 0x1 563set mia7_bit, 0x2 564set mda7_bit, 0x3 565set immed_bit, 0x7 566 567################################## 568# TRANSCENDENTAL "LAST-OP" FLAGS # 569################################## 570set FMUL_OP, 0x0 # fmul instr performed last 571set FDIV_OP, 0x1 # fdiv performed last 572set FADD_OP, 0x2 # fadd performed last 573set FMOV_OP, 0x3 # fmov performed last 574 575############# 576# CONSTANTS # 577############# 578T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 579T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 580 581PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 582PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 583 584TWOBYPI: 585 long 0x3FE45F30,0x6DC9C883 586 587######################################################################### 588# XDEF **************************************************************** # 589# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # 590# # 591# This handler should be the first code executed upon taking the # 592# FP Overflow exception in an operating system. # 593# # 594# XREF **************************************************************** # 595# _imem_read_long() - read instruction longword # 596# fix_skewed_ops() - adjust src operand in fsave frame # 597# set_tag_x() - determine optype of src/dst operands # 598# store_fpreg() - store opclass 0 or 2 result to FP regfile # 599# unnorm_fix() - change UNNORM operands to NORM or ZERO # 600# load_fpn2() - load dst operand from FP regfile # 601# fout() - emulate an opclass 3 instruction # 602# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 603# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 604# _real_ovfl() - "callout" for Overflow exception enabled code # 605# _real_inex() - "callout" for Inexact exception enabled code # 606# _real_trace() - "callout" for Trace exception code # 607# # 608# INPUT *************************************************************** # 609# - The system stack contains the FP Ovfl exception stack frame # 610# - The fsave frame contains the source operand # 611# # 612# OUTPUT ************************************************************** # 613# Overflow Exception enabled: # 614# - The system stack is unchanged # 615# - The fsave frame contains the adjusted src op for opclass 0,2 # 616# Overflow Exception disabled: # 617# - The system stack is unchanged # 618# - The "exception present" flag in the fsave frame is cleared # 619# # 620# ALGORITHM *********************************************************** # 621# On the 060, if an FP overflow is present as the result of any # 622# instruction, the 060 will take an overflow exception whether the # 623# exception is enabled or disabled in the FPCR. For the disabled case, # 624# This handler emulates the instruction to determine what the correct # 625# default result should be for the operation. This default result is # 626# then stored in either the FP regfile, data regfile, or memory. # 627# Finally, the handler exits through the "callout" _fpsp_done() # 628# denoting that no exceptional conditions exist within the machine. # 629# If the exception is enabled, then this handler must create the # 630# exceptional operand and plave it in the fsave state frame, and store # 631# the default result (only if the instruction is opclass 3). For # 632# exceptions enabled, this handler must exit through the "callout" # 633# _real_ovfl() so that the operating system enabled overflow handler # 634# can handle this case. # 635# Two other conditions exist. First, if overflow was disabled # 636# but the inexact exception was enabled, this handler must exit # 637# through the "callout" _real_inex() regardless of whether the result # 638# was inexact. # 639# Also, in the case of an opclass three instruction where # 640# overflow was disabled and the trace exception was enabled, this # 641# handler must exit through the "callout" _real_trace(). # 642# # 643######################################################################### 644 645 global _fpsp_ovfl 646_fpsp_ovfl: 647 648#$# sub.l &24,%sp # make room for src/dst 649 650 link.w %a6,&-LOCAL_SIZE # init stack frame 651 652 fsave FP_SRC(%a6) # grab the "busy" frame 653 654 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 655 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 656 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 657 658# the FPIAR holds the "current PC" of the faulting instruction 659 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 660 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 661 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 662 bsr.l _imem_read_long # fetch the instruction words 663 mov.l %d0,EXC_OPWORD(%a6) 664 665############################################################################## 666 667 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 668 bne.w fovfl_out 669 670 671 lea FP_SRC(%a6),%a0 # pass: ptr to src op 672 bsr.l fix_skewed_ops # fix src op 673 674# since, I believe, only NORMs and DENORMs can come through here, 675# maybe we can avoid the subroutine call. 676 lea FP_SRC(%a6),%a0 # pass: ptr to src op 677 bsr.l set_tag_x # tag the operand type 678 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 679 680# bit five of the fp extension word separates the monadic and dyadic operations 681# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos 682# will never take this exception. 683 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 684 beq.b fovfl_extract # monadic 685 686 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 687 bsr.l load_fpn2 # load dst into FP_DST 688 689 lea FP_DST(%a6),%a0 # pass: ptr to dst op 690 bsr.l set_tag_x # tag the operand type 691 cmpi.b %d0,&UNNORM # is operand an UNNORM? 692 bne.b fovfl_op2_done # no 693 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 694fovfl_op2_done: 695 mov.b %d0,DTAG(%a6) # save dst optype tag 696 697fovfl_extract: 698 699#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 700#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 701#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 702#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 703#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 704#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 705 706 clr.l %d0 707 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 708 709 mov.b 1+EXC_CMDREG(%a6),%d1 710 andi.w &0x007f,%d1 # extract extension 711 712 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 713 714 fmov.l &0x0,%fpcr # zero current control regs 715 fmov.l &0x0,%fpsr 716 717 lea FP_SRC(%a6),%a0 718 lea FP_DST(%a6),%a1 719 720# maybe we can make these entry points ONLY the OVFL entry points of each routine. 721 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 722 jsr (tbl_unsupp.l,%pc,%d1.l*1) 723 724# the operation has been emulated. the result is in fp0. 725# the EXOP, if an exception occurred, is in fp1. 726# we must save the default result regardless of whether 727# traps are enabled or disabled. 728 bfextu EXC_CMDREG(%a6){&6:&3},%d0 729 bsr.l store_fpreg 730 731# the exceptional possibilities we have left ourselves with are ONLY overflow 732# and inexact. and, the inexact is such that overflow occurred and was disabled 733# but inexact was enabled. 734 btst &ovfl_bit,FPCR_ENABLE(%a6) 735 bne.b fovfl_ovfl_on 736 737 btst &inex2_bit,FPCR_ENABLE(%a6) 738 bne.b fovfl_inex_on 739 740 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 741 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 742 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 743 744 unlk %a6 745#$# add.l &24,%sp 746 bra.l _fpsp_done 747 748# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 749# in fp1. now, simply jump to _real_ovfl()! 750fovfl_ovfl_on: 751 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 752 753 mov.w &0xe005,2+FP_SRC(%a6) # save exc status 754 755 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 756 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 757 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 758 759 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 760 761 unlk %a6 762 763 bra.l _real_ovfl 764 765# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, 766# we must jump to real_inex(). 767fovfl_inex_on: 768 769 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 770 771 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 772 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 773 774 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 775 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 776 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 777 778 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 779 780 unlk %a6 781 782 bra.l _real_inex 783 784######################################################################## 785fovfl_out: 786 787 788#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 789#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 790#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 791 792# the src operand is definitely a NORM(!), so tag it as such 793 mov.b &NORM,STAG(%a6) # set src optype tag 794 795 clr.l %d0 796 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 797 798 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 799 800 fmov.l &0x0,%fpcr # zero current control regs 801 fmov.l &0x0,%fpsr 802 803 lea FP_SRC(%a6),%a0 # pass ptr to src operand 804 805 bsr.l fout 806 807 btst &ovfl_bit,FPCR_ENABLE(%a6) 808 bne.w fovfl_ovfl_on 809 810 btst &inex2_bit,FPCR_ENABLE(%a6) 811 bne.w fovfl_inex_on 812 813 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 814 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 815 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 816 817 unlk %a6 818#$# add.l &24,%sp 819 820 btst &0x7,(%sp) # is trace on? 821 beq.l _fpsp_done # no 822 823 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 824 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 825 bra.l _real_trace 826 827######################################################################### 828# XDEF **************************************************************** # 829# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # 830# # 831# This handler should be the first code executed upon taking the # 832# FP Underflow exception in an operating system. # 833# # 834# XREF **************************************************************** # 835# _imem_read_long() - read instruction longword # 836# fix_skewed_ops() - adjust src operand in fsave frame # 837# set_tag_x() - determine optype of src/dst operands # 838# store_fpreg() - store opclass 0 or 2 result to FP regfile # 839# unnorm_fix() - change UNNORM operands to NORM or ZERO # 840# load_fpn2() - load dst operand from FP regfile # 841# fout() - emulate an opclass 3 instruction # 842# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 843# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 844# _real_ovfl() - "callout" for Overflow exception enabled code # 845# _real_inex() - "callout" for Inexact exception enabled code # 846# _real_trace() - "callout" for Trace exception code # 847# # 848# INPUT *************************************************************** # 849# - The system stack contains the FP Unfl exception stack frame # 850# - The fsave frame contains the source operand # 851# # 852# OUTPUT ************************************************************** # 853# Underflow Exception enabled: # 854# - The system stack is unchanged # 855# - The fsave frame contains the adjusted src op for opclass 0,2 # 856# Underflow Exception disabled: # 857# - The system stack is unchanged # 858# - The "exception present" flag in the fsave frame is cleared # 859# # 860# ALGORITHM *********************************************************** # 861# On the 060, if an FP underflow is present as the result of any # 862# instruction, the 060 will take an underflow exception whether the # 863# exception is enabled or disabled in the FPCR. For the disabled case, # 864# This handler emulates the instruction to determine what the correct # 865# default result should be for the operation. This default result is # 866# then stored in either the FP regfile, data regfile, or memory. # 867# Finally, the handler exits through the "callout" _fpsp_done() # 868# denoting that no exceptional conditions exist within the machine. # 869# If the exception is enabled, then this handler must create the # 870# exceptional operand and plave it in the fsave state frame, and store # 871# the default result (only if the instruction is opclass 3). For # 872# exceptions enabled, this handler must exit through the "callout" # 873# _real_unfl() so that the operating system enabled overflow handler # 874# can handle this case. # 875# Two other conditions exist. First, if underflow was disabled # 876# but the inexact exception was enabled and the result was inexact, # 877# this handler must exit through the "callout" _real_inex(). # 878# was inexact. # 879# Also, in the case of an opclass three instruction where # 880# underflow was disabled and the trace exception was enabled, this # 881# handler must exit through the "callout" _real_trace(). # 882# # 883######################################################################### 884 885 global _fpsp_unfl 886_fpsp_unfl: 887 888#$# sub.l &24,%sp # make room for src/dst 889 890 link.w %a6,&-LOCAL_SIZE # init stack frame 891 892 fsave FP_SRC(%a6) # grab the "busy" frame 893 894 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 895 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 896 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 897 898# the FPIAR holds the "current PC" of the faulting instruction 899 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 900 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 901 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 902 bsr.l _imem_read_long # fetch the instruction words 903 mov.l %d0,EXC_OPWORD(%a6) 904 905############################################################################## 906 907 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 908 bne.w funfl_out 909 910 911 lea FP_SRC(%a6),%a0 # pass: ptr to src op 912 bsr.l fix_skewed_ops # fix src op 913 914 lea FP_SRC(%a6),%a0 # pass: ptr to src op 915 bsr.l set_tag_x # tag the operand type 916 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 917 918# bit five of the fp ext word separates the monadic and dyadic operations 919# that can pass through fpsp_unfl(). remember that fcmp, and ftst 920# will never take this exception. 921 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? 922 beq.b funfl_extract # monadic 923 924# now, what's left that's not dyadic is fsincos. we can distinguish it 925# from all dyadics by the '0110xxx pattern 926 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? 927 bne.b funfl_extract # yes 928 929 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 930 bsr.l load_fpn2 # load dst into FP_DST 931 932 lea FP_DST(%a6),%a0 # pass: ptr to dst op 933 bsr.l set_tag_x # tag the operand type 934 cmpi.b %d0,&UNNORM # is operand an UNNORM? 935 bne.b funfl_op2_done # no 936 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 937funfl_op2_done: 938 mov.b %d0,DTAG(%a6) # save dst optype tag 939 940funfl_extract: 941 942#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 943#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 944#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 945#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 946#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 947#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 948 949 clr.l %d0 950 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 951 952 mov.b 1+EXC_CMDREG(%a6),%d1 953 andi.w &0x007f,%d1 # extract extension 954 955 andi.l &0x00ff01ff,USER_FPSR(%a6) 956 957 fmov.l &0x0,%fpcr # zero current control regs 958 fmov.l &0x0,%fpsr 959 960 lea FP_SRC(%a6),%a0 961 lea FP_DST(%a6),%a1 962 963# maybe we can make these entry points ONLY the OVFL entry points of each routine. 964 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 965 jsr (tbl_unsupp.l,%pc,%d1.l*1) 966 967 bfextu EXC_CMDREG(%a6){&6:&3},%d0 968 bsr.l store_fpreg 969 970# The `060 FPU multiplier hardware is such that if the result of a 971# multiply operation is the smallest possible normalized number 972# (0x00000000_80000000_00000000), then the machine will take an 973# underflow exception. Since this is incorrect, we need to check 974# if our emulation, after re-doing the operation, decided that 975# no underflow was called for. We do these checks only in 976# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this 977# special case will simply exit gracefully with the correct result. 978 979# the exceptional possibilities we have left ourselves with are ONLY overflow 980# and inexact. and, the inexact is such that overflow occurred and was disabled 981# but inexact was enabled. 982 btst &unfl_bit,FPCR_ENABLE(%a6) 983 bne.b funfl_unfl_on 984 985funfl_chkinex: 986 btst &inex2_bit,FPCR_ENABLE(%a6) 987 bne.b funfl_inex_on 988 989funfl_exit: 990 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 991 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 993 994 unlk %a6 995#$# add.l &24,%sp 996 bra.l _fpsp_done 997 998# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 999# in fp1 (don't forget to save fp0). what to do now? 1000# well, we simply have to get to go to _real_unfl()! 1001funfl_unfl_on: 1002 1003# The `060 FPU multiplier hardware is such that if the result of a 1004# multiply operation is the smallest possible normalized number 1005# (0x00000000_80000000_00000000), then the machine will take an 1006# underflow exception. Since this is incorrect, we check here to see 1007# if our emulation, after re-doing the operation, decided that 1008# no underflow was called for. 1009 btst &unfl_bit,FPSR_EXCEPT(%a6) 1010 beq.w funfl_chkinex 1011 1012funfl_unfl_on2: 1013 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 1014 1015 mov.w &0xe003,2+FP_SRC(%a6) # save exc status 1016 1017 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1019 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1020 1021 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1022 1023 unlk %a6 1024 1025 bra.l _real_unfl 1026 1027# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, 1028# we must jump to real_inex(). 1029funfl_inex_on: 1030 1031# The `060 FPU multiplier hardware is such that if the result of a 1032# multiply operation is the smallest possible normalized number 1033# (0x00000000_80000000_00000000), then the machine will take an 1034# underflow exception. 1035# But, whether bogus or not, if inexact is enabled AND it occurred, 1036# then we have to branch to real_inex. 1037 1038 btst &inex2_bit,FPSR_EXCEPT(%a6) 1039 beq.w funfl_exit 1040 1041funfl_inex_on2: 1042 1043 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack 1044 1045 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 1046 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 1047 1048 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1049 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1050 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1051 1052 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1053 1054 unlk %a6 1055 1056 bra.l _real_inex 1057 1058####################################################################### 1059funfl_out: 1060 1061 1062#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 1063#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 1064#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 1065 1066# the src operand is definitely a NORM(!), so tag it as such 1067 mov.b &NORM,STAG(%a6) # set src optype tag 1068 1069 clr.l %d0 1070 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 1071 1072 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 1073 1074 fmov.l &0x0,%fpcr # zero current control regs 1075 fmov.l &0x0,%fpsr 1076 1077 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1078 1079 bsr.l fout 1080 1081 btst &unfl_bit,FPCR_ENABLE(%a6) 1082 bne.w funfl_unfl_on2 1083 1084 btst &inex2_bit,FPCR_ENABLE(%a6) 1085 bne.w funfl_inex_on2 1086 1087 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1088 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1089 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1090 1091 unlk %a6 1092#$# add.l &24,%sp 1093 1094 btst &0x7,(%sp) # is trace on? 1095 beq.l _fpsp_done # no 1096 1097 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 1098 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 1099 bra.l _real_trace 1100 1101######################################################################### 1102# XDEF **************************************************************** # 1103# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # 1104# Data Type" exception. # 1105# # 1106# This handler should be the first code executed upon taking the # 1107# FP Unimplemented Data Type exception in an operating system. # 1108# # 1109# XREF **************************************************************** # 1110# _imem_read_{word,long}() - read instruction word/longword # 1111# fix_skewed_ops() - adjust src operand in fsave frame # 1112# set_tag_x() - determine optype of src/dst operands # 1113# store_fpreg() - store opclass 0 or 2 result to FP regfile # 1114# unnorm_fix() - change UNNORM operands to NORM or ZERO # 1115# load_fpn2() - load dst operand from FP regfile # 1116# load_fpn1() - load src operand from FP regfile # 1117# fout() - emulate an opclass 3 instruction # 1118# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 1119# _real_inex() - "callout" to operating system inexact handler # 1120# _fpsp_done() - "callout" for exit; work all done # 1121# _real_trace() - "callout" for Trace enabled exception # 1122# funimp_skew() - adjust fsave src ops to "incorrect" value # 1123# _real_snan() - "callout" for SNAN exception # 1124# _real_operr() - "callout" for OPERR exception # 1125# _real_ovfl() - "callout" for OVFL exception # 1126# _real_unfl() - "callout" for UNFL exception # 1127# get_packed() - fetch packed operand from memory # 1128# # 1129# INPUT *************************************************************** # 1130# - The system stack contains the "Unimp Data Type" stk frame # 1131# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # 1132# # 1133# OUTPUT ************************************************************** # 1134# If Inexact exception (opclass 3): # 1135# - The system stack is changed to an Inexact exception stk frame # 1136# If SNAN exception (opclass 3): # 1137# - The system stack is changed to an SNAN exception stk frame # 1138# If OPERR exception (opclass 3): # 1139# - The system stack is changed to an OPERR exception stk frame # 1140# If OVFL exception (opclass 3): # 1141# - The system stack is changed to an OVFL exception stk frame # 1142# If UNFL exception (opclass 3): # 1143# - The system stack is changed to an UNFL exception stack frame # 1144# If Trace exception enabled: # 1145# - The system stack is changed to a Trace exception stack frame # 1146# Else: (normal case) # 1147# - Correct result has been stored as appropriate # 1148# # 1149# ALGORITHM *********************************************************** # 1150# Two main instruction types can enter here: (1) DENORM or UNNORM # 1151# unimplemented data types. These can be either opclass 0,2 or 3 # 1152# instructions, and (2) PACKED unimplemented data format instructions # 1153# also of opclasses 0,2, or 3. # 1154# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # 1155# operand from the fsave state frame and the dst operand (if dyadic) # 1156# from the FP register file. The instruction is then emulated by # 1157# choosing an emulation routine from a table of routines indexed by # 1158# instruction type. Once the instruction has been emulated and result # 1159# saved, then we check to see if any enabled exceptions resulted from # 1160# instruction emulation. If none, then we exit through the "callout" # 1161# _fpsp_done(). If there is an enabled FP exception, then we insert # 1162# this exception into the FPU in the fsave state frame and then exit # 1163# through _fpsp_done(). # 1164# PACKED opclass 0 and 2 is similar in how the instruction is # 1165# emulated and exceptions handled. The differences occur in how the # 1166# handler loads the packed op (by calling get_packed() routine) and # 1167# by the fact that a Trace exception could be pending for PACKED ops. # 1168# If a Trace exception is pending, then the current exception stack # 1169# frame is changed to a Trace exception stack frame and an exit is # 1170# made through _real_trace(). # 1171# For UNNORM/DENORM opclass 3, the actual move out to memory is # 1172# performed by calling the routine fout(). If no exception should occur # 1173# as the result of emulation, then an exit either occurs through # 1174# _fpsp_done() or through _real_trace() if a Trace exception is pending # 1175# (a Trace stack frame must be created here, too). If an FP exception # 1176# should occur, then we must create an exception stack frame of that # 1177# type and jump to either _real_snan(), _real_operr(), _real_inex(), # 1178# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # 1179# emulation is performed in a similar manner. # 1180# # 1181######################################################################### 1182 1183# 1184# (1) DENORM and UNNORM (unimplemented) data types: 1185# 1186# post-instruction 1187# ***************** 1188# * EA * 1189# pre-instruction * * 1190# ***************** ***************** 1191# * 0x0 * 0x0dc * * 0x3 * 0x0dc * 1192# ***************** ***************** 1193# * Next * * Next * 1194# * PC * * PC * 1195# ***************** ***************** 1196# * SR * * SR * 1197# ***************** ***************** 1198# 1199# (2) PACKED format (unsupported) opclasses two and three: 1200# ***************** 1201# * EA * 1202# * * 1203# ***************** 1204# * 0x2 * 0x0dc * 1205# ***************** 1206# * Next * 1207# * PC * 1208# ***************** 1209# * SR * 1210# ***************** 1211# 1212 global _fpsp_unsupp 1213_fpsp_unsupp: 1214 1215 link.w %a6,&-LOCAL_SIZE # init stack frame 1216 1217 fsave FP_SRC(%a6) # save fp state 1218 1219 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1220 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 1221 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 1222 1223 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 1224 bne.b fu_s 1225fu_u: 1226 mov.l %usp,%a0 # fetch user stack pointer 1227 mov.l %a0,EXC_A7(%a6) # save on stack 1228 bra.b fu_cont 1229# if the exception is an opclass zero or two unimplemented data type 1230# exception, then the a7' calculated here is wrong since it doesn't 1231# stack an ea. however, we don't need an a7' for this case anyways. 1232fu_s: 1233 lea 0x4+EXC_EA(%a6),%a0 # load old a7' 1234 mov.l %a0,EXC_A7(%a6) # save on stack 1235 1236fu_cont: 1237 1238# the FPIAR holds the "current PC" of the faulting instruction 1239# the FPIAR should be set correctly for ALL exceptions passing through 1240# this point. 1241 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 1242 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 1243 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 1244 bsr.l _imem_read_long # fetch the instruction words 1245 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 1246 1247############################ 1248 1249 clr.b SPCOND_FLG(%a6) # clear special condition flag 1250 1251# Separate opclass three (fpn-to-mem) ops since they have a different 1252# stack frame and protocol. 1253 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? 1254 bne.w fu_out # yes 1255 1256# Separate packed opclass two instructions. 1257 bfextu EXC_CMDREG(%a6){&0:&6},%d0 1258 cmpi.b %d0,&0x13 1259 beq.w fu_in_pack 1260 1261 1262# I'm not sure at this point what FPSR bits are valid for this instruction. 1263# so, since the emulation routines re-create them anyways, zero exception field 1264 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field 1265 1266 fmov.l &0x0,%fpcr # zero current control regs 1267 fmov.l &0x0,%fpsr 1268 1269# Opclass two w/ memory-to-fpn operation will have an incorrect extended 1270# precision format if the src format was single or double and the 1271# source data type was an INF, NAN, DENORM, or UNNORM 1272 lea FP_SRC(%a6),%a0 # pass ptr to input 1273 bsr.l fix_skewed_ops 1274 1275# we don't know whether the src operand or the dst operand (or both) is the 1276# UNNORM or DENORM. call the function that tags the operand type. if the 1277# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. 1278 lea FP_SRC(%a6),%a0 # pass: ptr to src op 1279 bsr.l set_tag_x # tag the operand type 1280 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1281 bne.b fu_op2 # no 1282 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1283 1284fu_op2: 1285 mov.b %d0,STAG(%a6) # save src optype tag 1286 1287 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1288 1289# bit five of the fp extension word separates the monadic and dyadic operations 1290# at this point 1291 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1292 beq.b fu_extract # monadic 1293 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1294 beq.b fu_extract # yes, so it's monadic, too 1295 1296 bsr.l load_fpn2 # load dst into FP_DST 1297 1298 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1299 bsr.l set_tag_x # tag the operand type 1300 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1301 bne.b fu_op2_done # no 1302 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1303fu_op2_done: 1304 mov.b %d0,DTAG(%a6) # save dst optype tag 1305 1306fu_extract: 1307 clr.l %d0 1308 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1309 1310 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1311 1312 lea FP_SRC(%a6),%a0 1313 lea FP_DST(%a6),%a1 1314 1315 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1316 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1317 1318# 1319# Exceptions in order of precedence: 1320# BSUN : none 1321# SNAN : all dyadic ops 1322# OPERR : fsqrt(-NORM) 1323# OVFL : all except ftst,fcmp 1324# UNFL : all except ftst,fcmp 1325# DZ : fdiv 1326# INEX2 : all except ftst,fcmp 1327# INEX1 : none (packed doesn't go through here) 1328# 1329 1330# we determine the highest priority exception(if any) set by the 1331# emulation routine that has also been enabled by the user. 1332 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set 1333 bne.b fu_in_ena # some are enabled 1334 1335fu_in_cont: 1336# fcmp and ftst do not store any result. 1337 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1338 andi.b &0x38,%d0 # extract bits 3-5 1339 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1340 beq.b fu_in_exit # yes 1341 1342 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1343 bsr.l store_fpreg # store the result 1344 1345fu_in_exit: 1346 1347 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1348 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1349 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1350 1351 unlk %a6 1352 1353 bra.l _fpsp_done 1354 1355fu_in_ena: 1356 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1357 bfffo %d0{&24:&8},%d0 # find highest priority exception 1358 bne.b fu_in_exc # there is at least one set 1359 1360# 1361# No exceptions occurred that were also enabled. Now: 1362# 1363# if (OVFL && ovfl_disabled && inexact_enabled) { 1364# branch to _real_inex() (even if the result was exact!); 1365# } else { 1366# save the result in the proper fp reg (unless the op is fcmp or ftst); 1367# return; 1368# } 1369# 1370 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1371 beq.b fu_in_cont # no 1372 1373fu_in_ovflchk: 1374 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1375 beq.b fu_in_cont # no 1376 bra.w fu_in_exc_ovfl # go insert overflow frame 1377 1378# 1379# An exception occurred and that exception was enabled: 1380# 1381# shift enabled exception field into lo byte of d0; 1382# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1383# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1384# /* 1385# * this is the case where we must call _real_inex() now or else 1386# * there will be no other way to pass it the exceptional operand 1387# */ 1388# call _real_inex(); 1389# } else { 1390# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1391# } 1392# 1393fu_in_exc: 1394 subi.l &24,%d0 # fix offset to be 0-8 1395 cmpi.b %d0,&0x6 # is exception INEX? (6) 1396 bne.b fu_in_exc_exit # no 1397 1398# the enabled exception was inexact 1399 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1400 bne.w fu_in_exc_unfl # yes 1401 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1402 bne.w fu_in_exc_ovfl # yes 1403 1404# here, we insert the correct fsave status value into the fsave frame for the 1405# corresponding exception. the operand in the fsave frame should be the original 1406# src operand. 1407fu_in_exc_exit: 1408 mov.l %d0,-(%sp) # save d0 1409 bsr.l funimp_skew # skew sgl or dbl inputs 1410 mov.l (%sp)+,%d0 # restore d0 1411 1412 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status 1413 1414 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1415 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1416 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1417 1418 frestore FP_SRC(%a6) # restore src op 1419 1420 unlk %a6 1421 1422 bra.l _fpsp_done 1423 1424tbl_except: 1425 short 0xe000,0xe006,0xe004,0xe005 1426 short 0xe003,0xe002,0xe001,0xe001 1427 1428fu_in_exc_unfl: 1429 mov.w &0x4,%d0 1430 bra.b fu_in_exc_exit 1431fu_in_exc_ovfl: 1432 mov.w &0x03,%d0 1433 bra.b fu_in_exc_exit 1434 1435# If the input operand to this operation was opclass two and a single 1436# or double precision denorm, inf, or nan, the operand needs to be 1437# "corrected" in order to have the proper equivalent extended precision 1438# number. 1439 global fix_skewed_ops 1440fix_skewed_ops: 1441 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt 1442 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? 1443 beq.b fso_sgl # yes 1444 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? 1445 beq.b fso_dbl # yes 1446 rts # no 1447 1448fso_sgl: 1449 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1450 andi.w &0x7fff,%d0 # strip sign 1451 cmpi.w %d0,&0x3f80 # is |exp| == $3f80? 1452 beq.b fso_sgl_dnrm_zero # yes 1453 cmpi.w %d0,&0x407f # no; is |exp| == $407f? 1454 beq.b fso_infnan # yes 1455 rts # no 1456 1457fso_sgl_dnrm_zero: 1458 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1459 beq.b fso_zero # it's a skewed zero 1460fso_sgl_dnrm: 1461# here, we count on norm not to alter a0... 1462 bsr.l norm # normalize mantissa 1463 neg.w %d0 # -shft amt 1464 addi.w &0x3f81,%d0 # adjust new exponent 1465 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1466 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1467 rts 1468 1469fso_zero: 1470 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent 1471 rts 1472 1473fso_infnan: 1474 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit 1475 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff 1476 rts 1477 1478fso_dbl: 1479 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1480 andi.w &0x7fff,%d0 # strip sign 1481 cmpi.w %d0,&0x3c00 # is |exp| == $3c00? 1482 beq.b fso_dbl_dnrm_zero # yes 1483 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? 1484 beq.b fso_infnan # yes 1485 rts # no 1486 1487fso_dbl_dnrm_zero: 1488 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1489 bne.b fso_dbl_dnrm # it's a skewed denorm 1490 tst.l LOCAL_LO(%a0) # is it a zero? 1491 beq.b fso_zero # yes 1492fso_dbl_dnrm: 1493# here, we count on norm not to alter a0... 1494 bsr.l norm # normalize mantissa 1495 neg.w %d0 # -shft amt 1496 addi.w &0x3c01,%d0 # adjust new exponent 1497 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1498 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1499 rts 1500 1501################################################################# 1502 1503# fmove out took an unimplemented data type exception. 1504# the src operand is in FP_SRC. Call _fout() to write out the result and 1505# to determine which exceptions, if any, to take. 1506fu_out: 1507 1508# Separate packed move outs from the UNNORM and DENORM move outs. 1509 bfextu EXC_CMDREG(%a6){&3:&3},%d0 1510 cmpi.b %d0,&0x3 1511 beq.w fu_out_pack 1512 cmpi.b %d0,&0x7 1513 beq.w fu_out_pack 1514 1515 1516# I'm not sure at this point what FPSR bits are valid for this instruction. 1517# so, since the emulation routines re-create them anyways, zero exception field. 1518# fmove out doesn't affect ccodes. 1519 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 1520 1521 fmov.l &0x0,%fpcr # zero current control regs 1522 fmov.l &0x0,%fpsr 1523 1524# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine 1525# call here. just figure out what it is... 1526 mov.w FP_SRC_EX(%a6),%d0 # get exponent 1527 andi.w &0x7fff,%d0 # strip sign 1528 beq.b fu_out_denorm # it's a DENORM 1529 1530 lea FP_SRC(%a6),%a0 1531 bsr.l unnorm_fix # yes; fix it 1532 1533 mov.b %d0,STAG(%a6) 1534 1535 bra.b fu_out_cont 1536fu_out_denorm: 1537 mov.b &DENORM,STAG(%a6) 1538fu_out_cont: 1539 1540 clr.l %d0 1541 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1542 1543 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1544 1545 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 1546 bsr.l fout # call fmove out routine 1547 1548# Exceptions in order of precedence: 1549# BSUN : none 1550# SNAN : none 1551# OPERR : fmove.{b,w,l} out of large UNNORM 1552# OVFL : fmove.{s,d} 1553# UNFL : fmove.{s,d,x} 1554# DZ : none 1555# INEX2 : all 1556# INEX1 : none (packed doesn't travel through here) 1557 1558# determine the highest priority exception(if any) set by the 1559# emulation routine that has also been enabled by the user. 1560 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1561 bne.w fu_out_ena # some are enabled 1562 1563fu_out_done: 1564 1565 mov.l EXC_A6(%a6),(%a6) # in case a6 changed 1566 1567# on extended precision opclass three instructions using pre-decrement or 1568# post-increment addressing mode, the address register is not updated. is the 1569# address register was the stack pointer used from user mode, then let's update 1570# it here. if it was used from supervisor mode, then we have to handle this 1571# as a special case. 1572 btst &0x5,EXC_SR(%a6) 1573 bne.b fu_out_done_s 1574 1575 mov.l EXC_A7(%a6),%a0 # restore a7 1576 mov.l %a0,%usp 1577 1578fu_out_done_cont: 1579 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1580 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1581 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1582 1583 unlk %a6 1584 1585 btst &0x7,(%sp) # is trace on? 1586 bne.b fu_out_trace # yes 1587 1588 bra.l _fpsp_done 1589 1590# is the ea mode pre-decrement of the stack pointer from supervisor mode? 1591# ("fmov.x fpm,-(a7)") if so, 1592fu_out_done_s: 1593 cmpi.b SPCOND_FLG(%a6),&mda7_flg 1594 bne.b fu_out_done_cont 1595 1596# the extended precision result is still in fp0. but, we need to save it 1597# somewhere on the stack until we can copy it to its final resting place. 1598# here, we're counting on the top of the stack to be the old place-holders 1599# for fp0/fp1 which have already been restored. that way, we can write 1600# over those destinations with the shifted stack frame. 1601 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1602 1603 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1604 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1605 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1606 1607 mov.l (%a6),%a6 # restore frame pointer 1608 1609 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1610 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1611 1612# now, copy the result to the proper place on the stack 1613 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1614 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1615 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1616 1617 add.l &LOCAL_SIZE-0x8,%sp 1618 1619 btst &0x7,(%sp) 1620 bne.b fu_out_trace 1621 1622 bra.l _fpsp_done 1623 1624fu_out_ena: 1625 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1626 bfffo %d0{&24:&8},%d0 # find highest priority exception 1627 bne.b fu_out_exc # there is at least one set 1628 1629# no exceptions were set. 1630# if a disabled overflow occurred and inexact was enabled but the result 1631# was exact, then a branch to _real_inex() is made. 1632 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1633 beq.w fu_out_done # no 1634 1635fu_out_ovflchk: 1636 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1637 beq.w fu_out_done # no 1638 bra.w fu_inex # yes 1639 1640# 1641# The fp move out that took the "Unimplemented Data Type" exception was 1642# being traced. Since the stack frames are similar, get the "current" PC 1643# from FPIAR and put it in the trace stack frame then jump to _real_trace(). 1644# 1645# UNSUPP FRAME TRACE FRAME 1646# ***************** ***************** 1647# * EA * * Current * 1648# * * * PC * 1649# ***************** ***************** 1650# * 0x3 * 0x0dc * * 0x2 * 0x024 * 1651# ***************** ***************** 1652# * Next * * Next * 1653# * PC * * PC * 1654# ***************** ***************** 1655# * SR * * SR * 1656# ***************** ***************** 1657# 1658fu_out_trace: 1659 mov.w &0x2024,0x6(%sp) 1660 fmov.l %fpiar,0x8(%sp) 1661 bra.l _real_trace 1662 1663# an exception occurred and that exception was enabled. 1664fu_out_exc: 1665 subi.l &24,%d0 # fix offset to be 0-8 1666 1667# we don't mess with the existing fsave frame. just re-insert it and 1668# jump to the "_real_{}()" handler... 1669 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 1670 jmp (tbl_fu_out.b,%pc,%d0.w*1) 1671 1672 swbeg &0x8 1673tbl_fu_out: 1674 short tbl_fu_out - tbl_fu_out # BSUN can't happen 1675 short tbl_fu_out - tbl_fu_out # SNAN can't happen 1676 short fu_operr - tbl_fu_out # OPERR 1677 short fu_ovfl - tbl_fu_out # OVFL 1678 short fu_unfl - tbl_fu_out # UNFL 1679 short tbl_fu_out - tbl_fu_out # DZ can't happen 1680 short fu_inex - tbl_fu_out # INEX2 1681 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here 1682 1683# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just 1684# frestore it. 1685fu_snan: 1686 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1687 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1689 1690 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 1691 mov.w &0xe006,2+FP_SRC(%a6) 1692 1693 frestore FP_SRC(%a6) 1694 1695 unlk %a6 1696 1697 1698 bra.l _real_snan 1699 1700fu_operr: 1701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1704 1705 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 1706 mov.w &0xe004,2+FP_SRC(%a6) 1707 1708 frestore FP_SRC(%a6) 1709 1710 unlk %a6 1711 1712 1713 bra.l _real_operr 1714 1715fu_ovfl: 1716 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1717 1718 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1719 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1720 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1721 1722 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 1723 mov.w &0xe005,2+FP_SRC(%a6) 1724 1725 frestore FP_SRC(%a6) # restore EXOP 1726 1727 unlk %a6 1728 1729 bra.l _real_ovfl 1730 1731# underflow can happen for extended precision. extended precision opclass 1732# three instruction exceptions don't update the stack pointer. so, if the 1733# exception occurred from user mode, then simply update a7 and exit normally. 1734# if the exception occurred from supervisor mode, check if 1735fu_unfl: 1736 mov.l EXC_A6(%a6),(%a6) # restore a6 1737 1738 btst &0x5,EXC_SR(%a6) 1739 bne.w fu_unfl_s 1740 1741 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need 1742 mov.l %a0,%usp # to or not... 1743 1744fu_unfl_cont: 1745 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1746 1747 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1748 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1749 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1750 1751 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1752 mov.w &0xe003,2+FP_SRC(%a6) 1753 1754 frestore FP_SRC(%a6) # restore EXOP 1755 1756 unlk %a6 1757 1758 bra.l _real_unfl 1759 1760fu_unfl_s: 1761 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)? 1762 bne.b fu_unfl_cont 1763 1764# the extended precision result is still in fp0. but, we need to save it 1765# somewhere on the stack until we can copy it to its final resting place 1766# (where the exc frame is currently). make sure it's not at the top of the 1767# frame or it will get overwritten when the exc stack frame is shifted "down". 1768 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1769 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack 1770 1771 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1772 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1773 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1774 1775 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1776 mov.w &0xe003,2+FP_DST(%a6) 1777 1778 frestore FP_DST(%a6) # restore EXOP 1779 1780 mov.l (%a6),%a6 # restore frame pointer 1781 1782 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1783 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1784 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 1785 1786# now, copy the result to the proper place on the stack 1787 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1788 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1789 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1790 1791 add.l &LOCAL_SIZE-0x8,%sp 1792 1793 bra.l _real_unfl 1794 1795# fmove in and out enter here. 1796fu_inex: 1797 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1798 1799 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1801 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1802 1803 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 1804 mov.w &0xe001,2+FP_SRC(%a6) 1805 1806 frestore FP_SRC(%a6) # restore EXOP 1807 1808 unlk %a6 1809 1810 1811 bra.l _real_inex 1812 1813######################################################################### 1814######################################################################### 1815fu_in_pack: 1816 1817 1818# I'm not sure at this point what FPSR bits are valid for this instruction. 1819# so, since the emulation routines re-create them anyways, zero exception field 1820 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field 1821 1822 fmov.l &0x0,%fpcr # zero current control regs 1823 fmov.l &0x0,%fpsr 1824 1825 bsr.l get_packed # fetch packed src operand 1826 1827 lea FP_SRC(%a6),%a0 # pass ptr to src 1828 bsr.l set_tag_x # set src optype tag 1829 1830 mov.b %d0,STAG(%a6) # save src optype tag 1831 1832 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1833 1834# bit five of the fp extension word separates the monadic and dyadic operations 1835# at this point 1836 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1837 beq.b fu_extract_p # monadic 1838 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1839 beq.b fu_extract_p # yes, so it's monadic, too 1840 1841 bsr.l load_fpn2 # load dst into FP_DST 1842 1843 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1844 bsr.l set_tag_x # tag the operand type 1845 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1846 bne.b fu_op2_done_p # no 1847 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1848fu_op2_done_p: 1849 mov.b %d0,DTAG(%a6) # save dst optype tag 1850 1851fu_extract_p: 1852 clr.l %d0 1853 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1854 1855 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1856 1857 lea FP_SRC(%a6),%a0 1858 lea FP_DST(%a6),%a1 1859 1860 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1861 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1862 1863# 1864# Exceptions in order of precedence: 1865# BSUN : none 1866# SNAN : all dyadic ops 1867# OPERR : fsqrt(-NORM) 1868# OVFL : all except ftst,fcmp 1869# UNFL : all except ftst,fcmp 1870# DZ : fdiv 1871# INEX2 : all except ftst,fcmp 1872# INEX1 : all 1873# 1874 1875# we determine the highest priority exception(if any) set by the 1876# emulation routine that has also been enabled by the user. 1877 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1878 bne.w fu_in_ena_p # some are enabled 1879 1880fu_in_cont_p: 1881# fcmp and ftst do not store any result. 1882 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1883 andi.b &0x38,%d0 # extract bits 3-5 1884 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1885 beq.b fu_in_exit_p # yes 1886 1887 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1888 bsr.l store_fpreg # store the result 1889 1890fu_in_exit_p: 1891 1892 btst &0x5,EXC_SR(%a6) # user or supervisor? 1893 bne.w fu_in_exit_s_p # supervisor 1894 1895 mov.l EXC_A7(%a6),%a0 # update user a7 1896 mov.l %a0,%usp 1897 1898fu_in_exit_cont_p: 1899 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1900 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1901 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1902 1903 unlk %a6 # unravel stack frame 1904 1905 btst &0x7,(%sp) # is trace on? 1906 bne.w fu_trace_p # yes 1907 1908 bra.l _fpsp_done # exit to os 1909 1910# the exception occurred in supervisor mode. check to see if the 1911# addressing mode was (a7)+. if so, we'll need to shift the 1912# stack frame "up". 1913fu_in_exit_s_p: 1914 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ 1915 beq.b fu_in_exit_cont_p # no 1916 1917 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1918 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1919 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1920 1921 unlk %a6 # unravel stack frame 1922 1923# shift the stack frame "up". we don't really care about the <ea> field. 1924 mov.l 0x4(%sp),0x10(%sp) 1925 mov.l 0x0(%sp),0xc(%sp) 1926 add.l &0xc,%sp 1927 1928 btst &0x7,(%sp) # is trace on? 1929 bne.w fu_trace_p # yes 1930 1931 bra.l _fpsp_done # exit to os 1932 1933fu_in_ena_p: 1934 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set 1935 bfffo %d0{&24:&8},%d0 # find highest priority exception 1936 bne.b fu_in_exc_p # at least one was set 1937 1938# 1939# No exceptions occurred that were also enabled. Now: 1940# 1941# if (OVFL && ovfl_disabled && inexact_enabled) { 1942# branch to _real_inex() (even if the result was exact!); 1943# } else { 1944# save the result in the proper fp reg (unless the op is fcmp or ftst); 1945# return; 1946# } 1947# 1948 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1949 beq.w fu_in_cont_p # no 1950 1951fu_in_ovflchk_p: 1952 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1953 beq.w fu_in_cont_p # no 1954 bra.w fu_in_exc_ovfl_p # do _real_inex() now 1955 1956# 1957# An exception occurred and that exception was enabled: 1958# 1959# shift enabled exception field into lo byte of d0; 1960# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1961# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1962# /* 1963# * this is the case where we must call _real_inex() now or else 1964# * there will be no other way to pass it the exceptional operand 1965# */ 1966# call _real_inex(); 1967# } else { 1968# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1969# } 1970# 1971fu_in_exc_p: 1972 subi.l &24,%d0 # fix offset to be 0-8 1973 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) 1974 blt.b fu_in_exc_exit_p # no 1975 1976# the enabled exception was inexact 1977 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1978 bne.w fu_in_exc_unfl_p # yes 1979 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1980 bne.w fu_in_exc_ovfl_p # yes 1981 1982# here, we insert the correct fsave status value into the fsave frame for the 1983# corresponding exception. the operand in the fsave frame should be the original 1984# src operand. 1985# as a reminder for future predicted pain and agony, we are passing in fsave the 1986# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. 1987# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! 1988fu_in_exc_exit_p: 1989 btst &0x5,EXC_SR(%a6) # user or supervisor? 1990 bne.w fu_in_exc_exit_s_p # supervisor 1991 1992 mov.l EXC_A7(%a6),%a0 # update user a7 1993 mov.l %a0,%usp 1994 1995fu_in_exc_exit_cont_p: 1996 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 1997 1998 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1999 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2000 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2001 2002 frestore FP_SRC(%a6) # restore src op 2003 2004 unlk %a6 2005 2006 btst &0x7,(%sp) # is trace enabled? 2007 bne.w fu_trace_p # yes 2008 2009 bra.l _fpsp_done 2010 2011tbl_except_p: 2012 short 0xe000,0xe006,0xe004,0xe005 2013 short 0xe003,0xe002,0xe001,0xe001 2014 2015fu_in_exc_ovfl_p: 2016 mov.w &0x3,%d0 2017 bra.w fu_in_exc_exit_p 2018 2019fu_in_exc_unfl_p: 2020 mov.w &0x4,%d0 2021 bra.w fu_in_exc_exit_p 2022 2023fu_in_exc_exit_s_p: 2024 btst &mia7_bit,SPCOND_FLG(%a6) 2025 beq.b fu_in_exc_exit_cont_p 2026 2027 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2028 2029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2032 2033 frestore FP_SRC(%a6) # restore src op 2034 2035 unlk %a6 # unravel stack frame 2036 2037# shift stack frame "up". who cares about <ea> field. 2038 mov.l 0x4(%sp),0x10(%sp) 2039 mov.l 0x0(%sp),0xc(%sp) 2040 add.l &0xc,%sp 2041 2042 btst &0x7,(%sp) # is trace on? 2043 bne.b fu_trace_p # yes 2044 2045 bra.l _fpsp_done # exit to os 2046 2047# 2048# The opclass two PACKED instruction that took an "Unimplemented Data Type" 2049# exception was being traced. Make the "current" PC the FPIAR and put it in the 2050# trace stack frame then jump to _real_trace(). 2051# 2052# UNSUPP FRAME TRACE FRAME 2053# ***************** ***************** 2054# * EA * * Current * 2055# * * * PC * 2056# ***************** ***************** 2057# * 0x2 * 0x0dc * * 0x2 * 0x024 * 2058# ***************** ***************** 2059# * Next * * Next * 2060# * PC * * PC * 2061# ***************** ***************** 2062# * SR * * SR * 2063# ***************** ***************** 2064fu_trace_p: 2065 mov.w &0x2024,0x6(%sp) 2066 fmov.l %fpiar,0x8(%sp) 2067 2068 bra.l _real_trace 2069 2070######################################################### 2071######################################################### 2072fu_out_pack: 2073 2074 2075# I'm not sure at this point what FPSR bits are valid for this instruction. 2076# so, since the emulation routines re-create them anyways, zero exception field. 2077# fmove out doesn't affect ccodes. 2078 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 2079 2080 fmov.l &0x0,%fpcr # zero current control regs 2081 fmov.l &0x0,%fpsr 2082 2083 bfextu EXC_CMDREG(%a6){&6:&3},%d0 2084 bsr.l load_fpn1 2085 2086# unlike other opclass 3, unimplemented data type exceptions, packed must be 2087# able to detect all operand types. 2088 lea FP_SRC(%a6),%a0 2089 bsr.l set_tag_x # tag the operand type 2090 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2091 bne.b fu_op2_p # no 2092 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 2093 2094fu_op2_p: 2095 mov.b %d0,STAG(%a6) # save src optype tag 2096 2097 clr.l %d0 2098 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 2099 2100 lea FP_SRC(%a6),%a0 # pass ptr to src operand 2101 2102 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 2103 bsr.l fout # call fmove out routine 2104 2105# Exceptions in order of precedence: 2106# BSUN : no 2107# SNAN : yes 2108# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) 2109# OVFL : no 2110# UNFL : no 2111# DZ : no 2112# INEX2 : yes 2113# INEX1 : no 2114 2115# determine the highest priority exception(if any) set by the 2116# emulation routine that has also been enabled by the user. 2117 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2118 bne.w fu_out_ena_p # some are enabled 2119 2120fu_out_exit_p: 2121 mov.l EXC_A6(%a6),(%a6) # restore a6 2122 2123 btst &0x5,EXC_SR(%a6) # user or supervisor? 2124 bne.b fu_out_exit_s_p # supervisor 2125 2126 mov.l EXC_A7(%a6),%a0 # update user a7 2127 mov.l %a0,%usp 2128 2129fu_out_exit_cont_p: 2130 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2131 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2132 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2133 2134 unlk %a6 # unravel stack frame 2135 2136 btst &0x7,(%sp) # is trace on? 2137 bne.w fu_trace_p # yes 2138 2139 bra.l _fpsp_done # exit to os 2140 2141# the exception occurred in supervisor mode. check to see if the 2142# addressing mode was -(a7). if so, we'll need to shift the 2143# stack frame "down". 2144fu_out_exit_s_p: 2145 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) 2146 beq.b fu_out_exit_cont_p # no 2147 2148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2151 2152 mov.l (%a6),%a6 # restore frame pointer 2153 2154 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2155 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2156 2157# now, copy the result to the proper place on the stack 2158 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 2159 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 2160 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 2161 2162 add.l &LOCAL_SIZE-0x8,%sp 2163 2164 btst &0x7,(%sp) 2165 bne.w fu_trace_p 2166 2167 bra.l _fpsp_done 2168 2169fu_out_ena_p: 2170 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 2171 bfffo %d0{&24:&8},%d0 # find highest priority exception 2172 beq.w fu_out_exit_p 2173 2174 mov.l EXC_A6(%a6),(%a6) # restore a6 2175 2176# an exception occurred and that exception was enabled. 2177# the only exception possible on packed move out are INEX, OPERR, and SNAN. 2178fu_out_exc_p: 2179 cmpi.b %d0,&0x1a 2180 bgt.w fu_inex_p2 2181 beq.w fu_operr_p 2182 2183fu_snan_p: 2184 btst &0x5,EXC_SR(%a6) 2185 bne.b fu_snan_s_p 2186 2187 mov.l EXC_A7(%a6),%a0 2188 mov.l %a0,%usp 2189 bra.w fu_snan 2190 2191fu_snan_s_p: 2192 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2193 bne.w fu_snan 2194 2195# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2196# the strategy is to move the exception frame "down" 12 bytes. then, we 2197# can store the default result where the exception frame was. 2198 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2199 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2200 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2201 2202 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 2203 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status 2204 2205 frestore FP_SRC(%a6) # restore src operand 2206 2207 mov.l (%a6),%a6 # restore frame pointer 2208 2209 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2210 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2211 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2212 2213# now, we copy the default result to it's proper location 2214 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2215 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2216 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2217 2218 add.l &LOCAL_SIZE-0x8,%sp 2219 2220 2221 bra.l _real_snan 2222 2223fu_operr_p: 2224 btst &0x5,EXC_SR(%a6) 2225 bne.w fu_operr_p_s 2226 2227 mov.l EXC_A7(%a6),%a0 2228 mov.l %a0,%usp 2229 bra.w fu_operr 2230 2231fu_operr_p_s: 2232 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2233 bne.w fu_operr 2234 2235# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2236# the strategy is to move the exception frame "down" 12 bytes. then, we 2237# can store the default result where the exception frame was. 2238 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2239 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2240 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2241 2242 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 2243 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status 2244 2245 frestore FP_SRC(%a6) # restore src operand 2246 2247 mov.l (%a6),%a6 # restore frame pointer 2248 2249 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2250 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2251 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2252 2253# now, we copy the default result to it's proper location 2254 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2255 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2256 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2257 2258 add.l &LOCAL_SIZE-0x8,%sp 2259 2260 2261 bra.l _real_operr 2262 2263fu_inex_p2: 2264 btst &0x5,EXC_SR(%a6) 2265 bne.w fu_inex_s_p2 2266 2267 mov.l EXC_A7(%a6),%a0 2268 mov.l %a0,%usp 2269 bra.w fu_inex 2270 2271fu_inex_s_p2: 2272 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2273 bne.w fu_inex 2274 2275# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2276# the strategy is to move the exception frame "down" 12 bytes. then, we 2277# can store the default result where the exception frame was. 2278 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2279 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2280 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2281 2282 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 2283 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status 2284 2285 frestore FP_SRC(%a6) # restore src operand 2286 2287 mov.l (%a6),%a6 # restore frame pointer 2288 2289 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2290 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2291 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2292 2293# now, we copy the default result to it's proper location 2294 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2295 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2296 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2297 2298 add.l &LOCAL_SIZE-0x8,%sp 2299 2300 2301 bra.l _real_inex 2302 2303######################################################################### 2304 2305# 2306# if we're stuffing a source operand back into an fsave frame then we 2307# have to make sure that for single or double source operands that the 2308# format stuffed is as weird as the hardware usually makes it. 2309# 2310 global funimp_skew 2311funimp_skew: 2312 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier 2313 cmpi.b %d0,&0x1 # was src sgl? 2314 beq.b funimp_skew_sgl # yes 2315 cmpi.b %d0,&0x5 # was src dbl? 2316 beq.b funimp_skew_dbl # yes 2317 rts 2318 2319funimp_skew_sgl: 2320 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2321 andi.w &0x7fff,%d0 # strip sign 2322 beq.b funimp_skew_sgl_not 2323 cmpi.w %d0,&0x3f80 2324 bgt.b funimp_skew_sgl_not 2325 neg.w %d0 # make exponent negative 2326 addi.w &0x3f81,%d0 # find amt to shift 2327 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) 2328 lsr.l %d0,%d1 # shift it 2329 bset &31,%d1 # set j-bit 2330 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) 2331 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent 2332 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent 2333funimp_skew_sgl_not: 2334 rts 2335 2336funimp_skew_dbl: 2337 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2338 andi.w &0x7fff,%d0 # strip sign 2339 beq.b funimp_skew_dbl_not 2340 cmpi.w %d0,&0x3c00 2341 bgt.b funimp_skew_dbl_not 2342 2343 tst.b FP_SRC_EX(%a6) # make "internal format" 2344 smi.b 0x2+FP_SRC(%a6) 2345 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign 2346 clr.l %d0 # clear g,r,s 2347 lea FP_SRC(%a6),%a0 # pass ptr to src op 2348 mov.w &0x3c01,%d1 # pass denorm threshold 2349 bsr.l dnrm_lp # denorm it 2350 mov.w &0x3c00,%d0 # new exponent 2351 tst.b 0x2+FP_SRC(%a6) # is sign set? 2352 beq.b fss_dbl_denorm_done # no 2353 bset &15,%d0 # set sign 2354fss_dbl_denorm_done: 2355 bset &0x7,FP_SRC_HI(%a6) # set j-bit 2356 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent 2357funimp_skew_dbl_not: 2358 rts 2359 2360######################################################################### 2361 global _mem_write2 2362_mem_write2: 2363 btst &0x5,EXC_SR(%a6) 2364 beq.l _dmem_write 2365 mov.l 0x0(%a0),FP_DST_EX(%a6) 2366 mov.l 0x4(%a0),FP_DST_HI(%a6) 2367 mov.l 0x8(%a0),FP_DST_LO(%a6) 2368 clr.l %d1 2369 rts 2370 2371######################################################################### 2372# XDEF **************************************************************** # 2373# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # 2374# effective address" exception. # 2375# # 2376# This handler should be the first code executed upon taking the # 2377# FP Unimplemented Effective Address exception in an operating # 2378# system. # 2379# # 2380# XREF **************************************************************** # 2381# _imem_read_long() - read instruction longword # 2382# fix_skewed_ops() - adjust src operand in fsave frame # 2383# set_tag_x() - determine optype of src/dst operands # 2384# store_fpreg() - store opclass 0 or 2 result to FP regfile # 2385# unnorm_fix() - change UNNORM operands to NORM or ZERO # 2386# load_fpn2() - load dst operand from FP regfile # 2387# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 2388# decbin() - convert packed data to FP binary data # 2389# _real_fpu_disabled() - "callout" for "FPU disabled" exception # 2390# _real_access() - "callout" for access error exception # 2391# _mem_read() - read extended immediate operand from memory # 2392# _fpsp_done() - "callout" for exit; work all done # 2393# _real_trace() - "callout" for Trace enabled exception # 2394# fmovm_dynamic() - emulate dynamic fmovm instruction # 2395# fmovm_ctrl() - emulate fmovm control instruction # 2396# # 2397# INPUT *************************************************************** # 2398# - The system stack contains the "Unimplemented <ea>" stk frame # 2399# # 2400# OUTPUT ************************************************************** # 2401# If access error: # 2402# - The system stack is changed to an access error stack frame # 2403# If FPU disabled: # 2404# - The system stack is changed to an FPU disabled stack frame # 2405# If Trace exception enabled: # 2406# - The system stack is changed to a Trace exception stack frame # 2407# Else: (normal case) # 2408# - None (correct result has been stored as appropriate) # 2409# # 2410# ALGORITHM *********************************************************** # 2411# This exception handles 3 types of operations: # 2412# (1) FP Instructions using extended precision or packed immediate # 2413# addressing mode. # 2414# (2) The "fmovm.x" instruction w/ dynamic register specification. # 2415# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # 2416# # 2417# For immediate data operations, the data is read in w/ a # 2418# _mem_read() "callout", converted to FP binary (if packed), and used # 2419# as the source operand to the instruction specified by the instruction # 2420# word. If no FP exception should be reported ads a result of the # 2421# emulation, then the result is stored to the destination register and # 2422# the handler exits through _fpsp_done(). If an enabled exc has been # 2423# signalled as a result of emulation, then an fsave state frame # 2424# corresponding to the FP exception type must be entered into the 060 # 2425# FPU before exiting. In either the enabled or disabled cases, we # 2426# must also check if a Trace exception is pending, in which case, we # 2427# must create a Trace exception stack frame from the current exception # 2428# stack frame. If no Trace is pending, we simply exit through # 2429# _fpsp_done(). # 2430# For "fmovm.x", call the routine fmovm_dynamic() which will # 2431# decode and emulate the instruction. No FP exceptions can be pending # 2432# as a result of this operation emulation. A Trace exception can be # 2433# pending, though, which means the current stack frame must be changed # 2434# to a Trace stack frame and an exit made through _real_trace(). # 2435# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # 2436# was executed from supervisor mode, this handler must store the FP # 2437# register file values to the system stack by itself since # 2438# fmovm_dynamic() can't handle this. A normal exit is made through # 2439# fpsp_done(). # 2440# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # 2441# Again, a Trace exception may be pending and an exit made through # 2442# _real_trace(). Else, a normal exit is made through _fpsp_done(). # 2443# # 2444# Before any of the above is attempted, it must be checked to # 2445# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken # 2446# before the "FPU disabled" exception, but the "FPU disabled" exception # 2447# has higher priority, we check the disabled bit in the PCR. If set, # 2448# then we must create an 8 word "FPU disabled" exception stack frame # 2449# from the current 4 word exception stack frame. This includes # 2450# reproducing the effective address of the instruction to put on the # 2451# new stack frame. # 2452# # 2453# In the process of all emulation work, if a _mem_read() # 2454# "callout" returns a failing result indicating an access error, then # 2455# we must create an access error stack frame from the current stack # 2456# frame. This information includes a faulting address and a fault- # 2457# status-longword. These are created within this handler. # 2458# # 2459######################################################################### 2460 2461 global _fpsp_effadd 2462_fpsp_effadd: 2463 2464# This exception type takes priority over the "Line F Emulator" 2465# exception. Therefore, the FPU could be disabled when entering here. 2466# So, we must check to see if it's disabled and handle that case separately. 2467 mov.l %d0,-(%sp) # save d0 2468 movc %pcr,%d0 # load proc cr 2469 btst &0x1,%d0 # is FPU disabled? 2470 bne.w iea_disabled # yes 2471 mov.l (%sp)+,%d0 # restore d0 2472 2473 link %a6,&-LOCAL_SIZE # init stack frame 2474 2475 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2476 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 2477 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 2478 2479# PC of instruction that took the exception is the PC in the frame 2480 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2481 2482 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2483 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2484 bsr.l _imem_read_long # fetch the instruction words 2485 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2486 2487######################################################################### 2488 2489 tst.w %d0 # is operation fmovem? 2490 bmi.w iea_fmovm # yes 2491 2492# 2493# here, we will have: 2494# fabs fdabs fsabs facos fmod 2495# fadd fdadd fsadd fasin frem 2496# fcmp fatan fscale 2497# fdiv fddiv fsdiv fatanh fsin 2498# fint fcos fsincos 2499# fintrz fcosh fsinh 2500# fmove fdmove fsmove fetox ftan 2501# fmul fdmul fsmul fetoxm1 ftanh 2502# fneg fdneg fsneg fgetexp ftentox 2503# fsgldiv fgetman ftwotox 2504# fsglmul flog10 2505# fsqrt flog2 2506# fsub fdsub fssub flogn 2507# ftst flognp1 2508# which can all use f<op>.{x,p} 2509# so, now it's immediate data extended precision AND PACKED FORMAT! 2510# 2511iea_op: 2512 andi.l &0x00ff00ff,USER_FPSR(%a6) 2513 2514 btst &0xa,%d0 # is src fmt x or p? 2515 bne.b iea_op_pack # packed 2516 2517 2518 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2519 lea FP_SRC(%a6),%a1 # pass: ptr to super addr 2520 mov.l &0xc,%d0 # pass: 12 bytes 2521 bsr.l _imem_read # read extended immediate 2522 2523 tst.l %d1 # did ifetch fail? 2524 bne.w iea_iacc # yes 2525 2526 bra.b iea_op_setsrc 2527 2528iea_op_pack: 2529 2530 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2531 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 2532 mov.l &0xc,%d0 # pass: 12 bytes 2533 bsr.l _imem_read # read packed operand 2534 2535 tst.l %d1 # did ifetch fail? 2536 bne.w iea_iacc # yes 2537 2538# The packed operand is an INF or a NAN if the exponent field is all ones. 2539 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 2540 cmpi.w %d0,&0x7fff # INF or NAN? 2541 beq.b iea_op_setsrc # operand is an INF or NAN 2542 2543# The packed operand is a zero if the mantissa is all zero, else it's 2544# a normal packed op. 2545 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 2546 andi.b &0x0f,%d0 # clear all but last nybble 2547 bne.b iea_op_gp_not_spec # not a zero 2548 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 2549 bne.b iea_op_gp_not_spec # not a zero 2550 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 2551 beq.b iea_op_setsrc # operand is a ZERO 2552iea_op_gp_not_spec: 2553 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 2554 bsr.l decbin # convert to extended 2555 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 2556 2557iea_op_setsrc: 2558 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer 2559 2560# FP_SRC now holds the src operand. 2561 lea FP_SRC(%a6),%a0 # pass: ptr to src op 2562 bsr.l set_tag_x # tag the operand type 2563 mov.b %d0,STAG(%a6) # could be ANYTHING!!! 2564 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2565 bne.b iea_op_getdst # no 2566 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2567 mov.b %d0,STAG(%a6) # set new optype tag 2568iea_op_getdst: 2569 clr.b STORE_FLG(%a6) # clear "store result" boolean 2570 2571 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 2572 beq.b iea_op_extract # monadic 2573 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? 2574 bne.b iea_op_spec # yes 2575 2576iea_op_loaddst: 2577 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2578 bsr.l load_fpn2 # load dst operand 2579 2580 lea FP_DST(%a6),%a0 # pass: ptr to dst op 2581 bsr.l set_tag_x # tag the operand type 2582 mov.b %d0,DTAG(%a6) # could be ANYTHING!!! 2583 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2584 bne.b iea_op_extract # no 2585 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2586 mov.b %d0,DTAG(%a6) # set new optype tag 2587 bra.b iea_op_extract 2588 2589# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic 2590iea_op_spec: 2591 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? 2592 beq.b iea_op_extract # yes 2593# now, we're left with ftst and fcmp. so, first let's tag them so that they don't 2594# store a result. then, only fcmp will branch back and pick up a dst operand. 2595 st STORE_FLG(%a6) # don't store a final result 2596 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? 2597 beq.b iea_op_loaddst # yes 2598 2599iea_op_extract: 2600 clr.l %d0 2601 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec 2602 2603 mov.b 1+EXC_CMDREG(%a6),%d1 2604 andi.w &0x007f,%d1 # extract extension 2605 2606 fmov.l &0x0,%fpcr 2607 fmov.l &0x0,%fpsr 2608 2609 lea FP_SRC(%a6),%a0 2610 lea FP_DST(%a6),%a1 2611 2612 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 2613 jsr (tbl_unsupp.l,%pc,%d1.l*1) 2614 2615# 2616# Exceptions in order of precedence: 2617# BSUN : none 2618# SNAN : all operations 2619# OPERR : all reg-reg or mem-reg operations that can normally operr 2620# OVFL : same as OPERR 2621# UNFL : same as OPERR 2622# DZ : same as OPERR 2623# INEX2 : same as OPERR 2624# INEX1 : all packed immediate operations 2625# 2626 2627# we determine the highest priority exception(if any) set by the 2628# emulation routine that has also been enabled by the user. 2629 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2630 bne.b iea_op_ena # some are enabled 2631 2632# now, we save the result, unless, of course, the operation was ftst or fcmp. 2633# these don't save results. 2634iea_op_save: 2635 tst.b STORE_FLG(%a6) # does this op store a result? 2636 bne.b iea_op_exit1 # exit with no frestore 2637 2638iea_op_store: 2639 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2640 bsr.l store_fpreg # store the result 2641 2642iea_op_exit1: 2643 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2644 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2645 2646 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2647 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2648 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2649 2650 unlk %a6 # unravel the frame 2651 2652 btst &0x7,(%sp) # is trace on? 2653 bne.w iea_op_trace # yes 2654 2655 bra.l _fpsp_done # exit to os 2656 2657iea_op_ena: 2658 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set 2659 bfffo %d0{&24:&8},%d0 # find highest priority exception 2660 bne.b iea_op_exc # at least one was set 2661 2662# no exception occurred. now, did a disabled, exact overflow occur with inexact 2663# enabled? if so, then we have to stuff an overflow frame into the FPU. 2664 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2665 beq.b iea_op_save 2666 2667iea_op_ovfl: 2668 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 2669 beq.b iea_op_store # no 2670 bra.b iea_op_exc_ovfl # yes 2671 2672# an enabled exception occurred. we have to insert the exception type back into 2673# the machine. 2674iea_op_exc: 2675 subi.l &24,%d0 # fix offset to be 0-8 2676 cmpi.b %d0,&0x6 # is exception INEX? 2677 bne.b iea_op_exc_force # no 2678 2679# the enabled exception was inexact. so, if it occurs with an overflow 2680# or underflow that was disabled, then we have to force an overflow or 2681# underflow frame. 2682 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2683 bne.b iea_op_exc_ovfl # yes 2684 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 2685 bne.b iea_op_exc_unfl # yes 2686 2687iea_op_exc_force: 2688 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2689 bra.b iea_op_exit2 # exit with frestore 2690 2691tbl_iea_except: 2692 short 0xe002, 0xe006, 0xe004, 0xe005 2693 short 0xe003, 0xe002, 0xe001, 0xe001 2694 2695iea_op_exc_ovfl: 2696 mov.w &0xe005,2+FP_SRC(%a6) 2697 bra.b iea_op_exit2 2698 2699iea_op_exc_unfl: 2700 mov.w &0xe003,2+FP_SRC(%a6) 2701 2702iea_op_exit2: 2703 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2704 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2705 2706 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2707 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2708 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2709 2710 frestore FP_SRC(%a6) # restore exceptional state 2711 2712 unlk %a6 # unravel the frame 2713 2714 btst &0x7,(%sp) # is trace on? 2715 bne.b iea_op_trace # yes 2716 2717 bra.l _fpsp_done # exit to os 2718 2719# 2720# The opclass two instruction that took an "Unimplemented Effective Address" 2721# exception was being traced. Make the "current" PC the FPIAR and put it in 2722# the trace stack frame then jump to _real_trace(). 2723# 2724# UNIMP EA FRAME TRACE FRAME 2725# ***************** ***************** 2726# * 0x0 * 0x0f0 * * Current * 2727# ***************** * PC * 2728# * Current * ***************** 2729# * PC * * 0x2 * 0x024 * 2730# ***************** ***************** 2731# * SR * * Next * 2732# ***************** * PC * 2733# ***************** 2734# * SR * 2735# ***************** 2736iea_op_trace: 2737 mov.l (%sp),-(%sp) # shift stack frame "down" 2738 mov.w 0x8(%sp),0x4(%sp) 2739 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 2740 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 2741 2742 bra.l _real_trace 2743 2744######################################################################### 2745iea_fmovm: 2746 btst &14,%d0 # ctrl or data reg 2747 beq.w iea_fmovm_ctrl 2748 2749iea_fmovm_data: 2750 2751 btst &0x5,EXC_SR(%a6) # user or supervisor mode 2752 bne.b iea_fmovm_data_s 2753 2754iea_fmovm_data_u: 2755 mov.l %usp,%a0 2756 mov.l %a0,EXC_A7(%a6) # store current a7 2757 bsr.l fmovm_dynamic # do dynamic fmovm 2758 mov.l EXC_A7(%a6),%a0 # load possibly new a7 2759 mov.l %a0,%usp # update usp 2760 bra.w iea_fmovm_exit 2761 2762iea_fmovm_data_s: 2763 clr.b SPCOND_FLG(%a6) 2764 lea 0x2+EXC_VOFF(%a6),%a0 2765 mov.l %a0,EXC_A7(%a6) 2766 bsr.l fmovm_dynamic # do dynamic fmovm 2767 2768 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2769 beq.w iea_fmovm_data_predec 2770 cmpi.b SPCOND_FLG(%a6),&mia7_flg 2771 bne.w iea_fmovm_exit 2772 2773# right now, d0 = the size. 2774# the data has been fetched from the supervisor stack, but we have not 2775# incremented the stack pointer by the appropriate number of bytes. 2776# do it here. 2777iea_fmovm_data_postinc: 2778 btst &0x7,EXC_SR(%a6) 2779 bne.b iea_fmovm_data_pi_trace 2780 2781 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2782 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) 2783 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2784 2785 lea (EXC_SR,%a6,%d0),%a0 2786 mov.l %a0,EXC_SR(%a6) 2787 2788 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2789 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2791 2792 unlk %a6 2793 mov.l (%sp)+,%sp 2794 bra.l _fpsp_done 2795 2796iea_fmovm_data_pi_trace: 2797 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2798 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) 2799 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2800 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) 2801 2802 lea (EXC_SR-0x4,%a6,%d0),%a0 2803 mov.l %a0,EXC_SR(%a6) 2804 2805 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2806 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2808 2809 unlk %a6 2810 mov.l (%sp)+,%sp 2811 bra.l _real_trace 2812 2813# right now, d1 = size and d0 = the strg. 2814iea_fmovm_data_predec: 2815 mov.b %d1,EXC_VOFF(%a6) # store strg 2816 mov.b %d0,0x1+EXC_VOFF(%a6) # store size 2817 2818 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2820 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2821 2822 mov.l (%a6),-(%sp) # make a copy of a6 2823 mov.l %d0,-(%sp) # save d0 2824 mov.l %d1,-(%sp) # save d1 2825 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC 2826 2827 clr.l %d0 2828 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size 2829 neg.l %d0 # get negative of size 2830 2831 btst &0x7,EXC_SR(%a6) # is trace enabled? 2832 beq.b iea_fmovm_data_p2 2833 2834 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2835 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) 2836 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) 2837 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2838 2839 pea (%a6,%d0) # create final sp 2840 bra.b iea_fmovm_data_p3 2841 2842iea_fmovm_data_p2: 2843 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2844 mov.l (%sp)+,(EXC_PC,%a6,%d0) 2845 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2846 2847 pea (0x4,%a6,%d0) # create final sp 2848 2849iea_fmovm_data_p3: 2850 clr.l %d1 2851 mov.b EXC_VOFF(%a6),%d1 # fetch strg 2852 2853 tst.b %d1 2854 bpl.b fm_1 2855 fmovm.x &0x80,(0x4+0x8,%a6,%d0) 2856 addi.l &0xc,%d0 2857fm_1: 2858 lsl.b &0x1,%d1 2859 bpl.b fm_2 2860 fmovm.x &0x40,(0x4+0x8,%a6,%d0) 2861 addi.l &0xc,%d0 2862fm_2: 2863 lsl.b &0x1,%d1 2864 bpl.b fm_3 2865 fmovm.x &0x20,(0x4+0x8,%a6,%d0) 2866 addi.l &0xc,%d0 2867fm_3: 2868 lsl.b &0x1,%d1 2869 bpl.b fm_4 2870 fmovm.x &0x10,(0x4+0x8,%a6,%d0) 2871 addi.l &0xc,%d0 2872fm_4: 2873 lsl.b &0x1,%d1 2874 bpl.b fm_5 2875 fmovm.x &0x08,(0x4+0x8,%a6,%d0) 2876 addi.l &0xc,%d0 2877fm_5: 2878 lsl.b &0x1,%d1 2879 bpl.b fm_6 2880 fmovm.x &0x04,(0x4+0x8,%a6,%d0) 2881 addi.l &0xc,%d0 2882fm_6: 2883 lsl.b &0x1,%d1 2884 bpl.b fm_7 2885 fmovm.x &0x02,(0x4+0x8,%a6,%d0) 2886 addi.l &0xc,%d0 2887fm_7: 2888 lsl.b &0x1,%d1 2889 bpl.b fm_end 2890 fmovm.x &0x01,(0x4+0x8,%a6,%d0) 2891fm_end: 2892 mov.l 0x4(%sp),%d1 2893 mov.l 0x8(%sp),%d0 2894 mov.l 0xc(%sp),%a6 2895 mov.l (%sp)+,%sp 2896 2897 btst &0x7,(%sp) # is trace enabled? 2898 beq.l _fpsp_done 2899 bra.l _real_trace 2900 2901######################################################################### 2902iea_fmovm_ctrl: 2903 2904 bsr.l fmovm_ctrl # load ctrl regs 2905 2906iea_fmovm_exit: 2907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2910 2911 btst &0x7,EXC_SR(%a6) # is trace on? 2912 bne.b iea_fmovm_trace # yes 2913 2914 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC 2915 2916 unlk %a6 # unravel the frame 2917 2918 bra.l _fpsp_done # exit to os 2919 2920# 2921# The control reg instruction that took an "Unimplemented Effective Address" 2922# exception was being traced. The "Current PC" for the trace frame is the 2923# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. 2924# After fixing the stack frame, jump to _real_trace(). 2925# 2926# UNIMP EA FRAME TRACE FRAME 2927# ***************** ***************** 2928# * 0x0 * 0x0f0 * * Current * 2929# ***************** * PC * 2930# * Current * ***************** 2931# * PC * * 0x2 * 0x024 * 2932# ***************** ***************** 2933# * SR * * Next * 2934# ***************** * PC * 2935# ***************** 2936# * SR * 2937# ***************** 2938# this ain't a pretty solution, but it works: 2939# -restore a6 (not with unlk) 2940# -shift stack frame down over where old a6 used to be 2941# -add LOCAL_SIZE to stack pointer 2942iea_fmovm_trace: 2943 mov.l (%a6),%a6 # restore frame pointer 2944 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) 2945 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) 2946 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) 2947 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 2948 add.l &LOCAL_SIZE,%sp # clear stack frame 2949 2950 bra.l _real_trace 2951 2952######################################################################### 2953# The FPU is disabled and so we should really have taken the "Line 2954# F Emulator" exception. So, here we create an 8-word stack frame 2955# from our 4-word stack frame. This means we must calculate the length 2956# of the faulting instruction to get the "next PC". This is trivial for 2957# immediate operands but requires some extra work for fmovm dynamic 2958# which can use most addressing modes. 2959iea_disabled: 2960 mov.l (%sp)+,%d0 # restore d0 2961 2962 link %a6,&-LOCAL_SIZE # init stack frame 2963 2964 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2965 2966# PC of instruction that took the exception is the PC in the frame 2967 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2969 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2970 bsr.l _imem_read_long # fetch the instruction words 2971 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2972 2973 tst.w %d0 # is instr fmovm? 2974 bmi.b iea_dis_fmovm # yes 2975# instruction is using an extended precision immediate operand. therefore, 2976# the total instruction length is 16 bytes. 2977iea_dis_immed: 2978 mov.l &0x10,%d0 # 16 bytes of instruction 2979 bra.b iea_dis_cont 2980iea_dis_fmovm: 2981 btst &0xe,%d0 # is instr fmovm ctrl 2982 bne.b iea_dis_fmovm_data # no 2983# the instruction is a fmovm.l with 2 or 3 registers. 2984 bfextu %d0{&19:&3},%d1 2985 mov.l &0xc,%d0 2986 cmpi.b %d1,&0x7 # move all regs? 2987 bne.b iea_dis_cont 2988 addq.l &0x4,%d0 2989 bra.b iea_dis_cont 2990# the instruction is an fmovm.x dynamic which can use many addressing 2991# modes and thus can have several different total instruction lengths. 2992# call fmovm_calc_ea which will go through the ea calc process and, 2993# as a by-product, will tell us how long the instruction is. 2994iea_dis_fmovm_data: 2995 clr.l %d0 2996 bsr.l fmovm_calc_ea 2997 mov.l EXC_EXTWPTR(%a6),%d0 2998 sub.l EXC_PC(%a6),%d0 2999iea_dis_cont: 3000 mov.w %d0,EXC_VOFF(%a6) # store stack shift value 3001 3002 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3003 3004 unlk %a6 3005 3006# here, we actually create the 8-word frame from the 4-word frame, 3007# with the "next PC" as additional info. 3008# the <ea> field is let as undefined. 3009 subq.l &0x8,%sp # make room for new stack 3010 mov.l %d0,-(%sp) # save d0 3011 mov.w 0xc(%sp),0x4(%sp) # move SR 3012 mov.l 0xe(%sp),0x6(%sp) # move Current PC 3013 clr.l %d0 3014 mov.w 0x12(%sp),%d0 3015 mov.l 0x6(%sp),0x10(%sp) # move Current PC 3016 add.l %d0,0x6(%sp) # make Next PC 3017 mov.w &0x402c,0xa(%sp) # insert offset,frame format 3018 mov.l (%sp)+,%d0 # restore d0 3019 3020 bra.l _real_fpu_disabled 3021 3022########## 3023 3024iea_iacc: 3025 movc %pcr,%d0 3026 btst &0x1,%d0 3027 bne.b iea_iacc_cont 3028 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3030iea_iacc_cont: 3031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3032 3033 unlk %a6 3034 3035 subq.w &0x8,%sp # make stack frame bigger 3036 mov.l 0x8(%sp),(%sp) # store SR,hi(PC) 3037 mov.w 0xc(%sp),0x4(%sp) # store lo(PC) 3038 mov.w &0x4008,0x6(%sp) # store voff 3039 mov.l 0x2(%sp),0x8(%sp) # store ea 3040 mov.l &0x09428001,0xc(%sp) # store fslw 3041 3042iea_acc_done: 3043 btst &0x5,(%sp) # user or supervisor mode? 3044 beq.b iea_acc_done2 # user 3045 bset &0x2,0xd(%sp) # set supervisor TM bit 3046 3047iea_acc_done2: 3048 bra.l _real_access 3049 3050iea_dacc: 3051 lea -LOCAL_SIZE(%a6),%sp 3052 3053 movc %pcr,%d1 3054 btst &0x1,%d1 3055 bne.b iea_dacc_cont 3056 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3057 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs 3058iea_dacc_cont: 3059 mov.l (%a6),%a6 3060 3061 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) 3062 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) 3063 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) 3064 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) 3065 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) 3066 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) 3067 3068 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 3069 add.w &LOCAL_SIZE-0x4,%sp 3070 3071 bra.b iea_acc_done 3072 3073######################################################################### 3074# XDEF **************************************************************** # 3075# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # 3076# # 3077# This handler should be the first code executed upon taking the # 3078# FP Operand Error exception in an operating system. # 3079# # 3080# XREF **************************************************************** # 3081# _imem_read_long() - read instruction longword # 3082# fix_skewed_ops() - adjust src operand in fsave frame # 3083# _real_operr() - "callout" to operating system operr handler # 3084# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3085# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3086# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # 3087# # 3088# INPUT *************************************************************** # 3089# - The system stack contains the FP Operr exception frame # 3090# - The fsave frame contains the source operand # 3091# # 3092# OUTPUT ************************************************************** # 3093# No access error: # 3094# - The system stack is unchanged # 3095# - The fsave frame contains the adjusted src op for opclass 0,2 # 3096# # 3097# ALGORITHM *********************************************************** # 3098# In a system where the FP Operr exception is enabled, the goal # 3099# is to get to the handler specified at _real_operr(). But, on the 060, # 3100# for opclass zero and two instruction taking this exception, the # 3101# input operand in the fsave frame may be incorrect for some cases # 3102# and needs to be corrected. This handler calls fix_skewed_ops() to # 3103# do just this and then exits through _real_operr(). # 3104# For opclass 3 instructions, the 060 doesn't store the default # 3105# operr result out to memory or data register file as it should. # 3106# This code must emulate the move out before finally exiting through # 3107# _real_inex(). The move out, if to memory, is performed using # 3108# _mem_write() "callout" routines that may return a failing result. # 3109# In this special case, the handler must exit through facc_out() # 3110# which creates an access error stack frame from the current operr # 3111# stack frame. # 3112# # 3113######################################################################### 3114 3115 global _fpsp_operr 3116_fpsp_operr: 3117 3118 link.w %a6,&-LOCAL_SIZE # init stack frame 3119 3120 fsave FP_SRC(%a6) # grab the "busy" frame 3121 3122 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3123 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3124 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3125 3126# the FPIAR holds the "current PC" of the faulting instruction 3127 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3128 3129 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3130 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3131 bsr.l _imem_read_long # fetch the instruction words 3132 mov.l %d0,EXC_OPWORD(%a6) 3133 3134############################################################################## 3135 3136 btst &13,%d0 # is instr an fmove out? 3137 bne.b foperr_out # fmove out 3138 3139 3140# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3141# this would be the case for opclass two operations with a source infinity or 3142# denorm operand in the sgl or dbl format. NANs also become skewed, but can't 3143# cause an operr so we don't need to check for them here. 3144 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3145 bsr.l fix_skewed_ops # fix src op 3146 3147foperr_exit: 3148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3151 3152 frestore FP_SRC(%a6) 3153 3154 unlk %a6 3155 bra.l _real_operr 3156 3157######################################################################## 3158 3159# 3160# the hardware does not save the default result to memory on enabled 3161# operand error exceptions. we do this here before passing control to 3162# the user operand error handler. 3163# 3164# byte, word, and long destination format operations can pass 3165# through here. we simply need to test the sign of the src 3166# operand and save the appropriate minimum or maximum integer value 3167# to the effective address as pointed to by the stacked effective address. 3168# 3169# although packed opclass three operations can take operand error 3170# exceptions, they won't pass through here since they are caught 3171# first by the unsupported data format exception handler. that handler 3172# sends them directly to _real_operr() if necessary. 3173# 3174foperr_out: 3175 3176 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent 3177 andi.w &0x7fff,%d1 3178 cmpi.w %d1,&0x7fff 3179 bne.b foperr_out_not_qnan 3180# the operand is either an infinity or a QNAN. 3181 tst.l FP_SRC_LO(%a6) 3182 bne.b foperr_out_qnan 3183 mov.l FP_SRC_HI(%a6),%d1 3184 andi.l &0x7fffffff,%d1 3185 beq.b foperr_out_not_qnan 3186foperr_out_qnan: 3187 mov.l FP_SRC_HI(%a6),L_SCR1(%a6) 3188 bra.b foperr_out_jmp 3189 3190foperr_out_not_qnan: 3191 mov.l &0x7fffffff,%d1 3192 tst.b FP_SRC_EX(%a6) 3193 bpl.b foperr_out_not_qnan2 3194 addq.l &0x1,%d1 3195foperr_out_not_qnan2: 3196 mov.l %d1,L_SCR1(%a6) 3197 3198foperr_out_jmp: 3199 bfextu %d0{&19:&3},%d0 # extract dst format field 3200 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3201 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 3202 jmp (tbl_operr.b,%pc,%a0) 3203 3204tbl_operr: 3205 short foperr_out_l - tbl_operr # long word integer 3206 short tbl_operr - tbl_operr # sgl prec shouldn't happen 3207 short tbl_operr - tbl_operr # ext prec shouldn't happen 3208 short foperr_exit - tbl_operr # packed won't enter here 3209 short foperr_out_w - tbl_operr # word integer 3210 short tbl_operr - tbl_operr # dbl prec shouldn't happen 3211 short foperr_out_b - tbl_operr # byte integer 3212 short tbl_operr - tbl_operr # packed won't enter here 3213 3214foperr_out_b: 3215 mov.b L_SCR1(%a6),%d0 # load positive default result 3216 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3217 ble.b foperr_out_b_save_dn # yes 3218 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3219 bsr.l _dmem_write_byte # write the default result 3220 3221 tst.l %d1 # did dstore fail? 3222 bne.l facc_out_b # yes 3223 3224 bra.w foperr_exit 3225foperr_out_b_save_dn: 3226 andi.w &0x0007,%d1 3227 bsr.l store_dreg_b # store result to regfile 3228 bra.w foperr_exit 3229 3230foperr_out_w: 3231 mov.w L_SCR1(%a6),%d0 # load positive default result 3232 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3233 ble.b foperr_out_w_save_dn # yes 3234 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3235 bsr.l _dmem_write_word # write the default result 3236 3237 tst.l %d1 # did dstore fail? 3238 bne.l facc_out_w # yes 3239 3240 bra.w foperr_exit 3241foperr_out_w_save_dn: 3242 andi.w &0x0007,%d1 3243 bsr.l store_dreg_w # store result to regfile 3244 bra.w foperr_exit 3245 3246foperr_out_l: 3247 mov.l L_SCR1(%a6),%d0 # load positive default result 3248 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3249 ble.b foperr_out_l_save_dn # yes 3250 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3251 bsr.l _dmem_write_long # write the default result 3252 3253 tst.l %d1 # did dstore fail? 3254 bne.l facc_out_l # yes 3255 3256 bra.w foperr_exit 3257foperr_out_l_save_dn: 3258 andi.w &0x0007,%d1 3259 bsr.l store_dreg_l # store result to regfile 3260 bra.w foperr_exit 3261 3262######################################################################### 3263# XDEF **************************************************************** # 3264# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # 3265# # 3266# This handler should be the first code executed upon taking the # 3267# FP Signalling NAN exception in an operating system. # 3268# # 3269# XREF **************************************************************** # 3270# _imem_read_long() - read instruction longword # 3271# fix_skewed_ops() - adjust src operand in fsave frame # 3272# _real_snan() - "callout" to operating system SNAN handler # 3273# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3274# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3275# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # 3276# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> # 3277# # 3278# INPUT *************************************************************** # 3279# - The system stack contains the FP SNAN exception frame # 3280# - The fsave frame contains the source operand # 3281# # 3282# OUTPUT ************************************************************** # 3283# No access error: # 3284# - The system stack is unchanged # 3285# - The fsave frame contains the adjusted src op for opclass 0,2 # 3286# # 3287# ALGORITHM *********************************************************** # 3288# In a system where the FP SNAN exception is enabled, the goal # 3289# is to get to the handler specified at _real_snan(). But, on the 060, # 3290# for opclass zero and two instructions taking this exception, the # 3291# input operand in the fsave frame may be incorrect for some cases # 3292# and needs to be corrected. This handler calls fix_skewed_ops() to # 3293# do just this and then exits through _real_snan(). # 3294# For opclass 3 instructions, the 060 doesn't store the default # 3295# SNAN result out to memory or data register file as it should. # 3296# This code must emulate the move out before finally exiting through # 3297# _real_snan(). The move out, if to memory, is performed using # 3298# _mem_write() "callout" routines that may return a failing result. # 3299# In this special case, the handler must exit through facc_out() # 3300# which creates an access error stack frame from the current SNAN # 3301# stack frame. # 3302# For the case of an extended precision opclass 3 instruction, # 3303# if the effective addressing mode was -() or ()+, then the address # 3304# register must get updated by calling _calc_ea_fout(). If the <ea> # 3305# was -(a7) from supervisor mode, then the exception frame currently # 3306# on the system stack must be carefully moved "down" to make room # 3307# for the operand being moved. # 3308# # 3309######################################################################### 3310 3311 global _fpsp_snan 3312_fpsp_snan: 3313 3314 link.w %a6,&-LOCAL_SIZE # init stack frame 3315 3316 fsave FP_SRC(%a6) # grab the "busy" frame 3317 3318 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3319 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3320 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3321 3322# the FPIAR holds the "current PC" of the faulting instruction 3323 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3324 3325 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3326 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3327 bsr.l _imem_read_long # fetch the instruction words 3328 mov.l %d0,EXC_OPWORD(%a6) 3329 3330############################################################################## 3331 3332 btst &13,%d0 # is instr an fmove out? 3333 bne.w fsnan_out # fmove out 3334 3335 3336# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3337# this would be the case for opclass two operations with a source infinity or 3338# denorm operand in the sgl or dbl format. NANs also become skewed and must be 3339# fixed here. 3340 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3341 bsr.l fix_skewed_ops # fix src op 3342 3343fsnan_exit: 3344 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3346 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3347 3348 frestore FP_SRC(%a6) 3349 3350 unlk %a6 3351 bra.l _real_snan 3352 3353######################################################################## 3354 3355# 3356# the hardware does not save the default result to memory on enabled 3357# snan exceptions. we do this here before passing control to 3358# the user snan handler. 3359# 3360# byte, word, long, and packed destination format operations can pass 3361# through here. since packed format operations already were handled by 3362# fpsp_unsupp(), then we need to do nothing else for them here. 3363# for byte, word, and long, we simply need to test the sign of the src 3364# operand and save the appropriate minimum or maximum integer value 3365# to the effective address as pointed to by the stacked effective address. 3366# 3367fsnan_out: 3368 3369 bfextu %d0{&19:&3},%d0 # extract dst format field 3370 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3371 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 3372 jmp (tbl_snan.b,%pc,%a0) 3373 3374tbl_snan: 3375 short fsnan_out_l - tbl_snan # long word integer 3376 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen 3377 short fsnan_out_x - tbl_snan # ext prec shouldn't happen 3378 short tbl_snan - tbl_snan # packed needs no help 3379 short fsnan_out_w - tbl_snan # word integer 3380 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen 3381 short fsnan_out_b - tbl_snan # byte integer 3382 short tbl_snan - tbl_snan # packed needs no help 3383 3384fsnan_out_b: 3385 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN 3386 bset &6,%d0 # set SNAN bit 3387 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3388 ble.b fsnan_out_b_dn # yes 3389 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3390 bsr.l _dmem_write_byte # write the default result 3391 3392 tst.l %d1 # did dstore fail? 3393 bne.l facc_out_b # yes 3394 3395 bra.w fsnan_exit 3396fsnan_out_b_dn: 3397 andi.w &0x0007,%d1 3398 bsr.l store_dreg_b # store result to regfile 3399 bra.w fsnan_exit 3400 3401fsnan_out_w: 3402 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN 3403 bset &14,%d0 # set SNAN bit 3404 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3405 ble.b fsnan_out_w_dn # yes 3406 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3407 bsr.l _dmem_write_word # write the default result 3408 3409 tst.l %d1 # did dstore fail? 3410 bne.l facc_out_w # yes 3411 3412 bra.w fsnan_exit 3413fsnan_out_w_dn: 3414 andi.w &0x0007,%d1 3415 bsr.l store_dreg_w # store result to regfile 3416 bra.w fsnan_exit 3417 3418fsnan_out_l: 3419 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN 3420 bset &30,%d0 # set SNAN bit 3421 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3422 ble.b fsnan_out_l_dn # yes 3423 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3424 bsr.l _dmem_write_long # write the default result 3425 3426 tst.l %d1 # did dstore fail? 3427 bne.l facc_out_l # yes 3428 3429 bra.w fsnan_exit 3430fsnan_out_l_dn: 3431 andi.w &0x0007,%d1 3432 bsr.l store_dreg_l # store result to regfile 3433 bra.w fsnan_exit 3434 3435fsnan_out_s: 3436 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3437 ble.b fsnan_out_d_dn # yes 3438 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3439 andi.l &0x80000000,%d0 # keep sign 3440 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3441 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3442 lsr.l &0x8,%d1 # shift mantissa for sgl 3443 or.l %d1,%d0 # create sgl SNAN 3444 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3445 bsr.l _dmem_write_long # write the default result 3446 3447 tst.l %d1 # did dstore fail? 3448 bne.l facc_out_l # yes 3449 3450 bra.w fsnan_exit 3451fsnan_out_d_dn: 3452 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3453 andi.l &0x80000000,%d0 # keep sign 3454 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3455 mov.l %d1,-(%sp) 3456 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3457 lsr.l &0x8,%d1 # shift mantissa for sgl 3458 or.l %d1,%d0 # create sgl SNAN 3459 mov.l (%sp)+,%d1 3460 andi.w &0x0007,%d1 3461 bsr.l store_dreg_l # store result to regfile 3462 bra.w fsnan_exit 3463 3464fsnan_out_d: 3465 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3466 andi.l &0x80000000,%d0 # keep sign 3467 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit 3468 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3469 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space 3470 mov.l &11,%d0 # load shift amt 3471 lsr.l %d0,%d1 3472 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi 3473 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3474 andi.l &0x000007ff,%d1 3475 ror.l %d0,%d1 3476 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space 3477 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa 3478 lsr.l %d0,%d1 3479 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo 3480 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3481 mov.l EXC_EA(%a6),%a1 # pass: dst addr 3482 movq.l &0x8,%d0 # pass: size of 8 bytes 3483 bsr.l _dmem_write # write the default result 3484 3485 tst.l %d1 # did dstore fail? 3486 bne.l facc_out_d # yes 3487 3488 bra.w fsnan_exit 3489 3490# for extended precision, if the addressing mode is pre-decrement or 3491# post-increment, then the address register did not get updated. 3492# in addition, for pre-decrement, the stacked <ea> is incorrect. 3493fsnan_out_x: 3494 clr.b SPCOND_FLG(%a6) # clear special case flag 3495 3496 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) 3497 clr.w 2+FP_SCR0(%a6) 3498 mov.l FP_SRC_HI(%a6),%d0 3499 bset &30,%d0 3500 mov.l %d0,FP_SCR0_HI(%a6) 3501 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) 3502 3503 btst &0x5,EXC_SR(%a6) # supervisor mode exception? 3504 bne.b fsnan_out_x_s # yes 3505 3506 mov.l %usp,%a0 # fetch user stack pointer 3507 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() 3508 mov.l (%a6),EXC_A6(%a6) 3509 3510 bsr.l _calc_ea_fout # find the correct ea,update An 3511 mov.l %a0,%a1 3512 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3513 3514 mov.l EXC_A7(%a6),%a0 3515 mov.l %a0,%usp # restore user stack pointer 3516 mov.l EXC_A6(%a6),(%a6) 3517 3518fsnan_out_x_save: 3519 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3520 movq.l &0xc,%d0 # pass: size of extended 3521 bsr.l _dmem_write # write the default result 3522 3523 tst.l %d1 # did dstore fail? 3524 bne.l facc_out_x # yes 3525 3526 bra.w fsnan_exit 3527 3528fsnan_out_x_s: 3529 mov.l (%a6),EXC_A6(%a6) 3530 3531 bsr.l _calc_ea_fout # find the correct ea,update An 3532 mov.l %a0,%a1 3533 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3534 3535 mov.l EXC_A6(%a6),(%a6) 3536 3537 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 3538 bne.b fsnan_out_x_save # no 3539 3540# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. 3541 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3542 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3543 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3544 3545 frestore FP_SRC(%a6) 3546 3547 mov.l EXC_A6(%a6),%a6 # restore frame pointer 3548 3549 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 3550 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) 3551 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 3552 3553 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) 3554 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) 3555 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) 3556 3557 add.l &LOCAL_SIZE-0x8,%sp 3558 3559 bra.l _real_snan 3560 3561######################################################################### 3562# XDEF **************************************************************** # 3563# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # 3564# # 3565# This handler should be the first code executed upon taking the # 3566# FP Inexact exception in an operating system. # 3567# # 3568# XREF **************************************************************** # 3569# _imem_read_long() - read instruction longword # 3570# fix_skewed_ops() - adjust src operand in fsave frame # 3571# set_tag_x() - determine optype of src/dst operands # 3572# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3573# unnorm_fix() - change UNNORM operands to NORM or ZERO # 3574# load_fpn2() - load dst operand from FP regfile # 3575# smovcr() - emulate an "fmovcr" instruction # 3576# fout() - emulate an opclass 3 instruction # 3577# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 3578# _real_inex() - "callout" to operating system inexact handler # 3579# # 3580# INPUT *************************************************************** # 3581# - The system stack contains the FP Inexact exception frame # 3582# - The fsave frame contains the source operand # 3583# # 3584# OUTPUT ************************************************************** # 3585# - The system stack is unchanged # 3586# - The fsave frame contains the adjusted src op for opclass 0,2 # 3587# # 3588# ALGORITHM *********************************************************** # 3589# In a system where the FP Inexact exception is enabled, the goal # 3590# is to get to the handler specified at _real_inex(). But, on the 060, # 3591# for opclass zero and two instruction taking this exception, the # 3592# hardware doesn't store the correct result to the destination FP # 3593# register as did the '040 and '881/2. This handler must emulate the # 3594# instruction in order to get this value and then store it to the # 3595# correct register before calling _real_inex(). # 3596# For opclass 3 instructions, the 060 doesn't store the default # 3597# inexact result out to memory or data register file as it should. # 3598# This code must emulate the move out by calling fout() before finally # 3599# exiting through _real_inex(). # 3600# # 3601######################################################################### 3602 3603 global _fpsp_inex 3604_fpsp_inex: 3605 3606 link.w %a6,&-LOCAL_SIZE # init stack frame 3607 3608 fsave FP_SRC(%a6) # grab the "busy" frame 3609 3610 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3611 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3612 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3613 3614# the FPIAR holds the "current PC" of the faulting instruction 3615 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3616 3617 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3618 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3619 bsr.l _imem_read_long # fetch the instruction words 3620 mov.l %d0,EXC_OPWORD(%a6) 3621 3622############################################################################## 3623 3624 btst &13,%d0 # is instr an fmove out? 3625 bne.w finex_out # fmove out 3626 3627 3628# the hardware, for "fabs" and "fneg" w/ a long source format, puts the 3629# longword integer directly into the upper longword of the mantissa along 3630# w/ an exponent value of 0x401e. we convert this to extended precision here. 3631 bfextu %d0{&19:&3},%d0 # fetch instr size 3632 bne.b finex_cont # instr size is not long 3633 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? 3634 bne.b finex_cont # no 3635 fmov.l &0x0,%fpcr 3636 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src 3637 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision 3638 mov.w &0xe001,0x2+FP_SRC(%a6) 3639 3640finex_cont: 3641 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3642 bsr.l fix_skewed_ops # fix src op 3643 3644# Here, we zero the ccode and exception byte field since we're going to 3645# emulate the whole instruction. Notice, though, that we don't kill the 3646# INEX1 bit. This is because a packed op has long since been converted 3647# to extended before arriving here. Therefore, we need to retain the 3648# INEX1 bit from when the operand was first converted. 3649 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 3650 3651 fmov.l &0x0,%fpcr # zero current control regs 3652 fmov.l &0x0,%fpsr 3653 3654 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg 3655 cmpi.b %d1,&0x17 # is op an fmovecr? 3656 beq.w finex_fmovcr # yes 3657 3658 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3659 bsr.l set_tag_x # tag the operand type 3660 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 3661 3662# bits four and five of the fp extension word separate the monadic and dyadic 3663# operations that can pass through fpsp_inex(). remember that fcmp and ftst 3664# will never take this exception, but fsincos will. 3665 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 3666 beq.b finex_extract # monadic 3667 3668 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? 3669 bne.b finex_extract # yes 3670 3671 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 3672 bsr.l load_fpn2 # load dst into FP_DST 3673 3674 lea FP_DST(%a6),%a0 # pass: ptr to dst op 3675 bsr.l set_tag_x # tag the operand type 3676 cmpi.b %d0,&UNNORM # is operand an UNNORM? 3677 bne.b finex_op2_done # no 3678 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 3679finex_op2_done: 3680 mov.b %d0,DTAG(%a6) # save dst optype tag 3681 3682finex_extract: 3683 clr.l %d0 3684 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 3685 3686 mov.b 1+EXC_CMDREG(%a6),%d1 3687 andi.w &0x007f,%d1 # extract extension 3688 3689 lea FP_SRC(%a6),%a0 3690 lea FP_DST(%a6),%a1 3691 3692 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 3693 jsr (tbl_unsupp.l,%pc,%d1.l*1) 3694 3695# the operation has been emulated. the result is in fp0. 3696finex_save: 3697 bfextu EXC_CMDREG(%a6){&6:&3},%d0 3698 bsr.l store_fpreg 3699 3700finex_exit: 3701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3704 3705 frestore FP_SRC(%a6) 3706 3707 unlk %a6 3708 bra.l _real_inex 3709 3710finex_fmovcr: 3711 clr.l %d0 3712 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3713 mov.b 1+EXC_CMDREG(%a6),%d1 3714 andi.l &0x0000007f,%d1 # pass rom offset 3715 bsr.l smovcr 3716 bra.b finex_save 3717 3718######################################################################## 3719 3720# 3721# the hardware does not save the default result to memory on enabled 3722# inexact exceptions. we do this here before passing control to 3723# the user inexact handler. 3724# 3725# byte, word, and long destination format operations can pass 3726# through here. so can double and single precision. 3727# although packed opclass three operations can take inexact 3728# exceptions, they won't pass through here since they are caught 3729# first by the unsupported data format exception handler. that handler 3730# sends them directly to _real_inex() if necessary. 3731# 3732finex_out: 3733 3734 mov.b &NORM,STAG(%a6) # src is a NORM 3735 3736 clr.l %d0 3737 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3738 3739 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 3740 3741 lea FP_SRC(%a6),%a0 # pass ptr to src operand 3742 3743 bsr.l fout # store the default result 3744 3745 bra.b finex_exit 3746 3747######################################################################### 3748# XDEF **************************************************************** # 3749# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # 3750# # 3751# This handler should be the first code executed upon taking # 3752# the FP DZ exception in an operating system. # 3753# # 3754# XREF **************************************************************** # 3755# _imem_read_long() - read instruction longword from memory # 3756# fix_skewed_ops() - adjust fsave operand # 3757# _real_dz() - "callout" exit point from FP DZ handler # 3758# # 3759# INPUT *************************************************************** # 3760# - The system stack contains the FP DZ exception stack. # 3761# - The fsave frame contains the source operand. # 3762# # 3763# OUTPUT ************************************************************** # 3764# - The system stack contains the FP DZ exception stack. # 3765# - The fsave frame contains the adjusted source operand. # 3766# # 3767# ALGORITHM *********************************************************** # 3768# In a system where the DZ exception is enabled, the goal is to # 3769# get to the handler specified at _real_dz(). But, on the 060, when the # 3770# exception is taken, the input operand in the fsave state frame may # 3771# be incorrect for some cases and need to be adjusted. So, this package # 3772# adjusts the operand using fix_skewed_ops() and then branches to # 3773# _real_dz(). # 3774# # 3775######################################################################### 3776 3777 global _fpsp_dz 3778_fpsp_dz: 3779 3780 link.w %a6,&-LOCAL_SIZE # init stack frame 3781 3782 fsave FP_SRC(%a6) # grab the "busy" frame 3783 3784 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3785 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3786 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3787 3788# the FPIAR holds the "current PC" of the faulting instruction 3789 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3790 3791 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3792 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3793 bsr.l _imem_read_long # fetch the instruction words 3794 mov.l %d0,EXC_OPWORD(%a6) 3795 3796############################################################################## 3797 3798 3799# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3800# this would be the case for opclass two operations with a source zero 3801# in the sgl or dbl format. 3802 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3803 bsr.l fix_skewed_ops # fix src op 3804 3805fdz_exit: 3806 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3807 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3808 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3809 3810 frestore FP_SRC(%a6) 3811 3812 unlk %a6 3813 bra.l _real_dz 3814 3815######################################################################### 3816# XDEF **************************************************************** # 3817# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. # 3818# # 3819# This handler should be the first code executed upon taking the # 3820# "Line F Emulator" exception in an operating system. # 3821# # 3822# XREF **************************************************************** # 3823# _fpsp_unimp() - handle "FP Unimplemented" exceptions # 3824# _real_fpu_disabled() - handle "FPU disabled" exceptions # 3825# _real_fline() - handle "FLINE" exceptions # 3826# _imem_read_long() - read instruction longword # 3827# # 3828# INPUT *************************************************************** # 3829# - The system stack contains a "Line F Emulator" exception # 3830# stack frame. # 3831# # 3832# OUTPUT ************************************************************** # 3833# - The system stack is unchanged # 3834# # 3835# ALGORITHM *********************************************************** # 3836# When a "Line F Emulator" exception occurs, there are 3 possible # 3837# exception types, denoted by the exception stack frame format number: # 3838# (1) FPU unimplemented instruction (6 word stack frame) # 3839# (2) FPU disabled (8 word stack frame) # 3840# (3) Line F (4 word stack frame) # 3841# # 3842# This module determines which and forks the flow off to the # 3843# appropriate "callout" (for "disabled" and "Line F") or to the # 3844# correct emulation code (for "FPU unimplemented"). # 3845# This code also must check for "fmovecr" instructions w/ a # 3846# non-zero <ea> field. These may get flagged as "Line F" but should # 3847# really be flagged as "FPU Unimplemented". (This is a "feature" on # 3848# the '060. # 3849# # 3850######################################################################### 3851 3852 global _fpsp_fline 3853_fpsp_fline: 3854 3855# check to see if this exception is a "FP Unimplemented Instruction" 3856# exception. if so, branch directly to that handler's entry point. 3857 cmpi.w 0x6(%sp),&0x202c 3858 beq.l _fpsp_unimp 3859 3860# check to see if the FPU is disabled. if so, jump to the OS entry 3861# point for that condition. 3862 cmpi.w 0x6(%sp),&0x402c 3863 beq.l _real_fpu_disabled 3864 3865# the exception was an "F-Line Illegal" exception. we check to see 3866# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if 3867# so, convert the F-Line exception stack frame to an FP Unimplemented 3868# Instruction exception stack frame else branch to the OS entry 3869# point for the F-Line exception handler. 3870 link.w %a6,&-LOCAL_SIZE # init stack frame 3871 3872 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3873 3874 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 3875 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3876 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3877 bsr.l _imem_read_long # fetch instruction words 3878 3879 bfextu %d0{&0:&10},%d1 # is it an fmovecr? 3880 cmpi.w %d1,&0x03c8 3881 bne.b fline_fline # no 3882 3883 bfextu %d0{&16:&6},%d1 # is it an fmovecr? 3884 cmpi.b %d1,&0x17 3885 bne.b fline_fline # no 3886 3887# it's an fmovecr w/ a non-zero <ea> that has entered through 3888# the F-Line Illegal exception. 3889# so, we need to convert the F-Line exception stack frame into an 3890# FP Unimplemented Instruction stack frame and jump to that entry 3891# point. 3892# 3893# but, if the FPU is disabled, then we need to jump to the FPU diabled 3894# entry point. 3895 movc %pcr,%d0 3896 btst &0x1,%d0 3897 beq.b fline_fmovcr 3898 3899 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3900 3901 unlk %a6 3902 3903 sub.l &0x8,%sp # make room for "Next PC", <ea> 3904 mov.w 0x8(%sp),(%sp) 3905 mov.l 0xa(%sp),0x2(%sp) # move "Current PC" 3906 mov.w &0x402c,0x6(%sp) 3907 mov.l 0x2(%sp),0xc(%sp) 3908 addq.l &0x4,0x2(%sp) # set "Next PC" 3909 3910 bra.l _real_fpu_disabled 3911 3912fline_fmovcr: 3913 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3914 3915 unlk %a6 3916 3917 fmov.l 0x2(%sp),%fpiar # set current PC 3918 addq.l &0x4,0x2(%sp) # set Next PC 3919 3920 mov.l (%sp),-(%sp) 3921 mov.l 0x8(%sp),0x4(%sp) 3922 mov.b &0x20,0x6(%sp) 3923 3924 bra.l _fpsp_unimp 3925 3926fline_fline: 3927 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3928 3929 unlk %a6 3930 3931 bra.l _real_fline 3932 3933######################################################################### 3934# XDEF **************************************************************** # 3935# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented # 3936# Instruction" exception. # 3937# # 3938# This handler should be the first code executed upon taking the # 3939# FP Unimplemented Instruction exception in an operating system. # 3940# # 3941# XREF **************************************************************** # 3942# _imem_read_{word,long}() - read instruction word/longword # 3943# load_fop() - load src/dst ops from memory and/or FP regfile # 3944# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3945# tbl_trans - addr of table of emulation routines for trnscndls # 3946# _real_access() - "callout" for access error exception # 3947# _fpsp_done() - "callout" for exit; work all done # 3948# _real_trace() - "callout" for Trace enabled exception # 3949# smovcr() - emulate "fmovecr" instruction # 3950# funimp_skew() - adjust fsave src ops to "incorrect" value # 3951# _ftrapcc() - emulate an "ftrapcc" instruction # 3952# _fdbcc() - emulate an "fdbcc" instruction # 3953# _fscc() - emulate an "fscc" instruction # 3954# _real_trap() - "callout" for Trap exception # 3955# _real_bsun() - "callout" for enabled Bsun exception # 3956# # 3957# INPUT *************************************************************** # 3958# - The system stack contains the "Unimplemented Instr" stk frame # 3959# # 3960# OUTPUT ************************************************************** # 3961# If access error: # 3962# - The system stack is changed to an access error stack frame # 3963# If Trace exception enabled: # 3964# - The system stack is changed to a Trace exception stack frame # 3965# Else: (normal case) # 3966# - Correct result has been stored as appropriate # 3967# # 3968# ALGORITHM *********************************************************** # 3969# There are two main cases of instructions that may enter here to # 3970# be emulated: (1) the FPgen instructions, most of which were also # 3971# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". # 3972# For the first set, this handler calls the routine load_fop() # 3973# to load the source and destination (for dyadic) operands to be used # 3974# for instruction emulation. The correct emulation routine is then # 3975# chosen by decoding the instruction type and indexing into an # 3976# emulation subroutine index table. After emulation returns, this # 3977# handler checks to see if an exception should occur as a result of the # 3978# FP instruction emulation. If so, then an FP exception of the correct # 3979# type is inserted into the FPU state frame using the "frestore" # 3980# instruction before exiting through _fpsp_done(). In either the # 3981# exceptional or non-exceptional cases, we must check to see if the # 3982# Trace exception is enabled. If so, then we must create a Trace # 3983# exception frame from the current exception frame and exit through # 3984# _real_trace(). # 3985# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines # 3986# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three # 3987# may flag that a BSUN exception should be taken. If so, then the # 3988# current exception stack frame is converted into a BSUN exception # 3989# stack frame and an exit is made through _real_bsun(). If the # 3990# instruction was "ftrapcc" and a Trap exception should result, a Trap # 3991# exception stack frame is created from the current frame and an exit # 3992# is made through _real_trap(). If a Trace exception is pending, then # 3993# a Trace exception frame is created from the current frame and a jump # 3994# is made to _real_trace(). Finally, if none of these conditions exist, # 3995# then the handler exits though the callout _fpsp_done(). # 3996# # 3997# In any of the above scenarios, if a _mem_read() or _mem_write() # 3998# "callout" returns a failing value, then an access error stack frame # 3999# is created from the current stack frame and an exit is made through # 4000# _real_access(). # 4001# # 4002######################################################################### 4003 4004# 4005# FP UNIMPLEMENTED INSTRUCTION STACK FRAME: 4006# 4007# ***************** 4008# * * => <ea> of fp unimp instr. 4009# - EA - 4010# * * 4011# ***************** 4012# * 0x2 * 0x02c * => frame format and vector offset(vector #11) 4013# ***************** 4014# * * 4015# - Next PC - => PC of instr to execute after exc handling 4016# * * 4017# ***************** 4018# * SR * => SR at the time the exception was taken 4019# ***************** 4020# 4021# Note: the !NULL bit does not get set in the fsave frame when the 4022# machine encounters an fp unimp exception. Therefore, it must be set 4023# before leaving this handler. 4024# 4025 global _fpsp_unimp 4026_fpsp_unimp: 4027 4028 link.w %a6,&-LOCAL_SIZE # init stack frame 4029 4030 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4031 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 4032 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 4033 4034 btst &0x5,EXC_SR(%a6) # user mode exception? 4035 bne.b funimp_s # no; supervisor mode 4036 4037# save the value of the user stack pointer onto the stack frame 4038funimp_u: 4039 mov.l %usp,%a0 # fetch user stack pointer 4040 mov.l %a0,EXC_A7(%a6) # store in stack frame 4041 bra.b funimp_cont 4042 4043# store the value of the supervisor stack pointer BEFORE the exc occurred. 4044# old_sp is address just above stacked effective address. 4045funimp_s: 4046 lea 4+EXC_EA(%a6),%a0 # load old a7' 4047 mov.l %a0,EXC_A7(%a6) # store a7' 4048 mov.l %a0,OLD_A7(%a6) # make a copy 4049 4050funimp_cont: 4051 4052# the FPIAR holds the "current PC" of the faulting instruction. 4053 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 4054 4055 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4056 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 4057 bsr.l _imem_read_long # fetch the instruction words 4058 mov.l %d0,EXC_OPWORD(%a6) 4059 4060############################################################################ 4061 4062 fmov.l &0x0,%fpcr # clear FPCR 4063 fmov.l &0x0,%fpsr # clear FPSR 4064 4065 clr.b SPCOND_FLG(%a6) # clear "special case" flag 4066 4067# Divide the fp instructions into 8 types based on the TYPE field in 4068# bits 6-8 of the opword(classes 6,7 are undefined). 4069# (for the '060, only two types can take this exception) 4070# bftst %d0{&7:&3} # test TYPE 4071 btst &22,%d0 # type 0 or 1 ? 4072 bne.w funimp_misc # type 1 4073 4074######################################### 4075# TYPE == 0: General instructions # 4076######################################### 4077funimp_gen: 4078 4079 clr.b STORE_FLG(%a6) # clear "store result" flag 4080 4081# clear the ccode byte and exception status byte 4082 andi.l &0x00ff00ff,USER_FPSR(%a6) 4083 4084 bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg 4085 cmpi.b %d1,&0x17 # is op an fmovecr? 4086 beq.w funimp_fmovcr # yes 4087 4088funimp_gen_op: 4089 bsr.l _load_fop # load 4090 4091 clr.l %d0 4092 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode 4093 4094 mov.b 1+EXC_CMDREG(%a6),%d1 4095 andi.w &0x003f,%d1 # extract extension bits 4096 lsl.w &0x3,%d1 # shift right 3 bits 4097 or.b STAG(%a6),%d1 # insert src optag bits 4098 4099 lea FP_DST(%a6),%a1 # pass dst ptr in a1 4100 lea FP_SRC(%a6),%a0 # pass src ptr in a0 4101 4102 mov.w (tbl_trans.w,%pc,%d1.w*2),%d1 4103 jsr (tbl_trans.w,%pc,%d1.w*1) # emulate 4104 4105funimp_fsave: 4106 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 4107 bne.w funimp_ena # some are enabled 4108 4109funimp_store: 4110 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn 4111 bsr.l store_fpreg # store result to fp regfile 4112 4113funimp_gen_exit: 4114 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4115 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4116 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4117 4118funimp_gen_exit_cmp: 4119 cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ? 4120 beq.b funimp_gen_exit_a7 # yes 4121 4122 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ? 4123 beq.b funimp_gen_exit_a7 # yes 4124 4125funimp_gen_exit_cont: 4126 unlk %a6 4127 4128funimp_gen_exit_cont2: 4129 btst &0x7,(%sp) # is trace on? 4130 beq.l _fpsp_done # no 4131 4132# this catches a problem with the case where an exception will be re-inserted 4133# into the machine. the frestore has already been executed...so, the fmov.l 4134# alone of the control register would trigger an unwanted exception. 4135# until I feel like fixing this, we'll sidestep the exception. 4136 fsave -(%sp) 4137 fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR 4138 frestore (%sp)+ 4139 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24 4140 bra.l _real_trace 4141 4142funimp_gen_exit_a7: 4143 btst &0x5,EXC_SR(%a6) # supervisor or user mode? 4144 bne.b funimp_gen_exit_a7_s # supervisor 4145 4146 mov.l %a0,-(%sp) 4147 mov.l EXC_A7(%a6),%a0 4148 mov.l %a0,%usp 4149 mov.l (%sp)+,%a0 4150 bra.b funimp_gen_exit_cont 4151 4152# if the instruction was executed from supervisor mode and the addressing 4153# mode was (a7)+, then the stack frame for the rte must be shifted "up" 4154# "n" bytes where "n" is the size of the src operand type. 4155# f<op>.{b,w,l,s,d,x,p} 4156funimp_gen_exit_a7_s: 4157 mov.l %d0,-(%sp) # save d0 4158 mov.l EXC_A7(%a6),%d0 # load new a7' 4159 sub.l OLD_A7(%a6),%d0 # subtract old a7' 4160 mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame 4161 mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame 4162 mov.w %d0,EXC_SR(%a6) # store incr number 4163 mov.l (%sp)+,%d0 # restore d0 4164 4165 unlk %a6 4166 4167 add.w (%sp),%sp # stack frame shifted 4168 bra.b funimp_gen_exit_cont2 4169 4170###################### 4171# fmovecr.x #ccc,fpn # 4172###################### 4173funimp_fmovcr: 4174 clr.l %d0 4175 mov.b FPCR_MODE(%a6),%d0 4176 mov.b 1+EXC_CMDREG(%a6),%d1 4177 andi.l &0x0000007f,%d1 # pass rom offset in d1 4178 bsr.l smovcr 4179 bra.w funimp_fsave 4180 4181######################################################################### 4182 4183# 4184# the user has enabled some exceptions. we figure not to see this too 4185# often so that's why it gets lower priority. 4186# 4187funimp_ena: 4188 4189# was an exception set that was also enabled? 4190 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set 4191 bfffo %d0{&24:&8},%d0 # find highest priority exception 4192 bne.b funimp_exc # at least one was set 4193 4194# no exception that was enabled was set BUT if we got an exact overflow 4195# and overflow wasn't enabled but inexact was (yech!) then this is 4196# an inexact exception; otherwise, return to normal non-exception flow. 4197 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 4198 beq.w funimp_store # no; return to normal flow 4199 4200# the overflow w/ exact result happened but was inexact set in the FPCR? 4201funimp_ovfl: 4202 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 4203 beq.w funimp_store # no; return to normal flow 4204 bra.b funimp_exc_ovfl # yes 4205 4206# some exception happened that was actually enabled. 4207# we'll insert this new exception into the FPU and then return. 4208funimp_exc: 4209 subi.l &24,%d0 # fix offset to be 0-8 4210 cmpi.b %d0,&0x6 # is exception INEX? 4211 bne.b funimp_exc_force # no 4212 4213# the enabled exception was inexact. so, if it occurs with an overflow 4214# or underflow that was disabled, then we have to force an overflow or 4215# underflow frame. the eventual overflow or underflow handler will see that 4216# it's actually an inexact and act appropriately. this is the only easy 4217# way to have the EXOP available for the enabled inexact handler when 4218# a disabled overflow or underflow has also happened. 4219 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 4220 bne.b funimp_exc_ovfl # yes 4221 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 4222 bne.b funimp_exc_unfl # yes 4223 4224# force the fsave exception status bits to signal an exception of the 4225# appropriate type. don't forget to "skew" the source operand in case we 4226# "unskewed" the one the hardware initially gave us. 4227funimp_exc_force: 4228 mov.l %d0,-(%sp) # save d0 4229 bsr.l funimp_skew # check for special case 4230 mov.l (%sp)+,%d0 # restore d0 4231 mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 4232 bra.b funimp_gen_exit2 # exit with frestore 4233 4234tbl_funimp_except: 4235 short 0xe002, 0xe006, 0xe004, 0xe005 4236 short 0xe003, 0xe002, 0xe001, 0xe001 4237 4238# insert an overflow frame 4239funimp_exc_ovfl: 4240 bsr.l funimp_skew # check for special case 4241 mov.w &0xe005,2+FP_SRC(%a6) 4242 bra.b funimp_gen_exit2 4243 4244# insert an underflow frame 4245funimp_exc_unfl: 4246 bsr.l funimp_skew # check for special case 4247 mov.w &0xe003,2+FP_SRC(%a6) 4248 4249# this is the general exit point for an enabled exception that will be 4250# restored into the machine for the instruction just emulated. 4251funimp_gen_exit2: 4252 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4253 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4254 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4255 4256 frestore FP_SRC(%a6) # insert exceptional status 4257 4258 bra.w funimp_gen_exit_cmp 4259 4260############################################################################ 4261 4262# 4263# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc> 4264# 4265# These instructions were implemented on the '881/2 and '040 in hardware but 4266# are emulated in software on the '060. 4267# 4268funimp_misc: 4269 bfextu %d0{&10:&3},%d1 # extract mode field 4270 cmpi.b %d1,&0x1 # is it an fdb<cc>? 4271 beq.w funimp_fdbcc # yes 4272 cmpi.b %d1,&0x7 # is it an fs<cc>? 4273 bne.w funimp_fscc # yes 4274 bfextu %d0{&13:&3},%d1 4275 cmpi.b %d1,&0x2 # is it an fs<cc>? 4276 blt.w funimp_fscc # yes 4277 4278######################### 4279# ftrap<cc> # 4280# ftrap<cc>.w #<data> # 4281# ftrap<cc>.l #<data> # 4282######################### 4283funimp_ftrapcc: 4284 4285 bsr.l _ftrapcc # FTRAP<cc>() 4286 4287 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4288 beq.w funimp_bsun # yes 4289 4290 cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur? 4291 bne.w funimp_done # no 4292 4293# FP UNIMP FRAME TRAP FRAME 4294# ***************** ***************** 4295# ** <EA> ** ** Current PC ** 4296# ***************** ***************** 4297# * 0x2 * 0x02c * * 0x2 * 0x01c * 4298# ***************** ***************** 4299# ** Next PC ** ** Next PC ** 4300# ***************** ***************** 4301# * SR * * SR * 4302# ***************** ***************** 4303# (6 words) (6 words) 4304# 4305# the ftrapcc instruction should take a trap. so, here we must create a 4306# trap stack frame from an unimplemented fp instruction stack frame and 4307# jump to the user supplied entry point for the trap exception 4308funimp_ftrapcc_tp: 4309 mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC 4310 mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c 4311 4312 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4313 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4314 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4315 4316 unlk %a6 4317 bra.l _real_trap 4318 4319######################### 4320# fdb<cc> Dn,<label> # 4321######################### 4322funimp_fdbcc: 4323 4324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4325 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4326 bsr.l _imem_read_word # read displacement 4327 4328 tst.l %d1 # did ifetch fail? 4329 bne.w funimp_iacc # yes 4330 4331 ext.l %d0 # sign extend displacement 4332 4333 bsr.l _fdbcc # FDB<cc>() 4334 4335 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4336 beq.w funimp_bsun 4337 4338 bra.w funimp_done # branch to finish 4339 4340################# 4341# fs<cc>.b <ea> # 4342################# 4343funimp_fscc: 4344 4345 bsr.l _fscc # FS<cc>() 4346 4347# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction 4348# does not need to update "An" before taking a bsun exception. 4349 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4350 beq.w funimp_bsun 4351 4352 btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception? 4353 bne.b funimp_fscc_s # no 4354 4355funimp_fscc_u: 4356 mov.l EXC_A7(%a6),%a0 # yes; set new USP 4357 mov.l %a0,%usp 4358 bra.w funimp_done # branch to finish 4359 4360# remember, I'm assuming that post-increment is bogus...(it IS!!!) 4361# so, the least significant WORD of the stacked effective address got 4362# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down" 4363# so that the rte will work correctly without destroying the result. 4364# even though the operation size is byte, the stack ptr is decr by 2. 4365# 4366# remember, also, this instruction may be traced. 4367funimp_fscc_s: 4368 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified? 4369 bne.w funimp_done # no 4370 4371 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4372 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4373 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4374 4375 unlk %a6 4376 4377 btst &0x7,(%sp) # is trace enabled? 4378 bne.b funimp_fscc_s_trace # yes 4379 4380 subq.l &0x2,%sp 4381 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down" 4382 mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down" 4383 bra.l _fpsp_done 4384 4385funimp_fscc_s_trace: 4386 subq.l &0x2,%sp 4387 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down" 4388 mov.w 0x6(%sp),0x4(%sp) # shift lo(PC) 4389 mov.w &0x2024,0x6(%sp) # fmt/voff = $2024 4390 fmov.l %fpiar,0x8(%sp) # insert "current PC" 4391 4392 bra.l _real_trace 4393 4394# 4395# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert 4396# the fp unimplemented instruction exception stack frame into a bsun stack frame, 4397# restore a bsun exception into the machine, and branch to the user 4398# supplied bsun hook. 4399# 4400# FP UNIMP FRAME BSUN FRAME 4401# ***************** ***************** 4402# ** <EA> ** * 0x0 * 0x0c0 * 4403# ***************** ***************** 4404# * 0x2 * 0x02c * ** Current PC ** 4405# ***************** ***************** 4406# ** Next PC ** * SR * 4407# ***************** ***************** 4408# * SR * (4 words) 4409# ***************** 4410# (6 words) 4411# 4412funimp_bsun: 4413 mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0 4414 mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC 4415 mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up" 4416 4417 mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled 4418 4419 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4420 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4421 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4422 4423 frestore FP_SRC(%a6) # restore bsun exception 4424 4425 unlk %a6 4426 4427 addq.l &0x4,%sp # erase sludge 4428 4429 bra.l _real_bsun # branch to user bsun hook 4430 4431# 4432# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame 4433# and return. 4434# 4435# as usual, we have to check for trace mode being on here. since instructions 4436# modifying the supervisor stack frame don't pass through here, this is a 4437# relatively easy task. 4438# 4439funimp_done: 4440 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4441 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4442 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4443 4444 unlk %a6 4445 4446 btst &0x7,(%sp) # is trace enabled? 4447 bne.b funimp_trace # yes 4448 4449 bra.l _fpsp_done 4450 4451# FP UNIMP FRAME TRACE FRAME 4452# ***************** ***************** 4453# ** <EA> ** ** Current PC ** 4454# ***************** ***************** 4455# * 0x2 * 0x02c * * 0x2 * 0x024 * 4456# ***************** ***************** 4457# ** Next PC ** ** Next PC ** 4458# ***************** ***************** 4459# * SR * * SR * 4460# ***************** ***************** 4461# (6 words) (6 words) 4462# 4463# the fscc instruction should take a trace trap. so, here we must create a 4464# trace stack frame from an unimplemented fp instruction stack frame and 4465# jump to the user supplied entry point for the trace exception 4466funimp_trace: 4467 fmov.l %fpiar,0x8(%sp) # current PC is in fpiar 4468 mov.b &0x24,0x7(%sp) # vector offset = 0x024 4469 4470 bra.l _real_trace 4471 4472################################################################ 4473 4474 global tbl_trans 4475 swbeg &0x1c0 4476tbl_trans: 4477 short tbl_trans - tbl_trans # $00-0 fmovecr all 4478 short tbl_trans - tbl_trans # $00-1 fmovecr all 4479 short tbl_trans - tbl_trans # $00-2 fmovecr all 4480 short tbl_trans - tbl_trans # $00-3 fmovecr all 4481 short tbl_trans - tbl_trans # $00-4 fmovecr all 4482 short tbl_trans - tbl_trans # $00-5 fmovecr all 4483 short tbl_trans - tbl_trans # $00-6 fmovecr all 4484 short tbl_trans - tbl_trans # $00-7 fmovecr all 4485 4486 short tbl_trans - tbl_trans # $01-0 fint norm 4487 short tbl_trans - tbl_trans # $01-1 fint zero 4488 short tbl_trans - tbl_trans # $01-2 fint inf 4489 short tbl_trans - tbl_trans # $01-3 fint qnan 4490 short tbl_trans - tbl_trans # $01-5 fint denorm 4491 short tbl_trans - tbl_trans # $01-4 fint snan 4492 short tbl_trans - tbl_trans # $01-6 fint unnorm 4493 short tbl_trans - tbl_trans # $01-7 ERROR 4494 4495 short ssinh - tbl_trans # $02-0 fsinh norm 4496 short src_zero - tbl_trans # $02-1 fsinh zero 4497 short src_inf - tbl_trans # $02-2 fsinh inf 4498 short src_qnan - tbl_trans # $02-3 fsinh qnan 4499 short ssinhd - tbl_trans # $02-5 fsinh denorm 4500 short src_snan - tbl_trans # $02-4 fsinh snan 4501 short tbl_trans - tbl_trans # $02-6 fsinh unnorm 4502 short tbl_trans - tbl_trans # $02-7 ERROR 4503 4504 short tbl_trans - tbl_trans # $03-0 fintrz norm 4505 short tbl_trans - tbl_trans # $03-1 fintrz zero 4506 short tbl_trans - tbl_trans # $03-2 fintrz inf 4507 short tbl_trans - tbl_trans # $03-3 fintrz qnan 4508 short tbl_trans - tbl_trans # $03-5 fintrz denorm 4509 short tbl_trans - tbl_trans # $03-4 fintrz snan 4510 short tbl_trans - tbl_trans # $03-6 fintrz unnorm 4511 short tbl_trans - tbl_trans # $03-7 ERROR 4512 4513 short tbl_trans - tbl_trans # $04-0 fsqrt norm 4514 short tbl_trans - tbl_trans # $04-1 fsqrt zero 4515 short tbl_trans - tbl_trans # $04-2 fsqrt inf 4516 short tbl_trans - tbl_trans # $04-3 fsqrt qnan 4517 short tbl_trans - tbl_trans # $04-5 fsqrt denorm 4518 short tbl_trans - tbl_trans # $04-4 fsqrt snan 4519 short tbl_trans - tbl_trans # $04-6 fsqrt unnorm 4520 short tbl_trans - tbl_trans # $04-7 ERROR 4521 4522 short tbl_trans - tbl_trans # $05-0 ERROR 4523 short tbl_trans - tbl_trans # $05-1 ERROR 4524 short tbl_trans - tbl_trans # $05-2 ERROR 4525 short tbl_trans - tbl_trans # $05-3 ERROR 4526 short tbl_trans - tbl_trans # $05-4 ERROR 4527 short tbl_trans - tbl_trans # $05-5 ERROR 4528 short tbl_trans - tbl_trans # $05-6 ERROR 4529 short tbl_trans - tbl_trans # $05-7 ERROR 4530 4531 short slognp1 - tbl_trans # $06-0 flognp1 norm 4532 short src_zero - tbl_trans # $06-1 flognp1 zero 4533 short sopr_inf - tbl_trans # $06-2 flognp1 inf 4534 short src_qnan - tbl_trans # $06-3 flognp1 qnan 4535 short slognp1d - tbl_trans # $06-5 flognp1 denorm 4536 short src_snan - tbl_trans # $06-4 flognp1 snan 4537 short tbl_trans - tbl_trans # $06-6 flognp1 unnorm 4538 short tbl_trans - tbl_trans # $06-7 ERROR 4539 4540 short tbl_trans - tbl_trans # $07-0 ERROR 4541 short tbl_trans - tbl_trans # $07-1 ERROR 4542 short tbl_trans - tbl_trans # $07-2 ERROR 4543 short tbl_trans - tbl_trans # $07-3 ERROR 4544 short tbl_trans - tbl_trans # $07-4 ERROR 4545 short tbl_trans - tbl_trans # $07-5 ERROR 4546 short tbl_trans - tbl_trans # $07-6 ERROR 4547 short tbl_trans - tbl_trans # $07-7 ERROR 4548 4549 short setoxm1 - tbl_trans # $08-0 fetoxm1 norm 4550 short src_zero - tbl_trans # $08-1 fetoxm1 zero 4551 short setoxm1i - tbl_trans # $08-2 fetoxm1 inf 4552 short src_qnan - tbl_trans # $08-3 fetoxm1 qnan 4553 short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm 4554 short src_snan - tbl_trans # $08-4 fetoxm1 snan 4555 short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm 4556 short tbl_trans - tbl_trans # $08-7 ERROR 4557 4558 short stanh - tbl_trans # $09-0 ftanh norm 4559 short src_zero - tbl_trans # $09-1 ftanh zero 4560 short src_one - tbl_trans # $09-2 ftanh inf 4561 short src_qnan - tbl_trans # $09-3 ftanh qnan 4562 short stanhd - tbl_trans # $09-5 ftanh denorm 4563 short src_snan - tbl_trans # $09-4 ftanh snan 4564 short tbl_trans - tbl_trans # $09-6 ftanh unnorm 4565 short tbl_trans - tbl_trans # $09-7 ERROR 4566 4567 short satan - tbl_trans # $0a-0 fatan norm 4568 short src_zero - tbl_trans # $0a-1 fatan zero 4569 short spi_2 - tbl_trans # $0a-2 fatan inf 4570 short src_qnan - tbl_trans # $0a-3 fatan qnan 4571 short satand - tbl_trans # $0a-5 fatan denorm 4572 short src_snan - tbl_trans # $0a-4 fatan snan 4573 short tbl_trans - tbl_trans # $0a-6 fatan unnorm 4574 short tbl_trans - tbl_trans # $0a-7 ERROR 4575 4576 short tbl_trans - tbl_trans # $0b-0 ERROR 4577 short tbl_trans - tbl_trans # $0b-1 ERROR 4578 short tbl_trans - tbl_trans # $0b-2 ERROR 4579 short tbl_trans - tbl_trans # $0b-3 ERROR 4580 short tbl_trans - tbl_trans # $0b-4 ERROR 4581 short tbl_trans - tbl_trans # $0b-5 ERROR 4582 short tbl_trans - tbl_trans # $0b-6 ERROR 4583 short tbl_trans - tbl_trans # $0b-7 ERROR 4584 4585 short sasin - tbl_trans # $0c-0 fasin norm 4586 short src_zero - tbl_trans # $0c-1 fasin zero 4587 short t_operr - tbl_trans # $0c-2 fasin inf 4588 short src_qnan - tbl_trans # $0c-3 fasin qnan 4589 short sasind - tbl_trans # $0c-5 fasin denorm 4590 short src_snan - tbl_trans # $0c-4 fasin snan 4591 short tbl_trans - tbl_trans # $0c-6 fasin unnorm 4592 short tbl_trans - tbl_trans # $0c-7 ERROR 4593 4594 short satanh - tbl_trans # $0d-0 fatanh norm 4595 short src_zero - tbl_trans # $0d-1 fatanh zero 4596 short t_operr - tbl_trans # $0d-2 fatanh inf 4597 short src_qnan - tbl_trans # $0d-3 fatanh qnan 4598 short satanhd - tbl_trans # $0d-5 fatanh denorm 4599 short src_snan - tbl_trans # $0d-4 fatanh snan 4600 short tbl_trans - tbl_trans # $0d-6 fatanh unnorm 4601 short tbl_trans - tbl_trans # $0d-7 ERROR 4602 4603 short ssin - tbl_trans # $0e-0 fsin norm 4604 short src_zero - tbl_trans # $0e-1 fsin zero 4605 short t_operr - tbl_trans # $0e-2 fsin inf 4606 short src_qnan - tbl_trans # $0e-3 fsin qnan 4607 short ssind - tbl_trans # $0e-5 fsin denorm 4608 short src_snan - tbl_trans # $0e-4 fsin snan 4609 short tbl_trans - tbl_trans # $0e-6 fsin unnorm 4610 short tbl_trans - tbl_trans # $0e-7 ERROR 4611 4612 short stan - tbl_trans # $0f-0 ftan norm 4613 short src_zero - tbl_trans # $0f-1 ftan zero 4614 short t_operr - tbl_trans # $0f-2 ftan inf 4615 short src_qnan - tbl_trans # $0f-3 ftan qnan 4616 short stand - tbl_trans # $0f-5 ftan denorm 4617 short src_snan - tbl_trans # $0f-4 ftan snan 4618 short tbl_trans - tbl_trans # $0f-6 ftan unnorm 4619 short tbl_trans - tbl_trans # $0f-7 ERROR 4620 4621 short setox - tbl_trans # $10-0 fetox norm 4622 short ld_pone - tbl_trans # $10-1 fetox zero 4623 short szr_inf - tbl_trans # $10-2 fetox inf 4624 short src_qnan - tbl_trans # $10-3 fetox qnan 4625 short setoxd - tbl_trans # $10-5 fetox denorm 4626 short src_snan - tbl_trans # $10-4 fetox snan 4627 short tbl_trans - tbl_trans # $10-6 fetox unnorm 4628 short tbl_trans - tbl_trans # $10-7 ERROR 4629 4630 short stwotox - tbl_trans # $11-0 ftwotox norm 4631 short ld_pone - tbl_trans # $11-1 ftwotox zero 4632 short szr_inf - tbl_trans # $11-2 ftwotox inf 4633 short src_qnan - tbl_trans # $11-3 ftwotox qnan 4634 short stwotoxd - tbl_trans # $11-5 ftwotox denorm 4635 short src_snan - tbl_trans # $11-4 ftwotox snan 4636 short tbl_trans - tbl_trans # $11-6 ftwotox unnorm 4637 short tbl_trans - tbl_trans # $11-7 ERROR 4638 4639 short stentox - tbl_trans # $12-0 ftentox norm 4640 short ld_pone - tbl_trans # $12-1 ftentox zero 4641 short szr_inf - tbl_trans # $12-2 ftentox inf 4642 short src_qnan - tbl_trans # $12-3 ftentox qnan 4643 short stentoxd - tbl_trans # $12-5 ftentox denorm 4644 short src_snan - tbl_trans # $12-4 ftentox snan 4645 short tbl_trans - tbl_trans # $12-6 ftentox unnorm 4646 short tbl_trans - tbl_trans # $12-7 ERROR 4647 4648 short tbl_trans - tbl_trans # $13-0 ERROR 4649 short tbl_trans - tbl_trans # $13-1 ERROR 4650 short tbl_trans - tbl_trans # $13-2 ERROR 4651 short tbl_trans - tbl_trans # $13-3 ERROR 4652 short tbl_trans - tbl_trans # $13-4 ERROR 4653 short tbl_trans - tbl_trans # $13-5 ERROR 4654 short tbl_trans - tbl_trans # $13-6 ERROR 4655 short tbl_trans - tbl_trans # $13-7 ERROR 4656 4657 short slogn - tbl_trans # $14-0 flogn norm 4658 short t_dz2 - tbl_trans # $14-1 flogn zero 4659 short sopr_inf - tbl_trans # $14-2 flogn inf 4660 short src_qnan - tbl_trans # $14-3 flogn qnan 4661 short slognd - tbl_trans # $14-5 flogn denorm 4662 short src_snan - tbl_trans # $14-4 flogn snan 4663 short tbl_trans - tbl_trans # $14-6 flogn unnorm 4664 short tbl_trans - tbl_trans # $14-7 ERROR 4665 4666 short slog10 - tbl_trans # $15-0 flog10 norm 4667 short t_dz2 - tbl_trans # $15-1 flog10 zero 4668 short sopr_inf - tbl_trans # $15-2 flog10 inf 4669 short src_qnan - tbl_trans # $15-3 flog10 qnan 4670 short slog10d - tbl_trans # $15-5 flog10 denorm 4671 short src_snan - tbl_trans # $15-4 flog10 snan 4672 short tbl_trans - tbl_trans # $15-6 flog10 unnorm 4673 short tbl_trans - tbl_trans # $15-7 ERROR 4674 4675 short slog2 - tbl_trans # $16-0 flog2 norm 4676 short t_dz2 - tbl_trans # $16-1 flog2 zero 4677 short sopr_inf - tbl_trans # $16-2 flog2 inf 4678 short src_qnan - tbl_trans # $16-3 flog2 qnan 4679 short slog2d - tbl_trans # $16-5 flog2 denorm 4680 short src_snan - tbl_trans # $16-4 flog2 snan 4681 short tbl_trans - tbl_trans # $16-6 flog2 unnorm 4682 short tbl_trans - tbl_trans # $16-7 ERROR 4683 4684 short tbl_trans - tbl_trans # $17-0 ERROR 4685 short tbl_trans - tbl_trans # $17-1 ERROR 4686 short tbl_trans - tbl_trans # $17-2 ERROR 4687 short tbl_trans - tbl_trans # $17-3 ERROR 4688 short tbl_trans - tbl_trans # $17-4 ERROR 4689 short tbl_trans - tbl_trans # $17-5 ERROR 4690 short tbl_trans - tbl_trans # $17-6 ERROR 4691 short tbl_trans - tbl_trans # $17-7 ERROR 4692 4693 short tbl_trans - tbl_trans # $18-0 fabs norm 4694 short tbl_trans - tbl_trans # $18-1 fabs zero 4695 short tbl_trans - tbl_trans # $18-2 fabs inf 4696 short tbl_trans - tbl_trans # $18-3 fabs qnan 4697 short tbl_trans - tbl_trans # $18-5 fabs denorm 4698 short tbl_trans - tbl_trans # $18-4 fabs snan 4699 short tbl_trans - tbl_trans # $18-6 fabs unnorm 4700 short tbl_trans - tbl_trans # $18-7 ERROR 4701 4702 short scosh - tbl_trans # $19-0 fcosh norm 4703 short ld_pone - tbl_trans # $19-1 fcosh zero 4704 short ld_pinf - tbl_trans # $19-2 fcosh inf 4705 short src_qnan - tbl_trans # $19-3 fcosh qnan 4706 short scoshd - tbl_trans # $19-5 fcosh denorm 4707 short src_snan - tbl_trans # $19-4 fcosh snan 4708 short tbl_trans - tbl_trans # $19-6 fcosh unnorm 4709 short tbl_trans - tbl_trans # $19-7 ERROR 4710 4711 short tbl_trans - tbl_trans # $1a-0 fneg norm 4712 short tbl_trans - tbl_trans # $1a-1 fneg zero 4713 short tbl_trans - tbl_trans # $1a-2 fneg inf 4714 short tbl_trans - tbl_trans # $1a-3 fneg qnan 4715 short tbl_trans - tbl_trans # $1a-5 fneg denorm 4716 short tbl_trans - tbl_trans # $1a-4 fneg snan 4717 short tbl_trans - tbl_trans # $1a-6 fneg unnorm 4718 short tbl_trans - tbl_trans # $1a-7 ERROR 4719 4720 short tbl_trans - tbl_trans # $1b-0 ERROR 4721 short tbl_trans - tbl_trans # $1b-1 ERROR 4722 short tbl_trans - tbl_trans # $1b-2 ERROR 4723 short tbl_trans - tbl_trans # $1b-3 ERROR 4724 short tbl_trans - tbl_trans # $1b-4 ERROR 4725 short tbl_trans - tbl_trans # $1b-5 ERROR 4726 short tbl_trans - tbl_trans # $1b-6 ERROR 4727 short tbl_trans - tbl_trans # $1b-7 ERROR 4728 4729 short sacos - tbl_trans # $1c-0 facos norm 4730 short ld_ppi2 - tbl_trans # $1c-1 facos zero 4731 short t_operr - tbl_trans # $1c-2 facos inf 4732 short src_qnan - tbl_trans # $1c-3 facos qnan 4733 short sacosd - tbl_trans # $1c-5 facos denorm 4734 short src_snan - tbl_trans # $1c-4 facos snan 4735 short tbl_trans - tbl_trans # $1c-6 facos unnorm 4736 short tbl_trans - tbl_trans # $1c-7 ERROR 4737 4738 short scos - tbl_trans # $1d-0 fcos norm 4739 short ld_pone - tbl_trans # $1d-1 fcos zero 4740 short t_operr - tbl_trans # $1d-2 fcos inf 4741 short src_qnan - tbl_trans # $1d-3 fcos qnan 4742 short scosd - tbl_trans # $1d-5 fcos denorm 4743 short src_snan - tbl_trans # $1d-4 fcos snan 4744 short tbl_trans - tbl_trans # $1d-6 fcos unnorm 4745 short tbl_trans - tbl_trans # $1d-7 ERROR 4746 4747 short sgetexp - tbl_trans # $1e-0 fgetexp norm 4748 short src_zero - tbl_trans # $1e-1 fgetexp zero 4749 short t_operr - tbl_trans # $1e-2 fgetexp inf 4750 short src_qnan - tbl_trans # $1e-3 fgetexp qnan 4751 short sgetexpd - tbl_trans # $1e-5 fgetexp denorm 4752 short src_snan - tbl_trans # $1e-4 fgetexp snan 4753 short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm 4754 short tbl_trans - tbl_trans # $1e-7 ERROR 4755 4756 short sgetman - tbl_trans # $1f-0 fgetman norm 4757 short src_zero - tbl_trans # $1f-1 fgetman zero 4758 short t_operr - tbl_trans # $1f-2 fgetman inf 4759 short src_qnan - tbl_trans # $1f-3 fgetman qnan 4760 short sgetmand - tbl_trans # $1f-5 fgetman denorm 4761 short src_snan - tbl_trans # $1f-4 fgetman snan 4762 short tbl_trans - tbl_trans # $1f-6 fgetman unnorm 4763 short tbl_trans - tbl_trans # $1f-7 ERROR 4764 4765 short tbl_trans - tbl_trans # $20-0 fdiv norm 4766 short tbl_trans - tbl_trans # $20-1 fdiv zero 4767 short tbl_trans - tbl_trans # $20-2 fdiv inf 4768 short tbl_trans - tbl_trans # $20-3 fdiv qnan 4769 short tbl_trans - tbl_trans # $20-5 fdiv denorm 4770 short tbl_trans - tbl_trans # $20-4 fdiv snan 4771 short tbl_trans - tbl_trans # $20-6 fdiv unnorm 4772 short tbl_trans - tbl_trans # $20-7 ERROR 4773 4774 short smod_snorm - tbl_trans # $21-0 fmod norm 4775 short smod_szero - tbl_trans # $21-1 fmod zero 4776 short smod_sinf - tbl_trans # $21-2 fmod inf 4777 short sop_sqnan - tbl_trans # $21-3 fmod qnan 4778 short smod_sdnrm - tbl_trans # $21-5 fmod denorm 4779 short sop_ssnan - tbl_trans # $21-4 fmod snan 4780 short tbl_trans - tbl_trans # $21-6 fmod unnorm 4781 short tbl_trans - tbl_trans # $21-7 ERROR 4782 4783 short tbl_trans - tbl_trans # $22-0 fadd norm 4784 short tbl_trans - tbl_trans # $22-1 fadd zero 4785 short tbl_trans - tbl_trans # $22-2 fadd inf 4786 short tbl_trans - tbl_trans # $22-3 fadd qnan 4787 short tbl_trans - tbl_trans # $22-5 fadd denorm 4788 short tbl_trans - tbl_trans # $22-4 fadd snan 4789 short tbl_trans - tbl_trans # $22-6 fadd unnorm 4790 short tbl_trans - tbl_trans # $22-7 ERROR 4791 4792 short tbl_trans - tbl_trans # $23-0 fmul norm 4793 short tbl_trans - tbl_trans # $23-1 fmul zero 4794 short tbl_trans - tbl_trans # $23-2 fmul inf 4795 short tbl_trans - tbl_trans # $23-3 fmul qnan 4796 short tbl_trans - tbl_trans # $23-5 fmul denorm 4797 short tbl_trans - tbl_trans # $23-4 fmul snan 4798 short tbl_trans - tbl_trans # $23-6 fmul unnorm 4799 short tbl_trans - tbl_trans # $23-7 ERROR 4800 4801 short tbl_trans - tbl_trans # $24-0 fsgldiv norm 4802 short tbl_trans - tbl_trans # $24-1 fsgldiv zero 4803 short tbl_trans - tbl_trans # $24-2 fsgldiv inf 4804 short tbl_trans - tbl_trans # $24-3 fsgldiv qnan 4805 short tbl_trans - tbl_trans # $24-5 fsgldiv denorm 4806 short tbl_trans - tbl_trans # $24-4 fsgldiv snan 4807 short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm 4808 short tbl_trans - tbl_trans # $24-7 ERROR 4809 4810 short srem_snorm - tbl_trans # $25-0 frem norm 4811 short srem_szero - tbl_trans # $25-1 frem zero 4812 short srem_sinf - tbl_trans # $25-2 frem inf 4813 short sop_sqnan - tbl_trans # $25-3 frem qnan 4814 short srem_sdnrm - tbl_trans # $25-5 frem denorm 4815 short sop_ssnan - tbl_trans # $25-4 frem snan 4816 short tbl_trans - tbl_trans # $25-6 frem unnorm 4817 short tbl_trans - tbl_trans # $25-7 ERROR 4818 4819 short sscale_snorm - tbl_trans # $26-0 fscale norm 4820 short sscale_szero - tbl_trans # $26-1 fscale zero 4821 short sscale_sinf - tbl_trans # $26-2 fscale inf 4822 short sop_sqnan - tbl_trans # $26-3 fscale qnan 4823 short sscale_sdnrm - tbl_trans # $26-5 fscale denorm 4824 short sop_ssnan - tbl_trans # $26-4 fscale snan 4825 short tbl_trans - tbl_trans # $26-6 fscale unnorm 4826 short tbl_trans - tbl_trans # $26-7 ERROR 4827 4828 short tbl_trans - tbl_trans # $27-0 fsglmul norm 4829 short tbl_trans - tbl_trans # $27-1 fsglmul zero 4830 short tbl_trans - tbl_trans # $27-2 fsglmul inf 4831 short tbl_trans - tbl_trans # $27-3 fsglmul qnan 4832 short tbl_trans - tbl_trans # $27-5 fsglmul denorm 4833 short tbl_trans - tbl_trans # $27-4 fsglmul snan 4834 short tbl_trans - tbl_trans # $27-6 fsglmul unnorm 4835 short tbl_trans - tbl_trans # $27-7 ERROR 4836 4837 short tbl_trans - tbl_trans # $28-0 fsub norm 4838 short tbl_trans - tbl_trans # $28-1 fsub zero 4839 short tbl_trans - tbl_trans # $28-2 fsub inf 4840 short tbl_trans - tbl_trans # $28-3 fsub qnan 4841 short tbl_trans - tbl_trans # $28-5 fsub denorm 4842 short tbl_trans - tbl_trans # $28-4 fsub snan 4843 short tbl_trans - tbl_trans # $28-6 fsub unnorm 4844 short tbl_trans - tbl_trans # $28-7 ERROR 4845 4846 short tbl_trans - tbl_trans # $29-0 ERROR 4847 short tbl_trans - tbl_trans # $29-1 ERROR 4848 short tbl_trans - tbl_trans # $29-2 ERROR 4849 short tbl_trans - tbl_trans # $29-3 ERROR 4850 short tbl_trans - tbl_trans # $29-4 ERROR 4851 short tbl_trans - tbl_trans # $29-5 ERROR 4852 short tbl_trans - tbl_trans # $29-6 ERROR 4853 short tbl_trans - tbl_trans # $29-7 ERROR 4854 4855 short tbl_trans - tbl_trans # $2a-0 ERROR 4856 short tbl_trans - tbl_trans # $2a-1 ERROR 4857 short tbl_trans - tbl_trans # $2a-2 ERROR 4858 short tbl_trans - tbl_trans # $2a-3 ERROR 4859 short tbl_trans - tbl_trans # $2a-4 ERROR 4860 short tbl_trans - tbl_trans # $2a-5 ERROR 4861 short tbl_trans - tbl_trans # $2a-6 ERROR 4862 short tbl_trans - tbl_trans # $2a-7 ERROR 4863 4864 short tbl_trans - tbl_trans # $2b-0 ERROR 4865 short tbl_trans - tbl_trans # $2b-1 ERROR 4866 short tbl_trans - tbl_trans # $2b-2 ERROR 4867 short tbl_trans - tbl_trans # $2b-3 ERROR 4868 short tbl_trans - tbl_trans # $2b-4 ERROR 4869 short tbl_trans - tbl_trans # $2b-5 ERROR 4870 short tbl_trans - tbl_trans # $2b-6 ERROR 4871 short tbl_trans - tbl_trans # $2b-7 ERROR 4872 4873 short tbl_trans - tbl_trans # $2c-0 ERROR 4874 short tbl_trans - tbl_trans # $2c-1 ERROR 4875 short tbl_trans - tbl_trans # $2c-2 ERROR 4876 short tbl_trans - tbl_trans # $2c-3 ERROR 4877 short tbl_trans - tbl_trans # $2c-4 ERROR 4878 short tbl_trans - tbl_trans # $2c-5 ERROR 4879 short tbl_trans - tbl_trans # $2c-6 ERROR 4880 short tbl_trans - tbl_trans # $2c-7 ERROR 4881 4882 short tbl_trans - tbl_trans # $2d-0 ERROR 4883 short tbl_trans - tbl_trans # $2d-1 ERROR 4884 short tbl_trans - tbl_trans # $2d-2 ERROR 4885 short tbl_trans - tbl_trans # $2d-3 ERROR 4886 short tbl_trans - tbl_trans # $2d-4 ERROR 4887 short tbl_trans - tbl_trans # $2d-5 ERROR 4888 short tbl_trans - tbl_trans # $2d-6 ERROR 4889 short tbl_trans - tbl_trans # $2d-7 ERROR 4890 4891 short tbl_trans - tbl_trans # $2e-0 ERROR 4892 short tbl_trans - tbl_trans # $2e-1 ERROR 4893 short tbl_trans - tbl_trans # $2e-2 ERROR 4894 short tbl_trans - tbl_trans # $2e-3 ERROR 4895 short tbl_trans - tbl_trans # $2e-4 ERROR 4896 short tbl_trans - tbl_trans # $2e-5 ERROR 4897 short tbl_trans - tbl_trans # $2e-6 ERROR 4898 short tbl_trans - tbl_trans # $2e-7 ERROR 4899 4900 short tbl_trans - tbl_trans # $2f-0 ERROR 4901 short tbl_trans - tbl_trans # $2f-1 ERROR 4902 short tbl_trans - tbl_trans # $2f-2 ERROR 4903 short tbl_trans - tbl_trans # $2f-3 ERROR 4904 short tbl_trans - tbl_trans # $2f-4 ERROR 4905 short tbl_trans - tbl_trans # $2f-5 ERROR 4906 short tbl_trans - tbl_trans # $2f-6 ERROR 4907 short tbl_trans - tbl_trans # $2f-7 ERROR 4908 4909 short ssincos - tbl_trans # $30-0 fsincos norm 4910 short ssincosz - tbl_trans # $30-1 fsincos zero 4911 short ssincosi - tbl_trans # $30-2 fsincos inf 4912 short ssincosqnan - tbl_trans # $30-3 fsincos qnan 4913 short ssincosd - tbl_trans # $30-5 fsincos denorm 4914 short ssincossnan - tbl_trans # $30-4 fsincos snan 4915 short tbl_trans - tbl_trans # $30-6 fsincos unnorm 4916 short tbl_trans - tbl_trans # $30-7 ERROR 4917 4918 short ssincos - tbl_trans # $31-0 fsincos norm 4919 short ssincosz - tbl_trans # $31-1 fsincos zero 4920 short ssincosi - tbl_trans # $31-2 fsincos inf 4921 short ssincosqnan - tbl_trans # $31-3 fsincos qnan 4922 short ssincosd - tbl_trans # $31-5 fsincos denorm 4923 short ssincossnan - tbl_trans # $31-4 fsincos snan 4924 short tbl_trans - tbl_trans # $31-6 fsincos unnorm 4925 short tbl_trans - tbl_trans # $31-7 ERROR 4926 4927 short ssincos - tbl_trans # $32-0 fsincos norm 4928 short ssincosz - tbl_trans # $32-1 fsincos zero 4929 short ssincosi - tbl_trans # $32-2 fsincos inf 4930 short ssincosqnan - tbl_trans # $32-3 fsincos qnan 4931 short ssincosd - tbl_trans # $32-5 fsincos denorm 4932 short ssincossnan - tbl_trans # $32-4 fsincos snan 4933 short tbl_trans - tbl_trans # $32-6 fsincos unnorm 4934 short tbl_trans - tbl_trans # $32-7 ERROR 4935 4936 short ssincos - tbl_trans # $33-0 fsincos norm 4937 short ssincosz - tbl_trans # $33-1 fsincos zero 4938 short ssincosi - tbl_trans # $33-2 fsincos inf 4939 short ssincosqnan - tbl_trans # $33-3 fsincos qnan 4940 short ssincosd - tbl_trans # $33-5 fsincos denorm 4941 short ssincossnan - tbl_trans # $33-4 fsincos snan 4942 short tbl_trans - tbl_trans # $33-6 fsincos unnorm 4943 short tbl_trans - tbl_trans # $33-7 ERROR 4944 4945 short ssincos - tbl_trans # $34-0 fsincos norm 4946 short ssincosz - tbl_trans # $34-1 fsincos zero 4947 short ssincosi - tbl_trans # $34-2 fsincos inf 4948 short ssincosqnan - tbl_trans # $34-3 fsincos qnan 4949 short ssincosd - tbl_trans # $34-5 fsincos denorm 4950 short ssincossnan - tbl_trans # $34-4 fsincos snan 4951 short tbl_trans - tbl_trans # $34-6 fsincos unnorm 4952 short tbl_trans - tbl_trans # $34-7 ERROR 4953 4954 short ssincos - tbl_trans # $35-0 fsincos norm 4955 short ssincosz - tbl_trans # $35-1 fsincos zero 4956 short ssincosi - tbl_trans # $35-2 fsincos inf 4957 short ssincosqnan - tbl_trans # $35-3 fsincos qnan 4958 short ssincosd - tbl_trans # $35-5 fsincos denorm 4959 short ssincossnan - tbl_trans # $35-4 fsincos snan 4960 short tbl_trans - tbl_trans # $35-6 fsincos unnorm 4961 short tbl_trans - tbl_trans # $35-7 ERROR 4962 4963 short ssincos - tbl_trans # $36-0 fsincos norm 4964 short ssincosz - tbl_trans # $36-1 fsincos zero 4965 short ssincosi - tbl_trans # $36-2 fsincos inf 4966 short ssincosqnan - tbl_trans # $36-3 fsincos qnan 4967 short ssincosd - tbl_trans # $36-5 fsincos denorm 4968 short ssincossnan - tbl_trans # $36-4 fsincos snan 4969 short tbl_trans - tbl_trans # $36-6 fsincos unnorm 4970 short tbl_trans - tbl_trans # $36-7 ERROR 4971 4972 short ssincos - tbl_trans # $37-0 fsincos norm 4973 short ssincosz - tbl_trans # $37-1 fsincos zero 4974 short ssincosi - tbl_trans # $37-2 fsincos inf 4975 short ssincosqnan - tbl_trans # $37-3 fsincos qnan 4976 short ssincosd - tbl_trans # $37-5 fsincos denorm 4977 short ssincossnan - tbl_trans # $37-4 fsincos snan 4978 short tbl_trans - tbl_trans # $37-6 fsincos unnorm 4979 short tbl_trans - tbl_trans # $37-7 ERROR 4980 4981########## 4982 4983# the instruction fetch access for the displacement word for the 4984# fdbcc emulation failed. here, we create an access error frame 4985# from the current frame and branch to _real_access(). 4986funimp_iacc: 4987 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4988 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4989 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 4990 4991 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 4992 4993 unlk %a6 4994 4995 mov.l (%sp),-(%sp) # store SR,hi(PC) 4996 mov.w 0x8(%sp),0x4(%sp) # store lo(PC) 4997 mov.w &0x4008,0x6(%sp) # store voff 4998 mov.l 0x2(%sp),0x8(%sp) # store EA 4999 mov.l &0x09428001,0xc(%sp) # store FSLW 5000 5001 btst &0x5,(%sp) # user or supervisor mode? 5002 beq.b funimp_iacc_end # user 5003 bset &0x2,0xd(%sp) # set supervisor TM bit 5004 5005funimp_iacc_end: 5006 bra.l _real_access 5007 5008######################################################################### 5009# ssin(): computes the sine of a normalized input # 5010# ssind(): computes the sine of a denormalized input # 5011# scos(): computes the cosine of a normalized input # 5012# scosd(): computes the cosine of a denormalized input # 5013# ssincos(): computes the sine and cosine of a normalized input # 5014# ssincosd(): computes the sine and cosine of a denormalized input # 5015# # 5016# INPUT *************************************************************** # 5017# a0 = pointer to extended precision input # 5018# d0 = round precision,mode # 5019# # 5020# OUTPUT ************************************************************** # 5021# fp0 = sin(X) or cos(X) # 5022# # 5023# For ssincos(X): # 5024# fp0 = sin(X) # 5025# fp1 = cos(X) # 5026# # 5027# ACCURACY and MONOTONICITY ******************************************* # 5028# The returned result is within 1 ulp in 64 significant bit, i.e. # 5029# within 0.5001 ulp to 53 bits if the result is subsequently # 5030# rounded to double precision. The result is provably monotonic # 5031# in double precision. # 5032# # 5033# ALGORITHM *********************************************************** # 5034# # 5035# SIN and COS: # 5036# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # 5037# # 5038# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # 5039# # 5040# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5041# k = N mod 4, so in particular, k = 0,1,2,or 3. # 5042# Overwrite k by k := k + AdjN. # 5043# # 5044# 4. If k is even, go to 6. # 5045# # 5046# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # 5047# Return sgn*cos(r) where cos(r) is approximated by an # 5048# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # 5049# s = r*r. # 5050# Exit. # 5051# # 5052# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # 5053# where sin(r) is approximated by an odd polynomial in r # 5054# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # 5055# Exit. # 5056# # 5057# 7. If |X| > 1, go to 9. # 5058# # 5059# 8. (|X|<2**(-40)) If SIN is invoked, return X; # 5060# otherwise return 1. # 5061# # 5062# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 5063# go back to 3. # 5064# # 5065# SINCOS: # 5066# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5067# # 5068# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5069# k = N mod 4, so in particular, k = 0,1,2,or 3. # 5070# # 5071# 3. If k is even, go to 5. # 5072# # 5073# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # 5074# j1 exclusive or with the l.s.b. of k. # 5075# sgn1 := (-1)**j1, sgn2 := (-1)**j2. # 5076# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # 5077# sin(r) and cos(r) are computed as odd and even # 5078# polynomials in r, respectively. Exit # 5079# # 5080# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # 5081# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # 5082# sin(r) and cos(r) are computed as odd and even # 5083# polynomials in r, respectively. Exit # 5084# # 5085# 6. If |X| > 1, go to 8. # 5086# # 5087# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # 5088# # 5089# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 5090# go back to 2. # 5091# # 5092######################################################################### 5093 5094SINA7: long 0xBD6AAA77,0xCCC994F5 5095SINA6: long 0x3DE61209,0x7AAE8DA1 5096SINA5: long 0xBE5AE645,0x2A118AE4 5097SINA4: long 0x3EC71DE3,0xA5341531 5098SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 5099SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 5100SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 5101 5102COSB8: long 0x3D2AC4D0,0xD6011EE3 5103COSB7: long 0xBDA9396F,0x9F45AC19 5104COSB6: long 0x3E21EED9,0x0612C972 5105COSB5: long 0xBE927E4F,0xB79D9FCF 5106COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 5107COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 5108COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E 5109COSB1: long 0xBF000000 5110 5111 set INARG,FP_SCR0 5112 5113 set X,FP_SCR0 5114# set XDCARE,X+2 5115 set XFRAC,X+4 5116 5117 set RPRIME,FP_SCR0 5118 set SPRIME,FP_SCR1 5119 5120 set POSNEG1,L_SCR1 5121 set TWOTO63,L_SCR1 5122 5123 set ENDFLAG,L_SCR2 5124 set INT,L_SCR2 5125 5126 set ADJN,L_SCR3 5127 5128############################################ 5129 global ssin 5130ssin: 5131 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 5132 bra.b SINBGN 5133 5134############################################ 5135 global scos 5136scos: 5137 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 5138 5139############################################ 5140SINBGN: 5141#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE 5142 5143 fmov.x (%a0),%fp0 # LOAD INPUT 5144 fmov.x %fp0,X(%a6) # save input at X 5145 5146# "COMPACTIFY" X 5147 mov.l (%a0),%d1 # put exp in hi word 5148 mov.w 4(%a0),%d1 # fetch hi(man) 5149 and.l &0x7FFFFFFF,%d1 # strip sign 5150 5151 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? 5152 bge.b SOK1 # no 5153 bra.w SINSM # yes; input is very small 5154 5155SOK1: 5156 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? 5157 blt.b SINMAIN # no 5158 bra.w SREDUCEX # yes; input is very large 5159 5160#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5161#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5162SINMAIN: 5163 fmov.x %fp0,%fp1 5164 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5165 5166 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5167 5168 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5169 5170 mov.l INT(%a6),%d1 # make a copy of N 5171 asl.l &4,%d1 # N *= 16 5172 add.l %d1,%a1 # tbl_addr = a1 + (N*16) 5173 5174# A1 IS THE ADDRESS OF N*PIBY2 5175# ...WHICH IS IN TWO PIECES Y1 & Y2 5176 fsub.x (%a1)+,%fp0 # X-Y1 5177 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 5178 5179SINCONT: 5180#--continuation from REDUCEX 5181 5182#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED 5183 mov.l INT(%a6),%d1 5184 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN 5185 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE 5186 cmp.l %d1,&0 5187 blt.w COSPOLY 5188 5189#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5190#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY 5191#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE 5192#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS 5193#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) 5194#--WHERE T=S*S. 5195#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION 5196#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. 5197SINPOLY: 5198 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5199 5200 fmov.x %fp0,X(%a6) # X IS R 5201 fmul.x %fp0,%fp0 # FP0 IS S 5202 5203 fmov.d SINA7(%pc),%fp3 5204 fmov.d SINA6(%pc),%fp2 5205 5206 fmov.x %fp0,%fp1 5207 fmul.x %fp1,%fp1 # FP1 IS T 5208 5209 ror.l &1,%d1 5210 and.l &0x80000000,%d1 5211# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5212 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R 5213 5214 fmul.x %fp1,%fp3 # TA7 5215 fmul.x %fp1,%fp2 # TA6 5216 5217 fadd.d SINA5(%pc),%fp3 # A5+TA7 5218 fadd.d SINA4(%pc),%fp2 # A4+TA6 5219 5220 fmul.x %fp1,%fp3 # T(A5+TA7) 5221 fmul.x %fp1,%fp2 # T(A4+TA6) 5222 5223 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) 5224 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) 5225 5226 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) 5227 5228 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) 5229 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) 5230 fmul.x X(%a6),%fp0 # R'*S 5231 5232 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] 5233 5234 fmul.x %fp1,%fp0 # SIN(R')-R' 5235 5236 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5237 5238 fmov.l %d0,%fpcr # restore users round mode,prec 5239 fadd.x X(%a6),%fp0 # last inst - possible exception set 5240 bra t_inx2 5241 5242#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5243#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY 5244#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE 5245#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS 5246#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) 5247#--WHERE T=S*S. 5248#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION 5249#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 5250#--AND IS THEREFORE STORED AS SINGLE PRECISION. 5251COSPOLY: 5252 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5253 5254 fmul.x %fp0,%fp0 # FP0 IS S 5255 5256 fmov.d COSB8(%pc),%fp2 5257 fmov.d COSB7(%pc),%fp3 5258 5259 fmov.x %fp0,%fp1 5260 fmul.x %fp1,%fp1 # FP1 IS T 5261 5262 fmov.x %fp0,X(%a6) # X IS S 5263 ror.l &1,%d1 5264 and.l &0x80000000,%d1 5265# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5266 5267 fmul.x %fp1,%fp2 # TB8 5268 5269 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S 5270 and.l &0x80000000,%d1 5271 5272 fmul.x %fp1,%fp3 # TB7 5273 5274 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE 5275 mov.l %d1,POSNEG1(%a6) 5276 5277 fadd.d COSB6(%pc),%fp2 # B6+TB8 5278 fadd.d COSB5(%pc),%fp3 # B5+TB7 5279 5280 fmul.x %fp1,%fp2 # T(B6+TB8) 5281 fmul.x %fp1,%fp3 # T(B5+TB7) 5282 5283 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) 5284 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) 5285 5286 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) 5287 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) 5288 5289 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) 5290 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) 5291 5292 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) 5293 5294 fadd.x %fp1,%fp0 5295 5296 fmul.x X(%a6),%fp0 5297 5298 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5299 5300 fmov.l %d0,%fpcr # restore users round mode,prec 5301 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set 5302 bra t_inx2 5303 5304############################################## 5305 5306# SINe: Big OR Small? 5307#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5308#--IF |X| < 2**(-40), RETURN X OR 1. 5309SINBORS: 5310 cmp.l %d1,&0x3FFF8000 5311 bgt.l SREDUCEX 5312 5313SINSM: 5314 mov.l ADJN(%a6),%d1 5315 cmp.l %d1,&0 5316 bgt.b COSTINY 5317 5318# here, the operation may underflow iff the precision is sgl or dbl. 5319# extended denorms are handled through another entry point. 5320SINTINY: 5321# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE 5322 5323 fmov.l %d0,%fpcr # restore users round mode,prec 5324 mov.b &FMOV_OP,%d1 # last inst is MOVE 5325 fmov.x X(%a6),%fp0 # last inst - possible exception set 5326 bra t_catch 5327 5328COSTINY: 5329 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5330 fmov.l %d0,%fpcr # restore users round mode,prec 5331 fadd.s &0x80800000,%fp0 # last inst - possible exception set 5332 bra t_pinx2 5333 5334################################################ 5335 global ssind 5336#--SIN(X) = X FOR DENORMALIZED X 5337ssind: 5338 bra t_extdnrm 5339 5340############################################ 5341 global scosd 5342#--COS(X) = 1 FOR DENORMALIZED X 5343scosd: 5344 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5345 bra t_pinx2 5346 5347################################################## 5348 5349 global ssincos 5350ssincos: 5351#--SET ADJN TO 4 5352 mov.l &4,ADJN(%a6) 5353 5354 fmov.x (%a0),%fp0 # LOAD INPUT 5355 fmov.x %fp0,X(%a6) 5356 5357 mov.l (%a0),%d1 5358 mov.w 4(%a0),%d1 5359 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X 5360 5361 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5362 bge.b SCOK1 5363 bra.w SCSM 5364 5365SCOK1: 5366 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5367 blt.b SCMAIN 5368 bra.w SREDUCEX 5369 5370 5371#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5372#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5373SCMAIN: 5374 fmov.x %fp0,%fp1 5375 5376 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5377 5378 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5379 5380 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5381 5382 mov.l INT(%a6),%d1 5383 asl.l &4,%d1 5384 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 5385 5386 fsub.x (%a1)+,%fp0 # X-Y1 5387 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5388 5389SCCONT: 5390#--continuation point from REDUCEX 5391 5392 mov.l INT(%a6),%d1 5393 ror.l &1,%d1 5394 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD 5395 bge.w NEVEN 5396 5397SNODD: 5398#--REGISTERS SAVED SO FAR: D0, A0, FP2. 5399 fmovm.x &0x04,-(%sp) # save fp2 5400 5401 fmov.x %fp0,RPRIME(%a6) 5402 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5403 fmov.d SINA7(%pc),%fp1 # A7 5404 fmov.d COSB8(%pc),%fp2 # B8 5405 fmul.x %fp0,%fp1 # SA7 5406 fmul.x %fp0,%fp2 # SB8 5407 5408 mov.l %d2,-(%sp) 5409 mov.l %d1,%d2 5410 ror.l &1,%d2 5411 and.l &0x80000000,%d2 5412 eor.l %d1,%d2 5413 and.l &0x80000000,%d2 5414 5415 fadd.d SINA6(%pc),%fp1 # A6+SA7 5416 fadd.d COSB7(%pc),%fp2 # B7+SB8 5417 5418 fmul.x %fp0,%fp1 # S(A6+SA7) 5419 eor.l %d2,RPRIME(%a6) 5420 mov.l (%sp)+,%d2 5421 fmul.x %fp0,%fp2 # S(B7+SB8) 5422 ror.l &1,%d1 5423 and.l &0x80000000,%d1 5424 mov.l &0x3F800000,POSNEG1(%a6) 5425 eor.l %d1,POSNEG1(%a6) 5426 5427 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) 5428 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) 5429 5430 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) 5431 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) 5432 fmov.x %fp0,SPRIME(%a6) 5433 5434 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) 5435 eor.l %d1,SPRIME(%a6) 5436 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) 5437 5438 fmul.x %fp0,%fp1 # S(A4+...) 5439 fmul.x %fp0,%fp2 # S(B5+...) 5440 5441 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) 5442 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) 5443 5444 fmul.x %fp0,%fp1 # S(A3+...) 5445 fmul.x %fp0,%fp2 # S(B4+...) 5446 5447 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) 5448 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) 5449 5450 fmul.x %fp0,%fp1 # S(A2+...) 5451 fmul.x %fp0,%fp2 # S(B3+...) 5452 5453 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) 5454 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) 5455 5456 fmul.x %fp0,%fp1 # S(A1+...) 5457 fmul.x %fp2,%fp0 # S(B2+...) 5458 5459 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) 5460 fadd.s COSB1(%pc),%fp0 # B1+S(B2...) 5461 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) 5462 5463 fmovm.x (%sp)+,&0x20 # restore fp2 5464 5465 fmov.l %d0,%fpcr 5466 fadd.x RPRIME(%a6),%fp1 # COS(X) 5467 bsr sto_cos # store cosine result 5468 fadd.s POSNEG1(%a6),%fp0 # SIN(X) 5469 bra t_inx2 5470 5471NEVEN: 5472#--REGISTERS SAVED SO FAR: FP2. 5473 fmovm.x &0x04,-(%sp) # save fp2 5474 5475 fmov.x %fp0,RPRIME(%a6) 5476 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5477 5478 fmov.d COSB8(%pc),%fp1 # B8 5479 fmov.d SINA7(%pc),%fp2 # A7 5480 5481 fmul.x %fp0,%fp1 # SB8 5482 fmov.x %fp0,SPRIME(%a6) 5483 fmul.x %fp0,%fp2 # SA7 5484 5485 ror.l &1,%d1 5486 and.l &0x80000000,%d1 5487 5488 fadd.d COSB7(%pc),%fp1 # B7+SB8 5489 fadd.d SINA6(%pc),%fp2 # A6+SA7 5490 5491 eor.l %d1,RPRIME(%a6) 5492 eor.l %d1,SPRIME(%a6) 5493 5494 fmul.x %fp0,%fp1 # S(B7+SB8) 5495 5496 or.l &0x3F800000,%d1 5497 mov.l %d1,POSNEG1(%a6) 5498 5499 fmul.x %fp0,%fp2 # S(A6+SA7) 5500 5501 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) 5502 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) 5503 5504 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) 5505 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) 5506 5507 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) 5508 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) 5509 5510 fmul.x %fp0,%fp1 # S(B5+...) 5511 fmul.x %fp0,%fp2 # S(A4+...) 5512 5513 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) 5514 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) 5515 5516 fmul.x %fp0,%fp1 # S(B4+...) 5517 fmul.x %fp0,%fp2 # S(A3+...) 5518 5519 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) 5520 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) 5521 5522 fmul.x %fp0,%fp1 # S(B3+...) 5523 fmul.x %fp0,%fp2 # S(A2+...) 5524 5525 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) 5526 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) 5527 5528 fmul.x %fp0,%fp1 # S(B2+...) 5529 fmul.x %fp2,%fp0 # s(a1+...) 5530 5531 5532 fadd.s COSB1(%pc),%fp1 # B1+S(B2...) 5533 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) 5534 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) 5535 5536 fmovm.x (%sp)+,&0x20 # restore fp2 5537 5538 fmov.l %d0,%fpcr 5539 fadd.s POSNEG1(%a6),%fp1 # COS(X) 5540 bsr sto_cos # store cosine result 5541 fadd.x RPRIME(%a6),%fp0 # SIN(X) 5542 bra t_inx2 5543 5544################################################ 5545 5546SCBORS: 5547 cmp.l %d1,&0x3FFF8000 5548 bgt.w SREDUCEX 5549 5550################################################ 5551 5552SCSM: 5553# mov.w &0x0000,XDCARE(%a6) 5554 fmov.s &0x3F800000,%fp1 5555 5556 fmov.l %d0,%fpcr 5557 fsub.s &0x00800000,%fp1 5558 bsr sto_cos # store cosine result 5559 fmov.l %fpcr,%d0 # d0 must have fpcr,too 5560 mov.b &FMOV_OP,%d1 # last inst is MOVE 5561 fmov.x X(%a6),%fp0 5562 bra t_catch 5563 5564############################################## 5565 5566 global ssincosd 5567#--SIN AND COS OF X FOR DENORMALIZED X 5568ssincosd: 5569 mov.l %d0,-(%sp) # save d0 5570 fmov.s &0x3F800000,%fp1 5571 bsr sto_cos # store cosine result 5572 mov.l (%sp)+,%d0 # restore d0 5573 bra t_extdnrm 5574 5575############################################ 5576 5577#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5578#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5579#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5580SREDUCEX: 5581 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5582 mov.l %d2,-(%sp) # save d2 5583 fmov.s &0x00000000,%fp1 # fp1 = 0 5584 5585#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5586#--there is a danger of unwanted overflow in first LOOP iteration. In this 5587#--case, reduce argument by one remainder step to make subsequent reduction 5588#--safe. 5589 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5590 bne.b SLOOP # no 5591 5592# yes; create 2**16383*PI/2 5593 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5594 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5595 clr.l FP_SCR0_LO(%a6) 5596 5597# create low half of 2**16383*PI/2 at FP_SCR1 5598 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5599 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5600 clr.l FP_SCR1_LO(%a6) 5601 5602 ftest.x %fp0 # test sign of argument 5603 fblt.w sred_neg 5604 5605 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5606 or.b &0x80,FP_SCR1_EX(%a6) 5607sred_neg: 5608 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5609 fmov.x %fp0,%fp1 # save high result in fp1 5610 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5611 fsub.x %fp0,%fp1 # determine low component of result 5612 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5613 5614#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5615#--integer quotient will be stored in N 5616#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5617SLOOP: 5618 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5619 mov.w INARG(%a6),%d1 5620 mov.l %d1,%a1 # save a copy of D0 5621 and.l &0x00007FFF,%d1 5622 sub.l &0x00003FFF,%d1 # d0 = K 5623 cmp.l %d1,&28 5624 ble.b SLASTLOOP 5625SCONTLOOP: 5626 sub.l &27,%d1 # d0 = L := K-27 5627 mov.b &0,ENDFLAG(%a6) 5628 bra.b SWORK 5629SLASTLOOP: 5630 clr.l %d1 # d0 = L := 0 5631 mov.b &1,ENDFLAG(%a6) 5632 5633SWORK: 5634#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5635#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5636 5637#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5638#--2**L * (PIby2_1), 2**L * (PIby2_2) 5639 5640 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5641 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5642 5643 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5644 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5645 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5646 5647 fmov.x %fp0,%fp2 5648 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5649 5650#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5651#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5652#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5653#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5654#--US THE DESIRED VALUE IN FLOATING POINT. 5655 mov.l %a1,%d2 5656 swap %d2 5657 and.l &0x80000000,%d2 5658 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5659 mov.l %d2,TWOTO63(%a6) 5660 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5661 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5662# fint.x %fp2 5663 5664#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5665 mov.l %d1,%d2 # d2 = L 5666 5667 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5668 mov.w %d2,FP_SCR0_EX(%a6) 5669 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5670 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5671 5672 add.l &0x00003FDD,%d1 5673 mov.w %d1,FP_SCR1_EX(%a6) 5674 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 5675 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 5676 5677 mov.b ENDFLAG(%a6),%d1 5678 5679#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 5680#--P2 = 2**(L) * Piby2_2 5681 fmov.x %fp2,%fp4 # fp4 = N 5682 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 5683 fmov.x %fp2,%fp5 # fp5 = N 5684 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 5685 fmov.x %fp4,%fp3 # fp3 = W = N*P1 5686 5687#--we want P+p = W+w but |p| <= half ulp of P 5688#--Then, we need to compute A := R-P and a := r-p 5689 fadd.x %fp5,%fp3 # fp3 = P 5690 fsub.x %fp3,%fp4 # fp4 = W-P 5691 5692 fsub.x %fp3,%fp0 # fp0 = A := R - P 5693 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 5694 5695 fmov.x %fp0,%fp3 # fp3 = A 5696 fsub.x %fp4,%fp1 # fp1 = a := r - p 5697 5698#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 5699#--|r| <= half ulp of R. 5700 fadd.x %fp1,%fp0 # fp0 = R := A+a 5701#--No need to calculate r if this is the last loop 5702 cmp.b %d1,&0 5703 bgt.w SRESTORE 5704 5705#--Need to calculate r 5706 fsub.x %fp0,%fp3 # fp3 = A-R 5707 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 5708 bra.w SLOOP 5709 5710SRESTORE: 5711 fmov.l %fp2,INT(%a6) 5712 mov.l (%sp)+,%d2 # restore d2 5713 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 5714 5715 mov.l ADJN(%a6),%d1 5716 cmp.l %d1,&4 5717 5718 blt.w SINCONT 5719 bra.w SCCONT 5720 5721######################################################################### 5722# stan(): computes the tangent of a normalized input # 5723# stand(): computes the tangent of a denormalized input # 5724# # 5725# INPUT *************************************************************** # 5726# a0 = pointer to extended precision input # 5727# d0 = round precision,mode # 5728# # 5729# OUTPUT ************************************************************** # 5730# fp0 = tan(X) # 5731# # 5732# ACCURACY and MONOTONICITY ******************************************* # 5733# The returned result is within 3 ulp in 64 significant bit, i.e. # 5734# within 0.5001 ulp to 53 bits if the result is subsequently # 5735# rounded to double precision. The result is provably monotonic # 5736# in double precision. # 5737# # 5738# ALGORITHM *********************************************************** # 5739# # 5740# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5741# # 5742# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5743# k = N mod 2, so in particular, k = 0 or 1. # 5744# # 5745# 3. If k is odd, go to 5. # 5746# # 5747# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # 5748# rational function U/V where # 5749# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5750# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # 5751# Exit. # 5752# # 5753# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # 5754# a rational function U/V where # 5755# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5756# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # 5757# -Cot(r) = -V/U. Exit. # 5758# # 5759# 6. If |X| > 1, go to 8. # 5760# # 5761# 7. (|X|<2**(-40)) Tan(X) = X. Exit. # 5762# # 5763# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # 5764# to 2. # 5765# # 5766######################################################################### 5767 5768TANQ4: 5769 long 0x3EA0B759,0xF50F8688 5770TANP3: 5771 long 0xBEF2BAA5,0xA8924F04 5772 5773TANQ3: 5774 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 5775 5776TANP2: 5777 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 5778 5779TANQ2: 5780 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 5781 5782TANP1: 5783 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 5784 5785TANQ1: 5786 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 5787 5788INVTWOPI: 5789 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 5790 5791TWOPI1: 5792 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 5793TWOPI2: 5794 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 5795 5796#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING 5797#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT 5798#--MOST 69 BITS LONG. 5799# global PITBL 5800PITBL: 5801 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 5802 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 5803 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 5804 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 5805 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 5806 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 5807 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 5808 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 5809 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 5810 long 0xC0040000,0x90836524,0x88034B96,0x20B00000 5811 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 5812 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 5813 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 5814 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 5815 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 5816 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 5817 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 5818 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 5819 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 5820 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 5821 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 5822 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 5823 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 5824 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 5825 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 5826 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 5827 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 5828 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 5829 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 5830 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 5831 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 5832 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 5833 long 0x00000000,0x00000000,0x00000000,0x00000000 5834 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 5835 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 5836 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 5837 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 5838 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 5839 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 5840 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 5841 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 5842 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 5843 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 5844 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 5845 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 5846 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 5847 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 5848 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 5849 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 5850 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 5851 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 5852 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 5853 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 5854 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 5855 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 5856 long 0x40040000,0x90836524,0x88034B96,0xA0B00000 5857 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 5858 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 5859 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 5860 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 5861 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 5862 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 5863 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 5864 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 5865 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 5866 5867 set INARG,FP_SCR0 5868 5869 set TWOTO63,L_SCR1 5870 set INT,L_SCR1 5871 set ENDFLAG,L_SCR2 5872 5873 global stan 5874stan: 5875 fmov.x (%a0),%fp0 # LOAD INPUT 5876 5877 mov.l (%a0),%d1 5878 mov.w 4(%a0),%d1 5879 and.l &0x7FFFFFFF,%d1 5880 5881 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5882 bge.b TANOK1 5883 bra.w TANSM 5884TANOK1: 5885 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5886 blt.b TANMAIN 5887 bra.w REDUCEX 5888 5889TANMAIN: 5890#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5891#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5892 fmov.x %fp0,%fp1 5893 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5894 5895 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5896 5897 fmov.l %fp1,%d1 # CONVERT TO INTEGER 5898 5899 asl.l &4,%d1 5900 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 5901 5902 fsub.x (%a1)+,%fp0 # X-Y1 5903 5904 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5905 5906 ror.l &5,%d1 5907 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 5908 5909TANCONT: 5910 fmovm.x &0x0c,-(%sp) # save fp2,fp3 5911 5912 cmp.l %d1,&0 5913 blt.w NODD 5914 5915 fmov.x %fp0,%fp1 5916 fmul.x %fp1,%fp1 # S = R*R 5917 5918 fmov.d TANQ4(%pc),%fp3 5919 fmov.d TANP3(%pc),%fp2 5920 5921 fmul.x %fp1,%fp3 # SQ4 5922 fmul.x %fp1,%fp2 # SP3 5923 5924 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5925 fadd.x TANP2(%pc),%fp2 # P2+SP3 5926 5927 fmul.x %fp1,%fp3 # S(Q3+SQ4) 5928 fmul.x %fp1,%fp2 # S(P2+SP3) 5929 5930 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5931 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5932 5933 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) 5934 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) 5935 5936 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5937 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) 5938 5939 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) 5940 5941 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) 5942 5943 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) 5944 5945 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5946 5947 fmov.l %d0,%fpcr # restore users round mode,prec 5948 fdiv.x %fp1,%fp0 # last inst - possible exception set 5949 bra t_inx2 5950 5951NODD: 5952 fmov.x %fp0,%fp1 5953 fmul.x %fp0,%fp0 # S = R*R 5954 5955 fmov.d TANQ4(%pc),%fp3 5956 fmov.d TANP3(%pc),%fp2 5957 5958 fmul.x %fp0,%fp3 # SQ4 5959 fmul.x %fp0,%fp2 # SP3 5960 5961 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5962 fadd.x TANP2(%pc),%fp2 # P2+SP3 5963 5964 fmul.x %fp0,%fp3 # S(Q3+SQ4) 5965 fmul.x %fp0,%fp2 # S(P2+SP3) 5966 5967 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5968 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5969 5970 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) 5971 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) 5972 5973 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5974 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) 5975 5976 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) 5977 5978 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) 5979 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) 5980 5981 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5982 5983 fmov.x %fp1,-(%sp) 5984 eor.l &0x80000000,(%sp) 5985 5986 fmov.l %d0,%fpcr # restore users round mode,prec 5987 fdiv.x (%sp)+,%fp0 # last inst - possible exception set 5988 bra t_inx2 5989 5990TANBORS: 5991#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5992#--IF |X| < 2**(-40), RETURN X OR 1. 5993 cmp.l %d1,&0x3FFF8000 5994 bgt.b REDUCEX 5995 5996TANSM: 5997 fmov.x %fp0,-(%sp) 5998 fmov.l %d0,%fpcr # restore users round mode,prec 5999 mov.b &FMOV_OP,%d1 # last inst is MOVE 6000 fmov.x (%sp)+,%fp0 # last inst - posibble exception set 6001 bra t_catch 6002 6003 global stand 6004#--TAN(X) = X FOR DENORMALIZED X 6005stand: 6006 bra t_extdnrm 6007 6008#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 6009#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 6010#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 6011REDUCEX: 6012 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 6013 mov.l %d2,-(%sp) # save d2 6014 fmov.s &0x00000000,%fp1 # fp1 = 0 6015 6016#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 6017#--there is a danger of unwanted overflow in first LOOP iteration. In this 6018#--case, reduce argument by one remainder step to make subsequent reduction 6019#--safe. 6020 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 6021 bne.b LOOP # no 6022 6023# yes; create 2**16383*PI/2 6024 mov.w &0x7ffe,FP_SCR0_EX(%a6) 6025 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 6026 clr.l FP_SCR0_LO(%a6) 6027 6028# create low half of 2**16383*PI/2 at FP_SCR1 6029 mov.w &0x7fdc,FP_SCR1_EX(%a6) 6030 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 6031 clr.l FP_SCR1_LO(%a6) 6032 6033 ftest.x %fp0 # test sign of argument 6034 fblt.w red_neg 6035 6036 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 6037 or.b &0x80,FP_SCR1_EX(%a6) 6038red_neg: 6039 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 6040 fmov.x %fp0,%fp1 # save high result in fp1 6041 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 6042 fsub.x %fp0,%fp1 # determine low component of result 6043 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 6044 6045#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 6046#--integer quotient will be stored in N 6047#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 6048LOOP: 6049 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 6050 mov.w INARG(%a6),%d1 6051 mov.l %d1,%a1 # save a copy of D0 6052 and.l &0x00007FFF,%d1 6053 sub.l &0x00003FFF,%d1 # d0 = K 6054 cmp.l %d1,&28 6055 ble.b LASTLOOP 6056CONTLOOP: 6057 sub.l &27,%d1 # d0 = L := K-27 6058 mov.b &0,ENDFLAG(%a6) 6059 bra.b WORK 6060LASTLOOP: 6061 clr.l %d1 # d0 = L := 0 6062 mov.b &1,ENDFLAG(%a6) 6063 6064WORK: 6065#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 6066#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 6067 6068#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 6069#--2**L * (PIby2_1), 2**L * (PIby2_2) 6070 6071 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 6072 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 6073 6074 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 6075 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 6076 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 6077 6078 fmov.x %fp0,%fp2 6079 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 6080 6081#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 6082#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 6083#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 6084#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 6085#--US THE DESIRED VALUE IN FLOATING POINT. 6086 mov.l %a1,%d2 6087 swap %d2 6088 and.l &0x80000000,%d2 6089 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 6090 mov.l %d2,TWOTO63(%a6) 6091 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 6092 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 6093# fintrz.x %fp2,%fp2 6094 6095#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 6096 mov.l %d1,%d2 # d2 = L 6097 6098 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 6099 mov.w %d2,FP_SCR0_EX(%a6) 6100 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 6101 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 6102 6103 add.l &0x00003FDD,%d1 6104 mov.w %d1,FP_SCR1_EX(%a6) 6105 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 6106 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 6107 6108 mov.b ENDFLAG(%a6),%d1 6109 6110#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 6111#--P2 = 2**(L) * Piby2_2 6112 fmov.x %fp2,%fp4 # fp4 = N 6113 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 6114 fmov.x %fp2,%fp5 # fp5 = N 6115 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 6116 fmov.x %fp4,%fp3 # fp3 = W = N*P1 6117 6118#--we want P+p = W+w but |p| <= half ulp of P 6119#--Then, we need to compute A := R-P and a := r-p 6120 fadd.x %fp5,%fp3 # fp3 = P 6121 fsub.x %fp3,%fp4 # fp4 = W-P 6122 6123 fsub.x %fp3,%fp0 # fp0 = A := R - P 6124 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 6125 6126 fmov.x %fp0,%fp3 # fp3 = A 6127 fsub.x %fp4,%fp1 # fp1 = a := r - p 6128 6129#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 6130#--|r| <= half ulp of R. 6131 fadd.x %fp1,%fp0 # fp0 = R := A+a 6132#--No need to calculate r if this is the last loop 6133 cmp.b %d1,&0 6134 bgt.w RESTORE 6135 6136#--Need to calculate r 6137 fsub.x %fp0,%fp3 # fp3 = A-R 6138 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 6139 bra.w LOOP 6140 6141RESTORE: 6142 fmov.l %fp2,INT(%a6) 6143 mov.l (%sp)+,%d2 # restore d2 6144 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 6145 6146 mov.l INT(%a6),%d1 6147 ror.l &1,%d1 6148 6149 bra.w TANCONT 6150 6151######################################################################### 6152# satan(): computes the arctangent of a normalized number # 6153# satand(): computes the arctangent of a denormalized number # 6154# # 6155# INPUT *************************************************************** # 6156# a0 = pointer to extended precision input # 6157# d0 = round precision,mode # 6158# # 6159# OUTPUT ************************************************************** # 6160# fp0 = arctan(X) # 6161# # 6162# ACCURACY and MONOTONICITY ******************************************* # 6163# The returned result is within 2 ulps in 64 significant bit, # 6164# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6165# rounded to double precision. The result is provably monotonic # 6166# in double precision. # 6167# # 6168# ALGORITHM *********************************************************** # 6169# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # 6170# # 6171# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # 6172# Note that k = -4, -3,..., or 3. # 6173# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # 6174# significant bits of X with a bit-1 attached at the 6-th # 6175# bit position. Define u to be u = (X-F) / (1 + X*F). # 6176# # 6177# Step 3. Approximate arctan(u) by a polynomial poly. # 6178# # 6179# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # 6180# table of values calculated beforehand. Exit. # 6181# # 6182# Step 5. If |X| >= 16, go to Step 7. # 6183# # 6184# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # 6185# # 6186# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # 6187# polynomial in X'. # 6188# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # 6189# # 6190######################################################################### 6191 6192ATANA3: long 0xBFF6687E,0x314987D8 6193ATANA2: long 0x4002AC69,0x34A26DB3 6194ATANA1: long 0xBFC2476F,0x4E1DA28E 6195 6196ATANB6: long 0x3FB34444,0x7F876989 6197ATANB5: long 0xBFB744EE,0x7FAF45DB 6198ATANB4: long 0x3FBC71C6,0x46940220 6199ATANB3: long 0xBFC24924,0x921872F9 6200ATANB2: long 0x3FC99999,0x99998FA9 6201ATANB1: long 0xBFD55555,0x55555555 6202 6203ATANC5: long 0xBFB70BF3,0x98539E6A 6204ATANC4: long 0x3FBC7187,0x962D1D7D 6205ATANC3: long 0xBFC24924,0x827107B8 6206ATANC2: long 0x3FC99999,0x9996263E 6207ATANC1: long 0xBFD55555,0x55555536 6208 6209PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 6210NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 6211 6212PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 6213NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 6214 6215ATANTBL: 6216 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 6217 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 6218 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 6219 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 6220 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 6221 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 6222 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 6223 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 6224 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 6225 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 6226 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 6227 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 6228 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 6229 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 6230 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 6231 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 6232 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 6233 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 6234 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 6235 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 6236 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 6237 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 6238 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 6239 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 6240 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 6241 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 6242 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 6243 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 6244 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 6245 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 6246 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 6247 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 6248 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 6249 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 6250 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 6251 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 6252 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 6253 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 6254 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 6255 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 6256 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 6257 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 6258 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 6259 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 6260 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 6261 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 6262 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 6263 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 6264 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 6265 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 6266 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 6267 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 6268 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 6269 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 6270 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 6271 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 6272 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 6273 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 6274 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 6275 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 6276 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 6277 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 6278 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 6279 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 6280 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 6281 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 6282 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 6283 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 6284 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 6285 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 6286 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 6287 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 6288 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 6289 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 6290 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 6291 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 6292 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 6293 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 6294 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 6295 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 6296 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 6297 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 6298 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 6299 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 6300 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 6301 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 6302 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 6303 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 6304 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 6305 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 6306 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 6307 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 6308 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 6309 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 6310 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 6311 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 6312 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 6313 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 6314 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 6315 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 6316 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 6317 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 6318 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 6319 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 6320 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 6321 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 6322 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 6323 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 6324 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 6325 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 6326 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 6327 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 6328 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 6329 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 6330 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 6331 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 6332 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 6333 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 6334 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 6335 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 6336 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 6337 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 6338 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 6339 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 6340 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 6341 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 6342 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 6343 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 6344 6345 set X,FP_SCR0 6346 set XDCARE,X+2 6347 set XFRAC,X+4 6348 set XFRACLO,X+8 6349 6350 set ATANF,FP_SCR1 6351 set ATANFHI,ATANF+4 6352 set ATANFLO,ATANF+8 6353 6354 global satan 6355#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 6356satan: 6357 fmov.x (%a0),%fp0 # LOAD INPUT 6358 6359 mov.l (%a0),%d1 6360 mov.w 4(%a0),%d1 6361 fmov.x %fp0,X(%a6) 6362 and.l &0x7FFFFFFF,%d1 6363 6364 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? 6365 bge.b ATANOK1 6366 bra.w ATANSM 6367 6368ATANOK1: 6369 cmp.l %d1,&0x4002FFFF # |X| < 16 ? 6370 ble.b ATANMAIN 6371 bra.w ATANBIG 6372 6373#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 6374#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 6375#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 6376#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 6377#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 6378#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 6379#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 6380#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 6381#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 6382#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 6383#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 6384#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 6385#--WILL INVOLVE A VERY LONG POLYNOMIAL. 6386 6387#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 6388#--WE CHOSE F TO BE +-2^K * 1.BBBB1 6389#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 6390#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 6391#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 6392#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 6393 6394ATANMAIN: 6395 6396 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS 6397 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 6398 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F 6399 6400 fmov.x %fp0,%fp1 # FP1 IS X 6401 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 6402 fsub.x X(%a6),%fp0 # FP0 IS X-F 6403 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F 6404 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) 6405 6406#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 6407#--CREATE ATAN(F) AND STORE IT IN ATANF, AND 6408#--SAVE REGISTERS FP2. 6409 6410 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY 6411 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X 6412 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION 6413 and.l &0x7FFF0000,%d2 # EXPONENT OF F 6414 sub.l &0x3FFB0000,%d2 # K+4 6415 asr.l &1,%d2 6416 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F 6417 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) 6418 lea ATANTBL(%pc),%a1 6419 add.l %d1,%a1 # ADDRESS OF ATAN(|F|) 6420 mov.l (%a1)+,ATANF(%a6) 6421 mov.l (%a1)+,ATANFHI(%a6) 6422 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) 6423 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN 6424 and.l &0x80000000,%d1 # SIGN(F) 6425 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) 6426 mov.l (%sp)+,%d2 # RESTORE d2 6427 6428#--THAT'S ALL I HAVE TO DO FOR NOW, 6429#--BUT ALAS, THE DIVIDE IS STILL CRANKING! 6430 6431#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 6432#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 6433#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 6434#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 6435#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 6436#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 6437#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 6438 6439 fmovm.x &0x04,-(%sp) # save fp2 6440 6441 fmov.x %fp0,%fp1 6442 fmul.x %fp1,%fp1 6443 fmov.d ATANA3(%pc),%fp2 6444 fadd.x %fp1,%fp2 # A3+V 6445 fmul.x %fp1,%fp2 # V*(A3+V) 6446 fmul.x %fp0,%fp1 # U*V 6447 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) 6448 fmul.d ATANA1(%pc),%fp1 # A1*U*V 6449 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) 6450 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED 6451 6452 fmovm.x (%sp)+,&0x20 # restore fp2 6453 6454 fmov.l %d0,%fpcr # restore users rnd mode,prec 6455 fadd.x ATANF(%a6),%fp0 # ATAN(X) 6456 bra t_inx2 6457 6458ATANBORS: 6459#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 6460#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 6461 cmp.l %d1,&0x3FFF8000 6462 bgt.w ATANBIG # I.E. |X| >= 16 6463 6464ATANSM: 6465#--|X| <= 1/16 6466#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 6467#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 6468#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 6469#--WHERE Y = X*X, AND Z = Y*Y. 6470 6471 cmp.l %d1,&0x3FD78000 6472 blt.w ATANTINY 6473 6474#--COMPUTE POLYNOMIAL 6475 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6476 6477 fmul.x %fp0,%fp0 # FPO IS Y = X*X 6478 6479 fmov.x %fp0,%fp1 6480 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6481 6482 fmov.d ATANB6(%pc),%fp2 6483 fmov.d ATANB5(%pc),%fp3 6484 6485 fmul.x %fp1,%fp2 # Z*B6 6486 fmul.x %fp1,%fp3 # Z*B5 6487 6488 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 6489 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 6490 6491 fmul.x %fp1,%fp2 # Z*(B4+Z*B6) 6492 fmul.x %fp3,%fp1 # Z*(B3+Z*B5) 6493 6494 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) 6495 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) 6496 6497 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) 6498 fmul.x X(%a6),%fp0 # X*Y 6499 6500 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 6501 6502 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 6503 6504 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6505 6506 fmov.l %d0,%fpcr # restore users rnd mode,prec 6507 fadd.x X(%a6),%fp0 6508 bra t_inx2 6509 6510ATANTINY: 6511#--|X| < 2^(-40), ATAN(X) = X 6512 6513 fmov.l %d0,%fpcr # restore users rnd mode,prec 6514 mov.b &FMOV_OP,%d1 # last inst is MOVE 6515 fmov.x X(%a6),%fp0 # last inst - possible exception set 6516 6517 bra t_catch 6518 6519ATANBIG: 6520#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 6521#--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 6522 cmp.l %d1,&0x40638000 6523 bgt.w ATANHUGE 6524 6525#--APPROXIMATE ATAN(-1/X) BY 6526#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 6527#--THIS CAN BE RE-WRITTEN AS 6528#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 6529 6530 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6531 6532 fmov.s &0xBF800000,%fp1 # LOAD -1 6533 fdiv.x %fp0,%fp1 # FP1 IS -1/X 6534 6535#--DIVIDE IS STILL CRANKING 6536 6537 fmov.x %fp1,%fp0 # FP0 IS X' 6538 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' 6539 fmov.x %fp1,X(%a6) # X IS REALLY X' 6540 6541 fmov.x %fp0,%fp1 6542 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6543 6544 fmov.d ATANC5(%pc),%fp3 6545 fmov.d ATANC4(%pc),%fp2 6546 6547 fmul.x %fp1,%fp3 # Z*C5 6548 fmul.x %fp1,%fp2 # Z*B4 6549 6550 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 6551 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 6552 6553 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED 6554 fmul.x %fp0,%fp2 # Y*(C2+Z*C4) 6555 6556 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) 6557 fmul.x X(%a6),%fp0 # X'*Y 6558 6559 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 6560 6561 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] 6562# ... +[Y*(B2+Z*(B4+Z*B6))]) 6563 fadd.x X(%a6),%fp0 6564 6565 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6566 6567 fmov.l %d0,%fpcr # restore users rnd mode,prec 6568 tst.b (%a0) 6569 bpl.b pos_big 6570 6571neg_big: 6572 fadd.x NPIBY2(%pc),%fp0 6573 bra t_minx2 6574 6575pos_big: 6576 fadd.x PPIBY2(%pc),%fp0 6577 bra t_pinx2 6578 6579ATANHUGE: 6580#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 6581 tst.b (%a0) 6582 bpl.b pos_huge 6583 6584neg_huge: 6585 fmov.x NPIBY2(%pc),%fp0 6586 fmov.l %d0,%fpcr 6587 fadd.x PTINY(%pc),%fp0 6588 bra t_minx2 6589 6590pos_huge: 6591 fmov.x PPIBY2(%pc),%fp0 6592 fmov.l %d0,%fpcr 6593 fadd.x NTINY(%pc),%fp0 6594 bra t_pinx2 6595 6596 global satand 6597#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 6598satand: 6599 bra t_extdnrm 6600 6601######################################################################### 6602# sasin(): computes the inverse sine of a normalized input # 6603# sasind(): computes the inverse sine of a denormalized input # 6604# # 6605# INPUT *************************************************************** # 6606# a0 = pointer to extended precision input # 6607# d0 = round precision,mode # 6608# # 6609# OUTPUT ************************************************************** # 6610# fp0 = arcsin(X) # 6611# # 6612# ACCURACY and MONOTONICITY ******************************************* # 6613# The returned result is within 3 ulps in 64 significant bit, # 6614# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6615# rounded to double precision. The result is provably monotonic # 6616# in double precision. # 6617# # 6618# ALGORITHM *********************************************************** # 6619# # 6620# ASIN # 6621# 1. If |X| >= 1, go to 3. # 6622# # 6623# 2. (|X| < 1) Calculate asin(X) by # 6624# z := sqrt( [1-X][1+X] ) # 6625# asin(X) = atan( x / z ). # 6626# Exit. # 6627# # 6628# 3. If |X| > 1, go to 5. # 6629# # 6630# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# 6631# # 6632# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6633# Exit. # 6634# # 6635######################################################################### 6636 6637 global sasin 6638sasin: 6639 fmov.x (%a0),%fp0 # LOAD INPUT 6640 6641 mov.l (%a0),%d1 6642 mov.w 4(%a0),%d1 6643 and.l &0x7FFFFFFF,%d1 6644 cmp.l %d1,&0x3FFF8000 6645 bge.b ASINBIG 6646 6647# This catch is added here for the '060 QSP. Originally, the call to 6648# satan() would handle this case by causing the exception which would 6649# not be caught until gen_except(). Now, with the exceptions being 6650# detected inside of satan(), the exception would have been handled there 6651# instead of inside sasin() as expected. 6652 cmp.l %d1,&0x3FD78000 6653 blt.w ASINTINY 6654 6655#--THIS IS THE USUAL CASE, |X| < 1 6656#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) 6657 6658ASINMAIN: 6659 fmov.s &0x3F800000,%fp1 6660 fsub.x %fp0,%fp1 # 1-X 6661 fmovm.x &0x4,-(%sp) # {fp2} 6662 fmov.s &0x3F800000,%fp2 6663 fadd.x %fp0,%fp2 # 1+X 6664 fmul.x %fp2,%fp1 # (1+X)(1-X) 6665 fmovm.x (%sp)+,&0x20 # {fp2} 6666 fsqrt.x %fp1 # SQRT([1-X][1+X]) 6667 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) 6668 fmovm.x &0x01,-(%sp) # save X/SQRT(...) 6669 lea (%sp),%a0 # pass ptr to X/SQRT(...) 6670 bsr satan 6671 add.l &0xc,%sp # clear X/SQRT(...) from stack 6672 bra t_inx2 6673 6674ASINBIG: 6675 fabs.x %fp0 # |X| 6676 fcmp.s %fp0,&0x3F800000 6677 fbgt t_operr # cause an operr exception 6678 6679#--|X| = 1, ASIN(X) = +- PI/2. 6680ASINONE: 6681 fmov.x PIBY2(%pc),%fp0 6682 mov.l (%a0),%d1 6683 and.l &0x80000000,%d1 # SIGN BIT OF X 6684 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT 6685 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT 6686 fmov.l %d0,%fpcr 6687 fmul.s (%sp)+,%fp0 6688 bra t_inx2 6689 6690#--|X| < 2^(-40), ATAN(X) = X 6691ASINTINY: 6692 fmov.l %d0,%fpcr # restore users rnd mode,prec 6693 mov.b &FMOV_OP,%d1 # last inst is MOVE 6694 fmov.x (%a0),%fp0 # last inst - possible exception 6695 bra t_catch 6696 6697 global sasind 6698#--ASIN(X) = X FOR DENORMALIZED X 6699sasind: 6700 bra t_extdnrm 6701 6702######################################################################### 6703# sacos(): computes the inverse cosine of a normalized input # 6704# sacosd(): computes the inverse cosine of a denormalized input # 6705# # 6706# INPUT *************************************************************** # 6707# a0 = pointer to extended precision input # 6708# d0 = round precision,mode # 6709# # 6710# OUTPUT ************************************************************** # 6711# fp0 = arccos(X) # 6712# # 6713# ACCURACY and MONOTONICITY ******************************************* # 6714# The returned result is within 3 ulps in 64 significant bit, # 6715# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6716# rounded to double precision. The result is provably monotonic # 6717# in double precision. # 6718# # 6719# ALGORITHM *********************************************************** # 6720# # 6721# ACOS # 6722# 1. If |X| >= 1, go to 3. # 6723# # 6724# 2. (|X| < 1) Calculate acos(X) by # 6725# z := (1-X) / (1+X) # 6726# acos(X) = 2 * atan( sqrt(z) ). # 6727# Exit. # 6728# # 6729# 3. If |X| > 1, go to 5. # 6730# # 6731# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # 6732# # 6733# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6734# Exit. # 6735# # 6736######################################################################### 6737 6738 global sacos 6739sacos: 6740 fmov.x (%a0),%fp0 # LOAD INPUT 6741 6742 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction 6743 mov.w 4(%a0),%d1 6744 and.l &0x7FFFFFFF,%d1 6745 cmp.l %d1,&0x3FFF8000 6746 bge.b ACOSBIG 6747 6748#--THIS IS THE USUAL CASE, |X| < 1 6749#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) 6750 6751ACOSMAIN: 6752 fmov.s &0x3F800000,%fp1 6753 fadd.x %fp0,%fp1 # 1+X 6754 fneg.x %fp0 # -X 6755 fadd.s &0x3F800000,%fp0 # 1-X 6756 fdiv.x %fp1,%fp0 # (1-X)/(1+X) 6757 fsqrt.x %fp0 # SQRT((1-X)/(1+X)) 6758 mov.l %d0,-(%sp) # save original users fpcr 6759 clr.l %d0 6760 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack 6761 lea (%sp),%a0 # pass ptr to sqrt 6762 bsr satan # ATAN(SQRT([1-X]/[1+X])) 6763 add.l &0xc,%sp # clear SQRT(...) from stack 6764 6765 fmov.l (%sp)+,%fpcr # restore users round prec,mode 6766 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) 6767 bra t_pinx2 6768 6769ACOSBIG: 6770 fabs.x %fp0 6771 fcmp.s %fp0,&0x3F800000 6772 fbgt t_operr # cause an operr exception 6773 6774#--|X| = 1, ACOS(X) = 0 OR PI 6775 tst.b (%a0) # is X positive or negative? 6776 bpl.b ACOSP1 6777 6778#--X = -1 6779#Returns PI and inexact exception 6780ACOSM1: 6781 fmov.x PI(%pc),%fp0 # load PI 6782 fmov.l %d0,%fpcr # load round mode,prec 6783 fadd.s &0x00800000,%fp0 # add a small value 6784 bra t_pinx2 6785 6786ACOSP1: 6787 bra ld_pzero # answer is positive zero 6788 6789 global sacosd 6790#--ACOS(X) = PI/2 FOR DENORMALIZED X 6791sacosd: 6792 fmov.l %d0,%fpcr # load user's rnd mode/prec 6793 fmov.x PIBY2(%pc),%fp0 6794 bra t_pinx2 6795 6796######################################################################### 6797# setox(): computes the exponential for a normalized input # 6798# setoxd(): computes the exponential for a denormalized input # 6799# setoxm1(): computes the exponential minus 1 for a normalized input # 6800# setoxm1d(): computes the exponential minus 1 for a denormalized input # 6801# # 6802# INPUT *************************************************************** # 6803# a0 = pointer to extended precision input # 6804# d0 = round precision,mode # 6805# # 6806# OUTPUT ************************************************************** # 6807# fp0 = exp(X) or exp(X)-1 # 6808# # 6809# ACCURACY and MONOTONICITY ******************************************* # 6810# The returned result is within 0.85 ulps in 64 significant bit, # 6811# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6812# rounded to double precision. The result is provably monotonic # 6813# in double precision. # 6814# # 6815# ALGORITHM and IMPLEMENTATION **************************************** # 6816# # 6817# setoxd # 6818# ------ # 6819# Step 1. Set ans := 1.0 # 6820# # 6821# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # 6822# Notes: This will always generate one exception -- inexact. # 6823# # 6824# # 6825# setox # 6826# ----- # 6827# # 6828# Step 1. Filter out extreme cases of input argument. # 6829# 1.1 If |X| >= 2^(-65), go to Step 1.3. # 6830# 1.2 Go to Step 7. # 6831# 1.3 If |X| < 16380 log(2), go to Step 2. # 6832# 1.4 Go to Step 8. # 6833# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6834# To avoid the use of floating-point comparisons, a # 6835# compact representation of |X| is used. This format is a # 6836# 32-bit integer, the upper (more significant) 16 bits # 6837# are the sign and biased exponent field of |X|; the # 6838# lower 16 bits are the 16 most significant fraction # 6839# (including the explicit bit) bits of |X|. Consequently, # 6840# the comparisons in Steps 1.1 and 1.3 can be performed # 6841# by integer comparison. Note also that the constant # 6842# 16380 log(2) used in Step 1.3 is also in the compact # 6843# form. Thus taking the branch to Step 2 guarantees # 6844# |X| < 16380 log(2). There is no harm to have a small # 6845# number of cases where |X| is less than, but close to, # 6846# 16380 log(2) and the branch to Step 9 is taken. # 6847# # 6848# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6849# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # 6850# was taken) # 6851# 2.2 N := round-to-nearest-integer( X * 64/log2 ). # 6852# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # 6853# or 63. # 6854# 2.4 Calculate M = (N - J)/64; so N = 64M + J. # 6855# 2.5 Calculate the address of the stored value of # 6856# 2^(J/64). # 6857# 2.6 Create the value Scale = 2^M. # 6858# Notes: The calculation in 2.2 is really performed by # 6859# Z := X * constant # 6860# N := round-to-nearest-integer(Z) # 6861# where # 6862# constant := single-precision( 64/log 2 ). # 6863# # 6864# Using a single-precision constant avoids memory # 6865# access. Another effect of using a single-precision # 6866# "constant" is that the calculated value Z is # 6867# # 6868# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # 6869# # 6870# This error has to be considered later in Steps 3 and 4. # 6871# # 6872# Step 3. Calculate X - N*log2/64. # 6873# 3.1 R := X + N*L1, # 6874# where L1 := single-precision(-log2/64). # 6875# 3.2 R := R + N*L2, # 6876# L2 := extended-precision(-log2/64 - L1).# 6877# Notes: a) The way L1 and L2 are chosen ensures L1+L2 # 6878# approximate the value -log2/64 to 88 bits of accuracy. # 6879# b) N*L1 is exact because N is no longer than 22 bits # 6880# and L1 is no longer than 24 bits. # 6881# c) The calculation X+N*L1 is also exact due to # 6882# cancellation. Thus, R is practically X+N(L1+L2) to full # 6883# 64 bits. # 6884# d) It is important to estimate how large can |R| be # 6885# after Step 3.2. # 6886# # 6887# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # 6888# X*64/log2 (1+eps) = N + f, |f| <= 0.5 # 6889# X*64/log2 - N = f - eps*X 64/log2 # 6890# X - N*log2/64 = f*log2/64 - eps*X # 6891# # 6892# # 6893# Now |X| <= 16446 log2, thus # 6894# # 6895# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # 6896# <= 0.57 log2/64. # 6897# This bound will be used in Step 4. # 6898# # 6899# Step 4. Approximate exp(R)-1 by a polynomial # 6900# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # 6901# Notes: a) In order to reduce memory access, the coefficients # 6902# are made as "short" as possible: A1 (which is 1/2), A4 # 6903# and A5 are single precision; A2 and A3 are double # 6904# precision. # 6905# b) Even with the restrictions above, # 6906# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # 6907# Note that 0.0062 is slightly bigger than 0.57 log2/64. # 6908# c) To fully use the pipeline, p is separated into # 6909# two independent pieces of roughly equal complexities # 6910# p = [ R + R*S*(A2 + S*A4) ] + # 6911# [ S*(A1 + S*(A3 + S*A5)) ] # 6912# where S = R*R. # 6913# # 6914# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # 6915# ans := T + ( T*p + t) # 6916# where T and t are the stored values for 2^(J/64). # 6917# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6918# 2^(J/64) to roughly 85 bits; T is in extended precision # 6919# and t is in single precision. Note also that T is # 6920# rounded to 62 bits so that the last two bits of T are # 6921# zero. The reason for such a special form is that T-1, # 6922# T-2, and T-8 will all be exact --- a property that will # 6923# give much more accurate computation of the function # 6924# EXPM1. # 6925# # 6926# Step 6. Reconstruction of exp(X) # 6927# exp(X) = 2^M * 2^(J/64) * exp(R). # 6928# 6.1 If AdjFlag = 0, go to 6.3 # 6929# 6.2 ans := ans * AdjScale # 6930# 6.3 Restore the user FPCR # 6931# 6.4 Return ans := ans * Scale. Exit. # 6932# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # 6933# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # 6934# neither overflow nor underflow. If AdjFlag = 1, that # 6935# means that # 6936# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # 6937# Hence, exp(X) may overflow or underflow or neither. # 6938# When that is the case, AdjScale = 2^(M1) where M1 is # 6939# approximately M. Thus 6.2 will never cause # 6940# over/underflow. Possible exception in 6.4 is overflow # 6941# or underflow. The inexact exception is not generated in # 6942# 6.4. Although one can argue that the inexact flag # 6943# should always be raised, to simulate that exception # 6944# cost to much than the flag is worth in practical uses. # 6945# # 6946# Step 7. Return 1 + X. # 6947# 7.1 ans := X # 6948# 7.2 Restore user FPCR. # 6949# 7.3 Return ans := 1 + ans. Exit # 6950# Notes: For non-zero X, the inexact exception will always be # 6951# raised by 7.3. That is the only exception raised by 7.3.# 6952# Note also that we use the FMOVEM instruction to move X # 6953# in Step 7.1 to avoid unnecessary trapping. (Although # 6954# the FMOVEM may not seem relevant since X is normalized, # 6955# the precaution will be useful in the library version of # 6956# this code where the separate entry for denormalized # 6957# inputs will be done away with.) # 6958# # 6959# Step 8. Handle exp(X) where |X| >= 16380log2. # 6960# 8.1 If |X| > 16480 log2, go to Step 9. # 6961# (mimic 2.2 - 2.6) # 6962# 8.2 N := round-to-integer( X * 64/log2 ) # 6963# 8.3 Calculate J = N mod 64, J = 0,1,...,63 # 6964# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # 6965# AdjFlag := 1. # 6966# 8.5 Calculate the address of the stored value # 6967# 2^(J/64). # 6968# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # 6969# 8.7 Go to Step 3. # 6970# Notes: Refer to notes for 2.2 - 2.6. # 6971# # 6972# Step 9. Handle exp(X), |X| > 16480 log2. # 6973# 9.1 If X < 0, go to 9.3 # 6974# 9.2 ans := Huge, go to 9.4 # 6975# 9.3 ans := Tiny. # 6976# 9.4 Restore user FPCR. # 6977# 9.5 Return ans := ans * ans. Exit. # 6978# Notes: Exp(X) will surely overflow or underflow, depending on # 6979# X's sign. "Huge" and "Tiny" are respectively large/tiny # 6980# extended-precision numbers whose square over/underflow # 6981# with an inexact result. Thus, 9.5 always raises the # 6982# inexact together with either overflow or underflow. # 6983# # 6984# setoxm1d # 6985# -------- # 6986# # 6987# Step 1. Set ans := 0 # 6988# # 6989# Step 2. Return ans := X + ans. Exit. # 6990# Notes: This will return X with the appropriate rounding # 6991# precision prescribed by the user FPCR. # 6992# # 6993# setoxm1 # 6994# ------- # 6995# # 6996# Step 1. Check |X| # 6997# 1.1 If |X| >= 1/4, go to Step 1.3. # 6998# 1.2 Go to Step 7. # 6999# 1.3 If |X| < 70 log(2), go to Step 2. # 7000# 1.4 Go to Step 10. # 7001# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 7002# However, it is conceivable |X| can be small very often # 7003# because EXPM1 is intended to evaluate exp(X)-1 # 7004# accurately when |X| is small. For further details on # 7005# the comparisons, see the notes on Step 1 of setox. # 7006# # 7007# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 7008# 2.1 N := round-to-nearest-integer( X * 64/log2 ). # 7009# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # 7010# or 63. # 7011# 2.3 Calculate M = (N - J)/64; so N = 64M + J. # 7012# 2.4 Calculate the address of the stored value of # 7013# 2^(J/64). # 7014# 2.5 Create the values Sc = 2^M and # 7015# OnebySc := -2^(-M). # 7016# Notes: See the notes on Step 2 of setox. # 7017# # 7018# Step 3. Calculate X - N*log2/64. # 7019# 3.1 R := X + N*L1, # 7020# where L1 := single-precision(-log2/64). # 7021# 3.2 R := R + N*L2, # 7022# L2 := extended-precision(-log2/64 - L1).# 7023# Notes: Applying the analysis of Step 3 of setox in this case # 7024# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # 7025# this case). # 7026# # 7027# Step 4. Approximate exp(R)-1 by a polynomial # 7028# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # 7029# Notes: a) In order to reduce memory access, the coefficients # 7030# are made as "short" as possible: A1 (which is 1/2), A5 # 7031# and A6 are single precision; A2, A3 and A4 are double # 7032# precision. # 7033# b) Even with the restriction above, # 7034# |p - (exp(R)-1)| < |R| * 2^(-72.7) # 7035# for all |R| <= 0.0055. # 7036# c) To fully use the pipeline, p is separated into # 7037# two independent pieces of roughly equal complexity # 7038# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # 7039# [ R + S*(A1 + S*(A3 + S*A5)) ] # 7040# where S = R*R. # 7041# # 7042# Step 5. Compute 2^(J/64)*p by # 7043# p := T*p # 7044# where T and t are the stored values for 2^(J/64). # 7045# Notes: 2^(J/64) is stored as T and t where T+t approximates # 7046# 2^(J/64) to roughly 85 bits; T is in extended precision # 7047# and t is in single precision. Note also that T is # 7048# rounded to 62 bits so that the last two bits of T are # 7049# zero. The reason for such a special form is that T-1, # 7050# T-2, and T-8 will all be exact --- a property that will # 7051# be exploited in Step 6 below. The total relative error # 7052# in p is no bigger than 2^(-67.7) compared to the final # 7053# result. # 7054# # 7055# Step 6. Reconstruction of exp(X)-1 # 7056# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # 7057# 6.1 If M <= 63, go to Step 6.3. # 7058# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # 7059# 6.3 If M >= -3, go to 6.5. # 7060# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # 7061# 6.5 ans := (T + OnebySc) + (p + t). # 7062# 6.6 Restore user FPCR. # 7063# 6.7 Return ans := Sc * ans. Exit. # 7064# Notes: The various arrangements of the expressions give # 7065# accurate evaluations. # 7066# # 7067# Step 7. exp(X)-1 for |X| < 1/4. # 7068# 7.1 If |X| >= 2^(-65), go to Step 9. # 7069# 7.2 Go to Step 8. # 7070# # 7071# Step 8. Calculate exp(X)-1, |X| < 2^(-65). # 7072# 8.1 If |X| < 2^(-16312), goto 8.3 # 7073# 8.2 Restore FPCR; return ans := X - 2^(-16382). # 7074# Exit. # 7075# 8.3 X := X * 2^(140). # 7076# 8.4 Restore FPCR; ans := ans - 2^(-16382). # 7077# Return ans := ans*2^(140). Exit # 7078# Notes: The idea is to return "X - tiny" under the user # 7079# precision and rounding modes. To avoid unnecessary # 7080# inefficiency, we stay away from denormalized numbers # 7081# the best we can. For |X| >= 2^(-16312), the # 7082# straightforward 8.2 generates the inexact exception as # 7083# the case warrants. # 7084# # 7085# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # 7086# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # 7087# Notes: a) In order to reduce memory access, the coefficients # 7088# are made as "short" as possible: B1 (which is 1/2), B9 # 7089# to B12 are single precision; B3 to B8 are double # 7090# precision; and B2 is double extended. # 7091# b) Even with the restriction above, # 7092# |p - (exp(X)-1)| < |X| 2^(-70.6) # 7093# for all |X| <= 0.251. # 7094# Note that 0.251 is slightly bigger than 1/4. # 7095# c) To fully preserve accuracy, the polynomial is # 7096# computed as # 7097# X + ( S*B1 + Q ) where S = X*X and # 7098# Q = X*S*(B2 + X*(B3 + ... + X*B12)) # 7099# d) To fully use the pipeline, Q is separated into # 7100# two independent pieces of roughly equal complexity # 7101# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # 7102# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # 7103# # 7104# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # 7105# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # 7106# practical purposes. Therefore, go to Step 1 of setox. # 7107# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # 7108# purposes. # 7109# ans := -1 # 7110# Restore user FPCR # 7111# Return ans := ans + 2^(-126). Exit. # 7112# Notes: 10.2 will always create an inexact and return -1 + tiny # 7113# in the user rounding precision and mode. # 7114# # 7115######################################################################### 7116 7117L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 7118 7119EEXPA3: long 0x3FA55555,0x55554CC1 7120EEXPA2: long 0x3FC55555,0x55554A54 7121 7122EM1A4: long 0x3F811111,0x11174385 7123EM1A3: long 0x3FA55555,0x55554F5A 7124 7125EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 7126 7127EM1B8: long 0x3EC71DE3,0xA5774682 7128EM1B7: long 0x3EFA01A0,0x19D7CB68 7129 7130EM1B6: long 0x3F2A01A0,0x1A019DF3 7131EM1B5: long 0x3F56C16C,0x16C170E2 7132 7133EM1B4: long 0x3F811111,0x11111111 7134EM1B3: long 0x3FA55555,0x55555555 7135 7136EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB 7137 long 0x00000000 7138 7139TWO140: long 0x48B00000,0x00000000 7140TWON140: 7141 long 0x37300000,0x00000000 7142 7143EEXPTBL: 7144 long 0x3FFF0000,0x80000000,0x00000000,0x00000000 7145 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B 7146 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 7147 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 7148 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C 7149 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F 7150 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 7151 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF 7152 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF 7153 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA 7154 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 7155 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 7156 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 7157 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 7158 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D 7159 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 7160 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD 7161 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 7162 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 7163 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D 7164 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 7165 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C 7166 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 7167 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 7168 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 7169 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA 7170 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A 7171 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC 7172 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC 7173 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 7174 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 7175 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A 7176 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 7177 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 7178 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC 7179 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 7180 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 7181 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 7182 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 7183 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B 7184 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 7185 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E 7186 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 7187 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D 7188 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 7189 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C 7190 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 7191 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 7192 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F 7193 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F 7194 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 7195 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 7196 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B 7197 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 7198 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A 7199 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 7200 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 7201 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B 7202 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 7203 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 7204 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 7205 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 7206 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 7207 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A 7208 7209 set ADJFLAG,L_SCR2 7210 set SCALE,FP_SCR0 7211 set ADJSCALE,FP_SCR1 7212 set SC,FP_SCR0 7213 set ONEBYSC,FP_SCR1 7214 7215 global setox 7216setox: 7217#--entry point for EXP(X), here X is finite, non-zero, and not NaN's 7218 7219#--Step 1. 7220 mov.l (%a0),%d1 # load part of input X 7221 and.l &0x7FFF0000,%d1 # biased expo. of X 7222 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7223 bge.b EXPC1 # normal case 7224 bra EXPSM 7225 7226EXPC1: 7227#--The case |X| >= 2^(-65) 7228 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7229 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits 7230 blt.b EXPMAIN # normal case 7231 bra EEXPBIG 7232 7233EXPMAIN: 7234#--Step 2. 7235#--This is the normal branch: 2^(-65) <= |X| < 16380 log2. 7236 fmov.x (%a0),%fp0 # load input from (a0) 7237 7238 fmov.x %fp0,%fp1 7239 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7240 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7241 mov.l &0,ADJFLAG(%a6) 7242 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7243 lea EEXPTBL(%pc),%a1 7244 fmov.l %d1,%fp0 # convert to floating-format 7245 7246 mov.l %d1,L_SCR1(%a6) # save N temporarily 7247 and.l &0x3F,%d1 # D0 is J = N mod 64 7248 lsl.l &4,%d1 7249 add.l %d1,%a1 # address of 2^(J/64) 7250 mov.l L_SCR1(%a6),%d1 7251 asr.l &6,%d1 # D0 is M 7252 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7253 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB 7254 7255EXPCONT1: 7256#--Step 3. 7257#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7258#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) 7259 fmov.x %fp0,%fp2 7260 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7261 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7262 fadd.x %fp1,%fp0 # X + N*L1 7263 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7264 7265#--Step 4. 7266#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7267#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) 7268#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R 7269#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] 7270 7271 fmov.x %fp0,%fp1 7272 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7273 7274 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 7275 7276 fmul.x %fp1,%fp2 # fp2 IS S*A5 7277 fmov.x %fp1,%fp3 7278 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 7279 7280 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 7281 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 7282 7283 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) 7284 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended 7285 mov.l &0x80000000,SCALE+4(%a6) 7286 clr.l SCALE+8(%a6) 7287 7288 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) 7289 7290 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) 7291 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) 7292 7293 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) 7294 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), 7295 7296 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) 7297 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 7298 7299#--Step 5 7300#--final reconstruction process 7301#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) 7302 7303 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) 7304 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7305 fadd.s (%a1),%fp0 # accurate 2^(J/64) 7306 7307 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... 7308 mov.l ADJFLAG(%a6),%d1 7309 7310#--Step 6 7311 tst.l %d1 7312 beq.b NORMAL 7313ADJUST: 7314 fmul.x ADJSCALE(%a6),%fp0 7315NORMAL: 7316 fmov.l %d0,%fpcr # restore user FPCR 7317 mov.b &FMUL_OP,%d1 # last inst is MUL 7318 fmul.x SCALE(%a6),%fp0 # multiply 2^(M) 7319 bra t_catch 7320 7321EXPSM: 7322#--Step 7 7323 fmovm.x (%a0),&0x80 # load X 7324 fmov.l %d0,%fpcr 7325 fadd.s &0x3F800000,%fp0 # 1+X in user mode 7326 bra t_pinx2 7327 7328EEXPBIG: 7329#--Step 8 7330 cmp.l %d1,&0x400CB27C # 16480 log2 7331 bgt.b EXP2BIG 7332#--Steps 8.2 -- 8.6 7333 fmov.x (%a0),%fp0 # load input from (a0) 7334 7335 fmov.x %fp0,%fp1 7336 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7337 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7338 mov.l &1,ADJFLAG(%a6) 7339 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7340 lea EEXPTBL(%pc),%a1 7341 fmov.l %d1,%fp0 # convert to floating-format 7342 mov.l %d1,L_SCR1(%a6) # save N temporarily 7343 and.l &0x3F,%d1 # D0 is J = N mod 64 7344 lsl.l &4,%d1 7345 add.l %d1,%a1 # address of 2^(J/64) 7346 mov.l L_SCR1(%a6),%d1 7347 asr.l &6,%d1 # D0 is K 7348 mov.l %d1,L_SCR1(%a6) # save K temporarily 7349 asr.l &1,%d1 # D0 is M1 7350 sub.l %d1,L_SCR1(%a6) # a1 is M 7351 add.w &0x3FFF,%d1 # biased expo. of 2^(M1) 7352 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) 7353 mov.l &0x80000000,ADJSCALE+4(%a6) 7354 clr.l ADJSCALE+8(%a6) 7355 mov.l L_SCR1(%a6),%d1 # D0 is M 7356 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7357 bra.w EXPCONT1 # go back to Step 3 7358 7359EXP2BIG: 7360#--Step 9 7361 tst.b (%a0) # is X positive or negative? 7362 bmi t_unfl2 7363 bra t_ovfl2 7364 7365 global setoxd 7366setoxd: 7367#--entry point for EXP(X), X is denormalized 7368 mov.l (%a0),-(%sp) 7369 andi.l &0x80000000,(%sp) 7370 ori.l &0x00800000,(%sp) # sign(X)*2^(-126) 7371 7372 fmov.s &0x3F800000,%fp0 7373 7374 fmov.l %d0,%fpcr 7375 fadd.s (%sp)+,%fp0 7376 bra t_pinx2 7377 7378 global setoxm1 7379setoxm1: 7380#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN 7381 7382#--Step 1. 7383#--Step 1.1 7384 mov.l (%a0),%d1 # load part of input X 7385 and.l &0x7FFF0000,%d1 # biased expo. of X 7386 cmp.l %d1,&0x3FFD0000 # 1/4 7387 bge.b EM1CON1 # |X| >= 1/4 7388 bra EM1SM 7389 7390EM1CON1: 7391#--Step 1.3 7392#--The case |X| >= 1/4 7393 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7394 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits 7395 ble.b EM1MAIN # 1/4 <= |X| <= 70log2 7396 bra EM1BIG 7397 7398EM1MAIN: 7399#--Step 2. 7400#--This is the case: 1/4 <= |X| <= 70 log2. 7401 fmov.x (%a0),%fp0 # load input from (a0) 7402 7403 fmov.x %fp0,%fp1 7404 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7405 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7406 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7407 lea EEXPTBL(%pc),%a1 7408 fmov.l %d1,%fp0 # convert to floating-format 7409 7410 mov.l %d1,L_SCR1(%a6) # save N temporarily 7411 and.l &0x3F,%d1 # D0 is J = N mod 64 7412 lsl.l &4,%d1 7413 add.l %d1,%a1 # address of 2^(J/64) 7414 mov.l L_SCR1(%a6),%d1 7415 asr.l &6,%d1 # D0 is M 7416 mov.l %d1,L_SCR1(%a6) # save a copy of M 7417 7418#--Step 3. 7419#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7420#--a0 points to 2^(J/64), D0 and a1 both contain M 7421 fmov.x %fp0,%fp2 7422 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7423 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7424 fadd.x %fp1,%fp0 # X + N*L1 7425 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7426 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M 7427 7428#--Step 4. 7429#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7430#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) 7431#--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R 7432#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] 7433 7434 fmov.x %fp0,%fp1 7435 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7436 7437 fmov.s &0x3950097B,%fp2 # fp2 IS a6 7438 7439 fmul.x %fp1,%fp2 # fp2 IS S*A6 7440 fmov.x %fp1,%fp3 7441 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 7442 7443 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 7444 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 7445 mov.w %d1,SC(%a6) # SC is 2^(M) in extended 7446 mov.l &0x80000000,SC+4(%a6) 7447 clr.l SC+8(%a6) 7448 7449 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) 7450 mov.l L_SCR1(%a6),%d1 # D0 is M 7451 neg.w %d1 # D0 is -M 7452 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) 7453 add.w &0x3FFF,%d1 # biased expo. of 2^(-M) 7454 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) 7455 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) 7456 7457 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) 7458 or.w &0x8000,%d1 # signed/expo. of -2^(-M) 7459 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) 7460 mov.l &0x80000000,ONEBYSC+4(%a6) 7461 clr.l ONEBYSC+8(%a6) 7462 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) 7463 7464 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) 7465 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) 7466 7467 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 7468 7469 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7470 7471#--Step 5 7472#--Compute 2^(J/64)*p 7473 7474 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) 7475 7476#--Step 6 7477#--Step 6.1 7478 mov.l L_SCR1(%a6),%d1 # retrieve M 7479 cmp.l %d1,&63 7480 ble.b MLE63 7481#--Step 6.2 M >= 64 7482 fmov.s 12(%a1),%fp1 # fp1 is t 7483 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc 7484 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released 7485 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) 7486 bra EM1SCALE 7487MLE63: 7488#--Step 6.3 M <= 63 7489 cmp.l %d1,&-3 7490 bge.b MGEN3 7491MLTN3: 7492#--Step 6.4 M <= -4 7493 fadd.s 12(%a1),%fp0 # p+t 7494 fadd.x (%a1),%fp0 # T+(p+t) 7495 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) 7496 bra EM1SCALE 7497MGEN3: 7498#--Step 6.5 -3 <= M <= 63 7499 fmov.x (%a1)+,%fp1 # fp1 is T 7500 fadd.s (%a1),%fp0 # fp0 is p+t 7501 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc 7502 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) 7503 7504EM1SCALE: 7505#--Step 6.6 7506 fmov.l %d0,%fpcr 7507 fmul.x SC(%a6),%fp0 7508 bra t_inx2 7509 7510EM1SM: 7511#--Step 7 |X| < 1/4. 7512 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7513 bge.b EM1POLY 7514 7515EM1TINY: 7516#--Step 8 |X| < 2^(-65) 7517 cmp.l %d1,&0x00330000 # 2^(-16312) 7518 blt.b EM12TINY 7519#--Step 8.2 7520 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) 7521 mov.l &0x80000000,SC+4(%a6) 7522 clr.l SC+8(%a6) 7523 fmov.x (%a0),%fp0 7524 fmov.l %d0,%fpcr 7525 mov.b &FADD_OP,%d1 # last inst is ADD 7526 fadd.x SC(%a6),%fp0 7527 bra t_catch 7528 7529EM12TINY: 7530#--Step 8.3 7531 fmov.x (%a0),%fp0 7532 fmul.d TWO140(%pc),%fp0 7533 mov.l &0x80010000,SC(%a6) 7534 mov.l &0x80000000,SC+4(%a6) 7535 clr.l SC+8(%a6) 7536 fadd.x SC(%a6),%fp0 7537 fmov.l %d0,%fpcr 7538 mov.b &FMUL_OP,%d1 # last inst is MUL 7539 fmul.d TWON140(%pc),%fp0 7540 bra t_catch 7541 7542EM1POLY: 7543#--Step 9 exp(X)-1 by a simple polynomial 7544 fmov.x (%a0),%fp0 # fp0 is X 7545 fmul.x %fp0,%fp0 # fp0 is S := X*X 7546 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7547 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 7548 fmul.x %fp0,%fp1 # fp1 is S*B12 7549 fmov.s &0x310F8290,%fp2 # fp2 is B11 7550 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 7551 7552 fmul.x %fp0,%fp2 # fp2 is S*B11 7553 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... 7554 7555 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... 7556 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... 7557 7558 fmul.x %fp0,%fp2 # fp2 is S*(B9+... 7559 fmul.x %fp0,%fp1 # fp1 is S*(B8+... 7560 7561 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... 7562 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... 7563 7564 fmul.x %fp0,%fp2 # fp2 is S*(B7+... 7565 fmul.x %fp0,%fp1 # fp1 is S*(B6+... 7566 7567 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... 7568 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... 7569 7570 fmul.x %fp0,%fp2 # fp2 is S*(B5+... 7571 fmul.x %fp0,%fp1 # fp1 is S*(B4+... 7572 7573 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... 7574 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... 7575 7576 fmul.x %fp0,%fp2 # fp2 is S*(B3+... 7577 fmul.x %fp0,%fp1 # fp1 is S*(B2+... 7578 7579 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) 7580 fmul.x (%a0),%fp1 # fp1 is X*S*(B2... 7581 7582 fmul.s &0x3F000000,%fp0 # fp0 is S*B1 7583 fadd.x %fp2,%fp1 # fp1 is Q 7584 7585 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7586 7587 fadd.x %fp1,%fp0 # fp0 is S*B1+Q 7588 7589 fmov.l %d0,%fpcr 7590 fadd.x (%a0),%fp0 7591 bra t_inx2 7592 7593EM1BIG: 7594#--Step 10 |X| > 70 log2 7595 mov.l (%a0),%d1 7596 cmp.l %d1,&0 7597 bgt.w EXPC1 7598#--Step 10.2 7599 fmov.s &0xBF800000,%fp0 # fp0 is -1 7600 fmov.l %d0,%fpcr 7601 fadd.s &0x00800000,%fp0 # -1 + 2^(-126) 7602 bra t_minx2 7603 7604 global setoxm1d 7605setoxm1d: 7606#--entry point for EXPM1(X), here X is denormalized 7607#--Step 0. 7608 bra t_extdnrm 7609 7610######################################################################### 7611# sgetexp(): returns the exponent portion of the input argument. # 7612# The exponent bias is removed and the exponent value is # 7613# returned as an extended precision number in fp0. # 7614# sgetexpd(): handles denormalized numbers. # 7615# # 7616# sgetman(): extracts the mantissa of the input argument. The # 7617# mantissa is converted to an extended precision number w/ # 7618# an exponent of $3fff and is returned in fp0. The range of # 7619# the result is [1.0 - 2.0). # 7620# sgetmand(): handles denormalized numbers. # 7621# # 7622# INPUT *************************************************************** # 7623# a0 = pointer to extended precision input # 7624# # 7625# OUTPUT ************************************************************** # 7626# fp0 = exponent(X) or mantissa(X) # 7627# # 7628######################################################################### 7629 7630 global sgetexp 7631sgetexp: 7632 mov.w SRC_EX(%a0),%d0 # get the exponent 7633 bclr &0xf,%d0 # clear the sign bit 7634 subi.w &0x3fff,%d0 # subtract off the bias 7635 fmov.w %d0,%fp0 # return exp in fp0 7636 blt.b sgetexpn # it's negative 7637 rts 7638 7639sgetexpn: 7640 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7641 rts 7642 7643 global sgetexpd 7644sgetexpd: 7645 bsr.l norm # normalize 7646 neg.w %d0 # new exp = -(shft amt) 7647 subi.w &0x3fff,%d0 # subtract off the bias 7648 fmov.w %d0,%fp0 # return exp in fp0 7649 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7650 rts 7651 7652 global sgetman 7653sgetman: 7654 mov.w SRC_EX(%a0),%d0 # get the exp 7655 ori.w &0x7fff,%d0 # clear old exp 7656 bclr &0xe,%d0 # make it the new exp +-3fff 7657 7658# here, we build the result in a tmp location so as not to disturb the input 7659 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc 7660 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc 7661 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7662 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 7663 bmi.b sgetmann # it's negative 7664 rts 7665 7666sgetmann: 7667 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7668 rts 7669 7670# 7671# For denormalized numbers, shift the mantissa until the j-bit = 1, 7672# then load the exponent with +/1 $3fff. 7673# 7674 global sgetmand 7675sgetmand: 7676 bsr.l norm # normalize exponent 7677 bra.b sgetman 7678 7679######################################################################### 7680# scosh(): computes the hyperbolic cosine of a normalized input # 7681# scoshd(): computes the hyperbolic cosine of a denormalized input # 7682# # 7683# INPUT *************************************************************** # 7684# a0 = pointer to extended precision input # 7685# d0 = round precision,mode # 7686# # 7687# OUTPUT ************************************************************** # 7688# fp0 = cosh(X) # 7689# # 7690# ACCURACY and MONOTONICITY ******************************************* # 7691# The returned result is within 3 ulps in 64 significant bit, # 7692# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7693# rounded to double precision. The result is provably monotonic # 7694# in double precision. # 7695# # 7696# ALGORITHM *********************************************************** # 7697# # 7698# COSH # 7699# 1. If |X| > 16380 log2, go to 3. # 7700# # 7701# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # 7702# y = |X|, z = exp(Y), and # 7703# cosh(X) = (1/2)*( z + 1/z ). # 7704# Exit. # 7705# # 7706# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # 7707# # 7708# 4. (16380 log2 < |X| <= 16480 log2) # 7709# cosh(X) = sign(X) * exp(|X|)/2. # 7710# However, invoking exp(|X|) may cause premature # 7711# overflow. Thus, we calculate sinh(X) as follows: # 7712# Y := |X| # 7713# Fact := 2**(16380) # 7714# Y' := Y - 16381 log2 # 7715# cosh(X) := Fact * exp(Y'). # 7716# Exit. # 7717# # 7718# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7719# Huge*Huge to generate overflow and an infinity with # 7720# the appropriate sign. Huge is the largest finite number # 7721# in extended format. Exit. # 7722# # 7723######################################################################### 7724 7725TWO16380: 7726 long 0x7FFB0000,0x80000000,0x00000000,0x00000000 7727 7728 global scosh 7729scosh: 7730 fmov.x (%a0),%fp0 # LOAD INPUT 7731 7732 mov.l (%a0),%d1 7733 mov.w 4(%a0),%d1 7734 and.l &0x7FFFFFFF,%d1 7735 cmp.l %d1,&0x400CB167 7736 bgt.b COSHBIG 7737 7738#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7739#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) 7740 7741 fabs.x %fp0 # |X| 7742 7743 mov.l %d0,-(%sp) 7744 clr.l %d0 7745 fmovm.x &0x01,-(%sp) # save |X| to stack 7746 lea (%sp),%a0 # pass ptr to |X| 7747 bsr setox # FP0 IS EXP(|X|) 7748 add.l &0xc,%sp # erase |X| from stack 7749 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) 7750 mov.l (%sp)+,%d0 7751 7752 fmov.s &0x3E800000,%fp1 # (1/4) 7753 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) 7754 7755 fmov.l %d0,%fpcr 7756 mov.b &FADD_OP,%d1 # last inst is ADD 7757 fadd.x %fp1,%fp0 7758 bra t_catch 7759 7760COSHBIG: 7761 cmp.l %d1,&0x400CB2B3 7762 bgt.b COSHHUGE 7763 7764 fabs.x %fp0 7765 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7766 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7767 7768 mov.l %d0,-(%sp) 7769 clr.l %d0 7770 fmovm.x &0x01,-(%sp) # save fp0 to stack 7771 lea (%sp),%a0 # pass ptr to fp0 7772 bsr setox 7773 add.l &0xc,%sp # clear fp0 from stack 7774 mov.l (%sp)+,%d0 7775 7776 fmov.l %d0,%fpcr 7777 mov.b &FMUL_OP,%d1 # last inst is MUL 7778 fmul.x TWO16380(%pc),%fp0 7779 bra t_catch 7780 7781COSHHUGE: 7782 bra t_ovfl2 7783 7784 global scoshd 7785#--COSH(X) = 1 FOR DENORMALIZED X 7786scoshd: 7787 fmov.s &0x3F800000,%fp0 7788 7789 fmov.l %d0,%fpcr 7790 fadd.s &0x00800000,%fp0 7791 bra t_pinx2 7792 7793######################################################################### 7794# ssinh(): computes the hyperbolic sine of a normalized input # 7795# ssinhd(): computes the hyperbolic sine of a denormalized input # 7796# # 7797# INPUT *************************************************************** # 7798# a0 = pointer to extended precision input # 7799# d0 = round precision,mode # 7800# # 7801# OUTPUT ************************************************************** # 7802# fp0 = sinh(X) # 7803# # 7804# ACCURACY and MONOTONICITY ******************************************* # 7805# The returned result is within 3 ulps in 64 significant bit, # 7806# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7807# rounded to double precision. The result is provably monotonic # 7808# in double precision. # 7809# # 7810# ALGORITHM *********************************************************** # 7811# # 7812# SINH # 7813# 1. If |X| > 16380 log2, go to 3. # 7814# # 7815# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # 7816# y = |X|, sgn = sign(X), and z = expm1(Y), # 7817# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # 7818# Exit. # 7819# # 7820# 3. If |X| > 16480 log2, go to 5. # 7821# # 7822# 4. (16380 log2 < |X| <= 16480 log2) # 7823# sinh(X) = sign(X) * exp(|X|)/2. # 7824# However, invoking exp(|X|) may cause premature overflow. # 7825# Thus, we calculate sinh(X) as follows: # 7826# Y := |X| # 7827# sgn := sign(X) # 7828# sgnFact := sgn * 2**(16380) # 7829# Y' := Y - 16381 log2 # 7830# sinh(X) := sgnFact * exp(Y'). # 7831# Exit. # 7832# # 7833# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7834# sign(X)*Huge*Huge to generate overflow and an infinity with # 7835# the appropriate sign. Huge is the largest finite number in # 7836# extended format. Exit. # 7837# # 7838######################################################################### 7839 7840 global ssinh 7841ssinh: 7842 fmov.x (%a0),%fp0 # LOAD INPUT 7843 7844 mov.l (%a0),%d1 7845 mov.w 4(%a0),%d1 7846 mov.l %d1,%a1 # save (compacted) operand 7847 and.l &0x7FFFFFFF,%d1 7848 cmp.l %d1,&0x400CB167 7849 bgt.b SINHBIG 7850 7851#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7852#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) 7853 7854 fabs.x %fp0 # Y = |X| 7855 7856 movm.l &0x8040,-(%sp) # {a1/d0} 7857 fmovm.x &0x01,-(%sp) # save Y on stack 7858 lea (%sp),%a0 # pass ptr to Y 7859 clr.l %d0 7860 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7861 add.l &0xc,%sp # clear Y from stack 7862 fmov.l &0,%fpcr 7863 movm.l (%sp)+,&0x0201 # {a1/d0} 7864 7865 fmov.x %fp0,%fp1 7866 fadd.s &0x3F800000,%fp1 # 1+Z 7867 fmov.x %fp0,-(%sp) 7868 fdiv.x %fp1,%fp0 # Z/(1+Z) 7869 mov.l %a1,%d1 7870 and.l &0x80000000,%d1 7871 or.l &0x3F000000,%d1 7872 fadd.x (%sp)+,%fp0 7873 mov.l %d1,-(%sp) 7874 7875 fmov.l %d0,%fpcr 7876 mov.b &FMUL_OP,%d1 # last inst is MUL 7877 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set 7878 bra t_catch 7879 7880SINHBIG: 7881 cmp.l %d1,&0x400CB2B3 7882 bgt t_ovfl 7883 fabs.x %fp0 7884 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7885 mov.l &0,-(%sp) 7886 mov.l &0x80000000,-(%sp) 7887 mov.l %a1,%d1 7888 and.l &0x80000000,%d1 7889 or.l &0x7FFB0000,%d1 7890 mov.l %d1,-(%sp) # EXTENDED FMT 7891 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7892 7893 mov.l %d0,-(%sp) 7894 clr.l %d0 7895 fmovm.x &0x01,-(%sp) # save fp0 on stack 7896 lea (%sp),%a0 # pass ptr to fp0 7897 bsr setox 7898 add.l &0xc,%sp # clear fp0 from stack 7899 7900 mov.l (%sp)+,%d0 7901 fmov.l %d0,%fpcr 7902 mov.b &FMUL_OP,%d1 # last inst is MUL 7903 fmul.x (%sp)+,%fp0 # possible exception 7904 bra t_catch 7905 7906 global ssinhd 7907#--SINH(X) = X FOR DENORMALIZED X 7908ssinhd: 7909 bra t_extdnrm 7910 7911######################################################################### 7912# stanh(): computes the hyperbolic tangent of a normalized input # 7913# stanhd(): computes the hyperbolic tangent of a denormalized input # 7914# # 7915# INPUT *************************************************************** # 7916# a0 = pointer to extended precision input # 7917# d0 = round precision,mode # 7918# # 7919# OUTPUT ************************************************************** # 7920# fp0 = tanh(X) # 7921# # 7922# ACCURACY and MONOTONICITY ******************************************* # 7923# The returned result is within 3 ulps in 64 significant bit, # 7924# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7925# rounded to double precision. The result is provably monotonic # 7926# in double precision. # 7927# # 7928# ALGORITHM *********************************************************** # 7929# # 7930# TANH # 7931# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # 7932# # 7933# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # 7934# sgn := sign(X), y := 2|X|, z := expm1(Y), and # 7935# tanh(X) = sgn*( z/(2+z) ). # 7936# Exit. # 7937# # 7938# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # 7939# go to 7. # 7940# # 7941# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # 7942# # 7943# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # 7944# sgn := sign(X), y := 2|X|, z := exp(Y), # 7945# tanh(X) = sgn - [ sgn*2/(1+z) ]. # 7946# Exit. # 7947# # 7948# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # 7949# calculate Tanh(X) by # 7950# sgn := sign(X), Tiny := 2**(-126), # 7951# tanh(X) := sgn - sgn*Tiny. # 7952# Exit. # 7953# # 7954# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # 7955# # 7956######################################################################### 7957 7958 set X,FP_SCR0 7959 set XFRAC,X+4 7960 7961 set SGN,L_SCR3 7962 7963 set V,FP_SCR0 7964 7965 global stanh 7966stanh: 7967 fmov.x (%a0),%fp0 # LOAD INPUT 7968 7969 fmov.x %fp0,X(%a6) 7970 mov.l (%a0),%d1 7971 mov.w 4(%a0),%d1 7972 mov.l %d1,X(%a6) 7973 and.l &0x7FFFFFFF,%d1 7974 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? 7975 blt.w TANHBORS # yes 7976 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? 7977 bgt.w TANHBORS # yes 7978 7979#--THIS IS THE USUAL CASE 7980#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). 7981 7982 mov.l X(%a6),%d1 7983 mov.l %d1,SGN(%a6) 7984 and.l &0x7FFF0000,%d1 7985 add.l &0x00010000,%d1 # EXPONENT OF 2|X| 7986 mov.l %d1,X(%a6) 7987 and.l &0x80000000,SGN(%a6) 7988 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| 7989 7990 mov.l %d0,-(%sp) 7991 clr.l %d0 7992 fmovm.x &0x1,-(%sp) # save Y on stack 7993 lea (%sp),%a0 # pass ptr to Y 7994 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7995 add.l &0xc,%sp # clear Y from stack 7996 mov.l (%sp)+,%d0 7997 7998 fmov.x %fp0,%fp1 7999 fadd.s &0x40000000,%fp1 # Z+2 8000 mov.l SGN(%a6),%d1 8001 fmov.x %fp1,V(%a6) 8002 eor.l %d1,V(%a6) 8003 8004 fmov.l %d0,%fpcr # restore users round prec,mode 8005 fdiv.x V(%a6),%fp0 8006 bra t_inx2 8007 8008TANHBORS: 8009 cmp.l %d1,&0x3FFF8000 8010 blt.w TANHSM 8011 8012 cmp.l %d1,&0x40048AA1 8013 bgt.w TANHHUGE 8014 8015#-- (5/2) LOG2 < |X| < 50 LOG2, 8016#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), 8017#--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. 8018 8019 mov.l X(%a6),%d1 8020 mov.l %d1,SGN(%a6) 8021 and.l &0x7FFF0000,%d1 8022 add.l &0x00010000,%d1 # EXPO OF 2|X| 8023 mov.l %d1,X(%a6) # Y = 2|X| 8024 and.l &0x80000000,SGN(%a6) 8025 mov.l SGN(%a6),%d1 8026 fmov.x X(%a6),%fp0 # Y = 2|X| 8027 8028 mov.l %d0,-(%sp) 8029 clr.l %d0 8030 fmovm.x &0x01,-(%sp) # save Y on stack 8031 lea (%sp),%a0 # pass ptr to Y 8032 bsr setox # FP0 IS EXP(Y) 8033 add.l &0xc,%sp # clear Y from stack 8034 mov.l (%sp)+,%d0 8035 mov.l SGN(%a6),%d1 8036 fadd.s &0x3F800000,%fp0 # EXP(Y)+1 8037 8038 eor.l &0xC0000000,%d1 # -SIGN(X)*2 8039 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT 8040 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] 8041 8042 mov.l SGN(%a6),%d1 8043 or.l &0x3F800000,%d1 # SGN 8044 fmov.s %d1,%fp0 # SGN IN SGL FMT 8045 8046 fmov.l %d0,%fpcr # restore users round prec,mode 8047 mov.b &FADD_OP,%d1 # last inst is ADD 8048 fadd.x %fp1,%fp0 8049 bra t_inx2 8050 8051TANHSM: 8052 fmov.l %d0,%fpcr # restore users round prec,mode 8053 mov.b &FMOV_OP,%d1 # last inst is MOVE 8054 fmov.x X(%a6),%fp0 # last inst - possible exception set 8055 bra t_catch 8056 8057#---RETURN SGN(X) - SGN(X)EPS 8058TANHHUGE: 8059 mov.l X(%a6),%d1 8060 and.l &0x80000000,%d1 8061 or.l &0x3F800000,%d1 8062 fmov.s %d1,%fp0 8063 and.l &0x80000000,%d1 8064 eor.l &0x80800000,%d1 # -SIGN(X)*EPS 8065 8066 fmov.l %d0,%fpcr # restore users round prec,mode 8067 fadd.s %d1,%fp0 8068 bra t_inx2 8069 8070 global stanhd 8071#--TANH(X) = X FOR DENORMALIZED X 8072stanhd: 8073 bra t_extdnrm 8074 8075######################################################################### 8076# slogn(): computes the natural logarithm of a normalized input # 8077# slognd(): computes the natural logarithm of a denormalized input # 8078# slognp1(): computes the log(1+X) of a normalized input # 8079# slognp1d(): computes the log(1+X) of a denormalized input # 8080# # 8081# INPUT *************************************************************** # 8082# a0 = pointer to extended precision input # 8083# d0 = round precision,mode # 8084# # 8085# OUTPUT ************************************************************** # 8086# fp0 = log(X) or log(1+X) # 8087# # 8088# ACCURACY and MONOTONICITY ******************************************* # 8089# The returned result is within 2 ulps in 64 significant bit, # 8090# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8091# rounded to double precision. The result is provably monotonic # 8092# in double precision. # 8093# # 8094# ALGORITHM *********************************************************** # 8095# LOGN: # 8096# Step 1. If |X-1| < 1/16, approximate log(X) by an odd # 8097# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # 8098# move on to Step 2. # 8099# # 8100# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # 8101# seven significant bits of Y plus 2**(-7), i.e. # 8102# F = 1.xxxxxx1 in base 2 where the six "x" match those # 8103# of Y. Note that |Y-F| <= 2**(-7). # 8104# # 8105# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # 8106# polynomial in u, log(1+u) = poly. # 8107# # 8108# Step 4. Reconstruct # 8109# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # 8110# by k*log(2) + (log(F) + poly). The values of log(F) are # 8111# calculated beforehand and stored in the program. # 8112# # 8113# lognp1: # 8114# Step 1: If |X| < 1/16, approximate log(1+X) by an odd # 8115# polynomial in u where u = 2X/(2+X). Otherwise, move on # 8116# to Step 2. # 8117# # 8118# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # 8119# in Step 2 of the algorithm for LOGN and compute # 8120# log(1+X) as k*log(2) + log(F) + poly where poly # 8121# approximates log(1+u), u = (Y-F)/F. # 8122# # 8123# Implementation Notes: # 8124# Note 1. There are 64 different possible values for F, thus 64 # 8125# log(F)'s need to be tabulated. Moreover, the values of # 8126# 1/F are also tabulated so that the division in (Y-F)/F # 8127# can be performed by a multiplication. # 8128# # 8129# Note 2. In Step 2 of lognp1, in order to preserved accuracy, # 8130# the value Y-F has to be calculated carefully when # 8131# 1/2 <= X < 3/2. # 8132# # 8133# Note 3. To fully exploit the pipeline, polynomials are usually # 8134# separated into two parts evaluated independently before # 8135# being added up. # 8136# # 8137######################################################################### 8138LOGOF2: 8139 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8140 8141one: 8142 long 0x3F800000 8143zero: 8144 long 0x00000000 8145infty: 8146 long 0x7F800000 8147negone: 8148 long 0xBF800000 8149 8150LOGA6: 8151 long 0x3FC2499A,0xB5E4040B 8152LOGA5: 8153 long 0xBFC555B5,0x848CB7DB 8154 8155LOGA4: 8156 long 0x3FC99999,0x987D8730 8157LOGA3: 8158 long 0xBFCFFFFF,0xFF6F7E97 8159 8160LOGA2: 8161 long 0x3FD55555,0x555555A4 8162LOGA1: 8163 long 0xBFE00000,0x00000008 8164 8165LOGB5: 8166 long 0x3F175496,0xADD7DAD6 8167LOGB4: 8168 long 0x3F3C71C2,0xFE80C7E0 8169 8170LOGB3: 8171 long 0x3F624924,0x928BCCFF 8172LOGB2: 8173 long 0x3F899999,0x999995EC 8174 8175LOGB1: 8176 long 0x3FB55555,0x55555555 8177TWO: 8178 long 0x40000000,0x00000000 8179 8180LTHOLD: 8181 long 0x3f990000,0x80000000,0x00000000,0x00000000 8182 8183LOGTBL: 8184 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 8185 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 8186 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 8187 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 8188 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 8189 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 8190 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 8191 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 8192 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 8193 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 8194 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 8195 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 8196 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 8197 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 8198 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 8199 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 8200 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 8201 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 8202 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 8203 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 8204 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 8205 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 8206 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 8207 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 8208 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 8209 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 8210 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 8211 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 8212 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 8213 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 8214 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 8215 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 8216 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 8217 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 8218 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 8219 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 8220 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 8221 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 8222 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 8223 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 8224 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 8225 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 8226 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 8227 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 8228 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 8229 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 8230 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 8231 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 8232 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 8233 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 8234 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 8235 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 8236 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 8237 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 8238 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 8239 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 8240 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 8241 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 8242 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 8243 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 8244 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 8245 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 8246 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 8247 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 8248 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 8249 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 8250 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 8251 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 8252 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 8253 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 8254 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 8255 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 8256 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 8257 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 8258 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 8259 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 8260 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 8261 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 8262 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 8263 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 8264 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 8265 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 8266 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 8267 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 8268 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 8269 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 8270 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 8271 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 8272 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 8273 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 8274 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 8275 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 8276 long 0x3FFE0000,0x94458094,0x45809446,0x00000000 8277 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 8278 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 8279 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 8280 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 8281 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 8282 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 8283 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 8284 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 8285 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 8286 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 8287 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 8288 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 8289 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 8290 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 8291 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 8292 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 8293 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 8294 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 8295 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 8296 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 8297 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 8298 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 8299 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 8300 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 8301 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 8302 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 8303 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 8304 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 8305 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 8306 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 8307 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 8308 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 8309 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 8310 long 0x3FFE0000,0x80808080,0x80808081,0x00000000 8311 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 8312 8313 set ADJK,L_SCR1 8314 8315 set X,FP_SCR0 8316 set XDCARE,X+2 8317 set XFRAC,X+4 8318 8319 set F,FP_SCR1 8320 set FFRAC,F+4 8321 8322 set KLOG2,FP_SCR0 8323 8324 set SAVEU,FP_SCR0 8325 8326 global slogn 8327#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 8328slogn: 8329 fmov.x (%a0),%fp0 # LOAD INPUT 8330 mov.l &0x00000000,ADJK(%a6) 8331 8332LOGBGN: 8333#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 8334#--A FINITE, NON-ZERO, NORMALIZED NUMBER. 8335 8336 mov.l (%a0),%d1 8337 mov.w 4(%a0),%d1 8338 8339 mov.l (%a0),X(%a6) 8340 mov.l 4(%a0),X+4(%a6) 8341 mov.l 8(%a0),X+8(%a6) 8342 8343 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE 8344 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID 8345# X IS POSITIVE, CHECK IF X IS NEAR 1 8346 cmp.l %d1,&0x3ffef07d # IS X < 15/16? 8347 blt.b LOGMAIN # YES 8348 cmp.l %d1,&0x3fff8841 # IS X > 17/16? 8349 ble.w LOGNEAR1 # NO 8350 8351LOGMAIN: 8352#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 8353 8354#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 8355#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 8356#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 8357#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 8358#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 8359#--LOG(1+U) CAN BE VERY EFFICIENT. 8360#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 8361#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 8362 8363#--GET K, Y, F, AND ADDRESS OF 1/F. 8364 asr.l &8,%d1 8365 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X 8366 sub.l &0x3FFF,%d1 # THIS IS K 8367 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. 8368 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) 8369 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT 8370 8371#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 8372 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X 8373 mov.l XFRAC(%a6),FFRAC(%a6) 8374 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y 8375 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT 8376 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F 8377 and.l &0x7E000000,%d1 8378 asr.l &8,%d1 8379 asr.l &8,%d1 8380 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT 8381 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F 8382 8383 fmov.x X(%a6),%fp0 8384 mov.l &0x3fff0000,F(%a6) 8385 clr.l F+8(%a6) 8386 fsub.x F(%a6),%fp0 # Y-F 8387 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY 8388#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 8389#--REGISTERS SAVED: FPCR, FP1, FP2 8390 8391LP1CONT1: 8392#--AN RE-ENTRY POINT FOR LOGNP1 8393 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F 8394 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY 8395 fmov.x %fp0,%fp2 8396 fmul.x %fp2,%fp2 # FP2 IS V=U*U 8397 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMORY, FREE FP1 8398 8399#--LOG(1+U) IS APPROXIMATED BY 8400#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 8401#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 8402 8403 fmov.x %fp2,%fp3 8404 fmov.x %fp2,%fp1 8405 8406 fmul.d LOGA6(%pc),%fp1 # V*A6 8407 fmul.d LOGA5(%pc),%fp2 # V*A5 8408 8409 fadd.d LOGA4(%pc),%fp1 # A4+V*A6 8410 fadd.d LOGA3(%pc),%fp2 # A3+V*A5 8411 8412 fmul.x %fp3,%fp1 # V*(A4+V*A6) 8413 fmul.x %fp3,%fp2 # V*(A3+V*A5) 8414 8415 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) 8416 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) 8417 8418 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) 8419 add.l &16,%a0 # ADDRESS OF LOG(F) 8420 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) 8421 8422 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) 8423 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) 8424 8425 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) 8426 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 8427 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) 8428 8429 fmov.l %d0,%fpcr 8430 fadd.x KLOG2(%a6),%fp0 # FINAL ADD 8431 bra t_inx2 8432 8433 8434LOGNEAR1: 8435 8436# if the input is exactly equal to one, then exit through ld_pzero. 8437# if these 2 lines weren't here, the correct answer would be returned 8438# but the INEX2 bit would be set. 8439 fcmp.b %fp0,&0x1 # is it equal to one? 8440 fbeq.l ld_pzero # yes 8441 8442#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 8443 fmov.x %fp0,%fp1 8444 fsub.s one(%pc),%fp1 # FP1 IS X-1 8445 fadd.s one(%pc),%fp0 # FP0 IS X+1 8446 fadd.x %fp1,%fp1 # FP1 IS 2(X-1) 8447#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 8448#--IN U, U = 2(X-1)/(X+1) = FP1/FP0 8449 8450LP1CONT2: 8451#--THIS IS AN RE-ENTRY POINT FOR LOGNP1 8452 fdiv.x %fp0,%fp1 # FP1 IS U 8453 fmovm.x &0xc,-(%sp) # SAVE FP2-3 8454#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 8455#--LET V=U*U, W=V*V, CALCULATE 8456#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 8457#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 8458 fmov.x %fp1,%fp0 8459 fmul.x %fp0,%fp0 # FP0 IS V 8460 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 8461 fmov.x %fp0,%fp1 8462 fmul.x %fp1,%fp1 # FP1 IS W 8463 8464 fmov.d LOGB5(%pc),%fp3 8465 fmov.d LOGB4(%pc),%fp2 8466 8467 fmul.x %fp1,%fp3 # W*B5 8468 fmul.x %fp1,%fp2 # W*B4 8469 8470 fadd.d LOGB3(%pc),%fp3 # B3+W*B5 8471 fadd.d LOGB2(%pc),%fp2 # B2+W*B4 8472 8473 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED 8474 8475 fmul.x %fp0,%fp2 # V*(B2+W*B4) 8476 8477 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) 8478 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V 8479 8480 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 8481 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED 8482 8483 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 8484 8485 fmov.l %d0,%fpcr 8486 fadd.x SAVEU(%a6),%fp0 8487 bra t_inx2 8488 8489#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 8490LOGNEG: 8491 bra t_operr 8492 8493 global slognd 8494slognd: 8495#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 8496 8497 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 8498 8499#----normalize the input value by left shifting k bits (k to be determined 8500#----below), adjusting exponent and storing -k to ADJK 8501#----the value TWOTO100 is no longer needed. 8502#----Note that this code assumes the denormalized input is NON-ZERO. 8503 8504 movm.l &0x3f00,-(%sp) # save some registers {d2-d7} 8505 mov.l (%a0),%d3 # D3 is exponent of smallest norm. # 8506 mov.l 4(%a0),%d4 8507 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) 8508 clr.l %d2 # D2 used for holding K 8509 8510 tst.l %d4 8511 bne.b Hi_not0 8512 8513Hi_0: 8514 mov.l %d5,%d4 8515 clr.l %d5 8516 mov.l &32,%d2 8517 clr.l %d6 8518 bfffo %d4{&0:&32},%d6 8519 lsl.l %d6,%d4 8520 add.l %d6,%d2 # (D3,D4,D5) is normalized 8521 8522 mov.l %d3,X(%a6) 8523 mov.l %d4,XFRAC(%a6) 8524 mov.l %d5,XFRAC+4(%a6) 8525 neg.l %d2 8526 mov.l %d2,ADJK(%a6) 8527 fmov.x X(%a6),%fp0 8528 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8529 lea X(%a6),%a0 8530 bra.w LOGBGN # begin regular log(X) 8531 8532Hi_not0: 8533 clr.l %d6 8534 bfffo %d4{&0:&32},%d6 # find first 1 8535 mov.l %d6,%d2 # get k 8536 lsl.l %d6,%d4 8537 mov.l %d5,%d7 # a copy of D5 8538 lsl.l %d6,%d5 8539 neg.l %d6 8540 add.l &32,%d6 8541 lsr.l %d6,%d7 8542 or.l %d7,%d4 # (D3,D4,D5) normalized 8543 8544 mov.l %d3,X(%a6) 8545 mov.l %d4,XFRAC(%a6) 8546 mov.l %d5,XFRAC+4(%a6) 8547 neg.l %d2 8548 mov.l %d2,ADJK(%a6) 8549 fmov.x X(%a6),%fp0 8550 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8551 lea X(%a6),%a0 8552 bra.w LOGBGN # begin regular log(X) 8553 8554 global slognp1 8555#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 8556slognp1: 8557 fmov.x (%a0),%fp0 # LOAD INPUT 8558 fabs.x %fp0 # test magnitude 8559 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold 8560 fbgt.w LP1REAL # if greater, continue 8561 fmov.l %d0,%fpcr 8562 mov.b &FMOV_OP,%d1 # last inst is MOVE 8563 fmov.x (%a0),%fp0 # return signed argument 8564 bra t_catch 8565 8566LP1REAL: 8567 fmov.x (%a0),%fp0 # LOAD INPUT 8568 mov.l &0x00000000,ADJK(%a6) 8569 fmov.x %fp0,%fp1 # FP1 IS INPUT Z 8570 fadd.s one(%pc),%fp0 # X := ROUND(1+Z) 8571 fmov.x %fp0,X(%a6) 8572 mov.w XFRAC(%a6),XDCARE(%a6) 8573 mov.l X(%a6),%d1 8574 cmp.l %d1,&0 8575 ble.w LP1NEG0 # LOG OF ZERO OR -VE 8576 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? 8577 blt.w LOGMAIN 8578 cmp.l %d1,&0x3fffc000 8579 bgt.w LOGMAIN 8580#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 8581#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 8582#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 8583 8584LP1NEAR1: 8585#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 8586 cmp.l %d1,&0x3ffef07d 8587 blt.w LP1CARE 8588 cmp.l %d1,&0x3fff8841 8589 bgt.w LP1CARE 8590 8591LP1ONE16: 8592#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 8593#--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 8594 fadd.x %fp1,%fp1 # FP1 IS 2Z 8595 fadd.s one(%pc),%fp0 # FP0 IS 1+X 8596#--U = FP1/FP0 8597 bra.w LP1CONT2 8598 8599LP1CARE: 8600#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 8601#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 8602#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 8603#--THERE ARE ONLY TWO CASES. 8604#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 8605#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 8606#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 8607#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 8608 8609 mov.l XFRAC(%a6),FFRAC(%a6) 8610 and.l &0xFE000000,FFRAC(%a6) 8611 or.l &0x01000000,FFRAC(%a6) # F OBTAINED 8612 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 8613 bge.b KISZERO 8614 8615KISNEG1: 8616 fmov.s TWO(%pc),%fp0 8617 mov.l &0x3fff0000,F(%a6) 8618 clr.l F+8(%a6) 8619 fsub.x F(%a6),%fp0 # 2-F 8620 mov.l FFRAC(%a6),%d1 8621 and.l &0x7E000000,%d1 8622 asr.l &8,%d1 8623 asr.l &8,%d1 8624 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F 8625 fadd.x %fp1,%fp1 # GET 2Z 8626 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} 8627 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z 8628 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F 8629 add.l %d1,%a0 8630 fmov.s negone(%pc),%fp1 # FP1 IS K = -1 8631 bra.w LP1CONT1 8632 8633KISZERO: 8634 fmov.s one(%pc),%fp0 8635 mov.l &0x3fff0000,F(%a6) 8636 clr.l F+8(%a6) 8637 fsub.x F(%a6),%fp0 # 1-F 8638 mov.l FFRAC(%a6),%d1 8639 and.l &0x7E000000,%d1 8640 asr.l &8,%d1 8641 asr.l &8,%d1 8642 asr.l &4,%d1 8643 fadd.x %fp1,%fp0 # FP0 IS Y-F 8644 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} 8645 lea LOGTBL(%pc),%a0 8646 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F 8647 fmov.s zero(%pc),%fp1 # FP1 IS K = 0 8648 bra.w LP1CONT1 8649 8650LP1NEG0: 8651#--FPCR SAVED. D0 IS X IN COMPACT FORM. 8652 cmp.l %d1,&0 8653 blt.b LP1NEG 8654LP1ZERO: 8655 fmov.s negone(%pc),%fp0 8656 8657 fmov.l %d0,%fpcr 8658 bra t_dz 8659 8660LP1NEG: 8661 fmov.s zero(%pc),%fp0 8662 8663 fmov.l %d0,%fpcr 8664 bra t_operr 8665 8666 global slognp1d 8667#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 8668# Simply return the denorm 8669slognp1d: 8670 bra t_extdnrm 8671 8672######################################################################### 8673# satanh(): computes the inverse hyperbolic tangent of a norm input # 8674# satanhd(): computes the inverse hyperbolic tangent of a denorm input # 8675# # 8676# INPUT *************************************************************** # 8677# a0 = pointer to extended precision input # 8678# d0 = round precision,mode # 8679# # 8680# OUTPUT ************************************************************** # 8681# fp0 = arctanh(X) # 8682# # 8683# ACCURACY and MONOTONICITY ******************************************* # 8684# The returned result is within 3 ulps in 64 significant bit, # 8685# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8686# rounded to double precision. The result is provably monotonic # 8687# in double precision. # 8688# # 8689# ALGORITHM *********************************************************** # 8690# # 8691# ATANH # 8692# 1. If |X| >= 1, go to 3. # 8693# # 8694# 2. (|X| < 1) Calculate atanh(X) by # 8695# sgn := sign(X) # 8696# y := |X| # 8697# z := 2y/(1-y) # 8698# atanh(X) := sgn * (1/2) * logp1(z) # 8699# Exit. # 8700# # 8701# 3. If |X| > 1, go to 5. # 8702# # 8703# 4. (|X| = 1) Generate infinity with an appropriate sign and # 8704# divide-by-zero by # 8705# sgn := sign(X) # 8706# atan(X) := sgn / (+0). # 8707# Exit. # 8708# # 8709# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 8710# Exit. # 8711# # 8712######################################################################### 8713 8714 global satanh 8715satanh: 8716 mov.l (%a0),%d1 8717 mov.w 4(%a0),%d1 8718 and.l &0x7FFFFFFF,%d1 8719 cmp.l %d1,&0x3FFF8000 8720 bge.b ATANHBIG 8721 8722#--THIS IS THE USUAL CASE, |X| < 1 8723#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). 8724 8725 fabs.x (%a0),%fp0 # Y = |X| 8726 fmov.x %fp0,%fp1 8727 fneg.x %fp1 # -Y 8728 fadd.x %fp0,%fp0 # 2Y 8729 fadd.s &0x3F800000,%fp1 # 1-Y 8730 fdiv.x %fp1,%fp0 # 2Y/(1-Y) 8731 mov.l (%a0),%d1 8732 and.l &0x80000000,%d1 8733 or.l &0x3F000000,%d1 # SIGN(X)*HALF 8734 mov.l %d1,-(%sp) 8735 8736 mov.l %d0,-(%sp) # save rnd prec,mode 8737 clr.l %d0 # pass ext prec,RN 8738 fmovm.x &0x01,-(%sp) # save Z on stack 8739 lea (%sp),%a0 # pass ptr to Z 8740 bsr slognp1 # LOG1P(Z) 8741 add.l &0xc,%sp # clear Z from stack 8742 8743 mov.l (%sp)+,%d0 # fetch old prec,mode 8744 fmov.l %d0,%fpcr # load it 8745 mov.b &FMUL_OP,%d1 # last inst is MUL 8746 fmul.s (%sp)+,%fp0 8747 bra t_catch 8748 8749ATANHBIG: 8750 fabs.x (%a0),%fp0 # |X| 8751 fcmp.s %fp0,&0x3F800000 8752 fbgt t_operr 8753 bra t_dz 8754 8755 global satanhd 8756#--ATANH(X) = X FOR DENORMALIZED X 8757satanhd: 8758 bra t_extdnrm 8759 8760######################################################################### 8761# slog10(): computes the base-10 logarithm of a normalized input # 8762# slog10d(): computes the base-10 logarithm of a denormalized input # 8763# slog2(): computes the base-2 logarithm of a normalized input # 8764# slog2d(): computes the base-2 logarithm of a denormalized input # 8765# # 8766# INPUT *************************************************************** # 8767# a0 = pointer to extended precision input # 8768# d0 = round precision,mode # 8769# # 8770# OUTPUT ************************************************************** # 8771# fp0 = log_10(X) or log_2(X) # 8772# # 8773# ACCURACY and MONOTONICITY ******************************************* # 8774# The returned result is within 1.7 ulps in 64 significant bit, # 8775# i.e. within 0.5003 ulp to 53 bits if the result is subsequently # 8776# rounded to double precision. The result is provably monotonic # 8777# in double precision. # 8778# # 8779# ALGORITHM *********************************************************** # 8780# # 8781# slog10d: # 8782# # 8783# Step 0. If X < 0, create a NaN and raise the invalid operation # 8784# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8785# Notes: Default means round-to-nearest mode, no floating-point # 8786# traps, and precision control = double extended. # 8787# # 8788# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8789# Notes: Even if X is denormalized, log(X) is always normalized. # 8790# # 8791# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8792# 2.1 Restore the user FPCR # 8793# 2.2 Return ans := Y * INV_L10. # 8794# # 8795# slog10: # 8796# # 8797# Step 0. If X < 0, create a NaN and raise the invalid operation # 8798# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8799# Notes: Default means round-to-nearest mode, no floating-point # 8800# traps, and precision control = double extended. # 8801# # 8802# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # 8803# # 8804# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8805# 2.1 Restore the user FPCR # 8806# 2.2 Return ans := Y * INV_L10. # 8807# # 8808# sLog2d: # 8809# # 8810# Step 0. If X < 0, create a NaN and raise the invalid operation # 8811# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8812# Notes: Default means round-to-nearest mode, no floating-point # 8813# traps, and precision control = double extended. # 8814# # 8815# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8816# Notes: Even if X is denormalized, log(X) is always normalized. # 8817# # 8818# Step 2. Compute log_10(X) = log(X) * (1/log(2)). # 8819# 2.1 Restore the user FPCR # 8820# 2.2 Return ans := Y * INV_L2. # 8821# # 8822# sLog2: # 8823# # 8824# Step 0. If X < 0, create a NaN and raise the invalid operation # 8825# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8826# Notes: Default means round-to-nearest mode, no floating-point # 8827# traps, and precision control = double extended. # 8828# # 8829# Step 1. If X is not an integer power of two, i.e., X != 2^k, # 8830# go to Step 3. # 8831# # 8832# Step 2. Return k. # 8833# 2.1 Get integer k, X = 2^k. # 8834# 2.2 Restore the user FPCR. # 8835# 2.3 Return ans := convert-to-double-extended(k). # 8836# # 8837# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # 8838# # 8839# Step 4. Compute log_2(X) = log(X) * (1/log(2)). # 8840# 4.1 Restore the user FPCR # 8841# 4.2 Return ans := Y * INV_L2. # 8842# # 8843######################################################################### 8844 8845INV_L10: 8846 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 8847 8848INV_L2: 8849 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 8850 8851 global slog10 8852#--entry point for Log10(X), X is normalized 8853slog10: 8854 fmov.b &0x1,%fp0 8855 fcmp.x %fp0,(%a0) # if operand == 1, 8856 fbeq.l ld_pzero # return an EXACT zero 8857 8858 mov.l (%a0),%d1 8859 blt.w invalid 8860 mov.l %d0,-(%sp) 8861 clr.l %d0 8862 bsr slogn # log(X), X normal. 8863 fmov.l (%sp)+,%fpcr 8864 fmul.x INV_L10(%pc),%fp0 8865 bra t_inx2 8866 8867 global slog10d 8868#--entry point for Log10(X), X is denormalized 8869slog10d: 8870 mov.l (%a0),%d1 8871 blt.w invalid 8872 mov.l %d0,-(%sp) 8873 clr.l %d0 8874 bsr slognd # log(X), X denorm. 8875 fmov.l (%sp)+,%fpcr 8876 fmul.x INV_L10(%pc),%fp0 8877 bra t_minx2 8878 8879 global slog2 8880#--entry point for Log2(X), X is normalized 8881slog2: 8882 mov.l (%a0),%d1 8883 blt.w invalid 8884 8885 mov.l 8(%a0),%d1 8886 bne.b continue # X is not 2^k 8887 8888 mov.l 4(%a0),%d1 8889 and.l &0x7FFFFFFF,%d1 8890 bne.b continue 8891 8892#--X = 2^k. 8893 mov.w (%a0),%d1 8894 and.l &0x00007FFF,%d1 8895 sub.l &0x3FFF,%d1 8896 beq.l ld_pzero 8897 fmov.l %d0,%fpcr 8898 fmov.l %d1,%fp0 8899 bra t_inx2 8900 8901continue: 8902 mov.l %d0,-(%sp) 8903 clr.l %d0 8904 bsr slogn # log(X), X normal. 8905 fmov.l (%sp)+,%fpcr 8906 fmul.x INV_L2(%pc),%fp0 8907 bra t_inx2 8908 8909invalid: 8910 bra t_operr 8911 8912 global slog2d 8913#--entry point for Log2(X), X is denormalized 8914slog2d: 8915 mov.l (%a0),%d1 8916 blt.w invalid 8917 mov.l %d0,-(%sp) 8918 clr.l %d0 8919 bsr slognd # log(X), X denorm. 8920 fmov.l (%sp)+,%fpcr 8921 fmul.x INV_L2(%pc),%fp0 8922 bra t_minx2 8923 8924######################################################################### 8925# stwotox(): computes 2**X for a normalized input # 8926# stwotoxd(): computes 2**X for a denormalized input # 8927# stentox(): computes 10**X for a normalized input # 8928# stentoxd(): computes 10**X for a denormalized input # 8929# # 8930# INPUT *************************************************************** # 8931# a0 = pointer to extended precision input # 8932# d0 = round precision,mode # 8933# # 8934# OUTPUT ************************************************************** # 8935# fp0 = 2**X or 10**X # 8936# # 8937# ACCURACY and MONOTONICITY ******************************************* # 8938# The returned result is within 2 ulps in 64 significant bit, # 8939# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8940# rounded to double precision. The result is provably monotonic # 8941# in double precision. # 8942# # 8943# ALGORITHM *********************************************************** # 8944# # 8945# twotox # 8946# 1. If |X| > 16480, go to ExpBig. # 8947# # 8948# 2. If |X| < 2**(-70), go to ExpSm. # 8949# # 8950# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # 8951# decompose N as # 8952# N = 64(M + M') + j, j = 0,1,2,...,63. # 8953# # 8954# 4. Overwrite r := r * log2. Then # 8955# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8956# Go to expr to compute that expression. # 8957# # 8958# tentox # 8959# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # 8960# # 8961# 2. If |X| < 2**(-70), go to ExpSm. # 8962# # 8963# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # 8964# N := round-to-int(y). Decompose N as # 8965# N = 64(M + M') + j, j = 0,1,2,...,63. # 8966# # 8967# 4. Define r as # 8968# r := ((X - N*L1)-N*L2) * L10 # 8969# where L1, L2 are the leading and trailing parts of # 8970# log_10(2)/64 and L10 is the natural log of 10. Then # 8971# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8972# Go to expr to compute that expression. # 8973# # 8974# expr # 8975# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # 8976# # 8977# 2. Overwrite Fact1 and Fact2 by # 8978# Fact1 := 2**(M) * Fact1 # 8979# Fact2 := 2**(M) * Fact2 # 8980# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # 8981# # 8982# 3. Calculate P where 1 + P approximates exp(r): # 8983# P = r + r*r*(A1+r*(A2+...+r*A5)). # 8984# # 8985# 4. Let AdjFact := 2**(M'). Return # 8986# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # 8987# Exit. # 8988# # 8989# ExpBig # 8990# 1. Generate overflow by Huge * Huge if X > 0; otherwise, # 8991# generate underflow by Tiny * Tiny. # 8992# # 8993# ExpSm # 8994# 1. Return 1 + X. # 8995# # 8996######################################################################### 8997 8998L2TEN64: 8999 long 0x406A934F,0x0979A371 # 64LOG10/LOG2 9000L10TWO1: 9001 long 0x3F734413,0x509F8000 # LOG2/64LOG10 9002 9003L10TWO2: 9004 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 9005 9006LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 9007 9008LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 9009 9010EXPA5: long 0x3F56C16D,0x6F7BD0B2 9011EXPA4: long 0x3F811112,0x302C712C 9012EXPA3: long 0x3FA55555,0x55554CC1 9013EXPA2: long 0x3FC55555,0x55554A54 9014EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 9015 9016TEXPTBL: 9017 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 9018 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA 9019 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 9020 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 9021 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA 9022 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C 9023 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 9024 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA 9025 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 9026 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 9027 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 9028 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 9029 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D 9030 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 9031 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B 9032 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 9033 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A 9034 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B 9035 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF 9036 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA 9037 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD 9038 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E 9039 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B 9040 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB 9041 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB 9042 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 9043 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C 9044 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 9045 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 9046 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 9047 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F 9048 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C 9049 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB 9050 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB 9051 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C 9052 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA 9053 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD 9054 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 9055 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A 9056 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 9057 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB 9058 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 9059 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C 9060 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 9061 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 9062 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE 9063 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 9064 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 9065 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A 9066 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 9067 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 9068 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 9069 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 9070 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE 9071 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 9072 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F 9073 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A 9074 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A 9075 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC 9076 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F 9077 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A 9078 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 9079 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B 9080 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 9081 9082 set INT,L_SCR1 9083 9084 set X,FP_SCR0 9085 set XDCARE,X+2 9086 set XFRAC,X+4 9087 9088 set ADJFACT,FP_SCR0 9089 9090 set FACT1,FP_SCR0 9091 set FACT1HI,FACT1+4 9092 set FACT1LOW,FACT1+8 9093 9094 set FACT2,FP_SCR1 9095 set FACT2HI,FACT2+4 9096 set FACT2LOW,FACT2+8 9097 9098 global stwotox 9099#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9100stwotox: 9101 fmovm.x (%a0),&0x80 # LOAD INPUT 9102 9103 mov.l (%a0),%d1 9104 mov.w 4(%a0),%d1 9105 fmov.x %fp0,X(%a6) 9106 and.l &0x7FFFFFFF,%d1 9107 9108 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9109 bge.b TWOOK1 9110 bra.w EXPBORS 9111 9112TWOOK1: 9113 cmp.l %d1,&0x400D80C0 # |X| > 16480? 9114 ble.b TWOMAIN 9115 bra.w EXPBORS 9116 9117TWOMAIN: 9118#--USUAL CASE, 2^(-70) <= |X| <= 16480 9119 9120 fmov.x %fp0,%fp1 9121 fmul.s &0x42800000,%fp1 # 64 * X 9122 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) 9123 mov.l %d2,-(%sp) 9124 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9125 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9126 mov.l INT(%a6),%d1 9127 mov.l %d1,%d2 9128 and.l &0x3F,%d1 # D0 IS J 9129 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9130 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9131 asr.l &6,%d2 # d2 IS L, N = 64L + J 9132 mov.l %d2,%d1 9133 asr.l &1,%d1 # D0 IS M 9134 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9135 add.l &0x3FFF,%d2 9136 9137#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9138#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9139#--ADJFACT = 2^(M'). 9140#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9141 9142 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9143 9144 fmul.s &0x3C800000,%fp1 # (1/64)*N 9145 mov.l (%a1)+,FACT1(%a6) 9146 mov.l (%a1)+,FACT1HI(%a6) 9147 mov.l (%a1)+,FACT1LOW(%a6) 9148 mov.w (%a1)+,FACT2(%a6) 9149 9150 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) 9151 9152 mov.w (%a1)+,FACT2HI(%a6) 9153 clr.w FACT2HI+2(%a6) 9154 clr.l FACT2LOW(%a6) 9155 add.w %d1,FACT1(%a6) 9156 fmul.x LOG2(%pc),%fp0 # FP0 IS R 9157 add.w %d1,FACT2(%a6) 9158 9159 bra.w expr 9160 9161EXPBORS: 9162#--FPCR, D0 SAVED 9163 cmp.l %d1,&0x3FFF8000 9164 bgt.b TEXPBIG 9165 9166#--|X| IS SMALL, RETURN 1 + X 9167 9168 fmov.l %d0,%fpcr # restore users round prec,mode 9169 fadd.s &0x3F800000,%fp0 # RETURN 1 + X 9170 bra t_pinx2 9171 9172TEXPBIG: 9173#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW 9174#--REGISTERS SAVE SO FAR ARE FPCR AND D0 9175 mov.l X(%a6),%d1 9176 cmp.l %d1,&0 9177 blt.b EXPNEG 9178 9179 bra t_ovfl2 # t_ovfl expects positive value 9180 9181EXPNEG: 9182 bra t_unfl2 # t_unfl expects positive value 9183 9184 global stwotoxd 9185stwotoxd: 9186#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT 9187 9188 fmov.l %d0,%fpcr # set user's rounding mode/precision 9189 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9190 mov.l (%a0),%d1 9191 or.l &0x00800001,%d1 9192 fadd.s %d1,%fp0 9193 bra t_pinx2 9194 9195 global stentox 9196#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9197stentox: 9198 fmovm.x (%a0),&0x80 # LOAD INPUT 9199 9200 mov.l (%a0),%d1 9201 mov.w 4(%a0),%d1 9202 fmov.x %fp0,X(%a6) 9203 and.l &0x7FFFFFFF,%d1 9204 9205 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9206 bge.b TENOK1 9207 bra.w EXPBORS 9208 9209TENOK1: 9210 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? 9211 ble.b TENMAIN 9212 bra.w EXPBORS 9213 9214TENMAIN: 9215#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 9216 9217 fmov.x %fp0,%fp1 9218 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 9219 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) 9220 mov.l %d2,-(%sp) 9221 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9222 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9223 mov.l INT(%a6),%d1 9224 mov.l %d1,%d2 9225 and.l &0x3F,%d1 # D0 IS J 9226 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9227 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9228 asr.l &6,%d2 # d2 IS L, N = 64L + J 9229 mov.l %d2,%d1 9230 asr.l &1,%d1 # D0 IS M 9231 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9232 add.l &0x3FFF,%d2 9233 9234#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9235#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9236#--ADJFACT = 2^(M'). 9237#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9238 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9239 9240 fmov.x %fp1,%fp2 9241 9242 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD 9243 mov.l (%a1)+,FACT1(%a6) 9244 9245 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL 9246 9247 mov.l (%a1)+,FACT1HI(%a6) 9248 mov.l (%a1)+,FACT1LOW(%a6) 9249 fsub.x %fp1,%fp0 # X - N L_LEAD 9250 mov.w (%a1)+,FACT2(%a6) 9251 9252 fsub.x %fp2,%fp0 # X - N L_TRAIL 9253 9254 mov.w (%a1)+,FACT2HI(%a6) 9255 clr.w FACT2HI+2(%a6) 9256 clr.l FACT2LOW(%a6) 9257 9258 fmul.x LOG10(%pc),%fp0 # FP0 IS R 9259 add.w %d1,FACT1(%a6) 9260 add.w %d1,FACT2(%a6) 9261 9262expr: 9263#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. 9264#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). 9265#--FP0 IS R. THE FOLLOWING CODE COMPUTES 9266#-- 2**(M'+M) * 2**(J/64) * EXP(R) 9267 9268 fmov.x %fp0,%fp1 9269 fmul.x %fp1,%fp1 # FP1 IS S = R*R 9270 9271 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 9272 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 9273 9274 fmul.x %fp1,%fp2 # FP2 IS S*A5 9275 fmul.x %fp1,%fp3 # FP3 IS S*A4 9276 9277 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 9278 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 9279 9280 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) 9281 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) 9282 9283 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) 9284 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) 9285 9286 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) 9287 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) 9288 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 9289 9290 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 9291 9292#--FINAL RECONSTRUCTION PROCESS 9293#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) 9294 9295 fmul.x FACT1(%a6),%fp0 9296 fadd.x FACT2(%a6),%fp0 9297 fadd.x FACT1(%a6),%fp0 9298 9299 fmov.l %d0,%fpcr # restore users round prec,mode 9300 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT 9301 mov.l (%sp)+,%d2 9302 mov.l &0x80000000,ADJFACT+4(%a6) 9303 clr.l ADJFACT+8(%a6) 9304 mov.b &FMUL_OP,%d1 # last inst is MUL 9305 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT 9306 bra t_catch 9307 9308 global stentoxd 9309stentoxd: 9310#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT 9311 9312 fmov.l %d0,%fpcr # set user's rounding mode/precision 9313 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9314 mov.l (%a0),%d1 9315 or.l &0x00800001,%d1 9316 fadd.s %d1,%fp0 9317 bra t_pinx2 9318 9319######################################################################### 9320# smovcr(): returns the ROM constant at the offset specified in d1 # 9321# rounded to the mode and precision specified in d0. # 9322# # 9323# INPUT *************************************************************** # 9324# d0 = rnd prec,mode # 9325# d1 = ROM offset # 9326# # 9327# OUTPUT ************************************************************** # 9328# fp0 = the ROM constant rounded to the user's rounding mode,prec # 9329# # 9330######################################################################### 9331 9332 global smovcr 9333smovcr: 9334 mov.l %d1,-(%sp) # save rom offset for a sec 9335 9336 lsr.b &0x4,%d0 # shift ctrl bits to lo 9337 mov.l %d0,%d1 # make a copy 9338 andi.w &0x3,%d1 # extract rnd mode 9339 andi.w &0xc,%d0 # extract rnd prec 9340 swap %d0 # put rnd prec in hi 9341 mov.w %d1,%d0 # put rnd mode in lo 9342 9343 mov.l (%sp)+,%d1 # get rom offset 9344 9345# 9346# check range of offset 9347# 9348 tst.b %d1 # if zero, offset is to pi 9349 beq.b pi_tbl # it is pi 9350 cmpi.b %d1,&0x0a # check range $01 - $0a 9351 ble.b z_val # if in this range, return zero 9352 cmpi.b %d1,&0x0e # check range $0b - $0e 9353 ble.b sm_tbl # valid constants in this range 9354 cmpi.b %d1,&0x2f # check range $10 - $2f 9355 ble.b z_val # if in this range, return zero 9356 cmpi.b %d1,&0x3f # check range $30 - $3f 9357 ble.b bg_tbl # valid constants in this range 9358 9359z_val: 9360 bra.l ld_pzero # return a zero 9361 9362# 9363# the answer is PI rounded to the proper precision. 9364# 9365# fetch a pointer to the answer table relating to the proper rounding 9366# precision. 9367# 9368pi_tbl: 9369 tst.b %d0 # is rmode RN? 9370 bne.b pi_not_rn # no 9371pi_rn: 9372 lea.l PIRN(%pc),%a0 # yes; load PI RN table addr 9373 bra.w set_finx 9374pi_not_rn: 9375 cmpi.b %d0,&rp_mode # is rmode RP? 9376 beq.b pi_rp # yes 9377pi_rzrm: 9378 lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr 9379 bra.b set_finx 9380pi_rp: 9381 lea.l PIRP(%pc),%a0 # load PI RP table addr 9382 bra.b set_finx 9383 9384# 9385# the answer is one of: 9386# $0B log10(2) (inexact) 9387# $0C e (inexact) 9388# $0D log2(e) (inexact) 9389# $0E log10(e) (exact) 9390# 9391# fetch a pointer to the answer table relating to the proper rounding 9392# precision. 9393# 9394sm_tbl: 9395 subi.b &0xb,%d1 # make offset in 0-4 range 9396 tst.b %d0 # is rmode RN? 9397 bne.b sm_not_rn # no 9398sm_rn: 9399 lea.l SMALRN(%pc),%a0 # yes; load RN table addr 9400sm_tbl_cont: 9401 cmpi.b %d1,&0x2 # is result log10(e)? 9402 ble.b set_finx # no; answer is inexact 9403 bra.b no_finx # yes; answer is exact 9404sm_not_rn: 9405 cmpi.b %d0,&rp_mode # is rmode RP? 9406 beq.b sm_rp # yes 9407sm_rzrm: 9408 lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr 9409 bra.b sm_tbl_cont 9410sm_rp: 9411 lea.l SMALRP(%pc),%a0 # load RP table addr 9412 bra.b sm_tbl_cont 9413 9414# 9415# the answer is one of: 9416# $30 ln(2) (inexact) 9417# $31 ln(10) (inexact) 9418# $32 10^0 (exact) 9419# $33 10^1 (exact) 9420# $34 10^2 (exact) 9421# $35 10^4 (exact) 9422# $36 10^8 (exact) 9423# $37 10^16 (exact) 9424# $38 10^32 (inexact) 9425# $39 10^64 (inexact) 9426# $3A 10^128 (inexact) 9427# $3B 10^256 (inexact) 9428# $3C 10^512 (inexact) 9429# $3D 10^1024 (inexact) 9430# $3E 10^2048 (inexact) 9431# $3F 10^4096 (inexact) 9432# 9433# fetch a pointer to the answer table relating to the proper rounding 9434# precision. 9435# 9436bg_tbl: 9437 subi.b &0x30,%d1 # make offset in 0-f range 9438 tst.b %d0 # is rmode RN? 9439 bne.b bg_not_rn # no 9440bg_rn: 9441 lea.l BIGRN(%pc),%a0 # yes; load RN table addr 9442bg_tbl_cont: 9443 cmpi.b %d1,&0x1 # is offset <= $31? 9444 ble.b set_finx # yes; answer is inexact 9445 cmpi.b %d1,&0x7 # is $32 <= offset <= $37? 9446 ble.b no_finx # yes; answer is exact 9447 bra.b set_finx # no; answer is inexact 9448bg_not_rn: 9449 cmpi.b %d0,&rp_mode # is rmode RP? 9450 beq.b bg_rp # yes 9451bg_rzrm: 9452 lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr 9453 bra.b bg_tbl_cont 9454bg_rp: 9455 lea.l BIGRP(%pc),%a0 # load RP table addr 9456 bra.b bg_tbl_cont 9457 9458# answer is inexact, so set INEX2 and AINEX in the user's FPSR. 9459set_finx: 9460 ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX 9461no_finx: 9462 mulu.w &0xc,%d1 # offset points into tables 9463 swap %d0 # put rnd prec in lo word 9464 tst.b %d0 # is precision extended? 9465 9466 bne.b not_ext # if xprec, do not call round 9467 9468# Precision is extended 9469 fmovm.x (%a0,%d1.w),&0x80 # return result in fp0 9470 rts 9471 9472# Precision is single or double 9473not_ext: 9474 swap %d0 # rnd prec in upper word 9475 9476# call round() to round the answer to the proper precision. 9477# exponents out of range for single or double DO NOT cause underflow 9478# or overflow. 9479 mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word 9480 mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word 9481 mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word 9482 mov.l %d0,%d1 9483 clr.l %d0 # clear g,r,s 9484 lea FP_SCR1(%a6),%a0 # pass ptr to answer 9485 clr.w LOCAL_SGN(%a0) # sign always positive 9486 bsr.l _round # round the mantissa 9487 9488 fmovm.x (%a0),&0x80 # return rounded result in fp0 9489 rts 9490 9491 align 0x4 9492 9493PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi 9494PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi 9495PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi 9496 9497SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2) 9498 long 0x40000000,0xadf85458,0xa2bb4a9a # e 9499 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e) 9500 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9501 long 0x00000000,0x00000000,0x00000000 # 0.0 9502 9503SMALRZRM: 9504 long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2) 9505 long 0x40000000,0xadf85458,0xa2bb4a9a # e 9506 long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e) 9507 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9508 long 0x00000000,0x00000000,0x00000000 # 0.0 9509 9510SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2) 9511 long 0x40000000,0xadf85458,0xa2bb4a9b # e 9512 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e) 9513 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9514 long 0x00000000,0x00000000,0x00000000 # 0.0 9515 9516BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2) 9517 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10) 9518 9519 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9520 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9521 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9522 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9523 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9524 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9525 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 9526 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 9527 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 9528 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 9529 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 9530 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 9531 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 9532 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 9533 9534BIGRZRM: 9535 long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2) 9536 long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10) 9537 9538 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9539 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9540 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9541 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9542 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9543 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9544 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 9545 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 9546 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 9547 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 9548 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 9549 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 9550 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 9551 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 9552 9553BIGRP: 9554 long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2) 9555 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10) 9556 9557 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9558 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9559 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9560 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9561 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9562 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9563 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 9564 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 9565 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 9566 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 9567 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 9568 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 9569 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 9570 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 9571 9572######################################################################### 9573# sscale(): computes the destination operand scaled by the source # 9574# operand. If the absoulute value of the source operand is # 9575# >= 2^14, an overflow or underflow is returned. # 9576# # 9577# INPUT *************************************************************** # 9578# a0 = pointer to double-extended source operand X # 9579# a1 = pointer to double-extended destination operand Y # 9580# # 9581# OUTPUT ************************************************************** # 9582# fp0 = scale(X,Y) # 9583# # 9584######################################################################### 9585 9586set SIGN, L_SCR1 9587 9588 global sscale 9589sscale: 9590 mov.l %d0,-(%sp) # store off ctrl bits for now 9591 9592 mov.w DST_EX(%a1),%d1 # get dst exponent 9593 smi.b SIGN(%a6) # use SIGN to hold dst sign 9594 andi.l &0x00007fff,%d1 # strip sign from dst exp 9595 9596 mov.w SRC_EX(%a0),%d0 # check src bounds 9597 andi.w &0x7fff,%d0 # clr src sign bit 9598 cmpi.w %d0,&0x3fff # is src ~ ZERO? 9599 blt.w src_small # yes 9600 cmpi.w %d0,&0x400c # no; is src too big? 9601 bgt.w src_out # yes 9602 9603# 9604# Source is within 2^14 range. 9605# 9606src_ok: 9607 fintrz.x SRC(%a0),%fp0 # calc int of src 9608 fmov.l %fp0,%d0 # int src to d0 9609# don't want any accrued bits from the fintrz showing up later since 9610# we may need to read the fpsr for the last fp op in t_catch2(). 9611 fmov.l &0x0,%fpsr 9612 9613 tst.b DST_HI(%a1) # is dst denormalized? 9614 bmi.b sok_norm 9615 9616# the dst is a DENORM. normalize the DENORM and add the adjustment to 9617# the src value. then, jump to the norm part of the routine. 9618sok_dnrm: 9619 mov.l %d0,-(%sp) # save src for now 9620 9621 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy 9622 mov.l DST_HI(%a1),FP_SCR0_HI(%a6) 9623 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 9624 9625 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM 9626 bsr.l norm # normalize the DENORM 9627 neg.l %d0 9628 add.l (%sp)+,%d0 # add adjustment to src 9629 9630 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM 9631 9632 cmpi.w %d0,&-0x3fff # is the shft amt really low? 9633 bge.b sok_norm2 # thank goodness no 9634 9635# the multiply factor that we're trying to create should be a denorm 9636# for the multiply to work. therefore, we're going to actually do a 9637# multiply with a denorm which will cause an unimplemented data type 9638# exception to be put into the machine which will be caught and corrected 9639# later. we don't do this with the DENORMs above because this method 9640# is slower. but, don't fret, I don't see it being used much either. 9641 fmov.l (%sp)+,%fpcr # restore user fpcr 9642 mov.l &0x80000000,%d1 # load normalized mantissa 9643 subi.l &-0x3fff,%d0 # how many should we shift? 9644 neg.l %d0 # make it positive 9645 cmpi.b %d0,&0x20 # is it > 32? 9646 bge.b sok_dnrm_32 # yes 9647 lsr.l %d0,%d1 # no; bit stays in upper lw 9648 clr.l -(%sp) # insert zero low mantissa 9649 mov.l %d1,-(%sp) # insert new high mantissa 9650 clr.l -(%sp) # make zero exponent 9651 bra.b sok_norm_cont 9652sok_dnrm_32: 9653 subi.b &0x20,%d0 # get shift count 9654 lsr.l %d0,%d1 # make low mantissa longword 9655 mov.l %d1,-(%sp) # insert new low mantissa 9656 clr.l -(%sp) # insert zero high mantissa 9657 clr.l -(%sp) # make zero exponent 9658 bra.b sok_norm_cont 9659 9660# the src will force the dst to a DENORM value or worse. so, let's 9661# create an fp multiply that will create the result. 9662sok_norm: 9663 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src 9664sok_norm2: 9665 fmov.l (%sp)+,%fpcr # restore user fpcr 9666 9667 addi.w &0x3fff,%d0 # turn src amt into exp value 9668 swap %d0 # put exponent in high word 9669 clr.l -(%sp) # insert new exponent 9670 mov.l &0x80000000,-(%sp) # insert new high mantissa 9671 mov.l %d0,-(%sp) # insert new lo mantissa 9672 9673sok_norm_cont: 9674 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 9675 mov.b &FMUL_OP,%d1 # last inst is MUL 9676 fmul.x (%sp)+,%fp0 # do the multiply 9677 bra t_catch2 # catch any exceptions 9678 9679# 9680# Source is outside of 2^14 range. Test the sign and branch 9681# to the appropriate exception handler. 9682# 9683src_out: 9684 mov.l (%sp)+,%d0 # restore ctrl bits 9685 exg %a0,%a1 # swap src,dst ptrs 9686 tst.b SRC_EX(%a1) # is src negative? 9687 bmi t_unfl # yes; underflow 9688 bra t_ovfl_sc # no; overflow 9689 9690# 9691# The source input is below 1, so we check for denormalized numbers 9692# and set unfl. 9693# 9694src_small: 9695 tst.b DST_HI(%a1) # is dst denormalized? 9696 bpl.b ssmall_done # yes 9697 9698 mov.l (%sp)+,%d0 9699 fmov.l %d0,%fpcr # no; load control bits 9700 mov.b &FMOV_OP,%d1 # last inst is MOVE 9701 fmov.x DST(%a1),%fp0 # simply return dest 9702 bra t_catch2 9703ssmall_done: 9704 mov.l (%sp)+,%d0 # load control bits into d1 9705 mov.l %a1,%a0 # pass ptr to dst 9706 bra t_resdnrm 9707 9708######################################################################### 9709# smod(): computes the fp MOD of the input values X,Y. # 9710# srem(): computes the fp (IEEE) REM of the input values X,Y. # 9711# # 9712# INPUT *************************************************************** # 9713# a0 = pointer to extended precision input X # 9714# a1 = pointer to extended precision input Y # 9715# d0 = round precision,mode # 9716# # 9717# The input operands X and Y can be either normalized or # 9718# denormalized. # 9719# # 9720# OUTPUT ************************************************************** # 9721# fp0 = FREM(X,Y) or FMOD(X,Y) # 9722# # 9723# ALGORITHM *********************************************************** # 9724# # 9725# Step 1. Save and strip signs of X and Y: signX := sign(X), # 9726# signY := sign(Y), X := |X|, Y := |Y|, # 9727# signQ := signX EOR signY. Record whether MOD or REM # 9728# is requested. # 9729# # 9730# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # 9731# If (L < 0) then # 9732# R := X, go to Step 4. # 9733# else # 9734# R := 2^(-L)X, j := L. # 9735# endif # 9736# # 9737# Step 3. Perform MOD(X,Y) # 9738# 3.1 If R = Y, go to Step 9. # 9739# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # 9740# 3.3 If j = 0, go to Step 4. # 9741# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # 9742# Step 3.1. # 9743# # 9744# Step 4. At this point, R = X - QY = MOD(X,Y). Set # 9745# Last_Subtract := false (used in Step 7 below). If # 9746# MOD is requested, go to Step 6. # 9747# # 9748# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # 9749# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # 9750# Step 6. # 9751# 5.2 If R > Y/2, then { set Last_Subtract := true, # 9752# Q := Q + 1, Y := signY*Y }. Go to Step 6. # 9753# 5.3 This is the tricky case of R = Y/2. If Q is odd, # 9754# then { Q := Q + 1, signX := -signX }. # 9755# # 9756# Step 6. R := signX*R. # 9757# # 9758# Step 7. If Last_Subtract = true, R := R - Y. # 9759# # 9760# Step 8. Return signQ, last 7 bits of Q, and R as required. # 9761# # 9762# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # 9763# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # 9764# R := 0. Return signQ, last 7 bits of Q, and R. # 9765# # 9766######################################################################### 9767 9768 set Mod_Flag,L_SCR3 9769 set Sc_Flag,L_SCR3+1 9770 9771 set SignY,L_SCR2 9772 set SignX,L_SCR2+2 9773 set SignQ,L_SCR3+2 9774 9775 set Y,FP_SCR0 9776 set Y_Hi,Y+4 9777 set Y_Lo,Y+8 9778 9779 set R,FP_SCR1 9780 set R_Hi,R+4 9781 set R_Lo,R+8 9782 9783Scale: 9784 long 0x00010000,0x80000000,0x00000000,0x00000000 9785 9786 global smod 9787smod: 9788 clr.b FPSR_QBYTE(%a6) 9789 mov.l %d0,-(%sp) # save ctrl bits 9790 clr.b Mod_Flag(%a6) 9791 bra.b Mod_Rem 9792 9793 global srem 9794srem: 9795 clr.b FPSR_QBYTE(%a6) 9796 mov.l %d0,-(%sp) # save ctrl bits 9797 mov.b &0x1,Mod_Flag(%a6) 9798 9799Mod_Rem: 9800#..Save sign of X and Y 9801 movm.l &0x3f00,-(%sp) # save data registers 9802 mov.w SRC_EX(%a0),%d3 9803 mov.w %d3,SignY(%a6) 9804 and.l &0x00007FFF,%d3 # Y := |Y| 9805 9806# 9807 mov.l SRC_HI(%a0),%d4 9808 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| 9809 9810 tst.l %d3 9811 bne.b Y_Normal 9812 9813 mov.l &0x00003FFE,%d3 # $3FFD + 1 9814 tst.l %d4 9815 bne.b HiY_not0 9816 9817HiY_0: 9818 mov.l %d5,%d4 9819 clr.l %d5 9820 sub.l &32,%d3 9821 clr.l %d6 9822 bfffo %d4{&0:&32},%d6 9823 lsl.l %d6,%d4 9824 sub.l %d6,%d3 # (D3,D4,D5) is normalized 9825# ...with bias $7FFD 9826 bra.b Chk_X 9827 9828HiY_not0: 9829 clr.l %d6 9830 bfffo %d4{&0:&32},%d6 9831 sub.l %d6,%d3 9832 lsl.l %d6,%d4 9833 mov.l %d5,%d7 # a copy of D5 9834 lsl.l %d6,%d5 9835 neg.l %d6 9836 add.l &32,%d6 9837 lsr.l %d6,%d7 9838 or.l %d7,%d4 # (D3,D4,D5) normalized 9839# ...with bias $7FFD 9840 bra.b Chk_X 9841 9842Y_Normal: 9843 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized 9844# ...with bias $7FFD 9845 9846Chk_X: 9847 mov.w DST_EX(%a1),%d0 9848 mov.w %d0,SignX(%a6) 9849 mov.w SignY(%a6),%d1 9850 eor.l %d0,%d1 9851 and.l &0x00008000,%d1 9852 mov.w %d1,SignQ(%a6) # sign(Q) obtained 9853 and.l &0x00007FFF,%d0 9854 mov.l DST_HI(%a1),%d1 9855 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| 9856 tst.l %d0 9857 bne.b X_Normal 9858 mov.l &0x00003FFE,%d0 9859 tst.l %d1 9860 bne.b HiX_not0 9861 9862HiX_0: 9863 mov.l %d2,%d1 9864 clr.l %d2 9865 sub.l &32,%d0 9866 clr.l %d6 9867 bfffo %d1{&0:&32},%d6 9868 lsl.l %d6,%d1 9869 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9870# ...with bias $7FFD 9871 bra.b Init 9872 9873HiX_not0: 9874 clr.l %d6 9875 bfffo %d1{&0:&32},%d6 9876 sub.l %d6,%d0 9877 lsl.l %d6,%d1 9878 mov.l %d2,%d7 # a copy of D2 9879 lsl.l %d6,%d2 9880 neg.l %d6 9881 add.l &32,%d6 9882 lsr.l %d6,%d7 9883 or.l %d7,%d1 # (D0,D1,D2) normalized 9884# ...with bias $7FFD 9885 bra.b Init 9886 9887X_Normal: 9888 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized 9889# ...with bias $7FFD 9890 9891Init: 9892# 9893 mov.l %d3,L_SCR1(%a6) # save biased exp(Y) 9894 mov.l %d0,-(%sp) # save biased exp(X) 9895 sub.l %d3,%d0 # L := expo(X)-expo(Y) 9896 9897 clr.l %d6 # D6 := carry <- 0 9898 clr.l %d3 # D3 is Q 9899 mov.l &0,%a1 # A1 is k; j+k=L, Q=0 9900 9901#..(Carry,D1,D2) is R 9902 tst.l %d0 9903 bge.b Mod_Loop_pre 9904 9905#..expo(X) < expo(Y). Thus X = mod(X,Y) 9906# 9907 mov.l (%sp)+,%d0 # restore d0 9908 bra.w Get_Mod 9909 9910Mod_Loop_pre: 9911 addq.l &0x4,%sp # erase exp(X) 9912#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L 9913Mod_Loop: 9914 tst.l %d6 # test carry bit 9915 bgt.b R_GT_Y 9916 9917#..At this point carry = 0, R = (D1,D2), Y = (D4,D5) 9918 cmp.l %d1,%d4 # compare hi(R) and hi(Y) 9919 bne.b R_NE_Y 9920 cmp.l %d2,%d5 # compare lo(R) and lo(Y) 9921 bne.b R_NE_Y 9922 9923#..At this point, R = Y 9924 bra.w Rem_is_0 9925 9926R_NE_Y: 9927#..use the borrow of the previous compare 9928 bcs.b R_LT_Y # borrow is set iff R < Y 9929 9930R_GT_Y: 9931#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 9932#..and Y < (D1,D2) < 2Y. Either way, perform R - Y 9933 sub.l %d5,%d2 # lo(R) - lo(Y) 9934 subx.l %d4,%d1 # hi(R) - hi(Y) 9935 clr.l %d6 # clear carry 9936 addq.l &1,%d3 # Q := Q + 1 9937 9938R_LT_Y: 9939#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. 9940 tst.l %d0 # see if j = 0. 9941 beq.b PostLoop 9942 9943 add.l %d3,%d3 # Q := 2Q 9944 add.l %d2,%d2 # lo(R) = 2lo(R) 9945 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry 9946 scs %d6 # set Carry if 2(R) overflows 9947 addq.l &1,%a1 # k := k+1 9948 subq.l &1,%d0 # j := j - 1 9949#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. 9950 9951 bra.b Mod_Loop 9952 9953PostLoop: 9954#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. 9955 9956#..normalize R. 9957 mov.l L_SCR1(%a6),%d0 # new biased expo of R 9958 tst.l %d1 9959 bne.b HiR_not0 9960 9961HiR_0: 9962 mov.l %d2,%d1 9963 clr.l %d2 9964 sub.l &32,%d0 9965 clr.l %d6 9966 bfffo %d1{&0:&32},%d6 9967 lsl.l %d6,%d1 9968 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9969# ...with bias $7FFD 9970 bra.b Get_Mod 9971 9972HiR_not0: 9973 clr.l %d6 9974 bfffo %d1{&0:&32},%d6 9975 bmi.b Get_Mod # already normalized 9976 sub.l %d6,%d0 9977 lsl.l %d6,%d1 9978 mov.l %d2,%d7 # a copy of D2 9979 lsl.l %d6,%d2 9980 neg.l %d6 9981 add.l &32,%d6 9982 lsr.l %d6,%d7 9983 or.l %d7,%d1 # (D0,D1,D2) normalized 9984 9985# 9986Get_Mod: 9987 cmp.l %d0,&0x000041FE 9988 bge.b No_Scale 9989Do_Scale: 9990 mov.w %d0,R(%a6) 9991 mov.l %d1,R_Hi(%a6) 9992 mov.l %d2,R_Lo(%a6) 9993 mov.l L_SCR1(%a6),%d6 9994 mov.w %d6,Y(%a6) 9995 mov.l %d4,Y_Hi(%a6) 9996 mov.l %d5,Y_Lo(%a6) 9997 fmov.x R(%a6),%fp0 # no exception 9998 mov.b &1,Sc_Flag(%a6) 9999 bra.b ModOrRem 10000No_Scale: 10001 mov.l %d1,R_Hi(%a6) 10002 mov.l %d2,R_Lo(%a6) 10003 sub.l &0x3FFE,%d0 10004 mov.w %d0,R(%a6) 10005 mov.l L_SCR1(%a6),%d6 10006 sub.l &0x3FFE,%d6 10007 mov.l %d6,L_SCR1(%a6) 10008 fmov.x R(%a6),%fp0 10009 mov.w %d6,Y(%a6) 10010 mov.l %d4,Y_Hi(%a6) 10011 mov.l %d5,Y_Lo(%a6) 10012 clr.b Sc_Flag(%a6) 10013 10014# 10015ModOrRem: 10016 tst.b Mod_Flag(%a6) 10017 beq.b Fix_Sign 10018 10019 mov.l L_SCR1(%a6),%d6 # new biased expo(Y) 10020 subq.l &1,%d6 # biased expo(Y/2) 10021 cmp.l %d0,%d6 10022 blt.b Fix_Sign 10023 bgt.b Last_Sub 10024 10025 cmp.l %d1,%d4 10026 bne.b Not_EQ 10027 cmp.l %d2,%d5 10028 bne.b Not_EQ 10029 bra.w Tie_Case 10030 10031Not_EQ: 10032 bcs.b Fix_Sign 10033 10034Last_Sub: 10035# 10036 fsub.x Y(%a6),%fp0 # no exceptions 10037 addq.l &1,%d3 # Q := Q + 1 10038 10039# 10040Fix_Sign: 10041#..Get sign of X 10042 mov.w SignX(%a6),%d6 10043 bge.b Get_Q 10044 fneg.x %fp0 10045 10046#..Get Q 10047# 10048Get_Q: 10049 clr.l %d6 10050 mov.w SignQ(%a6),%d6 # D6 is sign(Q) 10051 mov.l &8,%d7 10052 lsr.l %d7,%d6 10053 and.l &0x0000007F,%d3 # 7 bits of Q 10054 or.l %d6,%d3 # sign and bits of Q 10055# swap %d3 10056# fmov.l %fpsr,%d6 10057# and.l &0xFF00FFFF,%d6 10058# or.l %d3,%d6 10059# fmov.l %d6,%fpsr # put Q in fpsr 10060 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr 10061 10062# 10063Restore: 10064 movm.l (%sp)+,&0xfc # {%d2-%d7} 10065 mov.l (%sp)+,%d0 10066 fmov.l %d0,%fpcr 10067 tst.b Sc_Flag(%a6) 10068 beq.b Finish 10069 mov.b &FMUL_OP,%d1 # last inst is MUL 10070 fmul.x Scale(%pc),%fp0 # may cause underflow 10071 bra t_catch2 10072# the '040 package did this apparently to see if the dst operand for the 10073# preceding fmul was a denorm. but, it better not have been since the 10074# algorithm just got done playing with fp0 and expected no exceptions 10075# as a result. trust me... 10076# bra t_avoid_unsupp # check for denorm as a 10077# ;result of the scaling 10078 10079Finish: 10080 mov.b &FMOV_OP,%d1 # last inst is MOVE 10081 fmov.x %fp0,%fp0 # capture exceptions & round 10082 bra t_catch2 10083 10084Rem_is_0: 10085#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) 10086 addq.l &1,%d3 10087 cmp.l %d0,&8 # D0 is j 10088 bge.b Q_Big 10089 10090 lsl.l %d0,%d3 10091 bra.b Set_R_0 10092 10093Q_Big: 10094 clr.l %d3 10095 10096Set_R_0: 10097 fmov.s &0x00000000,%fp0 10098 clr.b Sc_Flag(%a6) 10099 bra.w Fix_Sign 10100 10101Tie_Case: 10102#..Check parity of Q 10103 mov.l %d3,%d6 10104 and.l &0x00000001,%d6 10105 tst.l %d6 10106 beq.w Fix_Sign # Q is even 10107 10108#..Q is odd, Q := Q + 1, signX := -signX 10109 addq.l &1,%d3 10110 mov.w SignX(%a6),%d6 10111 eor.l &0x00008000,%d6 10112 mov.w %d6,SignX(%a6) 10113 bra.w Fix_Sign 10114 10115qnan: long 0x7fff0000, 0xffffffff, 0xffffffff 10116 10117######################################################################### 10118# XDEF **************************************************************** # 10119# t_dz(): Handle DZ exception during transcendental emulation. # 10120# Sets N bit according to sign of source operand. # 10121# t_dz2(): Handle DZ exception during transcendental emulation. # 10122# Sets N bit always. # 10123# # 10124# XREF **************************************************************** # 10125# None # 10126# # 10127# INPUT *************************************************************** # 10128# a0 = pointer to source operand # 10129# # 10130# OUTPUT ************************************************************** # 10131# fp0 = default result # 10132# # 10133# ALGORITHM *********************************************************** # 10134# - Store properly signed INF into fp0. # 10135# - Set FPSR exception status dz bit, ccode inf bit, and # 10136# accrued dz bit. # 10137# # 10138######################################################################### 10139 10140 global t_dz 10141t_dz: 10142 tst.b SRC_EX(%a0) # no; is src negative? 10143 bmi.b t_dz2 # yes 10144 10145dz_pinf: 10146 fmov.s &0x7f800000,%fp0 # return +INF in fp0 10147 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ 10148 rts 10149 10150 global t_dz2 10151t_dz2: 10152 fmov.s &0xff800000,%fp0 # return -INF in fp0 10153 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ 10154 rts 10155 10156################################################################# 10157# OPERR exception: # 10158# - set FPSR exception status operr bit, condition code # 10159# nan bit; Store default NAN into fp0 # 10160################################################################# 10161 global t_operr 10162t_operr: 10163 ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP 10164 fmovm.x qnan(%pc),&0x80 # return default NAN in fp0 10165 rts 10166 10167################################################################# 10168# Extended DENORM: # 10169# - For all functions that have a denormalized input and # 10170# that f(x)=x, this is the entry point. # 10171# - we only return the EXOP here if either underflow or # 10172# inexact is enabled. # 10173################################################################# 10174 10175# Entry point for scale w/ extended denorm. The function does 10176# NOT set INEX2/AUNFL/AINEX. 10177 global t_resdnrm 10178t_resdnrm: 10179 ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL 10180 bra.b xdnrm_con 10181 10182 global t_extdnrm 10183t_extdnrm: 10184 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10185 10186xdnrm_con: 10187 mov.l %a0,%a1 # make copy of src ptr 10188 mov.l %d0,%d1 # make copy of rnd prec,mode 10189 andi.b &0xc0,%d1 # extended precision? 10190 bne.b xdnrm_sd # no 10191 10192# result precision is extended. 10193 tst.b LOCAL_EX(%a0) # is denorm negative? 10194 bpl.b xdnrm_exit # no 10195 10196 bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit 10197 bra.b xdnrm_exit 10198 10199# result precision is single or double 10200xdnrm_sd: 10201 mov.l %a1,-(%sp) 10202 tst.b LOCAL_EX(%a0) # is denorm pos or neg? 10203 smi.b %d1 # set d0 accodingly 10204 bsr.l unf_sub 10205 mov.l (%sp)+,%a1 10206xdnrm_exit: 10207 fmovm.x (%a0),&0x80 # return default result in fp0 10208 10209 mov.b FPCR_ENABLE(%a6),%d0 10210 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 10211 bne.b xdnrm_ena # yes 10212 rts 10213 10214################ 10215# unfl enabled # 10216################ 10217# we have a DENORM that needs to be converted into an EXOP. 10218# so, normalize the mantissa, add 0x6000 to the new exponent, 10219# and return the result in fp1. 10220xdnrm_ena: 10221 mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6) 10222 mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6) 10223 mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6) 10224 10225 lea FP_SCR0(%a6),%a0 10226 bsr.l norm # normalize mantissa 10227 addi.l &0x6000,%d0 # add extra bias 10228 andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign 10229 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 10230 10231 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10232 rts 10233 10234################################################################# 10235# UNFL exception: # 10236# - This routine is for cases where even an EXOP isn't # 10237# large enough to hold the range of this result. # 10238# In such a case, the EXOP equals zero. # 10239# - Return the default result to the proper precision # 10240# with the sign of this result being the same as that # 10241# of the src operand. # 10242# - t_unfl2() is provided to force the result sign to # 10243# positive which is the desired result for fetox(). # 10244################################################################# 10245 global t_unfl 10246t_unfl: 10247 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10248 10249 tst.b (%a0) # is result pos or neg? 10250 smi.b %d1 # set d1 accordingly 10251 bsr.l unf_sub # calc default unfl result 10252 fmovm.x (%a0),&0x80 # return default result in fp0 10253 10254 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10255 rts 10256 10257# t_unfl2 ALWAYS tells unf_sub to create a positive result 10258 global t_unfl2 10259t_unfl2: 10260 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10261 10262 sf.b %d1 # set d0 to represent positive 10263 bsr.l unf_sub # calc default unfl result 10264 fmovm.x (%a0),&0x80 # return default result in fp0 10265 10266 fmov.s &0x0000000,%fp1 # return EXOP in fp1 10267 rts 10268 10269################################################################# 10270# OVFL exception: # 10271# - This routine is for cases where even an EXOP isn't # 10272# large enough to hold the range of this result. # 10273# - Return the default result to the proper precision # 10274# with the sign of this result being the same as that # 10275# of the src operand. # 10276# - t_ovfl2() is provided to force the result sign to # 10277# positive which is the desired result for fcosh(). # 10278# - t_ovfl_sc() is provided for scale() which only sets # 10279# the inexact bits if the number is inexact for the # 10280# precision indicated. # 10281################################################################# 10282 10283 global t_ovfl_sc 10284t_ovfl_sc: 10285 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX 10286 10287 mov.b %d0,%d1 # fetch rnd mode/prec 10288 andi.b &0xc0,%d1 # extract rnd prec 10289 beq.b ovfl_work # prec is extended 10290 10291 tst.b LOCAL_HI(%a0) # is dst a DENORM? 10292 bmi.b ovfl_sc_norm # no 10293 10294# dst op is a DENORM. we have to normalize the mantissa to see if the 10295# result would be inexact for the given precision. make a copy of the 10296# dst so we don't screw up the version passed to us. 10297 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) 10298 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) 10299 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) 10300 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 10301 movm.l &0xc080,-(%sp) # save d0-d1/a0 10302 bsr.l norm # normalize mantissa 10303 movm.l (%sp)+,&0x0103 # restore d0-d1/a0 10304 10305ovfl_sc_norm: 10306 cmpi.b %d1,&0x40 # is prec dbl? 10307 bne.b ovfl_sc_dbl # no; sgl 10308ovfl_sc_sgl: 10309 tst.l LOCAL_LO(%a0) # is lo lw of sgl set? 10310 bne.b ovfl_sc_inx # yes 10311 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? 10312 bne.b ovfl_sc_inx # yes 10313 bra.b ovfl_work # don't set INEX2 10314ovfl_sc_dbl: 10315 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of 10316 andi.l &0x7ff,%d1 # dbl mantissa set? 10317 beq.b ovfl_work # no; don't set INEX2 10318ovfl_sc_inx: 10319 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 10320 bra.b ovfl_work # continue 10321 10322 global t_ovfl 10323t_ovfl: 10324 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX 10325 10326ovfl_work: 10327 tst.b LOCAL_EX(%a0) # what is the sign? 10328 smi.b %d1 # set d1 accordingly 10329 bsr.l ovf_res # calc default ovfl result 10330 mov.b %d0,FPSR_CC(%a6) # insert new ccodes 10331 fmovm.x (%a0),&0x80 # return default result in fp0 10332 10333 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10334 rts 10335 10336# t_ovfl2 ALWAYS tells ovf_res to create a positive result 10337 global t_ovfl2 10338t_ovfl2: 10339 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX 10340 10341 sf.b %d1 # clear sign flag for positive 10342 bsr.l ovf_res # calc default ovfl result 10343 mov.b %d0,FPSR_CC(%a6) # insert new ccodes 10344 fmovm.x (%a0),&0x80 # return default result in fp0 10345 10346 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10347 rts 10348 10349################################################################# 10350# t_catch(): # 10351# - the last operation of a transcendental emulation # 10352# routine may have caused an underflow or overflow. # 10353# we find out if this occurred by doing an fsave and # 10354# checking the exception bit. if one did occur, then we # 10355# jump to fgen_except() which creates the default # 10356# result and EXOP for us. # 10357################################################################# 10358 global t_catch 10359t_catch: 10360 10361 fsave -(%sp) 10362 tst.b 0x2(%sp) 10363 bmi.b catch 10364 add.l &0xc,%sp 10365 10366################################################################# 10367# INEX2 exception: # 10368# - The inex2 and ainex bits are set. # 10369################################################################# 10370 global t_inx2 10371t_inx2: 10372 fblt.w t_minx2 10373 fbeq.w inx2_zero 10374 10375 global t_pinx2 10376t_pinx2: 10377 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX 10378 rts 10379 10380 global t_minx2 10381t_minx2: 10382 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX 10383 rts 10384 10385inx2_zero: 10386 mov.b &z_bmask,FPSR_CC(%a6) 10387 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX 10388 rts 10389 10390# an underflow or overflow exception occurred. 10391# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not! 10392catch: 10393 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) 10394catch2: 10395 bsr.l fgen_except 10396 add.l &0xc,%sp 10397 rts 10398 10399 global t_catch2 10400t_catch2: 10401 10402 fsave -(%sp) 10403 10404 tst.b 0x2(%sp) 10405 bmi.b catch2 10406 add.l &0xc,%sp 10407 10408 fmov.l %fpsr,%d0 10409 or.l %d0,USER_FPSR(%a6) 10410 10411 rts 10412 10413######################################################################### 10414 10415######################################################################### 10416# unf_res(): underflow default result calculation for transcendentals # 10417# # 10418# INPUT: # 10419# d0 : rnd mode,precision # 10420# d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) # 10421# OUTPUT: # 10422# a0 : points to result (in instruction memory) # 10423######################################################################### 10424unf_sub: 10425 ori.l &unfinx_mask,USER_FPSR(%a6) 10426 10427 andi.w &0x10,%d1 # keep sign bit in 4th spot 10428 10429 lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits 10430 andi.b &0xf,%d0 # strip hi rnd mode bit 10431 or.b %d1,%d0 # concat {sgn,mode,prec} 10432 10433 mov.l %d0,%d1 # make a copy 10434 lsl.b &0x1,%d1 # mult index 2 by 2 10435 10436 mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits 10437 lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr 10438 rts 10439 10440tbl_unf_cc: 10441 byte 0x4, 0x4, 0x4, 0x0 10442 byte 0x4, 0x4, 0x4, 0x0 10443 byte 0x4, 0x4, 0x4, 0x0 10444 byte 0x0, 0x0, 0x0, 0x0 10445 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10446 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10447 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10448 10449tbl_unf_result: 10450 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10451 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10452 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10453 long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext 10454 10455 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10456 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10457 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10458 long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl 10459 10460 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10461 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl 10462 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10463 long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl 10464 10465 long 0x0,0x0,0x0,0x0 10466 long 0x0,0x0,0x0,0x0 10467 long 0x0,0x0,0x0,0x0 10468 long 0x0,0x0,0x0,0x0 10469 10470 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10471 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10472 long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext 10473 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10474 10475 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10476 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10477 long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl 10478 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10479 10480 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10481 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10482 long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl 10483 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10484 10485############################################################ 10486 10487######################################################################### 10488# src_zero(): Return signed zero according to sign of src operand. # 10489######################################################################### 10490 global src_zero 10491src_zero: 10492 tst.b SRC_EX(%a0) # get sign of src operand 10493 bmi.b ld_mzero # if neg, load neg zero 10494 10495# 10496# ld_pzero(): return a positive zero. 10497# 10498 global ld_pzero 10499ld_pzero: 10500 fmov.s &0x00000000,%fp0 # load +0 10501 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10502 rts 10503 10504# ld_mzero(): return a negative zero. 10505 global ld_mzero 10506ld_mzero: 10507 fmov.s &0x80000000,%fp0 # load -0 10508 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits 10509 rts 10510 10511######################################################################### 10512# dst_zero(): Return signed zero according to sign of dst operand. # 10513######################################################################### 10514 global dst_zero 10515dst_zero: 10516 tst.b DST_EX(%a1) # get sign of dst operand 10517 bmi.b ld_mzero # if neg, load neg zero 10518 bra.b ld_pzero # load positive zero 10519 10520######################################################################### 10521# src_inf(): Return signed inf according to sign of src operand. # 10522######################################################################### 10523 global src_inf 10524src_inf: 10525 tst.b SRC_EX(%a0) # get sign of src operand 10526 bmi.b ld_minf # if negative branch 10527 10528# 10529# ld_pinf(): return a positive infinity. 10530# 10531 global ld_pinf 10532ld_pinf: 10533 fmov.s &0x7f800000,%fp0 # load +INF 10534 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit 10535 rts 10536 10537# 10538# ld_minf():return a negative infinity. 10539# 10540 global ld_minf 10541ld_minf: 10542 fmov.s &0xff800000,%fp0 # load -INF 10543 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 10544 rts 10545 10546######################################################################### 10547# dst_inf(): Return signed inf according to sign of dst operand. # 10548######################################################################### 10549 global dst_inf 10550dst_inf: 10551 tst.b DST_EX(%a1) # get sign of dst operand 10552 bmi.b ld_minf # if negative branch 10553 bra.b ld_pinf 10554 10555 global szr_inf 10556################################################################# 10557# szr_inf(): Return +ZERO for a negative src operand or # 10558# +INF for a positive src operand. # 10559# Routine used for fetox, ftwotox, and ftentox. # 10560################################################################# 10561szr_inf: 10562 tst.b SRC_EX(%a0) # check sign of source 10563 bmi.b ld_pzero 10564 bra.b ld_pinf 10565 10566######################################################################### 10567# sopr_inf(): Return +INF for a positive src operand or # 10568# jump to operand error routine for a negative src operand. # 10569# Routine used for flogn, flognp1, flog10, and flog2. # 10570######################################################################### 10571 global sopr_inf 10572sopr_inf: 10573 tst.b SRC_EX(%a0) # check sign of source 10574 bmi.w t_operr 10575 bra.b ld_pinf 10576 10577################################################################# 10578# setoxm1i(): Return minus one for a negative src operand or # 10579# positive infinity for a positive src operand. # 10580# Routine used for fetoxm1. # 10581################################################################# 10582 global setoxm1i 10583setoxm1i: 10584 tst.b SRC_EX(%a0) # check sign of source 10585 bmi.b ld_mone 10586 bra.b ld_pinf 10587 10588######################################################################### 10589# src_one(): Return signed one according to sign of src operand. # 10590######################################################################### 10591 global src_one 10592src_one: 10593 tst.b SRC_EX(%a0) # check sign of source 10594 bmi.b ld_mone 10595 10596# 10597# ld_pone(): return positive one. 10598# 10599 global ld_pone 10600ld_pone: 10601 fmov.s &0x3f800000,%fp0 # load +1 10602 clr.b FPSR_CC(%a6) 10603 rts 10604 10605# 10606# ld_mone(): return negative one. 10607# 10608 global ld_mone 10609ld_mone: 10610 fmov.s &0xbf800000,%fp0 # load -1 10611 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 10612 rts 10613 10614ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 10615mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 10616 10617################################################################# 10618# spi_2(): Return signed PI/2 according to sign of src operand. # 10619################################################################# 10620 global spi_2 10621spi_2: 10622 tst.b SRC_EX(%a0) # check sign of source 10623 bmi.b ld_mpi2 10624 10625# 10626# ld_ppi2(): return positive PI/2. 10627# 10628 global ld_ppi2 10629ld_ppi2: 10630 fmov.l %d0,%fpcr 10631 fmov.x ppiby2(%pc),%fp0 # load +pi/2 10632 bra.w t_pinx2 # set INEX2 10633 10634# 10635# ld_mpi2(): return negative PI/2. 10636# 10637 global ld_mpi2 10638ld_mpi2: 10639 fmov.l %d0,%fpcr 10640 fmov.x mpiby2(%pc),%fp0 # load -pi/2 10641 bra.w t_minx2 # set INEX2 10642 10643#################################################### 10644# The following routines give support for fsincos. # 10645#################################################### 10646 10647# 10648# ssincosz(): When the src operand is ZERO, store a one in the 10649# cosine register and return a ZERO in fp0 w/ the same sign 10650# as the src operand. 10651# 10652 global ssincosz 10653ssincosz: 10654 fmov.s &0x3f800000,%fp1 10655 tst.b SRC_EX(%a0) # test sign 10656 bpl.b sincoszp 10657 fmov.s &0x80000000,%fp0 # return sin result in fp0 10658 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) 10659 bra.b sto_cos # store cosine result 10660sincoszp: 10661 fmov.s &0x00000000,%fp0 # return sin result in fp0 10662 mov.b &z_bmask,FPSR_CC(%a6) 10663 bra.b sto_cos # store cosine result 10664 10665# 10666# ssincosi(): When the src operand is INF, store a QNAN in the cosine 10667# register and jump to the operand error routine for negative 10668# src operands. 10669# 10670 global ssincosi 10671ssincosi: 10672 fmov.x qnan(%pc),%fp1 # load NAN 10673 bsr.l sto_cos # store cosine result 10674 bra.w t_operr 10675 10676# 10677# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine 10678# register and branch to the src QNAN routine. 10679# 10680 global ssincosqnan 10681ssincosqnan: 10682 fmov.x LOCAL_EX(%a0),%fp1 10683 bsr.l sto_cos 10684 bra.w src_qnan 10685 10686# 10687# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set 10688# in the cosine register and branch to the src SNAN routine. 10689# 10690 global ssincossnan 10691ssincossnan: 10692 fmov.x LOCAL_EX(%a0),%fp1 10693 bsr.l sto_cos 10694 bra.w src_snan 10695 10696######################################################################## 10697 10698######################################################################### 10699# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. # 10700# fp1 holds the result of the cosine portion of ssincos(). # 10701# the value in fp1 will not take any exceptions when moved. # 10702# INPUT: # 10703# fp1 : fp value to store # 10704# MODIFIED: # 10705# d0 # 10706######################################################################### 10707 global sto_cos 10708sto_cos: 10709 mov.b 1+EXC_CMDREG(%a6),%d0 10710 andi.w &0x7,%d0 10711 mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0 10712 jmp (tbl_sto_cos.b,%pc,%d0.w*1) 10713 10714tbl_sto_cos: 10715 short sto_cos_0 - tbl_sto_cos 10716 short sto_cos_1 - tbl_sto_cos 10717 short sto_cos_2 - tbl_sto_cos 10718 short sto_cos_3 - tbl_sto_cos 10719 short sto_cos_4 - tbl_sto_cos 10720 short sto_cos_5 - tbl_sto_cos 10721 short sto_cos_6 - tbl_sto_cos 10722 short sto_cos_7 - tbl_sto_cos 10723 10724sto_cos_0: 10725 fmovm.x &0x40,EXC_FP0(%a6) 10726 rts 10727sto_cos_1: 10728 fmovm.x &0x40,EXC_FP1(%a6) 10729 rts 10730sto_cos_2: 10731 fmov.x %fp1,%fp2 10732 rts 10733sto_cos_3: 10734 fmov.x %fp1,%fp3 10735 rts 10736sto_cos_4: 10737 fmov.x %fp1,%fp4 10738 rts 10739sto_cos_5: 10740 fmov.x %fp1,%fp5 10741 rts 10742sto_cos_6: 10743 fmov.x %fp1,%fp6 10744 rts 10745sto_cos_7: 10746 fmov.x %fp1,%fp7 10747 rts 10748 10749################################################################## 10750 global smod_sdnrm 10751 global smod_snorm 10752smod_sdnrm: 10753smod_snorm: 10754 mov.b DTAG(%a6),%d1 10755 beq.l smod 10756 cmpi.b %d1,&ZERO 10757 beq.w smod_zro 10758 cmpi.b %d1,&INF 10759 beq.l t_operr 10760 cmpi.b %d1,&DENORM 10761 beq.l smod 10762 cmpi.b %d1,&SNAN 10763 beq.l dst_snan 10764 bra.l dst_qnan 10765 10766 global smod_szero 10767smod_szero: 10768 mov.b DTAG(%a6),%d1 10769 beq.l t_operr 10770 cmpi.b %d1,&ZERO 10771 beq.l t_operr 10772 cmpi.b %d1,&INF 10773 beq.l t_operr 10774 cmpi.b %d1,&DENORM 10775 beq.l t_operr 10776 cmpi.b %d1,&QNAN 10777 beq.l dst_qnan 10778 bra.l dst_snan 10779 10780 global smod_sinf 10781smod_sinf: 10782 mov.b DTAG(%a6),%d1 10783 beq.l smod_fpn 10784 cmpi.b %d1,&ZERO 10785 beq.l smod_zro 10786 cmpi.b %d1,&INF 10787 beq.l t_operr 10788 cmpi.b %d1,&DENORM 10789 beq.l smod_fpn 10790 cmpi.b %d1,&QNAN 10791 beq.l dst_qnan 10792 bra.l dst_snan 10793 10794smod_zro: 10795srem_zro: 10796 mov.b SRC_EX(%a0),%d1 # get src sign 10797 mov.b DST_EX(%a1),%d0 # get dst sign 10798 eor.b %d0,%d1 # get qbyte sign 10799 andi.b &0x80,%d1 10800 mov.b %d1,FPSR_QBYTE(%a6) 10801 tst.b %d0 10802 bpl.w ld_pzero 10803 bra.w ld_mzero 10804 10805smod_fpn: 10806srem_fpn: 10807 clr.b FPSR_QBYTE(%a6) 10808 mov.l %d0,-(%sp) 10809 mov.b SRC_EX(%a0),%d1 # get src sign 10810 mov.b DST_EX(%a1),%d0 # get dst sign 10811 eor.b %d0,%d1 # get qbyte sign 10812 andi.b &0x80,%d1 10813 mov.b %d1,FPSR_QBYTE(%a6) 10814 cmpi.b DTAG(%a6),&DENORM 10815 bne.b smod_nrm 10816 lea DST(%a1),%a0 10817 mov.l (%sp)+,%d0 10818 bra t_resdnrm 10819smod_nrm: 10820 fmov.l (%sp)+,%fpcr 10821 fmov.x DST(%a1),%fp0 10822 tst.b DST_EX(%a1) 10823 bmi.b smod_nrm_neg 10824 rts 10825 10826smod_nrm_neg: 10827 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode 10828 rts 10829 10830######################################################################### 10831 global srem_snorm 10832 global srem_sdnrm 10833srem_sdnrm: 10834srem_snorm: 10835 mov.b DTAG(%a6),%d1 10836 beq.l srem 10837 cmpi.b %d1,&ZERO 10838 beq.w srem_zro 10839 cmpi.b %d1,&INF 10840 beq.l t_operr 10841 cmpi.b %d1,&DENORM 10842 beq.l srem 10843 cmpi.b %d1,&QNAN 10844 beq.l dst_qnan 10845 bra.l dst_snan 10846 10847 global srem_szero 10848srem_szero: 10849 mov.b DTAG(%a6),%d1 10850 beq.l t_operr 10851 cmpi.b %d1,&ZERO 10852 beq.l t_operr 10853 cmpi.b %d1,&INF 10854 beq.l t_operr 10855 cmpi.b %d1,&DENORM 10856 beq.l t_operr 10857 cmpi.b %d1,&QNAN 10858 beq.l dst_qnan 10859 bra.l dst_snan 10860 10861 global srem_sinf 10862srem_sinf: 10863 mov.b DTAG(%a6),%d1 10864 beq.w srem_fpn 10865 cmpi.b %d1,&ZERO 10866 beq.w srem_zro 10867 cmpi.b %d1,&INF 10868 beq.l t_operr 10869 cmpi.b %d1,&DENORM 10870 beq.l srem_fpn 10871 cmpi.b %d1,&QNAN 10872 beq.l dst_qnan 10873 bra.l dst_snan 10874 10875######################################################################### 10876 global sscale_snorm 10877 global sscale_sdnrm 10878sscale_snorm: 10879sscale_sdnrm: 10880 mov.b DTAG(%a6),%d1 10881 beq.l sscale 10882 cmpi.b %d1,&ZERO 10883 beq.l dst_zero 10884 cmpi.b %d1,&INF 10885 beq.l dst_inf 10886 cmpi.b %d1,&DENORM 10887 beq.l sscale 10888 cmpi.b %d1,&QNAN 10889 beq.l dst_qnan 10890 bra.l dst_snan 10891 10892 global sscale_szero 10893sscale_szero: 10894 mov.b DTAG(%a6),%d1 10895 beq.l sscale 10896 cmpi.b %d1,&ZERO 10897 beq.l dst_zero 10898 cmpi.b %d1,&INF 10899 beq.l dst_inf 10900 cmpi.b %d1,&DENORM 10901 beq.l sscale 10902 cmpi.b %d1,&QNAN 10903 beq.l dst_qnan 10904 bra.l dst_snan 10905 10906 global sscale_sinf 10907sscale_sinf: 10908 mov.b DTAG(%a6),%d1 10909 beq.l t_operr 10910 cmpi.b %d1,&QNAN 10911 beq.l dst_qnan 10912 cmpi.b %d1,&SNAN 10913 beq.l dst_snan 10914 bra.l t_operr 10915 10916######################################################################## 10917 10918# 10919# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN. 10920# 10921 global sop_sqnan 10922sop_sqnan: 10923 mov.b DTAG(%a6),%d1 10924 cmpi.b %d1,&QNAN 10925 beq.b dst_qnan 10926 cmpi.b %d1,&SNAN 10927 beq.b dst_snan 10928 bra.b src_qnan 10929 10930# 10931# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN. 10932# 10933 global sop_ssnan 10934sop_ssnan: 10935 mov.b DTAG(%a6),%d1 10936 cmpi.b %d1,&QNAN 10937 beq.b dst_qnan_src_snan 10938 cmpi.b %d1,&SNAN 10939 beq.b dst_snan 10940 bra.b src_snan 10941 10942dst_qnan_src_snan: 10943 ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP 10944 bra.b dst_qnan 10945 10946# 10947# dst_qnan(): Return the dst SNAN w/ the SNAN bit set. 10948# 10949 global dst_snan 10950dst_snan: 10951 fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit 10952 fmov.l %fpsr,%d0 # catch resulting status 10953 or.l %d0,USER_FPSR(%a6) # store status 10954 rts 10955 10956# 10957# dst_qnan(): Return the dst QNAN. 10958# 10959 global dst_qnan 10960dst_qnan: 10961 fmov.x DST(%a1),%fp0 # return the non-signalling nan 10962 tst.b DST_EX(%a1) # set ccodes according to QNAN sign 10963 bmi.b dst_qnan_m 10964dst_qnan_p: 10965 mov.b &nan_bmask,FPSR_CC(%a6) 10966 rts 10967dst_qnan_m: 10968 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6) 10969 rts 10970 10971# 10972# src_snan(): Return the src SNAN w/ the SNAN bit set. 10973# 10974 global src_snan 10975src_snan: 10976 fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit 10977 fmov.l %fpsr,%d0 # catch resulting status 10978 or.l %d0,USER_FPSR(%a6) # store status 10979 rts 10980 10981# 10982# src_qnan(): Return the src QNAN. 10983# 10984 global src_qnan 10985src_qnan: 10986 fmov.x SRC(%a0),%fp0 # return the non-signalling nan 10987 tst.b SRC_EX(%a0) # set ccodes according to QNAN sign 10988 bmi.b dst_qnan_m 10989src_qnan_p: 10990 mov.b &nan_bmask,FPSR_CC(%a6) 10991 rts 10992src_qnan_m: 10993 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6) 10994 rts 10995 10996# 10997# fkern2.s: 10998# These entry points are used by the exception handler 10999# routines where an instruction is selected by an index into 11000# a large jump table corresponding to a given instruction which 11001# has been decoded. Flow continues here where we now decode 11002# further accoding to the source operand type. 11003# 11004 11005 global fsinh 11006fsinh: 11007 mov.b STAG(%a6),%d1 11008 beq.l ssinh 11009 cmpi.b %d1,&ZERO 11010 beq.l src_zero 11011 cmpi.b %d1,&INF 11012 beq.l src_inf 11013 cmpi.b %d1,&DENORM 11014 beq.l ssinhd 11015 cmpi.b %d1,&QNAN 11016 beq.l src_qnan 11017 bra.l src_snan 11018 11019 global flognp1 11020flognp1: 11021 mov.b STAG(%a6),%d1 11022 beq.l slognp1 11023 cmpi.b %d1,&ZERO 11024 beq.l src_zero 11025 cmpi.b %d1,&INF 11026 beq.l sopr_inf 11027 cmpi.b %d1,&DENORM 11028 beq.l slognp1d 11029 cmpi.b %d1,&QNAN 11030 beq.l src_qnan 11031 bra.l src_snan 11032 11033 global fetoxm1 11034fetoxm1: 11035 mov.b STAG(%a6),%d1 11036 beq.l setoxm1 11037 cmpi.b %d1,&ZERO 11038 beq.l src_zero 11039 cmpi.b %d1,&INF 11040 beq.l setoxm1i 11041 cmpi.b %d1,&DENORM 11042 beq.l setoxm1d 11043 cmpi.b %d1,&QNAN 11044 beq.l src_qnan 11045 bra.l src_snan 11046 11047 global ftanh 11048ftanh: 11049 mov.b STAG(%a6),%d1 11050 beq.l stanh 11051 cmpi.b %d1,&ZERO 11052 beq.l src_zero 11053 cmpi.b %d1,&INF 11054 beq.l src_one 11055 cmpi.b %d1,&DENORM 11056 beq.l stanhd 11057 cmpi.b %d1,&QNAN 11058 beq.l src_qnan 11059 bra.l src_snan 11060 11061 global fatan 11062fatan: 11063 mov.b STAG(%a6),%d1 11064 beq.l satan 11065 cmpi.b %d1,&ZERO 11066 beq.l src_zero 11067 cmpi.b %d1,&INF 11068 beq.l spi_2 11069 cmpi.b %d1,&DENORM 11070 beq.l satand 11071 cmpi.b %d1,&QNAN 11072 beq.l src_qnan 11073 bra.l src_snan 11074 11075 global fasin 11076fasin: 11077 mov.b STAG(%a6),%d1 11078 beq.l sasin 11079 cmpi.b %d1,&ZERO 11080 beq.l src_zero 11081 cmpi.b %d1,&INF 11082 beq.l t_operr 11083 cmpi.b %d1,&DENORM 11084 beq.l sasind 11085 cmpi.b %d1,&QNAN 11086 beq.l src_qnan 11087 bra.l src_snan 11088 11089 global fatanh 11090fatanh: 11091 mov.b STAG(%a6),%d1 11092 beq.l satanh 11093 cmpi.b %d1,&ZERO 11094 beq.l src_zero 11095 cmpi.b %d1,&INF 11096 beq.l t_operr 11097 cmpi.b %d1,&DENORM 11098 beq.l satanhd 11099 cmpi.b %d1,&QNAN 11100 beq.l src_qnan 11101 bra.l src_snan 11102 11103 global fsine 11104fsine: 11105 mov.b STAG(%a6),%d1 11106 beq.l ssin 11107 cmpi.b %d1,&ZERO 11108 beq.l src_zero 11109 cmpi.b %d1,&INF 11110 beq.l t_operr 11111 cmpi.b %d1,&DENORM 11112 beq.l ssind 11113 cmpi.b %d1,&QNAN 11114 beq.l src_qnan 11115 bra.l src_snan 11116 11117 global ftan 11118ftan: 11119 mov.b STAG(%a6),%d1 11120 beq.l stan 11121 cmpi.b %d1,&ZERO 11122 beq.l src_zero 11123 cmpi.b %d1,&INF 11124 beq.l t_operr 11125 cmpi.b %d1,&DENORM 11126 beq.l stand 11127 cmpi.b %d1,&QNAN 11128 beq.l src_qnan 11129 bra.l src_snan 11130 11131 global fetox 11132fetox: 11133 mov.b STAG(%a6),%d1 11134 beq.l setox 11135 cmpi.b %d1,&ZERO 11136 beq.l ld_pone 11137 cmpi.b %d1,&INF 11138 beq.l szr_inf 11139 cmpi.b %d1,&DENORM 11140 beq.l setoxd 11141 cmpi.b %d1,&QNAN 11142 beq.l src_qnan 11143 bra.l src_snan 11144 11145 global ftwotox 11146ftwotox: 11147 mov.b STAG(%a6),%d1 11148 beq.l stwotox 11149 cmpi.b %d1,&ZERO 11150 beq.l ld_pone 11151 cmpi.b %d1,&INF 11152 beq.l szr_inf 11153 cmpi.b %d1,&DENORM 11154 beq.l stwotoxd 11155 cmpi.b %d1,&QNAN 11156 beq.l src_qnan 11157 bra.l src_snan 11158 11159 global ftentox 11160ftentox: 11161 mov.b STAG(%a6),%d1 11162 beq.l stentox 11163 cmpi.b %d1,&ZERO 11164 beq.l ld_pone 11165 cmpi.b %d1,&INF 11166 beq.l szr_inf 11167 cmpi.b %d1,&DENORM 11168 beq.l stentoxd 11169 cmpi.b %d1,&QNAN 11170 beq.l src_qnan 11171 bra.l src_snan 11172 11173 global flogn 11174flogn: 11175 mov.b STAG(%a6),%d1 11176 beq.l slogn 11177 cmpi.b %d1,&ZERO 11178 beq.l t_dz2 11179 cmpi.b %d1,&INF 11180 beq.l sopr_inf 11181 cmpi.b %d1,&DENORM 11182 beq.l slognd 11183 cmpi.b %d1,&QNAN 11184 beq.l src_qnan 11185 bra.l src_snan 11186 11187 global flog10 11188flog10: 11189 mov.b STAG(%a6),%d1 11190 beq.l slog10 11191 cmpi.b %d1,&ZERO 11192 beq.l t_dz2 11193 cmpi.b %d1,&INF 11194 beq.l sopr_inf 11195 cmpi.b %d1,&DENORM 11196 beq.l slog10d 11197 cmpi.b %d1,&QNAN 11198 beq.l src_qnan 11199 bra.l src_snan 11200 11201 global flog2 11202flog2: 11203 mov.b STAG(%a6),%d1 11204 beq.l slog2 11205 cmpi.b %d1,&ZERO 11206 beq.l t_dz2 11207 cmpi.b %d1,&INF 11208 beq.l sopr_inf 11209 cmpi.b %d1,&DENORM 11210 beq.l slog2d 11211 cmpi.b %d1,&QNAN 11212 beq.l src_qnan 11213 bra.l src_snan 11214 11215 global fcosh 11216fcosh: 11217 mov.b STAG(%a6),%d1 11218 beq.l scosh 11219 cmpi.b %d1,&ZERO 11220 beq.l ld_pone 11221 cmpi.b %d1,&INF 11222 beq.l ld_pinf 11223 cmpi.b %d1,&DENORM 11224 beq.l scoshd 11225 cmpi.b %d1,&QNAN 11226 beq.l src_qnan 11227 bra.l src_snan 11228 11229 global facos 11230facos: 11231 mov.b STAG(%a6),%d1 11232 beq.l sacos 11233 cmpi.b %d1,&ZERO 11234 beq.l ld_ppi2 11235 cmpi.b %d1,&INF 11236 beq.l t_operr 11237 cmpi.b %d1,&DENORM 11238 beq.l sacosd 11239 cmpi.b %d1,&QNAN 11240 beq.l src_qnan 11241 bra.l src_snan 11242 11243 global fcos 11244fcos: 11245 mov.b STAG(%a6),%d1 11246 beq.l scos 11247 cmpi.b %d1,&ZERO 11248 beq.l ld_pone 11249 cmpi.b %d1,&INF 11250 beq.l t_operr 11251 cmpi.b %d1,&DENORM 11252 beq.l scosd 11253 cmpi.b %d1,&QNAN 11254 beq.l src_qnan 11255 bra.l src_snan 11256 11257 global fgetexp 11258fgetexp: 11259 mov.b STAG(%a6),%d1 11260 beq.l sgetexp 11261 cmpi.b %d1,&ZERO 11262 beq.l src_zero 11263 cmpi.b %d1,&INF 11264 beq.l t_operr 11265 cmpi.b %d1,&DENORM 11266 beq.l sgetexpd 11267 cmpi.b %d1,&QNAN 11268 beq.l src_qnan 11269 bra.l src_snan 11270 11271 global fgetman 11272fgetman: 11273 mov.b STAG(%a6),%d1 11274 beq.l sgetman 11275 cmpi.b %d1,&ZERO 11276 beq.l src_zero 11277 cmpi.b %d1,&INF 11278 beq.l t_operr 11279 cmpi.b %d1,&DENORM 11280 beq.l sgetmand 11281 cmpi.b %d1,&QNAN 11282 beq.l src_qnan 11283 bra.l src_snan 11284 11285 global fsincos 11286fsincos: 11287 mov.b STAG(%a6),%d1 11288 beq.l ssincos 11289 cmpi.b %d1,&ZERO 11290 beq.l ssincosz 11291 cmpi.b %d1,&INF 11292 beq.l ssincosi 11293 cmpi.b %d1,&DENORM 11294 beq.l ssincosd 11295 cmpi.b %d1,&QNAN 11296 beq.l ssincosqnan 11297 bra.l ssincossnan 11298 11299 global fmod 11300fmod: 11301 mov.b STAG(%a6),%d1 11302 beq.l smod_snorm 11303 cmpi.b %d1,&ZERO 11304 beq.l smod_szero 11305 cmpi.b %d1,&INF 11306 beq.l smod_sinf 11307 cmpi.b %d1,&DENORM 11308 beq.l smod_sdnrm 11309 cmpi.b %d1,&QNAN 11310 beq.l sop_sqnan 11311 bra.l sop_ssnan 11312 11313 global frem 11314frem: 11315 mov.b STAG(%a6),%d1 11316 beq.l srem_snorm 11317 cmpi.b %d1,&ZERO 11318 beq.l srem_szero 11319 cmpi.b %d1,&INF 11320 beq.l srem_sinf 11321 cmpi.b %d1,&DENORM 11322 beq.l srem_sdnrm 11323 cmpi.b %d1,&QNAN 11324 beq.l sop_sqnan 11325 bra.l sop_ssnan 11326 11327 global fscale 11328fscale: 11329 mov.b STAG(%a6),%d1 11330 beq.l sscale_snorm 11331 cmpi.b %d1,&ZERO 11332 beq.l sscale_szero 11333 cmpi.b %d1,&INF 11334 beq.l sscale_sinf 11335 cmpi.b %d1,&DENORM 11336 beq.l sscale_sdnrm 11337 cmpi.b %d1,&QNAN 11338 beq.l sop_sqnan 11339 bra.l sop_ssnan 11340 11341######################################################################### 11342# XDEF **************************************************************** # 11343# fgen_except(): catch an exception during transcendental # 11344# emulation # 11345# # 11346# XREF **************************************************************** # 11347# fmul() - emulate a multiply instruction # 11348# fadd() - emulate an add instruction # 11349# fin() - emulate an fmove instruction # 11350# # 11351# INPUT *************************************************************** # 11352# fp0 = destination operand # 11353# d0 = type of instruction that took exception # 11354# fsave frame = source operand # 11355# # 11356# OUTPUT ************************************************************** # 11357# fp0 = result # 11358# fp1 = EXOP # 11359# # 11360# ALGORITHM *********************************************************** # 11361# An exception occurred on the last instruction of the # 11362# transcendental emulation. hopefully, this won't be happening much # 11363# because it will be VERY slow. # 11364# The only exceptions capable of passing through here are # 11365# Overflow, Underflow, and Unsupported Data Type. # 11366# # 11367######################################################################### 11368 11369 global fgen_except 11370fgen_except: 11371 cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP? 11372 beq.b fge_unsupp # yes 11373 11374 mov.b &NORM,STAG(%a6) 11375 11376fge_cont: 11377 mov.b &NORM,DTAG(%a6) 11378 11379# ok, I have a problem with putting the dst op at FP_DST. the emulation 11380# routines aren't supposed to alter the operands but we've just squashed 11381# FP_DST here... 11382 11383# 8/17/93 - this turns out to be more of a "cleanliness" standpoint 11384# then a potential bug. to begin with, only the dyadic functions 11385# frem,fmod, and fscale would get the dst trashed here. But, for 11386# the 060SP, the FP_DST is never used again anyways. 11387 fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0 11388 11389 lea 0x4(%sp),%a0 # pass: ptr to src op 11390 lea FP_DST(%a6),%a1 # pass: ptr to dst op 11391 11392 cmpi.b %d1,&FMOV_OP 11393 beq.b fge_fin # it was an "fmov" 11394 cmpi.b %d1,&FADD_OP 11395 beq.b fge_fadd # it was an "fadd" 11396fge_fmul: 11397 bsr.l fmul 11398 rts 11399fge_fadd: 11400 bsr.l fadd 11401 rts 11402fge_fin: 11403 bsr.l fin 11404 rts 11405 11406fge_unsupp: 11407 mov.b &DENORM,STAG(%a6) 11408 bra.b fge_cont 11409 11410# 11411# This table holds the offsets of the emulation routines for each individual 11412# math operation relative to the address of this table. Included are 11413# routines like fadd/fmul/fabs as well as the transcendentals. 11414# The location within the table is determined by the extension bits of the 11415# operation longword. 11416# 11417 11418 swbeg &109 11419tbl_unsupp: 11420 long fin - tbl_unsupp # 00: fmove 11421 long fint - tbl_unsupp # 01: fint 11422 long fsinh - tbl_unsupp # 02: fsinh 11423 long fintrz - tbl_unsupp # 03: fintrz 11424 long fsqrt - tbl_unsupp # 04: fsqrt 11425 long tbl_unsupp - tbl_unsupp 11426 long flognp1 - tbl_unsupp # 06: flognp1 11427 long tbl_unsupp - tbl_unsupp 11428 long fetoxm1 - tbl_unsupp # 08: fetoxm1 11429 long ftanh - tbl_unsupp # 09: ftanh 11430 long fatan - tbl_unsupp # 0a: fatan 11431 long tbl_unsupp - tbl_unsupp 11432 long fasin - tbl_unsupp # 0c: fasin 11433 long fatanh - tbl_unsupp # 0d: fatanh 11434 long fsine - tbl_unsupp # 0e: fsin 11435 long ftan - tbl_unsupp # 0f: ftan 11436 long fetox - tbl_unsupp # 10: fetox 11437 long ftwotox - tbl_unsupp # 11: ftwotox 11438 long ftentox - tbl_unsupp # 12: ftentox 11439 long tbl_unsupp - tbl_unsupp 11440 long flogn - tbl_unsupp # 14: flogn 11441 long flog10 - tbl_unsupp # 15: flog10 11442 long flog2 - tbl_unsupp # 16: flog2 11443 long tbl_unsupp - tbl_unsupp 11444 long fabs - tbl_unsupp # 18: fabs 11445 long fcosh - tbl_unsupp # 19: fcosh 11446 long fneg - tbl_unsupp # 1a: fneg 11447 long tbl_unsupp - tbl_unsupp 11448 long facos - tbl_unsupp # 1c: facos 11449 long fcos - tbl_unsupp # 1d: fcos 11450 long fgetexp - tbl_unsupp # 1e: fgetexp 11451 long fgetman - tbl_unsupp # 1f: fgetman 11452 long fdiv - tbl_unsupp # 20: fdiv 11453 long fmod - tbl_unsupp # 21: fmod 11454 long fadd - tbl_unsupp # 22: fadd 11455 long fmul - tbl_unsupp # 23: fmul 11456 long fsgldiv - tbl_unsupp # 24: fsgldiv 11457 long frem - tbl_unsupp # 25: frem 11458 long fscale - tbl_unsupp # 26: fscale 11459 long fsglmul - tbl_unsupp # 27: fsglmul 11460 long fsub - tbl_unsupp # 28: fsub 11461 long tbl_unsupp - tbl_unsupp 11462 long tbl_unsupp - tbl_unsupp 11463 long tbl_unsupp - tbl_unsupp 11464 long tbl_unsupp - tbl_unsupp 11465 long tbl_unsupp - tbl_unsupp 11466 long tbl_unsupp - tbl_unsupp 11467 long tbl_unsupp - tbl_unsupp 11468 long fsincos - tbl_unsupp # 30: fsincos 11469 long fsincos - tbl_unsupp # 31: fsincos 11470 long fsincos - tbl_unsupp # 32: fsincos 11471 long fsincos - tbl_unsupp # 33: fsincos 11472 long fsincos - tbl_unsupp # 34: fsincos 11473 long fsincos - tbl_unsupp # 35: fsincos 11474 long fsincos - tbl_unsupp # 36: fsincos 11475 long fsincos - tbl_unsupp # 37: fsincos 11476 long fcmp - tbl_unsupp # 38: fcmp 11477 long tbl_unsupp - tbl_unsupp 11478 long ftst - tbl_unsupp # 3a: ftst 11479 long tbl_unsupp - tbl_unsupp 11480 long tbl_unsupp - tbl_unsupp 11481 long tbl_unsupp - tbl_unsupp 11482 long tbl_unsupp - tbl_unsupp 11483 long tbl_unsupp - tbl_unsupp 11484 long fsin - tbl_unsupp # 40: fsmove 11485 long fssqrt - tbl_unsupp # 41: fssqrt 11486 long tbl_unsupp - tbl_unsupp 11487 long tbl_unsupp - tbl_unsupp 11488 long fdin - tbl_unsupp # 44: fdmove 11489 long fdsqrt - tbl_unsupp # 45: fdsqrt 11490 long tbl_unsupp - tbl_unsupp 11491 long tbl_unsupp - tbl_unsupp 11492 long tbl_unsupp - tbl_unsupp 11493 long tbl_unsupp - tbl_unsupp 11494 long tbl_unsupp - tbl_unsupp 11495 long tbl_unsupp - tbl_unsupp 11496 long tbl_unsupp - tbl_unsupp 11497 long tbl_unsupp - tbl_unsupp 11498 long tbl_unsupp - tbl_unsupp 11499 long tbl_unsupp - tbl_unsupp 11500 long tbl_unsupp - tbl_unsupp 11501 long tbl_unsupp - tbl_unsupp 11502 long tbl_unsupp - tbl_unsupp 11503 long tbl_unsupp - tbl_unsupp 11504 long tbl_unsupp - tbl_unsupp 11505 long tbl_unsupp - tbl_unsupp 11506 long tbl_unsupp - tbl_unsupp 11507 long tbl_unsupp - tbl_unsupp 11508 long fsabs - tbl_unsupp # 58: fsabs 11509 long tbl_unsupp - tbl_unsupp 11510 long fsneg - tbl_unsupp # 5a: fsneg 11511 long tbl_unsupp - tbl_unsupp 11512 long fdabs - tbl_unsupp # 5c: fdabs 11513 long tbl_unsupp - tbl_unsupp 11514 long fdneg - tbl_unsupp # 5e: fdneg 11515 long tbl_unsupp - tbl_unsupp 11516 long fsdiv - tbl_unsupp # 60: fsdiv 11517 long tbl_unsupp - tbl_unsupp 11518 long fsadd - tbl_unsupp # 62: fsadd 11519 long fsmul - tbl_unsupp # 63: fsmul 11520 long fddiv - tbl_unsupp # 64: fddiv 11521 long tbl_unsupp - tbl_unsupp 11522 long fdadd - tbl_unsupp # 66: fdadd 11523 long fdmul - tbl_unsupp # 67: fdmul 11524 long fssub - tbl_unsupp # 68: fssub 11525 long tbl_unsupp - tbl_unsupp 11526 long tbl_unsupp - tbl_unsupp 11527 long tbl_unsupp - tbl_unsupp 11528 long fdsub - tbl_unsupp # 6c: fdsub 11529 11530######################################################################### 11531# XDEF **************************************************************** # 11532# fmul(): emulates the fmul instruction # 11533# fsmul(): emulates the fsmul instruction # 11534# fdmul(): emulates the fdmul instruction # 11535# # 11536# XREF **************************************************************** # 11537# scale_to_zero_src() - scale src exponent to zero # 11538# scale_to_zero_dst() - scale dst exponent to zero # 11539# unf_res() - return default underflow result # 11540# ovf_res() - return default overflow result # 11541# res_qnan() - return QNAN result # 11542# res_snan() - return SNAN result # 11543# # 11544# INPUT *************************************************************** # 11545# a0 = pointer to extended precision source operand # 11546# a1 = pointer to extended precision destination operand # 11547# d0 rnd prec,mode # 11548# # 11549# OUTPUT ************************************************************** # 11550# fp0 = result # 11551# fp1 = EXOP (if exception occurred) # 11552# # 11553# ALGORITHM *********************************************************** # 11554# Handle NANs, infinities, and zeroes as special cases. Divide # 11555# norms/denorms into ext/sgl/dbl precision. # 11556# For norms/denorms, scale the exponents such that a multiply # 11557# instruction won't cause an exception. Use the regular fmul to # 11558# compute a result. Check if the regular operands would have taken # 11559# an exception. If so, return the default overflow/underflow result # 11560# and return the EXOP if exceptions are enabled. Else, scale the # 11561# result operand to the proper exponent. # 11562# # 11563######################################################################### 11564 11565 align 0x10 11566tbl_fmul_ovfl: 11567 long 0x3fff - 0x7ffe # ext_max 11568 long 0x3fff - 0x407e # sgl_max 11569 long 0x3fff - 0x43fe # dbl_max 11570tbl_fmul_unfl: 11571 long 0x3fff + 0x0001 # ext_unfl 11572 long 0x3fff - 0x3f80 # sgl_unfl 11573 long 0x3fff - 0x3c00 # dbl_unfl 11574 11575 global fsmul 11576fsmul: 11577 andi.b &0x30,%d0 # clear rnd prec 11578 ori.b &s_mode*0x10,%d0 # insert sgl prec 11579 bra.b fmul 11580 11581 global fdmul 11582fdmul: 11583 andi.b &0x30,%d0 11584 ori.b &d_mode*0x10,%d0 # insert dbl prec 11585 11586 global fmul 11587fmul: 11588 mov.l %d0,L_SCR3(%a6) # store rnd info 11589 11590 clr.w %d1 11591 mov.b DTAG(%a6),%d1 11592 lsl.b &0x3,%d1 11593 or.b STAG(%a6),%d1 # combine src tags 11594 bne.w fmul_not_norm # optimize on non-norm input 11595 11596fmul_norm: 11597 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11598 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11599 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11600 11601 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11602 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11603 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11604 11605 bsr.l scale_to_zero_src # scale src exponent 11606 mov.l %d0,-(%sp) # save scale factor 1 11607 11608 bsr.l scale_to_zero_dst # scale dst exponent 11609 11610 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2 11611 11612 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 11613 lsr.b &0x6,%d1 # shift to lo bits 11614 mov.l (%sp)+,%d0 # load S.F. 11615 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl? 11616 beq.w fmul_may_ovfl # result may rnd to overflow 11617 blt.w fmul_ovfl # result will overflow 11618 11619 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl? 11620 beq.w fmul_may_unfl # result may rnd to no unfl 11621 bgt.w fmul_unfl # result will underflow 11622 11623# 11624# NORMAL: 11625# - the result of the multiply operation will neither overflow nor underflow. 11626# - do the multiply to the proper precision and rounding mode. 11627# - scale the result exponent using the scale factor. if both operands were 11628# normalized then we really don't need to go through this scaling. but for now, 11629# this will do. 11630# 11631fmul_normal: 11632 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11633 11634 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11635 fmov.l &0x0,%fpsr # clear FPSR 11636 11637 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11638 11639 fmov.l %fpsr,%d1 # save status 11640 fmov.l &0x0,%fpcr # clear FPCR 11641 11642 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11643 11644fmul_normal_exit: 11645 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11646 mov.l %d2,-(%sp) # save d2 11647 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 11648 mov.l %d1,%d2 # make a copy 11649 andi.l &0x7fff,%d1 # strip sign 11650 andi.w &0x8000,%d2 # keep old sign 11651 sub.l %d0,%d1 # add scale factor 11652 or.w %d2,%d1 # concat old sign,new exp 11653 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11654 mov.l (%sp)+,%d2 # restore d2 11655 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11656 rts 11657 11658# 11659# OVERFLOW: 11660# - the result of the multiply operation is an overflow. 11661# - do the multiply to the proper precision and rounding mode in order to 11662# set the inexact bits. 11663# - calculate the default result and return it in fp0. 11664# - if overflow or inexact is enabled, we need a multiply result rounded to 11665# extended precision. if the original operation was extended, then we have this 11666# result. if the original operation was single or double, we have to do another 11667# multiply using extended precision and the correct rounding mode. the result 11668# of this operation then has its exponent scaled by -0x6000 to create the 11669# exceptional operand. 11670# 11671fmul_ovfl: 11672 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11673 11674 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11675 fmov.l &0x0,%fpsr # clear FPSR 11676 11677 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11678 11679 fmov.l %fpsr,%d1 # save status 11680 fmov.l &0x0,%fpcr # clear FPCR 11681 11682 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11683 11684# save setting this until now because this is where fmul_may_ovfl may jump in 11685fmul_ovfl_tst: 11686 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11687 11688 mov.b FPCR_ENABLE(%a6),%d1 11689 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11690 bne.b fmul_ovfl_ena # yes 11691 11692# calculate the default result 11693fmul_ovfl_dis: 11694 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11695 sne %d1 # set sign param accordingly 11696 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode 11697 bsr.l ovf_res # calculate default result 11698 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11699 fmovm.x (%a0),&0x80 # return default result in fp0 11700 rts 11701 11702# 11703# OVFL is enabled; Create EXOP: 11704# - if precision is extended, then we have the EXOP. simply bias the exponent 11705# with an extra -0x6000. if the precision is single or double, we need to 11706# calculate a result rounded to extended precision. 11707# 11708fmul_ovfl_ena: 11709 mov.l L_SCR3(%a6),%d1 11710 andi.b &0xc0,%d1 # test the rnd prec 11711 bne.b fmul_ovfl_ena_sd # it's sgl or dbl 11712 11713fmul_ovfl_ena_cont: 11714 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 11715 11716 mov.l %d2,-(%sp) # save d2 11717 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11718 mov.w %d1,%d2 # make a copy 11719 andi.l &0x7fff,%d1 # strip sign 11720 sub.l %d0,%d1 # add scale factor 11721 subi.l &0x6000,%d1 # subtract bias 11722 andi.w &0x7fff,%d1 # clear sign bit 11723 andi.w &0x8000,%d2 # keep old sign 11724 or.w %d2,%d1 # concat old sign,new exp 11725 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11726 mov.l (%sp)+,%d2 # restore d2 11727 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11728 bra.b fmul_ovfl_dis 11729 11730fmul_ovfl_ena_sd: 11731 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11732 11733 mov.l L_SCR3(%a6),%d1 11734 andi.b &0x30,%d1 # keep rnd mode only 11735 fmov.l %d1,%fpcr # set FPCR 11736 11737 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11738 11739 fmov.l &0x0,%fpcr # clear FPCR 11740 bra.b fmul_ovfl_ena_cont 11741 11742# 11743# may OVERFLOW: 11744# - the result of the multiply operation MAY overflow. 11745# - do the multiply to the proper precision and rounding mode in order to 11746# set the inexact bits. 11747# - calculate the default result and return it in fp0. 11748# 11749fmul_may_ovfl: 11750 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11751 11752 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11753 fmov.l &0x0,%fpsr # clear FPSR 11754 11755 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11756 11757 fmov.l %fpsr,%d1 # save status 11758 fmov.l &0x0,%fpcr # clear FPCR 11759 11760 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11761 11762 fabs.x %fp0,%fp1 # make a copy of result 11763 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 11764 fbge.w fmul_ovfl_tst # yes; overflow has occurred 11765 11766# no, it didn't overflow; we have correct result 11767 bra.w fmul_normal_exit 11768 11769# 11770# UNDERFLOW: 11771# - the result of the multiply operation is an underflow. 11772# - do the multiply to the proper precision and rounding mode in order to 11773# set the inexact bits. 11774# - calculate the default result and return it in fp0. 11775# - if overflow or inexact is enabled, we need a multiply result rounded to 11776# extended precision. if the original operation was extended, then we have this 11777# result. if the original operation was single or double, we have to do another 11778# multiply using extended precision and the correct rounding mode. the result 11779# of this operation then has its exponent scaled by -0x6000 to create the 11780# exceptional operand. 11781# 11782fmul_unfl: 11783 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11784 11785# for fun, let's use only extended precision, round to zero. then, let 11786# the unf_res() routine figure out all the rest. 11787# will we get the correct answer. 11788 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11789 11790 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11791 fmov.l &0x0,%fpsr # clear FPSR 11792 11793 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11794 11795 fmov.l %fpsr,%d1 # save status 11796 fmov.l &0x0,%fpcr # clear FPCR 11797 11798 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11799 11800 mov.b FPCR_ENABLE(%a6),%d1 11801 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11802 bne.b fmul_unfl_ena # yes 11803 11804fmul_unfl_dis: 11805 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11806 11807 lea FP_SCR0(%a6),%a0 # pass: result addr 11808 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11809 bsr.l unf_res # calculate default result 11810 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z' 11811 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11812 rts 11813 11814# 11815# UNFL is enabled. 11816# 11817fmul_unfl_ena: 11818 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 11819 11820 mov.l L_SCR3(%a6),%d1 11821 andi.b &0xc0,%d1 # is precision extended? 11822 bne.b fmul_unfl_ena_sd # no, sgl or dbl 11823 11824# if the rnd mode is anything but RZ, then we have to re-do the above 11825# multiplication because we used RZ for all. 11826 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11827 11828fmul_unfl_ena_cont: 11829 fmov.l &0x0,%fpsr # clear FPSR 11830 11831 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 11832 11833 fmov.l &0x0,%fpcr # clear FPCR 11834 11835 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 11836 mov.l %d2,-(%sp) # save d2 11837 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11838 mov.l %d1,%d2 # make a copy 11839 andi.l &0x7fff,%d1 # strip sign 11840 andi.w &0x8000,%d2 # keep old sign 11841 sub.l %d0,%d1 # add scale factor 11842 addi.l &0x6000,%d1 # add bias 11843 andi.w &0x7fff,%d1 11844 or.w %d2,%d1 # concat old sign,new exp 11845 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11846 mov.l (%sp)+,%d2 # restore d2 11847 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11848 bra.w fmul_unfl_dis 11849 11850fmul_unfl_ena_sd: 11851 mov.l L_SCR3(%a6),%d1 11852 andi.b &0x30,%d1 # use only rnd mode 11853 fmov.l %d1,%fpcr # set FPCR 11854 11855 bra.b fmul_unfl_ena_cont 11856 11857# MAY UNDERFLOW: 11858# -use the correct rounding mode and precision. this code favors operations 11859# that do not underflow. 11860fmul_may_unfl: 11861 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11862 11863 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11864 fmov.l &0x0,%fpsr # clear FPSR 11865 11866 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11867 11868 fmov.l %fpsr,%d1 # save status 11869 fmov.l &0x0,%fpcr # clear FPCR 11870 11871 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11872 11873 fabs.x %fp0,%fp1 # make a copy of result 11874 fcmp.b %fp1,&0x2 # is |result| > 2.b? 11875 fbgt.w fmul_normal_exit # no; no underflow occurred 11876 fblt.w fmul_unfl # yes; underflow occurred 11877 11878# 11879# we still don't know if underflow occurred. result is ~ equal to 2. but, 11880# we don't know if the result was an underflow that rounded up to a 2 or 11881# a normalized number that rounded down to a 2. so, redo the entire operation 11882# using RZ as the rounding mode to see what the pre-rounded result is. 11883# this case should be relatively rare. 11884# 11885 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand 11886 11887 mov.l L_SCR3(%a6),%d1 11888 andi.b &0xc0,%d1 # keep rnd prec 11889 ori.b &rz_mode*0x10,%d1 # insert RZ 11890 11891 fmov.l %d1,%fpcr # set FPCR 11892 fmov.l &0x0,%fpsr # clear FPSR 11893 11894 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 11895 11896 fmov.l &0x0,%fpcr # clear FPCR 11897 fabs.x %fp1 # make absolute value 11898 fcmp.b %fp1,&0x2 # is |result| < 2.b? 11899 fbge.w fmul_normal_exit # no; no underflow occurred 11900 bra.w fmul_unfl # yes, underflow occurred 11901 11902################################################################################ 11903 11904# 11905# Multiply: inputs are not both normalized; what are they? 11906# 11907fmul_not_norm: 11908 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1 11909 jmp (tbl_fmul_op.b,%pc,%d1.w) 11910 11911 swbeg &48 11912tbl_fmul_op: 11913 short fmul_norm - tbl_fmul_op # NORM x NORM 11914 short fmul_zero - tbl_fmul_op # NORM x ZERO 11915 short fmul_inf_src - tbl_fmul_op # NORM x INF 11916 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 11917 short fmul_norm - tbl_fmul_op # NORM x DENORM 11918 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 11919 short tbl_fmul_op - tbl_fmul_op # 11920 short tbl_fmul_op - tbl_fmul_op # 11921 11922 short fmul_zero - tbl_fmul_op # ZERO x NORM 11923 short fmul_zero - tbl_fmul_op # ZERO x ZERO 11924 short fmul_res_operr - tbl_fmul_op # ZERO x INF 11925 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN 11926 short fmul_zero - tbl_fmul_op # ZERO x DENORM 11927 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN 11928 short tbl_fmul_op - tbl_fmul_op # 11929 short tbl_fmul_op - tbl_fmul_op # 11930 11931 short fmul_inf_dst - tbl_fmul_op # INF x NORM 11932 short fmul_res_operr - tbl_fmul_op # INF x ZERO 11933 short fmul_inf_dst - tbl_fmul_op # INF x INF 11934 short fmul_res_qnan - tbl_fmul_op # INF x QNAN 11935 short fmul_inf_dst - tbl_fmul_op # INF x DENORM 11936 short fmul_res_snan - tbl_fmul_op # INF x SNAN 11937 short tbl_fmul_op - tbl_fmul_op # 11938 short tbl_fmul_op - tbl_fmul_op # 11939 11940 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM 11941 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO 11942 short fmul_res_qnan - tbl_fmul_op # QNAN x INF 11943 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN 11944 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM 11945 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN 11946 short tbl_fmul_op - tbl_fmul_op # 11947 short tbl_fmul_op - tbl_fmul_op # 11948 11949 short fmul_norm - tbl_fmul_op # NORM x NORM 11950 short fmul_zero - tbl_fmul_op # NORM x ZERO 11951 short fmul_inf_src - tbl_fmul_op # NORM x INF 11952 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 11953 short fmul_norm - tbl_fmul_op # NORM x DENORM 11954 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 11955 short tbl_fmul_op - tbl_fmul_op # 11956 short tbl_fmul_op - tbl_fmul_op # 11957 11958 short fmul_res_snan - tbl_fmul_op # SNAN x NORM 11959 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO 11960 short fmul_res_snan - tbl_fmul_op # SNAN x INF 11961 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN 11962 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM 11963 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN 11964 short tbl_fmul_op - tbl_fmul_op # 11965 short tbl_fmul_op - tbl_fmul_op # 11966 11967fmul_res_operr: 11968 bra.l res_operr 11969fmul_res_snan: 11970 bra.l res_snan 11971fmul_res_qnan: 11972 bra.l res_qnan 11973 11974# 11975# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm) 11976# 11977 global fmul_zero # global for fsglmul 11978fmul_zero: 11979 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11980 mov.b DST_EX(%a1),%d1 11981 eor.b %d0,%d1 11982 bpl.b fmul_zero_p # result ZERO is pos. 11983fmul_zero_n: 11984 fmov.s &0x80000000,%fp0 # load -ZERO 11985 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 11986 rts 11987fmul_zero_p: 11988 fmov.s &0x00000000,%fp0 # load +ZERO 11989 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11990 rts 11991 11992# 11993# Multiply: (inf x inf) || (inf x norm) || (inf x denorm) 11994# 11995# Note: The j-bit for an infinity is a don't-care. However, to be 11996# strictly compatible w/ the 68881/882, we make sure to return an 11997# INF w/ the j-bit set if the input INF j-bit was set. Destination 11998# INFs take priority. 11999# 12000 global fmul_inf_dst # global for fsglmul 12001fmul_inf_dst: 12002 fmovm.x DST(%a1),&0x80 # return INF result in fp0 12003 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 12004 mov.b DST_EX(%a1),%d1 12005 eor.b %d0,%d1 12006 bpl.b fmul_inf_dst_p # result INF is pos. 12007fmul_inf_dst_n: 12008 fabs.x %fp0 # clear result sign 12009 fneg.x %fp0 # set result sign 12010 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 12011 rts 12012fmul_inf_dst_p: 12013 fabs.x %fp0 # clear result sign 12014 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12015 rts 12016 12017 global fmul_inf_src # global for fsglmul 12018fmul_inf_src: 12019 fmovm.x SRC(%a0),&0x80 # return INF result in fp0 12020 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 12021 mov.b DST_EX(%a1),%d1 12022 eor.b %d0,%d1 12023 bpl.b fmul_inf_dst_p # result INF is pos. 12024 bra.b fmul_inf_dst_n 12025 12026######################################################################### 12027# XDEF **************************************************************** # 12028# fin(): emulates the fmove instruction # 12029# fsin(): emulates the fsmove instruction # 12030# fdin(): emulates the fdmove instruction # 12031# # 12032# XREF **************************************************************** # 12033# norm() - normalize mantissa for EXOP on denorm # 12034# scale_to_zero_src() - scale src exponent to zero # 12035# ovf_res() - return default overflow result # 12036# unf_res() - return default underflow result # 12037# res_qnan_1op() - return QNAN result # 12038# res_snan_1op() - return SNAN result # 12039# # 12040# INPUT *************************************************************** # 12041# a0 = pointer to extended precision source operand # 12042# d0 = round prec/mode # 12043# # 12044# OUTPUT ************************************************************** # 12045# fp0 = result # 12046# fp1 = EXOP (if exception occurred) # 12047# # 12048# ALGORITHM *********************************************************** # 12049# Handle NANs, infinities, and zeroes as special cases. Divide # 12050# norms into extended, single, and double precision. # 12051# Norms can be emulated w/ a regular fmove instruction. For # 12052# sgl/dbl, must scale exponent and perform an "fmove". Check to see # 12053# if the result would have overflowed/underflowed. If so, use unf_res() # 12054# or ovf_res() to return the default result. Also return EXOP if # 12055# exception is enabled. If no exception, return the default result. # 12056# Unnorms don't pass through here. # 12057# # 12058######################################################################### 12059 12060 global fsin 12061fsin: 12062 andi.b &0x30,%d0 # clear rnd prec 12063 ori.b &s_mode*0x10,%d0 # insert sgl precision 12064 bra.b fin 12065 12066 global fdin 12067fdin: 12068 andi.b &0x30,%d0 # clear rnd prec 12069 ori.b &d_mode*0x10,%d0 # insert dbl precision 12070 12071 global fin 12072fin: 12073 mov.l %d0,L_SCR3(%a6) # store rnd info 12074 12075 mov.b STAG(%a6),%d1 # fetch src optype tag 12076 bne.w fin_not_norm # optimize on non-norm input 12077 12078# 12079# FP MOVE IN: NORMs and DENORMs ONLY! 12080# 12081fin_norm: 12082 andi.b &0xc0,%d0 # is precision extended? 12083 bne.w fin_not_ext # no, so go handle dbl or sgl 12084 12085# 12086# precision selected is extended. so...we cannot get an underflow 12087# or overflow because of rounding to the correct precision. so... 12088# skip the scaling and unscaling... 12089# 12090 tst.b SRC_EX(%a0) # is the operand negative? 12091 bpl.b fin_norm_done # no 12092 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 12093fin_norm_done: 12094 fmovm.x SRC(%a0),&0x80 # return result in fp0 12095 rts 12096 12097# 12098# for an extended precision DENORM, the UNFL exception bit is set 12099# the accrued bit is NOT set in this instance(no inexactness!) 12100# 12101fin_denorm: 12102 andi.b &0xc0,%d0 # is precision extended? 12103 bne.w fin_not_ext # no, so go handle dbl or sgl 12104 12105 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12106 tst.b SRC_EX(%a0) # is the operand negative? 12107 bpl.b fin_denorm_done # no 12108 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 12109fin_denorm_done: 12110 fmovm.x SRC(%a0),&0x80 # return result in fp0 12111 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 12112 bne.b fin_denorm_unfl_ena # yes 12113 rts 12114 12115# 12116# the input is an extended DENORM and underflow is enabled in the FPCR. 12117# normalize the mantissa and add the bias of 0x6000 to the resulting negative 12118# exponent and insert back into the operand. 12119# 12120fin_denorm_unfl_ena: 12121 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12122 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12123 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12124 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 12125 bsr.l norm # normalize result 12126 neg.w %d0 # new exponent = -(shft val) 12127 addi.w &0x6000,%d0 # add new bias to exponent 12128 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 12129 andi.w &0x8000,%d1 # keep old sign 12130 andi.w &0x7fff,%d0 # clear sign position 12131 or.w %d1,%d0 # concat new exo,old sign 12132 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 12133 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12134 rts 12135 12136# 12137# operand is to be rounded to single or double precision 12138# 12139fin_not_ext: 12140 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12141 bne.b fin_dbl 12142 12143# 12144# operand is to be rounded to single precision 12145# 12146fin_sgl: 12147 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12148 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12149 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12150 bsr.l scale_to_zero_src # calculate scale factor 12151 12152 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 12153 bge.w fin_sd_unfl # yes; go handle underflow 12154 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 12155 beq.w fin_sd_may_ovfl # maybe; go check 12156 blt.w fin_sd_ovfl # yes; go handle overflow 12157 12158# 12159# operand will NOT overflow or underflow when moved into the fp reg file 12160# 12161fin_sd_normal: 12162 fmov.l &0x0,%fpsr # clear FPSR 12163 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12164 12165 fmov.x FP_SCR0(%a6),%fp0 # perform move 12166 12167 fmov.l %fpsr,%d1 # save FPSR 12168 fmov.l &0x0,%fpcr # clear FPCR 12169 12170 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12171 12172fin_sd_normal_exit: 12173 mov.l %d2,-(%sp) # save d2 12174 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12175 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 12176 mov.w %d1,%d2 # make a copy 12177 andi.l &0x7fff,%d1 # strip sign 12178 sub.l %d0,%d1 # add scale factor 12179 andi.w &0x8000,%d2 # keep old sign 12180 or.w %d1,%d2 # concat old sign,new exponent 12181 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12182 mov.l (%sp)+,%d2 # restore d2 12183 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12184 rts 12185 12186# 12187# operand is to be rounded to double precision 12188# 12189fin_dbl: 12190 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12191 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12192 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12193 bsr.l scale_to_zero_src # calculate scale factor 12194 12195 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 12196 bge.w fin_sd_unfl # yes; go handle underflow 12197 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 12198 beq.w fin_sd_may_ovfl # maybe; go check 12199 blt.w fin_sd_ovfl # yes; go handle overflow 12200 bra.w fin_sd_normal # no; ho handle normalized op 12201 12202# 12203# operand WILL underflow when moved in to the fp register file 12204# 12205fin_sd_unfl: 12206 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12207 12208 tst.b FP_SCR0_EX(%a6) # is operand negative? 12209 bpl.b fin_sd_unfl_tst 12210 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 12211 12212# if underflow or inexact is enabled, then go calculate the EXOP first. 12213fin_sd_unfl_tst: 12214 mov.b FPCR_ENABLE(%a6),%d1 12215 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12216 bne.b fin_sd_unfl_ena # yes 12217 12218fin_sd_unfl_dis: 12219 lea FP_SCR0(%a6),%a0 # pass: result addr 12220 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12221 bsr.l unf_res # calculate default result 12222 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 12223 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12224 rts 12225 12226# 12227# operand will underflow AND underflow or inexact is enabled. 12228# therefore, we must return the result rounded to extended precision. 12229# 12230fin_sd_unfl_ena: 12231 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 12232 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 12233 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 12234 12235 mov.l %d2,-(%sp) # save d2 12236 mov.w %d1,%d2 # make a copy 12237 andi.l &0x7fff,%d1 # strip sign 12238 sub.l %d0,%d1 # subtract scale factor 12239 andi.w &0x8000,%d2 # extract old sign 12240 addi.l &0x6000,%d1 # add new bias 12241 andi.w &0x7fff,%d1 12242 or.w %d1,%d2 # concat old sign,new exp 12243 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent 12244 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 12245 mov.l (%sp)+,%d2 # restore d2 12246 bra.b fin_sd_unfl_dis 12247 12248# 12249# operand WILL overflow. 12250# 12251fin_sd_ovfl: 12252 fmov.l &0x0,%fpsr # clear FPSR 12253 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12254 12255 fmov.x FP_SCR0(%a6),%fp0 # perform move 12256 12257 fmov.l &0x0,%fpcr # clear FPCR 12258 fmov.l %fpsr,%d1 # save FPSR 12259 12260 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12261 12262fin_sd_ovfl_tst: 12263 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12264 12265 mov.b FPCR_ENABLE(%a6),%d1 12266 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12267 bne.b fin_sd_ovfl_ena # yes 12268 12269# 12270# OVFL is not enabled; therefore, we must create the default result by 12271# calling ovf_res(). 12272# 12273fin_sd_ovfl_dis: 12274 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12275 sne %d1 # set sign param accordingly 12276 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 12277 bsr.l ovf_res # calculate default result 12278 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 12279 fmovm.x (%a0),&0x80 # return default result in fp0 12280 rts 12281 12282# 12283# OVFL is enabled. 12284# the INEX2 bit has already been updated by the round to the correct precision. 12285# now, round to extended(and don't alter the FPSR). 12286# 12287fin_sd_ovfl_ena: 12288 mov.l %d2,-(%sp) # save d2 12289 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12290 mov.l %d1,%d2 # make a copy 12291 andi.l &0x7fff,%d1 # strip sign 12292 andi.w &0x8000,%d2 # keep old sign 12293 sub.l %d0,%d1 # add scale factor 12294 sub.l &0x6000,%d1 # subtract bias 12295 andi.w &0x7fff,%d1 12296 or.w %d2,%d1 12297 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12298 mov.l (%sp)+,%d2 # restore d2 12299 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12300 bra.b fin_sd_ovfl_dis 12301 12302# 12303# the move in MAY overflow. so... 12304# 12305fin_sd_may_ovfl: 12306 fmov.l &0x0,%fpsr # clear FPSR 12307 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12308 12309 fmov.x FP_SCR0(%a6),%fp0 # perform the move 12310 12311 fmov.l %fpsr,%d1 # save status 12312 fmov.l &0x0,%fpcr # clear FPCR 12313 12314 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12315 12316 fabs.x %fp0,%fp1 # make a copy of result 12317 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 12318 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred 12319 12320# no, it didn't overflow; we have correct result 12321 bra.w fin_sd_normal_exit 12322 12323########################################################################## 12324 12325# 12326# operand is not a NORM: check its optype and branch accordingly 12327# 12328fin_not_norm: 12329 cmpi.b %d1,&DENORM # weed out DENORM 12330 beq.w fin_denorm 12331 cmpi.b %d1,&SNAN # weed out SNANs 12332 beq.l res_snan_1op 12333 cmpi.b %d1,&QNAN # weed out QNANs 12334 beq.l res_qnan_1op 12335 12336# 12337# do the fmove in; at this point, only possible ops are ZERO and INF. 12338# use fmov to determine ccodes. 12339# prec:mode should be zero at this point but it won't affect answer anyways. 12340# 12341 fmov.x SRC(%a0),%fp0 # do fmove in 12342 fmov.l %fpsr,%d0 # no exceptions possible 12343 rol.l &0x8,%d0 # put ccodes in lo byte 12344 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 12345 rts 12346 12347######################################################################### 12348# XDEF **************************************************************** # 12349# fdiv(): emulates the fdiv instruction # 12350# fsdiv(): emulates the fsdiv instruction # 12351# fddiv(): emulates the fddiv instruction # 12352# # 12353# XREF **************************************************************** # 12354# scale_to_zero_src() - scale src exponent to zero # 12355# scale_to_zero_dst() - scale dst exponent to zero # 12356# unf_res() - return default underflow result # 12357# ovf_res() - return default overflow result # 12358# res_qnan() - return QNAN result # 12359# res_snan() - return SNAN result # 12360# # 12361# INPUT *************************************************************** # 12362# a0 = pointer to extended precision source operand # 12363# a1 = pointer to extended precision destination operand # 12364# d0 rnd prec,mode # 12365# # 12366# OUTPUT ************************************************************** # 12367# fp0 = result # 12368# fp1 = EXOP (if exception occurred) # 12369# # 12370# ALGORITHM *********************************************************** # 12371# Handle NANs, infinities, and zeroes as special cases. Divide # 12372# norms/denorms into ext/sgl/dbl precision. # 12373# For norms/denorms, scale the exponents such that a divide # 12374# instruction won't cause an exception. Use the regular fdiv to # 12375# compute a result. Check if the regular operands would have taken # 12376# an exception. If so, return the default overflow/underflow result # 12377# and return the EXOP if exceptions are enabled. Else, scale the # 12378# result operand to the proper exponent. # 12379# # 12380######################################################################### 12381 12382 align 0x10 12383tbl_fdiv_unfl: 12384 long 0x3fff - 0x0000 # ext_unfl 12385 long 0x3fff - 0x3f81 # sgl_unfl 12386 long 0x3fff - 0x3c01 # dbl_unfl 12387 12388tbl_fdiv_ovfl: 12389 long 0x3fff - 0x7ffe # ext overflow exponent 12390 long 0x3fff - 0x407e # sgl overflow exponent 12391 long 0x3fff - 0x43fe # dbl overflow exponent 12392 12393 global fsdiv 12394fsdiv: 12395 andi.b &0x30,%d0 # clear rnd prec 12396 ori.b &s_mode*0x10,%d0 # insert sgl prec 12397 bra.b fdiv 12398 12399 global fddiv 12400fddiv: 12401 andi.b &0x30,%d0 # clear rnd prec 12402 ori.b &d_mode*0x10,%d0 # insert dbl prec 12403 12404 global fdiv 12405fdiv: 12406 mov.l %d0,L_SCR3(%a6) # store rnd info 12407 12408 clr.w %d1 12409 mov.b DTAG(%a6),%d1 12410 lsl.b &0x3,%d1 12411 or.b STAG(%a6),%d1 # combine src tags 12412 12413 bne.w fdiv_not_norm # optimize on non-norm input 12414 12415# 12416# DIVIDE: NORMs and DENORMs ONLY! 12417# 12418fdiv_norm: 12419 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 12420 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 12421 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 12422 12423 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12424 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12425 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12426 12427 bsr.l scale_to_zero_src # scale src exponent 12428 mov.l %d0,-(%sp) # save scale factor 1 12429 12430 bsr.l scale_to_zero_dst # scale dst exponent 12431 12432 neg.l (%sp) # SCALE FACTOR = scale1 - scale2 12433 add.l %d0,(%sp) 12434 12435 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 12436 lsr.b &0x6,%d1 # shift to lo bits 12437 mov.l (%sp)+,%d0 # load S.F. 12438 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow? 12439 ble.w fdiv_may_ovfl # result will overflow 12440 12441 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow? 12442 beq.w fdiv_may_unfl # maybe 12443 bgt.w fdiv_unfl # yes; go handle underflow 12444 12445fdiv_normal: 12446 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12447 12448 fmov.l L_SCR3(%a6),%fpcr # save FPCR 12449 fmov.l &0x0,%fpsr # clear FPSR 12450 12451 fdiv.x FP_SCR0(%a6),%fp0 # perform divide 12452 12453 fmov.l %fpsr,%d1 # save FPSR 12454 fmov.l &0x0,%fpcr # clear FPCR 12455 12456 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12457 12458fdiv_normal_exit: 12459 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 12460 mov.l %d2,-(%sp) # store d2 12461 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 12462 mov.l %d1,%d2 # make a copy 12463 andi.l &0x7fff,%d1 # strip sign 12464 andi.w &0x8000,%d2 # keep old sign 12465 sub.l %d0,%d1 # add scale factor 12466 or.w %d2,%d1 # concat old sign,new exp 12467 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12468 mov.l (%sp)+,%d2 # restore d2 12469 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12470 rts 12471 12472tbl_fdiv_ovfl2: 12473 long 0x7fff 12474 long 0x407f 12475 long 0x43ff 12476 12477fdiv_no_ovfl: 12478 mov.l (%sp)+,%d0 # restore scale factor 12479 bra.b fdiv_normal_exit 12480 12481fdiv_may_ovfl: 12482 mov.l %d0,-(%sp) # save scale factor 12483 12484 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12485 12486 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12487 fmov.l &0x0,%fpsr # set FPSR 12488 12489 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12490 12491 fmov.l %fpsr,%d0 12492 fmov.l &0x0,%fpcr 12493 12494 or.l %d0,USER_FPSR(%a6) # save INEX,N 12495 12496 fmovm.x &0x01,-(%sp) # save result to stack 12497 mov.w (%sp),%d0 # fetch new exponent 12498 add.l &0xc,%sp # clear result from stack 12499 andi.l &0x7fff,%d0 # strip sign 12500 sub.l (%sp),%d0 # add scale factor 12501 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4) 12502 blt.b fdiv_no_ovfl 12503 mov.l (%sp)+,%d0 12504 12505fdiv_ovfl_tst: 12506 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12507 12508 mov.b FPCR_ENABLE(%a6),%d1 12509 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12510 bne.b fdiv_ovfl_ena # yes 12511 12512fdiv_ovfl_dis: 12513 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12514 sne %d1 # set sign param accordingly 12515 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 12516 bsr.l ovf_res # calculate default result 12517 or.b %d0,FPSR_CC(%a6) # set INF if applicable 12518 fmovm.x (%a0),&0x80 # return default result in fp0 12519 rts 12520 12521fdiv_ovfl_ena: 12522 mov.l L_SCR3(%a6),%d1 12523 andi.b &0xc0,%d1 # is precision extended? 12524 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl 12525 12526fdiv_ovfl_ena_cont: 12527 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 12528 12529 mov.l %d2,-(%sp) # save d2 12530 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12531 mov.w %d1,%d2 # make a copy 12532 andi.l &0x7fff,%d1 # strip sign 12533 sub.l %d0,%d1 # add scale factor 12534 subi.l &0x6000,%d1 # subtract bias 12535 andi.w &0x7fff,%d1 # clear sign bit 12536 andi.w &0x8000,%d2 # keep old sign 12537 or.w %d2,%d1 # concat old sign,new exp 12538 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12539 mov.l (%sp)+,%d2 # restore d2 12540 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12541 bra.b fdiv_ovfl_dis 12542 12543fdiv_ovfl_ena_sd: 12544 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 12545 12546 mov.l L_SCR3(%a6),%d1 12547 andi.b &0x30,%d1 # keep rnd mode 12548 fmov.l %d1,%fpcr # set FPCR 12549 12550 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12551 12552 fmov.l &0x0,%fpcr # clear FPCR 12553 bra.b fdiv_ovfl_ena_cont 12554 12555fdiv_unfl: 12556 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12557 12558 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12559 12560 fmov.l &rz_mode*0x10,%fpcr # set FPCR 12561 fmov.l &0x0,%fpsr # clear FPSR 12562 12563 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12564 12565 fmov.l %fpsr,%d1 # save status 12566 fmov.l &0x0,%fpcr # clear FPCR 12567 12568 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12569 12570 mov.b FPCR_ENABLE(%a6),%d1 12571 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12572 bne.b fdiv_unfl_ena # yes 12573 12574fdiv_unfl_dis: 12575 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12576 12577 lea FP_SCR0(%a6),%a0 # pass: result addr 12578 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12579 bsr.l unf_res # calculate default result 12580 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 12581 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12582 rts 12583 12584# 12585# UNFL is enabled. 12586# 12587fdiv_unfl_ena: 12588 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 12589 12590 mov.l L_SCR3(%a6),%d1 12591 andi.b &0xc0,%d1 # is precision extended? 12592 bne.b fdiv_unfl_ena_sd # no, sgl or dbl 12593 12594 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12595 12596fdiv_unfl_ena_cont: 12597 fmov.l &0x0,%fpsr # clear FPSR 12598 12599 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 12600 12601 fmov.l &0x0,%fpcr # clear FPCR 12602 12603 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 12604 mov.l %d2,-(%sp) # save d2 12605 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12606 mov.l %d1,%d2 # make a copy 12607 andi.l &0x7fff,%d1 # strip sign 12608 andi.w &0x8000,%d2 # keep old sign 12609 sub.l %d0,%d1 # add scale factoer 12610 addi.l &0x6000,%d1 # add bias 12611 andi.w &0x7fff,%d1 12612 or.w %d2,%d1 # concat old sign,new exp 12613 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp 12614 mov.l (%sp)+,%d2 # restore d2 12615 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12616 bra.w fdiv_unfl_dis 12617 12618fdiv_unfl_ena_sd: 12619 mov.l L_SCR3(%a6),%d1 12620 andi.b &0x30,%d1 # use only rnd mode 12621 fmov.l %d1,%fpcr # set FPCR 12622 12623 bra.b fdiv_unfl_ena_cont 12624 12625# 12626# the divide operation MAY underflow: 12627# 12628fdiv_may_unfl: 12629 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12630 12631 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12632 fmov.l &0x0,%fpsr # clear FPSR 12633 12634 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12635 12636 fmov.l %fpsr,%d1 # save status 12637 fmov.l &0x0,%fpcr # clear FPCR 12638 12639 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12640 12641 fabs.x %fp0,%fp1 # make a copy of result 12642 fcmp.b %fp1,&0x1 # is |result| > 1.b? 12643 fbgt.w fdiv_normal_exit # no; no underflow occurred 12644 fblt.w fdiv_unfl # yes; underflow occurred 12645 12646# 12647# we still don't know if underflow occurred. result is ~ equal to 1. but, 12648# we don't know if the result was an underflow that rounded up to a 1 12649# or a normalized number that rounded down to a 1. so, redo the entire 12650# operation using RZ as the rounding mode to see what the pre-rounded 12651# result is. this case should be relatively rare. 12652# 12653 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 12654 12655 mov.l L_SCR3(%a6),%d1 12656 andi.b &0xc0,%d1 # keep rnd prec 12657 ori.b &rz_mode*0x10,%d1 # insert RZ 12658 12659 fmov.l %d1,%fpcr # set FPCR 12660 fmov.l &0x0,%fpsr # clear FPSR 12661 12662 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 12663 12664 fmov.l &0x0,%fpcr # clear FPCR 12665 fabs.x %fp1 # make absolute value 12666 fcmp.b %fp1,&0x1 # is |result| < 1.b? 12667 fbge.w fdiv_normal_exit # no; no underflow occurred 12668 bra.w fdiv_unfl # yes; underflow occurred 12669 12670############################################################################ 12671 12672# 12673# Divide: inputs are not both normalized; what are they? 12674# 12675fdiv_not_norm: 12676 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1 12677 jmp (tbl_fdiv_op.b,%pc,%d1.w*1) 12678 12679 swbeg &48 12680tbl_fdiv_op: 12681 short fdiv_norm - tbl_fdiv_op # NORM / NORM 12682 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO 12683 short fdiv_zero_load - tbl_fdiv_op # NORM / INF 12684 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN 12685 short fdiv_norm - tbl_fdiv_op # NORM / DENORM 12686 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN 12687 short tbl_fdiv_op - tbl_fdiv_op # 12688 short tbl_fdiv_op - tbl_fdiv_op # 12689 12690 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM 12691 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO 12692 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF 12693 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN 12694 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM 12695 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN 12696 short tbl_fdiv_op - tbl_fdiv_op # 12697 short tbl_fdiv_op - tbl_fdiv_op # 12698 12699 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM 12700 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO 12701 short fdiv_res_operr - tbl_fdiv_op # INF / INF 12702 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN 12703 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM 12704 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN 12705 short tbl_fdiv_op - tbl_fdiv_op # 12706 short tbl_fdiv_op - tbl_fdiv_op # 12707 12708 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM 12709 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO 12710 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF 12711 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN 12712 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM 12713 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN 12714 short tbl_fdiv_op - tbl_fdiv_op # 12715 short tbl_fdiv_op - tbl_fdiv_op # 12716 12717 short fdiv_norm - tbl_fdiv_op # DENORM / NORM 12718 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO 12719 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF 12720 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN 12721 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM 12722 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN 12723 short tbl_fdiv_op - tbl_fdiv_op # 12724 short tbl_fdiv_op - tbl_fdiv_op # 12725 12726 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM 12727 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO 12728 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF 12729 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN 12730 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM 12731 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN 12732 short tbl_fdiv_op - tbl_fdiv_op # 12733 short tbl_fdiv_op - tbl_fdiv_op # 12734 12735fdiv_res_qnan: 12736 bra.l res_qnan 12737fdiv_res_snan: 12738 bra.l res_snan 12739fdiv_res_operr: 12740 bra.l res_operr 12741 12742 global fdiv_zero_load # global for fsgldiv 12743fdiv_zero_load: 12744 mov.b SRC_EX(%a0),%d0 # result sign is exclusive 12745 mov.b DST_EX(%a1),%d1 # or of input signs. 12746 eor.b %d0,%d1 12747 bpl.b fdiv_zero_load_p # result is positive 12748 fmov.s &0x80000000,%fp0 # load a -ZERO 12749 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 12750 rts 12751fdiv_zero_load_p: 12752 fmov.s &0x00000000,%fp0 # load a +ZERO 12753 mov.b &z_bmask,FPSR_CC(%a6) # set Z 12754 rts 12755 12756# 12757# The destination was In Range and the source was a ZERO. The result, 12758# therefore, is an INF w/ the proper sign. 12759# So, determine the sign and return a new INF (w/ the j-bit cleared). 12760# 12761 global fdiv_inf_load # global for fsgldiv 12762fdiv_inf_load: 12763 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ 12764 mov.b SRC_EX(%a0),%d0 # load both signs 12765 mov.b DST_EX(%a1),%d1 12766 eor.b %d0,%d1 12767 bpl.b fdiv_inf_load_p # result is positive 12768 fmov.s &0xff800000,%fp0 # make result -INF 12769 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 12770 rts 12771fdiv_inf_load_p: 12772 fmov.s &0x7f800000,%fp0 # make result +INF 12773 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12774 rts 12775 12776# 12777# The destination was an INF w/ an In Range or ZERO source, the result is 12778# an INF w/ the proper sign. 12779# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the 12780# dst INF is set, then then j-bit of the result INF is also set). 12781# 12782 global fdiv_inf_dst # global for fsgldiv 12783fdiv_inf_dst: 12784 mov.b DST_EX(%a1),%d0 # load both signs 12785 mov.b SRC_EX(%a0),%d1 12786 eor.b %d0,%d1 12787 bpl.b fdiv_inf_dst_p # result is positive 12788 12789 fmovm.x DST(%a1),&0x80 # return result in fp0 12790 fabs.x %fp0 # clear sign bit 12791 fneg.x %fp0 # set sign bit 12792 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG 12793 rts 12794 12795fdiv_inf_dst_p: 12796 fmovm.x DST(%a1),&0x80 # return result in fp0 12797 fabs.x %fp0 # return positive INF 12798 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12799 rts 12800 12801######################################################################### 12802# XDEF **************************************************************** # 12803# fneg(): emulates the fneg instruction # 12804# fsneg(): emulates the fsneg instruction # 12805# fdneg(): emulates the fdneg instruction # 12806# # 12807# XREF **************************************************************** # 12808# norm() - normalize a denorm to provide EXOP # 12809# scale_to_zero_src() - scale sgl/dbl source exponent # 12810# ovf_res() - return default overflow result # 12811# unf_res() - return default underflow result # 12812# res_qnan_1op() - return QNAN result # 12813# res_snan_1op() - return SNAN result # 12814# # 12815# INPUT *************************************************************** # 12816# a0 = pointer to extended precision source operand # 12817# d0 = rnd prec,mode # 12818# # 12819# OUTPUT ************************************************************** # 12820# fp0 = result # 12821# fp1 = EXOP (if exception occurred) # 12822# # 12823# ALGORITHM *********************************************************** # 12824# Handle NANs, zeroes, and infinities as special cases. Separate # 12825# norms/denorms into ext/sgl/dbl precisions. Extended precision can be # 12826# emulated by simply setting sign bit. Sgl/dbl operands must be scaled # 12827# and an actual fneg performed to see if overflow/underflow would have # 12828# occurred. If so, return default underflow/overflow result. Else, # 12829# scale the result exponent and return result. FPSR gets set based on # 12830# the result value. # 12831# # 12832######################################################################### 12833 12834 global fsneg 12835fsneg: 12836 andi.b &0x30,%d0 # clear rnd prec 12837 ori.b &s_mode*0x10,%d0 # insert sgl precision 12838 bra.b fneg 12839 12840 global fdneg 12841fdneg: 12842 andi.b &0x30,%d0 # clear rnd prec 12843 ori.b &d_mode*0x10,%d0 # insert dbl prec 12844 12845 global fneg 12846fneg: 12847 mov.l %d0,L_SCR3(%a6) # store rnd info 12848 mov.b STAG(%a6),%d1 12849 bne.w fneg_not_norm # optimize on non-norm input 12850 12851# 12852# NEGATE SIGN : norms and denorms ONLY! 12853# 12854fneg_norm: 12855 andi.b &0xc0,%d0 # is precision extended? 12856 bne.w fneg_not_ext # no; go handle sgl or dbl 12857 12858# 12859# precision selected is extended. so...we can not get an underflow 12860# or overflow because of rounding to the correct precision. so... 12861# skip the scaling and unscaling... 12862# 12863 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12864 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12865 mov.w SRC_EX(%a0),%d0 12866 eori.w &0x8000,%d0 # negate sign 12867 bpl.b fneg_norm_load # sign is positive 12868 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 12869fneg_norm_load: 12870 mov.w %d0,FP_SCR0_EX(%a6) 12871 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12872 rts 12873 12874# 12875# for an extended precision DENORM, the UNFL exception bit is set 12876# the accrued bit is NOT set in this instance(no inexactness!) 12877# 12878fneg_denorm: 12879 andi.b &0xc0,%d0 # is precision extended? 12880 bne.b fneg_not_ext # no; go handle sgl or dbl 12881 12882 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12883 12884 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12885 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12886 mov.w SRC_EX(%a0),%d0 12887 eori.w &0x8000,%d0 # negate sign 12888 bpl.b fneg_denorm_done # no 12889 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit 12890fneg_denorm_done: 12891 mov.w %d0,FP_SCR0_EX(%a6) 12892 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12893 12894 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 12895 bne.b fneg_ext_unfl_ena # yes 12896 rts 12897 12898# 12899# the input is an extended DENORM and underflow is enabled in the FPCR. 12900# normalize the mantissa and add the bias of 0x6000 to the resulting negative 12901# exponent and insert back into the operand. 12902# 12903fneg_ext_unfl_ena: 12904 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 12905 bsr.l norm # normalize result 12906 neg.w %d0 # new exponent = -(shft val) 12907 addi.w &0x6000,%d0 # add new bias to exponent 12908 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 12909 andi.w &0x8000,%d1 # keep old sign 12910 andi.w &0x7fff,%d0 # clear sign position 12911 or.w %d1,%d0 # concat old sign, new exponent 12912 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 12913 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12914 rts 12915 12916# 12917# operand is either single or double 12918# 12919fneg_not_ext: 12920 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12921 bne.b fneg_dbl 12922 12923# 12924# operand is to be rounded to single precision 12925# 12926fneg_sgl: 12927 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12928 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12929 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12930 bsr.l scale_to_zero_src # calculate scale factor 12931 12932 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 12933 bge.w fneg_sd_unfl # yes; go handle underflow 12934 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 12935 beq.w fneg_sd_may_ovfl # maybe; go check 12936 blt.w fneg_sd_ovfl # yes; go handle overflow 12937 12938# 12939# operand will NOT overflow or underflow when moved in to the fp reg file 12940# 12941fneg_sd_normal: 12942 fmov.l &0x0,%fpsr # clear FPSR 12943 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12944 12945 fneg.x FP_SCR0(%a6),%fp0 # perform negation 12946 12947 fmov.l %fpsr,%d1 # save FPSR 12948 fmov.l &0x0,%fpcr # clear FPCR 12949 12950 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12951 12952fneg_sd_normal_exit: 12953 mov.l %d2,-(%sp) # save d2 12954 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12955 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 12956 mov.w %d1,%d2 # make a copy 12957 andi.l &0x7fff,%d1 # strip sign 12958 sub.l %d0,%d1 # add scale factor 12959 andi.w &0x8000,%d2 # keep old sign 12960 or.w %d1,%d2 # concat old sign,new exp 12961 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12962 mov.l (%sp)+,%d2 # restore d2 12963 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12964 rts 12965 12966# 12967# operand is to be rounded to double precision 12968# 12969fneg_dbl: 12970 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12971 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12972 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12973 bsr.l scale_to_zero_src # calculate scale factor 12974 12975 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 12976 bge.b fneg_sd_unfl # yes; go handle underflow 12977 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 12978 beq.w fneg_sd_may_ovfl # maybe; go check 12979 blt.w fneg_sd_ovfl # yes; go handle overflow 12980 bra.w fneg_sd_normal # no; ho handle normalized op 12981 12982# 12983# operand WILL underflow when moved in to the fp register file 12984# 12985fneg_sd_unfl: 12986 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12987 12988 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign 12989 bpl.b fneg_sd_unfl_tst 12990 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 12991 12992# if underflow or inexact is enabled, go calculate EXOP first. 12993fneg_sd_unfl_tst: 12994 mov.b FPCR_ENABLE(%a6),%d1 12995 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12996 bne.b fneg_sd_unfl_ena # yes 12997 12998fneg_sd_unfl_dis: 12999 lea FP_SCR0(%a6),%a0 # pass: result addr 13000 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 13001 bsr.l unf_res # calculate default result 13002 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 13003 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 13004 rts 13005 13006# 13007# operand will underflow AND underflow is enabled. 13008# therefore, we must return the result rounded to extended precision. 13009# 13010fneg_sd_unfl_ena: 13011 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 13012 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 13013 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 13014 13015 mov.l %d2,-(%sp) # save d2 13016 mov.l %d1,%d2 # make a copy 13017 andi.l &0x7fff,%d1 # strip sign 13018 andi.w &0x8000,%d2 # keep old sign 13019 sub.l %d0,%d1 # subtract scale factor 13020 addi.l &0x6000,%d1 # add new bias 13021 andi.w &0x7fff,%d1 13022 or.w %d2,%d1 # concat new sign,new exp 13023 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 13024 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 13025 mov.l (%sp)+,%d2 # restore d2 13026 bra.b fneg_sd_unfl_dis 13027 13028# 13029# operand WILL overflow. 13030# 13031fneg_sd_ovfl: 13032 fmov.l &0x0,%fpsr # clear FPSR 13033 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13034 13035 fneg.x FP_SCR0(%a6),%fp0 # perform negation 13036 13037 fmov.l &0x0,%fpcr # clear FPCR 13038 fmov.l %fpsr,%d1 # save FPSR 13039 13040 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13041 13042fneg_sd_ovfl_tst: 13043 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 13044 13045 mov.b FPCR_ENABLE(%a6),%d1 13046 andi.b &0x13,%d1 # is OVFL or INEX enabled? 13047 bne.b fneg_sd_ovfl_ena # yes 13048 13049# 13050# OVFL is not enabled; therefore, we must create the default result by 13051# calling ovf_res(). 13052# 13053fneg_sd_ovfl_dis: 13054 btst &neg_bit,FPSR_CC(%a6) # is result negative? 13055 sne %d1 # set sign param accordingly 13056 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 13057 bsr.l ovf_res # calculate default result 13058 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 13059 fmovm.x (%a0),&0x80 # return default result in fp0 13060 rts 13061 13062# 13063# OVFL is enabled. 13064# the INEX2 bit has already been updated by the round to the correct precision. 13065# now, round to extended(and don't alter the FPSR). 13066# 13067fneg_sd_ovfl_ena: 13068 mov.l %d2,-(%sp) # save d2 13069 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 13070 mov.l %d1,%d2 # make a copy 13071 andi.l &0x7fff,%d1 # strip sign 13072 andi.w &0x8000,%d2 # keep old sign 13073 sub.l %d0,%d1 # add scale factor 13074 subi.l &0x6000,%d1 # subtract bias 13075 andi.w &0x7fff,%d1 13076 or.w %d2,%d1 # concat sign,exp 13077 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 13078 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13079 mov.l (%sp)+,%d2 # restore d2 13080 bra.b fneg_sd_ovfl_dis 13081 13082# 13083# the move in MAY underflow. so... 13084# 13085fneg_sd_may_ovfl: 13086 fmov.l &0x0,%fpsr # clear FPSR 13087 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13088 13089 fneg.x FP_SCR0(%a6),%fp0 # perform negation 13090 13091 fmov.l %fpsr,%d1 # save status 13092 fmov.l &0x0,%fpcr # clear FPCR 13093 13094 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13095 13096 fabs.x %fp0,%fp1 # make a copy of result 13097 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 13098 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred 13099 13100# no, it didn't overflow; we have correct result 13101 bra.w fneg_sd_normal_exit 13102 13103########################################################################## 13104 13105# 13106# input is not normalized; what is it? 13107# 13108fneg_not_norm: 13109 cmpi.b %d1,&DENORM # weed out DENORM 13110 beq.w fneg_denorm 13111 cmpi.b %d1,&SNAN # weed out SNAN 13112 beq.l res_snan_1op 13113 cmpi.b %d1,&QNAN # weed out QNAN 13114 beq.l res_qnan_1op 13115 13116# 13117# do the fneg; at this point, only possible ops are ZERO and INF. 13118# use fneg to determine ccodes. 13119# prec:mode should be zero at this point but it won't affect answer anyways. 13120# 13121 fneg.x SRC_EX(%a0),%fp0 # do fneg 13122 fmov.l %fpsr,%d0 13123 rol.l &0x8,%d0 # put ccodes in lo byte 13124 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 13125 rts 13126 13127######################################################################### 13128# XDEF **************************************************************** # 13129# ftst(): emulates the ftest instruction # 13130# # 13131# XREF **************************************************************** # 13132# res{s,q}nan_1op() - set NAN result for monadic instruction # 13133# # 13134# INPUT *************************************************************** # 13135# a0 = pointer to extended precision source operand # 13136# # 13137# OUTPUT ************************************************************** # 13138# none # 13139# # 13140# ALGORITHM *********************************************************** # 13141# Check the source operand tag (STAG) and set the FPCR according # 13142# to the operand type and sign. # 13143# # 13144######################################################################### 13145 13146 global ftst 13147ftst: 13148 mov.b STAG(%a6),%d1 13149 bne.b ftst_not_norm # optimize on non-norm input 13150 13151# 13152# Norm: 13153# 13154ftst_norm: 13155 tst.b SRC_EX(%a0) # is operand negative? 13156 bmi.b ftst_norm_m # yes 13157 rts 13158ftst_norm_m: 13159 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13160 rts 13161 13162# 13163# input is not normalized; what is it? 13164# 13165ftst_not_norm: 13166 cmpi.b %d1,&ZERO # weed out ZERO 13167 beq.b ftst_zero 13168 cmpi.b %d1,&INF # weed out INF 13169 beq.b ftst_inf 13170 cmpi.b %d1,&SNAN # weed out SNAN 13171 beq.l res_snan_1op 13172 cmpi.b %d1,&QNAN # weed out QNAN 13173 beq.l res_qnan_1op 13174 13175# 13176# Denorm: 13177# 13178ftst_denorm: 13179 tst.b SRC_EX(%a0) # is operand negative? 13180 bmi.b ftst_denorm_m # yes 13181 rts 13182ftst_denorm_m: 13183 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13184 rts 13185 13186# 13187# Infinity: 13188# 13189ftst_inf: 13190 tst.b SRC_EX(%a0) # is operand negative? 13191 bmi.b ftst_inf_m # yes 13192ftst_inf_p: 13193 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13194 rts 13195ftst_inf_m: 13196 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits 13197 rts 13198 13199# 13200# Zero: 13201# 13202ftst_zero: 13203 tst.b SRC_EX(%a0) # is operand negative? 13204 bmi.b ftst_zero_m # yes 13205ftst_zero_p: 13206 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13207 rts 13208ftst_zero_m: 13209 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13210 rts 13211 13212######################################################################### 13213# XDEF **************************************************************** # 13214# fint(): emulates the fint instruction # 13215# # 13216# XREF **************************************************************** # 13217# res_{s,q}nan_1op() - set NAN result for monadic operation # 13218# # 13219# INPUT *************************************************************** # 13220# a0 = pointer to extended precision source operand # 13221# d0 = round precision/mode # 13222# # 13223# OUTPUT ************************************************************** # 13224# fp0 = result # 13225# # 13226# ALGORITHM *********************************************************** # 13227# Separate according to operand type. Unnorms don't pass through # 13228# here. For norms, load the rounding mode/prec, execute a "fint", then # 13229# store the resulting FPSR bits. # 13230# For denorms, force the j-bit to a one and do the same as for # 13231# norms. Denorms are so low that the answer will either be a zero or a # 13232# one. # 13233# For zeroes/infs/NANs, return the same while setting the FPSR # 13234# as appropriate. # 13235# # 13236######################################################################### 13237 13238 global fint 13239fint: 13240 mov.b STAG(%a6),%d1 13241 bne.b fint_not_norm # optimize on non-norm input 13242 13243# 13244# Norm: 13245# 13246fint_norm: 13247 andi.b &0x30,%d0 # set prec = ext 13248 13249 fmov.l %d0,%fpcr # set FPCR 13250 fmov.l &0x0,%fpsr # clear FPSR 13251 13252 fint.x SRC(%a0),%fp0 # execute fint 13253 13254 fmov.l &0x0,%fpcr # clear FPCR 13255 fmov.l %fpsr,%d0 # save FPSR 13256 or.l %d0,USER_FPSR(%a6) # set exception bits 13257 13258 rts 13259 13260# 13261# input is not normalized; what is it? 13262# 13263fint_not_norm: 13264 cmpi.b %d1,&ZERO # weed out ZERO 13265 beq.b fint_zero 13266 cmpi.b %d1,&INF # weed out INF 13267 beq.b fint_inf 13268 cmpi.b %d1,&DENORM # weed out DENORM 13269 beq.b fint_denorm 13270 cmpi.b %d1,&SNAN # weed out SNAN 13271 beq.l res_snan_1op 13272 bra.l res_qnan_1op # weed out QNAN 13273 13274# 13275# Denorm: 13276# 13277# for DENORMs, the result will be either (+/-)ZERO or (+/-)1. 13278# also, the INEX2 and AINEX exception bits will be set. 13279# so, we could either set these manually or force the DENORM 13280# to a very small NORM and ship it to the NORM routine. 13281# I do the latter. 13282# 13283fint_denorm: 13284 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 13285 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 13286 lea FP_SCR0(%a6),%a0 13287 bra.b fint_norm 13288 13289# 13290# Zero: 13291# 13292fint_zero: 13293 tst.b SRC_EX(%a0) # is ZERO negative? 13294 bmi.b fint_zero_m # yes 13295fint_zero_p: 13296 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 13297 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13298 rts 13299fint_zero_m: 13300 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 13301 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13302 rts 13303 13304# 13305# Infinity: 13306# 13307fint_inf: 13308 fmovm.x SRC(%a0),&0x80 # return result in fp0 13309 tst.b SRC_EX(%a0) # is INF negative? 13310 bmi.b fint_inf_m # yes 13311fint_inf_p: 13312 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13313 rts 13314fint_inf_m: 13315 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 13316 rts 13317 13318######################################################################### 13319# XDEF **************************************************************** # 13320# fintrz(): emulates the fintrz instruction # 13321# # 13322# XREF **************************************************************** # 13323# res_{s,q}nan_1op() - set NAN result for monadic operation # 13324# # 13325# INPUT *************************************************************** # 13326# a0 = pointer to extended precision source operand # 13327# d0 = round precision/mode # 13328# # 13329# OUTPUT ************************************************************** # 13330# fp0 = result # 13331# # 13332# ALGORITHM *********************************************************** # 13333# Separate according to operand type. Unnorms don't pass through # 13334# here. For norms, load the rounding mode/prec, execute a "fintrz", # 13335# then store the resulting FPSR bits. # 13336# For denorms, force the j-bit to a one and do the same as for # 13337# norms. Denorms are so low that the answer will either be a zero or a # 13338# one. # 13339# For zeroes/infs/NANs, return the same while setting the FPSR # 13340# as appropriate. # 13341# # 13342######################################################################### 13343 13344 global fintrz 13345fintrz: 13346 mov.b STAG(%a6),%d1 13347 bne.b fintrz_not_norm # optimize on non-norm input 13348 13349# 13350# Norm: 13351# 13352fintrz_norm: 13353 fmov.l &0x0,%fpsr # clear FPSR 13354 13355 fintrz.x SRC(%a0),%fp0 # execute fintrz 13356 13357 fmov.l %fpsr,%d0 # save FPSR 13358 or.l %d0,USER_FPSR(%a6) # set exception bits 13359 13360 rts 13361 13362# 13363# input is not normalized; what is it? 13364# 13365fintrz_not_norm: 13366 cmpi.b %d1,&ZERO # weed out ZERO 13367 beq.b fintrz_zero 13368 cmpi.b %d1,&INF # weed out INF 13369 beq.b fintrz_inf 13370 cmpi.b %d1,&DENORM # weed out DENORM 13371 beq.b fintrz_denorm 13372 cmpi.b %d1,&SNAN # weed out SNAN 13373 beq.l res_snan_1op 13374 bra.l res_qnan_1op # weed out QNAN 13375 13376# 13377# Denorm: 13378# 13379# for DENORMs, the result will be (+/-)ZERO. 13380# also, the INEX2 and AINEX exception bits will be set. 13381# so, we could either set these manually or force the DENORM 13382# to a very small NORM and ship it to the NORM routine. 13383# I do the latter. 13384# 13385fintrz_denorm: 13386 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 13387 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 13388 lea FP_SCR0(%a6),%a0 13389 bra.b fintrz_norm 13390 13391# 13392# Zero: 13393# 13394fintrz_zero: 13395 tst.b SRC_EX(%a0) # is ZERO negative? 13396 bmi.b fintrz_zero_m # yes 13397fintrz_zero_p: 13398 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 13399 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13400 rts 13401fintrz_zero_m: 13402 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 13403 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13404 rts 13405 13406# 13407# Infinity: 13408# 13409fintrz_inf: 13410 fmovm.x SRC(%a0),&0x80 # return result in fp0 13411 tst.b SRC_EX(%a0) # is INF negative? 13412 bmi.b fintrz_inf_m # yes 13413fintrz_inf_p: 13414 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13415 rts 13416fintrz_inf_m: 13417 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 13418 rts 13419 13420######################################################################### 13421# XDEF **************************************************************** # 13422# fabs(): emulates the fabs instruction # 13423# fsabs(): emulates the fsabs instruction # 13424# fdabs(): emulates the fdabs instruction # 13425# # 13426# XREF **************************************************************** # 13427# norm() - normalize denorm mantissa to provide EXOP # 13428# scale_to_zero_src() - make exponent. = 0; get scale factor # 13429# unf_res() - calculate underflow result # 13430# ovf_res() - calculate overflow result # 13431# res_{s,q}nan_1op() - set NAN result for monadic operation # 13432# # 13433# INPUT *************************************************************** # 13434# a0 = pointer to extended precision source operand # 13435# d0 = rnd precision/mode # 13436# # 13437# OUTPUT ************************************************************** # 13438# fp0 = result # 13439# fp1 = EXOP (if exception occurred) # 13440# # 13441# ALGORITHM *********************************************************** # 13442# Handle NANs, infinities, and zeroes as special cases. Divide # 13443# norms into extended, single, and double precision. # 13444# Simply clear sign for extended precision norm. Ext prec denorm # 13445# gets an EXOP created for it since it's an underflow. # 13446# Double and single precision can overflow and underflow. First, # 13447# scale the operand such that the exponent is zero. Perform an "fabs" # 13448# using the correct rnd mode/prec. Check to see if the original # 13449# exponent would take an exception. If so, use unf_res() or ovf_res() # 13450# to calculate the default result. Also, create the EXOP for the # 13451# exceptional case. If no exception should occur, insert the correct # 13452# result exponent and return. # 13453# Unnorms don't pass through here. # 13454# # 13455######################################################################### 13456 13457 global fsabs 13458fsabs: 13459 andi.b &0x30,%d0 # clear rnd prec 13460 ori.b &s_mode*0x10,%d0 # insert sgl precision 13461 bra.b fabs 13462 13463 global fdabs 13464fdabs: 13465 andi.b &0x30,%d0 # clear rnd prec 13466 ori.b &d_mode*0x10,%d0 # insert dbl precision 13467 13468 global fabs 13469fabs: 13470 mov.l %d0,L_SCR3(%a6) # store rnd info 13471 mov.b STAG(%a6),%d1 13472 bne.w fabs_not_norm # optimize on non-norm input 13473 13474# 13475# ABSOLUTE VALUE: norms and denorms ONLY! 13476# 13477fabs_norm: 13478 andi.b &0xc0,%d0 # is precision extended? 13479 bne.b fabs_not_ext # no; go handle sgl or dbl 13480 13481# 13482# precision selected is extended. so...we can not get an underflow 13483# or overflow because of rounding to the correct precision. so... 13484# skip the scaling and unscaling... 13485# 13486 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13487 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13488 mov.w SRC_EX(%a0),%d1 13489 bclr &15,%d1 # force absolute value 13490 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent 13491 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 13492 rts 13493 13494# 13495# for an extended precision DENORM, the UNFL exception bit is set 13496# the accrued bit is NOT set in this instance(no inexactness!) 13497# 13498fabs_denorm: 13499 andi.b &0xc0,%d0 # is precision extended? 13500 bne.b fabs_not_ext # no 13501 13502 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 13503 13504 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13505 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13506 mov.w SRC_EX(%a0),%d0 13507 bclr &15,%d0 # clear sign 13508 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent 13509 13510 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 13511 13512 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 13513 bne.b fabs_ext_unfl_ena 13514 rts 13515 13516# 13517# the input is an extended DENORM and underflow is enabled in the FPCR. 13518# normalize the mantissa and add the bias of 0x6000 to the resulting negative 13519# exponent and insert back into the operand. 13520# 13521fabs_ext_unfl_ena: 13522 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 13523 bsr.l norm # normalize result 13524 neg.w %d0 # new exponent = -(shft val) 13525 addi.w &0x6000,%d0 # add new bias to exponent 13526 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 13527 andi.w &0x8000,%d1 # keep old sign 13528 andi.w &0x7fff,%d0 # clear sign position 13529 or.w %d1,%d0 # concat old sign, new exponent 13530 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 13531 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13532 rts 13533 13534# 13535# operand is either single or double 13536# 13537fabs_not_ext: 13538 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 13539 bne.b fabs_dbl 13540 13541# 13542# operand is to be rounded to single precision 13543# 13544fabs_sgl: 13545 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13546 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13547 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13548 bsr.l scale_to_zero_src # calculate scale factor 13549 13550 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 13551 bge.w fabs_sd_unfl # yes; go handle underflow 13552 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 13553 beq.w fabs_sd_may_ovfl # maybe; go check 13554 blt.w fabs_sd_ovfl # yes; go handle overflow 13555 13556# 13557# operand will NOT overflow or underflow when moved in to the fp reg file 13558# 13559fabs_sd_normal: 13560 fmov.l &0x0,%fpsr # clear FPSR 13561 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13562 13563 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13564 13565 fmov.l %fpsr,%d1 # save FPSR 13566 fmov.l &0x0,%fpcr # clear FPCR 13567 13568 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13569 13570fabs_sd_normal_exit: 13571 mov.l %d2,-(%sp) # save d2 13572 fmovm.x &0x80,FP_SCR0(%a6) # store out result 13573 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 13574 mov.l %d1,%d2 # make a copy 13575 andi.l &0x7fff,%d1 # strip sign 13576 sub.l %d0,%d1 # add scale factor 13577 andi.w &0x8000,%d2 # keep old sign 13578 or.w %d1,%d2 # concat old sign,new exp 13579 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 13580 mov.l (%sp)+,%d2 # restore d2 13581 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 13582 rts 13583 13584# 13585# operand is to be rounded to double precision 13586# 13587fabs_dbl: 13588 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13589 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13590 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13591 bsr.l scale_to_zero_src # calculate scale factor 13592 13593 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 13594 bge.b fabs_sd_unfl # yes; go handle underflow 13595 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 13596 beq.w fabs_sd_may_ovfl # maybe; go check 13597 blt.w fabs_sd_ovfl # yes; go handle overflow 13598 bra.w fabs_sd_normal # no; ho handle normalized op 13599 13600# 13601# operand WILL underflow when moved in to the fp register file 13602# 13603fabs_sd_unfl: 13604 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 13605 13606 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value 13607 13608# if underflow or inexact is enabled, go calculate EXOP first. 13609 mov.b FPCR_ENABLE(%a6),%d1 13610 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 13611 bne.b fabs_sd_unfl_ena # yes 13612 13613fabs_sd_unfl_dis: 13614 lea FP_SCR0(%a6),%a0 # pass: result addr 13615 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 13616 bsr.l unf_res # calculate default result 13617 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 13618 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 13619 rts 13620 13621# 13622# operand will underflow AND underflow is enabled. 13623# therefore, we must return the result rounded to extended precision. 13624# 13625fabs_sd_unfl_ena: 13626 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 13627 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 13628 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 13629 13630 mov.l %d2,-(%sp) # save d2 13631 mov.l %d1,%d2 # make a copy 13632 andi.l &0x7fff,%d1 # strip sign 13633 andi.w &0x8000,%d2 # keep old sign 13634 sub.l %d0,%d1 # subtract scale factor 13635 addi.l &0x6000,%d1 # add new bias 13636 andi.w &0x7fff,%d1 13637 or.w %d2,%d1 # concat new sign,new exp 13638 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 13639 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 13640 mov.l (%sp)+,%d2 # restore d2 13641 bra.b fabs_sd_unfl_dis 13642 13643# 13644# operand WILL overflow. 13645# 13646fabs_sd_ovfl: 13647 fmov.l &0x0,%fpsr # clear FPSR 13648 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13649 13650 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13651 13652 fmov.l &0x0,%fpcr # clear FPCR 13653 fmov.l %fpsr,%d1 # save FPSR 13654 13655 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13656 13657fabs_sd_ovfl_tst: 13658 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 13659 13660 mov.b FPCR_ENABLE(%a6),%d1 13661 andi.b &0x13,%d1 # is OVFL or INEX enabled? 13662 bne.b fabs_sd_ovfl_ena # yes 13663 13664# 13665# OVFL is not enabled; therefore, we must create the default result by 13666# calling ovf_res(). 13667# 13668fabs_sd_ovfl_dis: 13669 btst &neg_bit,FPSR_CC(%a6) # is result negative? 13670 sne %d1 # set sign param accordingly 13671 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 13672 bsr.l ovf_res # calculate default result 13673 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 13674 fmovm.x (%a0),&0x80 # return default result in fp0 13675 rts 13676 13677# 13678# OVFL is enabled. 13679# the INEX2 bit has already been updated by the round to the correct precision. 13680# now, round to extended(and don't alter the FPSR). 13681# 13682fabs_sd_ovfl_ena: 13683 mov.l %d2,-(%sp) # save d2 13684 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 13685 mov.l %d1,%d2 # make a copy 13686 andi.l &0x7fff,%d1 # strip sign 13687 andi.w &0x8000,%d2 # keep old sign 13688 sub.l %d0,%d1 # add scale factor 13689 subi.l &0x6000,%d1 # subtract bias 13690 andi.w &0x7fff,%d1 13691 or.w %d2,%d1 # concat sign,exp 13692 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 13693 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13694 mov.l (%sp)+,%d2 # restore d2 13695 bra.b fabs_sd_ovfl_dis 13696 13697# 13698# the move in MAY underflow. so... 13699# 13700fabs_sd_may_ovfl: 13701 fmov.l &0x0,%fpsr # clear FPSR 13702 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13703 13704 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13705 13706 fmov.l %fpsr,%d1 # save status 13707 fmov.l &0x0,%fpcr # clear FPCR 13708 13709 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13710 13711 fabs.x %fp0,%fp1 # make a copy of result 13712 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 13713 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred 13714 13715# no, it didn't overflow; we have correct result 13716 bra.w fabs_sd_normal_exit 13717 13718########################################################################## 13719 13720# 13721# input is not normalized; what is it? 13722# 13723fabs_not_norm: 13724 cmpi.b %d1,&DENORM # weed out DENORM 13725 beq.w fabs_denorm 13726 cmpi.b %d1,&SNAN # weed out SNAN 13727 beq.l res_snan_1op 13728 cmpi.b %d1,&QNAN # weed out QNAN 13729 beq.l res_qnan_1op 13730 13731 fabs.x SRC(%a0),%fp0 # force absolute value 13732 13733 cmpi.b %d1,&INF # weed out INF 13734 beq.b fabs_inf 13735fabs_zero: 13736 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13737 rts 13738fabs_inf: 13739 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13740 rts 13741 13742######################################################################### 13743# XDEF **************************************************************** # 13744# fcmp(): fp compare op routine # 13745# # 13746# XREF **************************************************************** # 13747# res_qnan() - return QNAN result # 13748# res_snan() - return SNAN result # 13749# # 13750# INPUT *************************************************************** # 13751# a0 = pointer to extended precision source operand # 13752# a1 = pointer to extended precision destination operand # 13753# d0 = round prec/mode # 13754# # 13755# OUTPUT ************************************************************** # 13756# None # 13757# # 13758# ALGORITHM *********************************************************** # 13759# Handle NANs and denorms as special cases. For everything else, # 13760# just use the actual fcmp instruction to produce the correct condition # 13761# codes. # 13762# # 13763######################################################################### 13764 13765 global fcmp 13766fcmp: 13767 clr.w %d1 13768 mov.b DTAG(%a6),%d1 13769 lsl.b &0x3,%d1 13770 or.b STAG(%a6),%d1 13771 bne.b fcmp_not_norm # optimize on non-norm input 13772 13773# 13774# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs 13775# 13776fcmp_norm: 13777 fmovm.x DST(%a1),&0x80 # load dst op 13778 13779 fcmp.x %fp0,SRC(%a0) # do compare 13780 13781 fmov.l %fpsr,%d0 # save FPSR 13782 rol.l &0x8,%d0 # extract ccode bits 13783 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set) 13784 13785 rts 13786 13787# 13788# fcmp: inputs are not both normalized; what are they? 13789# 13790fcmp_not_norm: 13791 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1 13792 jmp (tbl_fcmp_op.b,%pc,%d1.w*1) 13793 13794 swbeg &48 13795tbl_fcmp_op: 13796 short fcmp_norm - tbl_fcmp_op # NORM - NORM 13797 short fcmp_norm - tbl_fcmp_op # NORM - ZERO 13798 short fcmp_norm - tbl_fcmp_op # NORM - INF 13799 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN 13800 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM 13801 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN 13802 short tbl_fcmp_op - tbl_fcmp_op # 13803 short tbl_fcmp_op - tbl_fcmp_op # 13804 13805 short fcmp_norm - tbl_fcmp_op # ZERO - NORM 13806 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO 13807 short fcmp_norm - tbl_fcmp_op # ZERO - INF 13808 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN 13809 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM 13810 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN 13811 short tbl_fcmp_op - tbl_fcmp_op # 13812 short tbl_fcmp_op - tbl_fcmp_op # 13813 13814 short fcmp_norm - tbl_fcmp_op # INF - NORM 13815 short fcmp_norm - tbl_fcmp_op # INF - ZERO 13816 short fcmp_norm - tbl_fcmp_op # INF - INF 13817 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN 13818 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM 13819 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN 13820 short tbl_fcmp_op - tbl_fcmp_op # 13821 short tbl_fcmp_op - tbl_fcmp_op # 13822 13823 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM 13824 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO 13825 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF 13826 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN 13827 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM 13828 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN 13829 short tbl_fcmp_op - tbl_fcmp_op # 13830 short tbl_fcmp_op - tbl_fcmp_op # 13831 13832 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM 13833 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO 13834 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF 13835 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN 13836 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM 13837 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN 13838 short tbl_fcmp_op - tbl_fcmp_op # 13839 short tbl_fcmp_op - tbl_fcmp_op # 13840 13841 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM 13842 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO 13843 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF 13844 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN 13845 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM 13846 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN 13847 short tbl_fcmp_op - tbl_fcmp_op # 13848 short tbl_fcmp_op - tbl_fcmp_op # 13849 13850# unlike all other functions for QNAN and SNAN, fcmp does NOT set the 13851# 'N' bit for a negative QNAN or SNAN input so we must squelch it here. 13852fcmp_res_qnan: 13853 bsr.l res_qnan 13854 andi.b &0xf7,FPSR_CC(%a6) 13855 rts 13856fcmp_res_snan: 13857 bsr.l res_snan 13858 andi.b &0xf7,FPSR_CC(%a6) 13859 rts 13860 13861# 13862# DENORMs are a little more difficult. 13863# If you have a 2 DENORMs, then you can just force the j-bit to a one 13864# and use the fcmp_norm routine. 13865# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one 13866# and use the fcmp_norm routine. 13867# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also. 13868# But with a DENORM and a NORM of the same sign, the neg bit is set if the 13869# (1) signs are (+) and the DENORM is the dst or 13870# (2) signs are (-) and the DENORM is the src 13871# 13872 13873fcmp_dnrm_s: 13874 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13875 mov.l SRC_HI(%a0),%d0 13876 bset &31,%d0 # DENORM src; make into small norm 13877 mov.l %d0,FP_SCR0_HI(%a6) 13878 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13879 lea FP_SCR0(%a6),%a0 13880 bra.w fcmp_norm 13881 13882fcmp_dnrm_d: 13883 mov.l DST_EX(%a1),FP_SCR0_EX(%a6) 13884 mov.l DST_HI(%a1),%d0 13885 bset &31,%d0 # DENORM src; make into small norm 13886 mov.l %d0,FP_SCR0_HI(%a6) 13887 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 13888 lea FP_SCR0(%a6),%a1 13889 bra.w fcmp_norm 13890 13891fcmp_dnrm_sd: 13892 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 13893 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13894 mov.l DST_HI(%a1),%d0 13895 bset &31,%d0 # DENORM dst; make into small norm 13896 mov.l %d0,FP_SCR1_HI(%a6) 13897 mov.l SRC_HI(%a0),%d0 13898 bset &31,%d0 # DENORM dst; make into small norm 13899 mov.l %d0,FP_SCR0_HI(%a6) 13900 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 13901 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13902 lea FP_SCR1(%a6),%a1 13903 lea FP_SCR0(%a6),%a0 13904 bra.w fcmp_norm 13905 13906fcmp_nrm_dnrm: 13907 mov.b SRC_EX(%a0),%d0 # determine if like signs 13908 mov.b DST_EX(%a1),%d1 13909 eor.b %d0,%d1 13910 bmi.w fcmp_dnrm_s 13911 13912# signs are the same, so must determine the answer ourselves. 13913 tst.b %d0 # is src op negative? 13914 bmi.b fcmp_nrm_dnrm_m # yes 13915 rts 13916fcmp_nrm_dnrm_m: 13917 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13918 rts 13919 13920fcmp_dnrm_nrm: 13921 mov.b SRC_EX(%a0),%d0 # determine if like signs 13922 mov.b DST_EX(%a1),%d1 13923 eor.b %d0,%d1 13924 bmi.w fcmp_dnrm_d 13925 13926# signs are the same, so must determine the answer ourselves. 13927 tst.b %d0 # is src op negative? 13928 bpl.b fcmp_dnrm_nrm_m # no 13929 rts 13930fcmp_dnrm_nrm_m: 13931 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13932 rts 13933 13934######################################################################### 13935# XDEF **************************************************************** # 13936# fsglmul(): emulates the fsglmul instruction # 13937# # 13938# XREF **************************************************************** # 13939# scale_to_zero_src() - scale src exponent to zero # 13940# scale_to_zero_dst() - scale dst exponent to zero # 13941# unf_res4() - return default underflow result for sglop # 13942# ovf_res() - return default overflow result # 13943# res_qnan() - return QNAN result # 13944# res_snan() - return SNAN result # 13945# # 13946# INPUT *************************************************************** # 13947# a0 = pointer to extended precision source operand # 13948# a1 = pointer to extended precision destination operand # 13949# d0 rnd prec,mode # 13950# # 13951# OUTPUT ************************************************************** # 13952# fp0 = result # 13953# fp1 = EXOP (if exception occurred) # 13954# # 13955# ALGORITHM *********************************************************** # 13956# Handle NANs, infinities, and zeroes as special cases. Divide # 13957# norms/denorms into ext/sgl/dbl precision. # 13958# For norms/denorms, scale the exponents such that a multiply # 13959# instruction won't cause an exception. Use the regular fsglmul to # 13960# compute a result. Check if the regular operands would have taken # 13961# an exception. If so, return the default overflow/underflow result # 13962# and return the EXOP if exceptions are enabled. Else, scale the # 13963# result operand to the proper exponent. # 13964# # 13965######################################################################### 13966 13967 global fsglmul 13968fsglmul: 13969 mov.l %d0,L_SCR3(%a6) # store rnd info 13970 13971 clr.w %d1 13972 mov.b DTAG(%a6),%d1 13973 lsl.b &0x3,%d1 13974 or.b STAG(%a6),%d1 13975 13976 bne.w fsglmul_not_norm # optimize on non-norm input 13977 13978fsglmul_norm: 13979 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 13980 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 13981 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 13982 13983 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13984 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13985 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13986 13987 bsr.l scale_to_zero_src # scale exponent 13988 mov.l %d0,-(%sp) # save scale factor 1 13989 13990 bsr.l scale_to_zero_dst # scale dst exponent 13991 13992 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2 13993 13994 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl? 13995 beq.w fsglmul_may_ovfl # result may rnd to overflow 13996 blt.w fsglmul_ovfl # result will overflow 13997 13998 cmpi.l %d0,&0x3fff+0x0001 # would result unfl? 13999 beq.w fsglmul_may_unfl # result may rnd to no unfl 14000 bgt.w fsglmul_unfl # result will underflow 14001 14002fsglmul_normal: 14003 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14004 14005 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14006 fmov.l &0x0,%fpsr # clear FPSR 14007 14008 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14009 14010 fmov.l %fpsr,%d1 # save status 14011 fmov.l &0x0,%fpcr # clear FPCR 14012 14013 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14014 14015fsglmul_normal_exit: 14016 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14017 mov.l %d2,-(%sp) # save d2 14018 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 14019 mov.l %d1,%d2 # make a copy 14020 andi.l &0x7fff,%d1 # strip sign 14021 andi.w &0x8000,%d2 # keep old sign 14022 sub.l %d0,%d1 # add scale factor 14023 or.w %d2,%d1 # concat old sign,new exp 14024 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14025 mov.l (%sp)+,%d2 # restore d2 14026 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 14027 rts 14028 14029fsglmul_ovfl: 14030 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14031 14032 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14033 fmov.l &0x0,%fpsr # clear FPSR 14034 14035 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14036 14037 fmov.l %fpsr,%d1 # save status 14038 fmov.l &0x0,%fpcr # clear FPCR 14039 14040 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14041 14042fsglmul_ovfl_tst: 14043 14044# save setting this until now because this is where fsglmul_may_ovfl may jump in 14045 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex 14046 14047 mov.b FPCR_ENABLE(%a6),%d1 14048 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14049 bne.b fsglmul_ovfl_ena # yes 14050 14051fsglmul_ovfl_dis: 14052 btst &neg_bit,FPSR_CC(%a6) # is result negative? 14053 sne %d1 # set sign param accordingly 14054 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14055 andi.b &0x30,%d0 # force prec = ext 14056 bsr.l ovf_res # calculate default result 14057 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 14058 fmovm.x (%a0),&0x80 # return default result in fp0 14059 rts 14060 14061fsglmul_ovfl_ena: 14062 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 14063 14064 mov.l %d2,-(%sp) # save d2 14065 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14066 mov.l %d1,%d2 # make a copy 14067 andi.l &0x7fff,%d1 # strip sign 14068 sub.l %d0,%d1 # add scale factor 14069 subi.l &0x6000,%d1 # subtract bias 14070 andi.w &0x7fff,%d1 14071 andi.w &0x8000,%d2 # keep old sign 14072 or.w %d2,%d1 # concat old sign,new exp 14073 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14074 mov.l (%sp)+,%d2 # restore d2 14075 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14076 bra.b fsglmul_ovfl_dis 14077 14078fsglmul_may_ovfl: 14079 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14080 14081 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14082 fmov.l &0x0,%fpsr # clear FPSR 14083 14084 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14085 14086 fmov.l %fpsr,%d1 # save status 14087 fmov.l &0x0,%fpcr # clear FPCR 14088 14089 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14090 14091 fabs.x %fp0,%fp1 # make a copy of result 14092 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 14093 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred 14094 14095# no, it didn't overflow; we have correct result 14096 bra.w fsglmul_normal_exit 14097 14098fsglmul_unfl: 14099 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14100 14101 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14102 14103 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14104 fmov.l &0x0,%fpsr # clear FPSR 14105 14106 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14107 14108 fmov.l %fpsr,%d1 # save status 14109 fmov.l &0x0,%fpcr # clear FPCR 14110 14111 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14112 14113 mov.b FPCR_ENABLE(%a6),%d1 14114 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14115 bne.b fsglmul_unfl_ena # yes 14116 14117fsglmul_unfl_dis: 14118 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14119 14120 lea FP_SCR0(%a6),%a0 # pass: result addr 14121 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14122 bsr.l unf_res4 # calculate default result 14123 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14124 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14125 rts 14126 14127# 14128# UNFL is enabled. 14129# 14130fsglmul_unfl_ena: 14131 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14132 14133 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14134 fmov.l &0x0,%fpsr # clear FPSR 14135 14136 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 14137 14138 fmov.l &0x0,%fpcr # clear FPCR 14139 14140 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14141 mov.l %d2,-(%sp) # save d2 14142 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14143 mov.l %d1,%d2 # make a copy 14144 andi.l &0x7fff,%d1 # strip sign 14145 andi.w &0x8000,%d2 # keep old sign 14146 sub.l %d0,%d1 # add scale factor 14147 addi.l &0x6000,%d1 # add bias 14148 andi.w &0x7fff,%d1 14149 or.w %d2,%d1 # concat old sign,new exp 14150 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14151 mov.l (%sp)+,%d2 # restore d2 14152 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14153 bra.w fsglmul_unfl_dis 14154 14155fsglmul_may_unfl: 14156 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14157 14158 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14159 fmov.l &0x0,%fpsr # clear FPSR 14160 14161 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14162 14163 fmov.l %fpsr,%d1 # save status 14164 fmov.l &0x0,%fpcr # clear FPCR 14165 14166 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14167 14168 fabs.x %fp0,%fp1 # make a copy of result 14169 fcmp.b %fp1,&0x2 # is |result| > 2.b? 14170 fbgt.w fsglmul_normal_exit # no; no underflow occurred 14171 fblt.w fsglmul_unfl # yes; underflow occurred 14172 14173# 14174# we still don't know if underflow occurred. result is ~ equal to 2. but, 14175# we don't know if the result was an underflow that rounded up to a 2 or 14176# a normalized number that rounded down to a 2. so, redo the entire operation 14177# using RZ as the rounding mode to see what the pre-rounded result is. 14178# this case should be relatively rare. 14179# 14180 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 14181 14182 mov.l L_SCR3(%a6),%d1 14183 andi.b &0xc0,%d1 # keep rnd prec 14184 ori.b &rz_mode*0x10,%d1 # insert RZ 14185 14186 fmov.l %d1,%fpcr # set FPCR 14187 fmov.l &0x0,%fpsr # clear FPSR 14188 14189 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 14190 14191 fmov.l &0x0,%fpcr # clear FPCR 14192 fabs.x %fp1 # make absolute value 14193 fcmp.b %fp1,&0x2 # is |result| < 2.b? 14194 fbge.w fsglmul_normal_exit # no; no underflow occurred 14195 bra.w fsglmul_unfl # yes, underflow occurred 14196 14197############################################################################## 14198 14199# 14200# Single Precision Multiply: inputs are not both normalized; what are they? 14201# 14202fsglmul_not_norm: 14203 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1 14204 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1) 14205 14206 swbeg &48 14207tbl_fsglmul_op: 14208 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 14209 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 14210 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 14211 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 14212 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 14213 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 14214 short tbl_fsglmul_op - tbl_fsglmul_op # 14215 short tbl_fsglmul_op - tbl_fsglmul_op # 14216 14217 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM 14218 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO 14219 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF 14220 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN 14221 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM 14222 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN 14223 short tbl_fsglmul_op - tbl_fsglmul_op # 14224 short tbl_fsglmul_op - tbl_fsglmul_op # 14225 14226 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM 14227 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO 14228 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF 14229 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN 14230 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM 14231 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN 14232 short tbl_fsglmul_op - tbl_fsglmul_op # 14233 short tbl_fsglmul_op - tbl_fsglmul_op # 14234 14235 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM 14236 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO 14237 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF 14238 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN 14239 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM 14240 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN 14241 short tbl_fsglmul_op - tbl_fsglmul_op # 14242 short tbl_fsglmul_op - tbl_fsglmul_op # 14243 14244 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 14245 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 14246 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 14247 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 14248 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 14249 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 14250 short tbl_fsglmul_op - tbl_fsglmul_op # 14251 short tbl_fsglmul_op - tbl_fsglmul_op # 14252 14253 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM 14254 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO 14255 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF 14256 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN 14257 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM 14258 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN 14259 short tbl_fsglmul_op - tbl_fsglmul_op # 14260 short tbl_fsglmul_op - tbl_fsglmul_op # 14261 14262fsglmul_res_operr: 14263 bra.l res_operr 14264fsglmul_res_snan: 14265 bra.l res_snan 14266fsglmul_res_qnan: 14267 bra.l res_qnan 14268fsglmul_zero: 14269 bra.l fmul_zero 14270fsglmul_inf_src: 14271 bra.l fmul_inf_src 14272fsglmul_inf_dst: 14273 bra.l fmul_inf_dst 14274 14275######################################################################### 14276# XDEF **************************************************************** # 14277# fsgldiv(): emulates the fsgldiv instruction # 14278# # 14279# XREF **************************************************************** # 14280# scale_to_zero_src() - scale src exponent to zero # 14281# scale_to_zero_dst() - scale dst exponent to zero # 14282# unf_res4() - return default underflow result for sglop # 14283# ovf_res() - return default overflow result # 14284# res_qnan() - return QNAN result # 14285# res_snan() - return SNAN result # 14286# # 14287# INPUT *************************************************************** # 14288# a0 = pointer to extended precision source operand # 14289# a1 = pointer to extended precision destination operand # 14290# d0 rnd prec,mode # 14291# # 14292# OUTPUT ************************************************************** # 14293# fp0 = result # 14294# fp1 = EXOP (if exception occurred) # 14295# # 14296# ALGORITHM *********************************************************** # 14297# Handle NANs, infinities, and zeroes as special cases. Divide # 14298# norms/denorms into ext/sgl/dbl precision. # 14299# For norms/denorms, scale the exponents such that a divide # 14300# instruction won't cause an exception. Use the regular fsgldiv to # 14301# compute a result. Check if the regular operands would have taken # 14302# an exception. If so, return the default overflow/underflow result # 14303# and return the EXOP if exceptions are enabled. Else, scale the # 14304# result operand to the proper exponent. # 14305# # 14306######################################################################### 14307 14308 global fsgldiv 14309fsgldiv: 14310 mov.l %d0,L_SCR3(%a6) # store rnd info 14311 14312 clr.w %d1 14313 mov.b DTAG(%a6),%d1 14314 lsl.b &0x3,%d1 14315 or.b STAG(%a6),%d1 # combine src tags 14316 14317 bne.w fsgldiv_not_norm # optimize on non-norm input 14318 14319# 14320# DIVIDE: NORMs and DENORMs ONLY! 14321# 14322fsgldiv_norm: 14323 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 14324 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 14325 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 14326 14327 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 14328 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 14329 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 14330 14331 bsr.l scale_to_zero_src # calculate scale factor 1 14332 mov.l %d0,-(%sp) # save scale factor 1 14333 14334 bsr.l scale_to_zero_dst # calculate scale factor 2 14335 14336 neg.l (%sp) # S.F. = scale1 - scale2 14337 add.l %d0,(%sp) 14338 14339 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode 14340 lsr.b &0x6,%d1 14341 mov.l (%sp)+,%d0 14342 cmpi.l %d0,&0x3fff-0x7ffe 14343 ble.w fsgldiv_may_ovfl 14344 14345 cmpi.l %d0,&0x3fff-0x0000 # will result underflow? 14346 beq.w fsgldiv_may_unfl # maybe 14347 bgt.w fsgldiv_unfl # yes; go handle underflow 14348 14349fsgldiv_normal: 14350 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14351 14352 fmov.l L_SCR3(%a6),%fpcr # save FPCR 14353 fmov.l &0x0,%fpsr # clear FPSR 14354 14355 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide 14356 14357 fmov.l %fpsr,%d1 # save FPSR 14358 fmov.l &0x0,%fpcr # clear FPCR 14359 14360 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14361 14362fsgldiv_normal_exit: 14363 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 14364 mov.l %d2,-(%sp) # save d2 14365 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 14366 mov.l %d1,%d2 # make a copy 14367 andi.l &0x7fff,%d1 # strip sign 14368 andi.w &0x8000,%d2 # keep old sign 14369 sub.l %d0,%d1 # add scale factor 14370 or.w %d2,%d1 # concat old sign,new exp 14371 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14372 mov.l (%sp)+,%d2 # restore d2 14373 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 14374 rts 14375 14376fsgldiv_may_ovfl: 14377 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14378 14379 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14380 fmov.l &0x0,%fpsr # set FPSR 14381 14382 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide 14383 14384 fmov.l %fpsr,%d1 14385 fmov.l &0x0,%fpcr 14386 14387 or.l %d1,USER_FPSR(%a6) # save INEX,N 14388 14389 fmovm.x &0x01,-(%sp) # save result to stack 14390 mov.w (%sp),%d1 # fetch new exponent 14391 add.l &0xc,%sp # clear result 14392 andi.l &0x7fff,%d1 # strip sign 14393 sub.l %d0,%d1 # add scale factor 14394 cmp.l %d1,&0x7fff # did divide overflow? 14395 blt.b fsgldiv_normal_exit 14396 14397fsgldiv_ovfl_tst: 14398 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 14399 14400 mov.b FPCR_ENABLE(%a6),%d1 14401 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14402 bne.b fsgldiv_ovfl_ena # yes 14403 14404fsgldiv_ovfl_dis: 14405 btst &neg_bit,FPSR_CC(%a6) # is result negative 14406 sne %d1 # set sign param accordingly 14407 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14408 andi.b &0x30,%d0 # kill precision 14409 bsr.l ovf_res # calculate default result 14410 or.b %d0,FPSR_CC(%a6) # set INF if applicable 14411 fmovm.x (%a0),&0x80 # return default result in fp0 14412 rts 14413 14414fsgldiv_ovfl_ena: 14415 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 14416 14417 mov.l %d2,-(%sp) # save d2 14418 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14419 mov.l %d1,%d2 # make a copy 14420 andi.l &0x7fff,%d1 # strip sign 14421 andi.w &0x8000,%d2 # keep old sign 14422 sub.l %d0,%d1 # add scale factor 14423 subi.l &0x6000,%d1 # subtract new bias 14424 andi.w &0x7fff,%d1 # clear ms bit 14425 or.w %d2,%d1 # concat old sign,new exp 14426 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14427 mov.l (%sp)+,%d2 # restore d2 14428 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14429 bra.b fsgldiv_ovfl_dis 14430 14431fsgldiv_unfl: 14432 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14433 14434 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14435 14436 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14437 fmov.l &0x0,%fpsr # clear FPSR 14438 14439 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 14440 14441 fmov.l %fpsr,%d1 # save status 14442 fmov.l &0x0,%fpcr # clear FPCR 14443 14444 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14445 14446 mov.b FPCR_ENABLE(%a6),%d1 14447 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14448 bne.b fsgldiv_unfl_ena # yes 14449 14450fsgldiv_unfl_dis: 14451 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14452 14453 lea FP_SCR0(%a6),%a0 # pass: result addr 14454 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14455 bsr.l unf_res4 # calculate default result 14456 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14457 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14458 rts 14459 14460# 14461# UNFL is enabled. 14462# 14463fsgldiv_unfl_ena: 14464 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14465 14466 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14467 fmov.l &0x0,%fpsr # clear FPSR 14468 14469 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 14470 14471 fmov.l &0x0,%fpcr # clear FPCR 14472 14473 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14474 mov.l %d2,-(%sp) # save d2 14475 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14476 mov.l %d1,%d2 # make a copy 14477 andi.l &0x7fff,%d1 # strip sign 14478 andi.w &0x8000,%d2 # keep old sign 14479 sub.l %d0,%d1 # add scale factor 14480 addi.l &0x6000,%d1 # add bias 14481 andi.w &0x7fff,%d1 # clear top bit 14482 or.w %d2,%d1 # concat old sign, new exp 14483 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14484 mov.l (%sp)+,%d2 # restore d2 14485 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14486 bra.b fsgldiv_unfl_dis 14487 14488# 14489# the divide operation MAY underflow: 14490# 14491fsgldiv_may_unfl: 14492 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14493 14494 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14495 fmov.l &0x0,%fpsr # clear FPSR 14496 14497 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 14498 14499 fmov.l %fpsr,%d1 # save status 14500 fmov.l &0x0,%fpcr # clear FPCR 14501 14502 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14503 14504 fabs.x %fp0,%fp1 # make a copy of result 14505 fcmp.b %fp1,&0x1 # is |result| > 1.b? 14506 fbgt.w fsgldiv_normal_exit # no; no underflow occurred 14507 fblt.w fsgldiv_unfl # yes; underflow occurred 14508 14509# 14510# we still don't know if underflow occurred. result is ~ equal to 1. but, 14511# we don't know if the result was an underflow that rounded up to a 1 14512# or a normalized number that rounded down to a 1. so, redo the entire 14513# operation using RZ as the rounding mode to see what the pre-rounded 14514# result is. this case should be relatively rare. 14515# 14516 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1 14517 14518 clr.l %d1 # clear scratch register 14519 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode 14520 14521 fmov.l %d1,%fpcr # set FPCR 14522 fmov.l &0x0,%fpsr # clear FPSR 14523 14524 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 14525 14526 fmov.l &0x0,%fpcr # clear FPCR 14527 fabs.x %fp1 # make absolute value 14528 fcmp.b %fp1,&0x1 # is |result| < 1.b? 14529 fbge.w fsgldiv_normal_exit # no; no underflow occurred 14530 bra.w fsgldiv_unfl # yes; underflow occurred 14531 14532############################################################################ 14533 14534# 14535# Divide: inputs are not both normalized; what are they? 14536# 14537fsgldiv_not_norm: 14538 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1 14539 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1) 14540 14541 swbeg &48 14542tbl_fsgldiv_op: 14543 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM 14544 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO 14545 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF 14546 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN 14547 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM 14548 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN 14549 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14550 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14551 14552 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM 14553 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO 14554 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF 14555 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN 14556 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM 14557 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN 14558 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14559 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14560 14561 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM 14562 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO 14563 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF 14564 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN 14565 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM 14566 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN 14567 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14568 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14569 14570 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM 14571 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO 14572 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF 14573 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN 14574 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM 14575 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN 14576 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14577 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14578 14579 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM 14580 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO 14581 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF 14582 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN 14583 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM 14584 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN 14585 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14586 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14587 14588 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM 14589 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO 14590 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF 14591 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN 14592 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM 14593 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN 14594 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14595 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14596 14597fsgldiv_res_qnan: 14598 bra.l res_qnan 14599fsgldiv_res_snan: 14600 bra.l res_snan 14601fsgldiv_res_operr: 14602 bra.l res_operr 14603fsgldiv_inf_load: 14604 bra.l fdiv_inf_load 14605fsgldiv_zero_load: 14606 bra.l fdiv_zero_load 14607fsgldiv_inf_dst: 14608 bra.l fdiv_inf_dst 14609 14610######################################################################### 14611# XDEF **************************************************************** # 14612# fadd(): emulates the fadd instruction # 14613# fsadd(): emulates the fadd instruction # 14614# fdadd(): emulates the fdadd instruction # 14615# # 14616# XREF **************************************************************** # 14617# addsub_scaler2() - scale the operands so they won't take exc # 14618# ovf_res() - return default overflow result # 14619# unf_res() - return default underflow result # 14620# res_qnan() - set QNAN result # 14621# res_snan() - set SNAN result # 14622# res_operr() - set OPERR result # 14623# scale_to_zero_src() - set src operand exponent equal to zero # 14624# scale_to_zero_dst() - set dst operand exponent equal to zero # 14625# # 14626# INPUT *************************************************************** # 14627# a0 = pointer to extended precision source operand # 14628# a1 = pointer to extended precision destination operand # 14629# # 14630# OUTPUT ************************************************************** # 14631# fp0 = result # 14632# fp1 = EXOP (if exception occurred) # 14633# # 14634# ALGORITHM *********************************************************** # 14635# Handle NANs, infinities, and zeroes as special cases. Divide # 14636# norms into extended, single, and double precision. # 14637# Do addition after scaling exponents such that exception won't # 14638# occur. Then, check result exponent to see if exception would have # 14639# occurred. If so, return default result and maybe EXOP. Else, insert # 14640# the correct result exponent and return. Set FPSR bits as appropriate. # 14641# # 14642######################################################################### 14643 14644 global fsadd 14645fsadd: 14646 andi.b &0x30,%d0 # clear rnd prec 14647 ori.b &s_mode*0x10,%d0 # insert sgl prec 14648 bra.b fadd 14649 14650 global fdadd 14651fdadd: 14652 andi.b &0x30,%d0 # clear rnd prec 14653 ori.b &d_mode*0x10,%d0 # insert dbl prec 14654 14655 global fadd 14656fadd: 14657 mov.l %d0,L_SCR3(%a6) # store rnd info 14658 14659 clr.w %d1 14660 mov.b DTAG(%a6),%d1 14661 lsl.b &0x3,%d1 14662 or.b STAG(%a6),%d1 # combine src tags 14663 14664 bne.w fadd_not_norm # optimize on non-norm input 14665 14666# 14667# ADD: norms and denorms 14668# 14669fadd_norm: 14670 bsr.l addsub_scaler2 # scale exponents 14671 14672fadd_zero_entry: 14673 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14674 14675 fmov.l &0x0,%fpsr # clear FPSR 14676 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14677 14678 fadd.x FP_SCR0(%a6),%fp0 # execute add 14679 14680 fmov.l &0x0,%fpcr # clear FPCR 14681 fmov.l %fpsr,%d1 # fetch INEX2,N,Z 14682 14683 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 14684 14685 fbeq.w fadd_zero_exit # if result is zero, end now 14686 14687 mov.l %d2,-(%sp) # save d2 14688 14689 fmovm.x &0x01,-(%sp) # save result to stack 14690 14691 mov.w 2+L_SCR3(%a6),%d1 14692 lsr.b &0x6,%d1 14693 14694 mov.w (%sp),%d2 # fetch new sign, exp 14695 andi.l &0x7fff,%d2 # strip sign 14696 sub.l %d0,%d2 # add scale factor 14697 14698 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow? 14699 bge.b fadd_ovfl # yes 14700 14701 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow? 14702 blt.w fadd_unfl # yes 14703 beq.w fadd_may_unfl # maybe; go find out 14704 14705fadd_normal: 14706 mov.w (%sp),%d1 14707 andi.w &0x8000,%d1 # keep sign 14708 or.w %d2,%d1 # concat sign,new exp 14709 mov.w %d1,(%sp) # insert new exponent 14710 14711 fmovm.x (%sp)+,&0x80 # return result in fp0 14712 14713 mov.l (%sp)+,%d2 # restore d2 14714 rts 14715 14716fadd_zero_exit: 14717# fmov.s &0x00000000,%fp0 # return zero in fp0 14718 rts 14719 14720tbl_fadd_ovfl: 14721 long 0x7fff # ext ovfl 14722 long 0x407f # sgl ovfl 14723 long 0x43ff # dbl ovfl 14724 14725tbl_fadd_unfl: 14726 long 0x0000 # ext unfl 14727 long 0x3f81 # sgl unfl 14728 long 0x3c01 # dbl unfl 14729 14730fadd_ovfl: 14731 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 14732 14733 mov.b FPCR_ENABLE(%a6),%d1 14734 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14735 bne.b fadd_ovfl_ena # yes 14736 14737 add.l &0xc,%sp 14738fadd_ovfl_dis: 14739 btst &neg_bit,FPSR_CC(%a6) # is result negative? 14740 sne %d1 # set sign param accordingly 14741 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14742 bsr.l ovf_res # calculate default result 14743 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 14744 fmovm.x (%a0),&0x80 # return default result in fp0 14745 mov.l (%sp)+,%d2 # restore d2 14746 rts 14747 14748fadd_ovfl_ena: 14749 mov.b L_SCR3(%a6),%d1 14750 andi.b &0xc0,%d1 # is precision extended? 14751 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl 14752 14753fadd_ovfl_ena_cont: 14754 mov.w (%sp),%d1 14755 andi.w &0x8000,%d1 # keep sign 14756 subi.l &0x6000,%d2 # add extra bias 14757 andi.w &0x7fff,%d2 14758 or.w %d2,%d1 # concat sign,new exp 14759 mov.w %d1,(%sp) # insert new exponent 14760 14761 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 14762 bra.b fadd_ovfl_dis 14763 14764fadd_ovfl_ena_sd: 14765 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14766 14767 mov.l L_SCR3(%a6),%d1 14768 andi.b &0x30,%d1 # keep rnd mode 14769 fmov.l %d1,%fpcr # set FPCR 14770 14771 fadd.x FP_SCR0(%a6),%fp0 # execute add 14772 14773 fmov.l &0x0,%fpcr # clear FPCR 14774 14775 add.l &0xc,%sp 14776 fmovm.x &0x01,-(%sp) 14777 bra.b fadd_ovfl_ena_cont 14778 14779fadd_unfl: 14780 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14781 14782 add.l &0xc,%sp 14783 14784 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14785 14786 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14787 fmov.l &0x0,%fpsr # clear FPSR 14788 14789 fadd.x FP_SCR0(%a6),%fp0 # execute add 14790 14791 fmov.l &0x0,%fpcr # clear FPCR 14792 fmov.l %fpsr,%d1 # save status 14793 14794 or.l %d1,USER_FPSR(%a6) # save INEX,N 14795 14796 mov.b FPCR_ENABLE(%a6),%d1 14797 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14798 bne.b fadd_unfl_ena # yes 14799 14800fadd_unfl_dis: 14801 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14802 14803 lea FP_SCR0(%a6),%a0 # pass: result addr 14804 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14805 bsr.l unf_res # calculate default result 14806 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14807 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14808 mov.l (%sp)+,%d2 # restore d2 14809 rts 14810 14811fadd_unfl_ena: 14812 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14813 14814 mov.l L_SCR3(%a6),%d1 14815 andi.b &0xc0,%d1 # is precision extended? 14816 bne.b fadd_unfl_ena_sd # no; sgl or dbl 14817 14818 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14819 14820fadd_unfl_ena_cont: 14821 fmov.l &0x0,%fpsr # clear FPSR 14822 14823 fadd.x FP_SCR0(%a6),%fp1 # execute multiply 14824 14825 fmov.l &0x0,%fpcr # clear FPCR 14826 14827 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14828 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14829 mov.l %d1,%d2 # make a copy 14830 andi.l &0x7fff,%d1 # strip sign 14831 andi.w &0x8000,%d2 # keep old sign 14832 sub.l %d0,%d1 # add scale factor 14833 addi.l &0x6000,%d1 # add new bias 14834 andi.w &0x7fff,%d1 # clear top bit 14835 or.w %d2,%d1 # concat sign,new exp 14836 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14837 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14838 bra.w fadd_unfl_dis 14839 14840fadd_unfl_ena_sd: 14841 mov.l L_SCR3(%a6),%d1 14842 andi.b &0x30,%d1 # use only rnd mode 14843 fmov.l %d1,%fpcr # set FPCR 14844 14845 bra.b fadd_unfl_ena_cont 14846 14847# 14848# result is equal to the smallest normalized number in the selected precision 14849# if the precision is extended, this result could not have come from an 14850# underflow that rounded up. 14851# 14852fadd_may_unfl: 14853 mov.l L_SCR3(%a6),%d1 14854 andi.b &0xc0,%d1 14855 beq.w fadd_normal # yes; no underflow occurred 14856 14857 mov.l 0x4(%sp),%d1 # extract hi(man) 14858 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 14859 bne.w fadd_normal # no; no underflow occurred 14860 14861 tst.l 0x8(%sp) # is lo(man) = 0x0? 14862 bne.w fadd_normal # no; no underflow occurred 14863 14864 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 14865 beq.w fadd_normal # no; no underflow occurred 14866 14867# 14868# ok, so now the result has a exponent equal to the smallest normalized 14869# exponent for the selected precision. also, the mantissa is equal to 14870# 0x8000000000000000 and this mantissa is the result of rounding non-zero 14871# g,r,s. 14872# now, we must determine whether the pre-rounded result was an underflow 14873# rounded "up" or a normalized number rounded "down". 14874# so, we do this be re-executing the add using RZ as the rounding mode and 14875# seeing if the new result is smaller or equal to the current result. 14876# 14877 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 14878 14879 mov.l L_SCR3(%a6),%d1 14880 andi.b &0xc0,%d1 # keep rnd prec 14881 ori.b &rz_mode*0x10,%d1 # insert rnd mode 14882 fmov.l %d1,%fpcr # set FPCR 14883 fmov.l &0x0,%fpsr # clear FPSR 14884 14885 fadd.x FP_SCR0(%a6),%fp1 # execute add 14886 14887 fmov.l &0x0,%fpcr # clear FPCR 14888 14889 fabs.x %fp0 # compare absolute values 14890 fabs.x %fp1 14891 fcmp.x %fp0,%fp1 # is first result > second? 14892 14893 fbgt.w fadd_unfl # yes; it's an underflow 14894 bra.w fadd_normal # no; it's not an underflow 14895 14896########################################################################## 14897 14898# 14899# Add: inputs are not both normalized; what are they? 14900# 14901fadd_not_norm: 14902 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1 14903 jmp (tbl_fadd_op.b,%pc,%d1.w*1) 14904 14905 swbeg &48 14906tbl_fadd_op: 14907 short fadd_norm - tbl_fadd_op # NORM + NORM 14908 short fadd_zero_src - tbl_fadd_op # NORM + ZERO 14909 short fadd_inf_src - tbl_fadd_op # NORM + INF 14910 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14911 short fadd_norm - tbl_fadd_op # NORM + DENORM 14912 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14913 short tbl_fadd_op - tbl_fadd_op # 14914 short tbl_fadd_op - tbl_fadd_op # 14915 14916 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM 14917 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO 14918 short fadd_inf_src - tbl_fadd_op # ZERO + INF 14919 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14920 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM 14921 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14922 short tbl_fadd_op - tbl_fadd_op # 14923 short tbl_fadd_op - tbl_fadd_op # 14924 14925 short fadd_inf_dst - tbl_fadd_op # INF + NORM 14926 short fadd_inf_dst - tbl_fadd_op # INF + ZERO 14927 short fadd_inf_2 - tbl_fadd_op # INF + INF 14928 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14929 short fadd_inf_dst - tbl_fadd_op # INF + DENORM 14930 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14931 short tbl_fadd_op - tbl_fadd_op # 14932 short tbl_fadd_op - tbl_fadd_op # 14933 14934 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM 14935 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO 14936 short fadd_res_qnan - tbl_fadd_op # QNAN + INF 14937 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN 14938 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM 14939 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN 14940 short tbl_fadd_op - tbl_fadd_op # 14941 short tbl_fadd_op - tbl_fadd_op # 14942 14943 short fadd_norm - tbl_fadd_op # DENORM + NORM 14944 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO 14945 short fadd_inf_src - tbl_fadd_op # DENORM + INF 14946 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14947 short fadd_norm - tbl_fadd_op # DENORM + DENORM 14948 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14949 short tbl_fadd_op - tbl_fadd_op # 14950 short tbl_fadd_op - tbl_fadd_op # 14951 14952 short fadd_res_snan - tbl_fadd_op # SNAN + NORM 14953 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO 14954 short fadd_res_snan - tbl_fadd_op # SNAN + INF 14955 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN 14956 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM 14957 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN 14958 short tbl_fadd_op - tbl_fadd_op # 14959 short tbl_fadd_op - tbl_fadd_op # 14960 14961fadd_res_qnan: 14962 bra.l res_qnan 14963fadd_res_snan: 14964 bra.l res_snan 14965 14966# 14967# both operands are ZEROes 14968# 14969fadd_zero_2: 14970 mov.b SRC_EX(%a0),%d0 # are the signs opposite 14971 mov.b DST_EX(%a1),%d1 14972 eor.b %d0,%d1 14973 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO) 14974 14975# the signs are the same. so determine whether they are positive or negative 14976# and return the appropriately signed zero. 14977 tst.b %d0 # are ZEROes positive or negative? 14978 bmi.b fadd_zero_rm # negative 14979 fmov.s &0x00000000,%fp0 # return +ZERO 14980 mov.b &z_bmask,FPSR_CC(%a6) # set Z 14981 rts 14982 14983# 14984# the ZEROes have opposite signs: 14985# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. 14986# - -ZERO is returned in the case of RM. 14987# 14988fadd_zero_2_chk_rm: 14989 mov.b 3+L_SCR3(%a6),%d1 14990 andi.b &0x30,%d1 # extract rnd mode 14991 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM? 14992 beq.b fadd_zero_rm # yes 14993 fmov.s &0x00000000,%fp0 # return +ZERO 14994 mov.b &z_bmask,FPSR_CC(%a6) # set Z 14995 rts 14996 14997fadd_zero_rm: 14998 fmov.s &0x80000000,%fp0 # return -ZERO 14999 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z 15000 rts 15001 15002# 15003# one operand is a ZERO and the other is a DENORM or NORM. scale 15004# the DENORM or NORM and jump to the regular fadd routine. 15005# 15006fadd_zero_dst: 15007 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15008 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15009 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15010 bsr.l scale_to_zero_src # scale the operand 15011 clr.w FP_SCR1_EX(%a6) 15012 clr.l FP_SCR1_HI(%a6) 15013 clr.l FP_SCR1_LO(%a6) 15014 bra.w fadd_zero_entry # go execute fadd 15015 15016fadd_zero_src: 15017 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 15018 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15019 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15020 bsr.l scale_to_zero_dst # scale the operand 15021 clr.w FP_SCR0_EX(%a6) 15022 clr.l FP_SCR0_HI(%a6) 15023 clr.l FP_SCR0_LO(%a6) 15024 bra.w fadd_zero_entry # go execute fadd 15025 15026# 15027# both operands are INFs. an OPERR will result if the INFs have 15028# different signs. else, an INF of the same sign is returned 15029# 15030fadd_inf_2: 15031 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 15032 mov.b DST_EX(%a1),%d1 15033 eor.b %d1,%d0 15034 bmi.l res_operr # weed out (-INF)+(+INF) 15035 15036# ok, so it's not an OPERR. but, we do have to remember to return the 15037# src INF since that's where the 881/882 gets the j-bit from... 15038 15039# 15040# operands are INF and one of {ZERO, INF, DENORM, NORM} 15041# 15042fadd_inf_src: 15043 fmovm.x SRC(%a0),&0x80 # return src INF 15044 tst.b SRC_EX(%a0) # is INF positive? 15045 bpl.b fadd_inf_done # yes; we're done 15046 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15047 rts 15048 15049# 15050# operands are INF and one of {ZERO, INF, DENORM, NORM} 15051# 15052fadd_inf_dst: 15053 fmovm.x DST(%a1),&0x80 # return dst INF 15054 tst.b DST_EX(%a1) # is INF positive? 15055 bpl.b fadd_inf_done # yes; we're done 15056 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15057 rts 15058 15059fadd_inf_done: 15060 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 15061 rts 15062 15063######################################################################### 15064# XDEF **************************************************************** # 15065# fsub(): emulates the fsub instruction # 15066# fssub(): emulates the fssub instruction # 15067# fdsub(): emulates the fdsub instruction # 15068# # 15069# XREF **************************************************************** # 15070# addsub_scaler2() - scale the operands so they won't take exc # 15071# ovf_res() - return default overflow result # 15072# unf_res() - return default underflow result # 15073# res_qnan() - set QNAN result # 15074# res_snan() - set SNAN result # 15075# res_operr() - set OPERR result # 15076# scale_to_zero_src() - set src operand exponent equal to zero # 15077# scale_to_zero_dst() - set dst operand exponent equal to zero # 15078# # 15079# INPUT *************************************************************** # 15080# a0 = pointer to extended precision source operand # 15081# a1 = pointer to extended precision destination operand # 15082# # 15083# OUTPUT ************************************************************** # 15084# fp0 = result # 15085# fp1 = EXOP (if exception occurred) # 15086# # 15087# ALGORITHM *********************************************************** # 15088# Handle NANs, infinities, and zeroes as special cases. Divide # 15089# norms into extended, single, and double precision. # 15090# Do subtraction after scaling exponents such that exception won't# 15091# occur. Then, check result exponent to see if exception would have # 15092# occurred. If so, return default result and maybe EXOP. Else, insert # 15093# the correct result exponent and return. Set FPSR bits as appropriate. # 15094# # 15095######################################################################### 15096 15097 global fssub 15098fssub: 15099 andi.b &0x30,%d0 # clear rnd prec 15100 ori.b &s_mode*0x10,%d0 # insert sgl prec 15101 bra.b fsub 15102 15103 global fdsub 15104fdsub: 15105 andi.b &0x30,%d0 # clear rnd prec 15106 ori.b &d_mode*0x10,%d0 # insert dbl prec 15107 15108 global fsub 15109fsub: 15110 mov.l %d0,L_SCR3(%a6) # store rnd info 15111 15112 clr.w %d1 15113 mov.b DTAG(%a6),%d1 15114 lsl.b &0x3,%d1 15115 or.b STAG(%a6),%d1 # combine src tags 15116 15117 bne.w fsub_not_norm # optimize on non-norm input 15118 15119# 15120# SUB: norms and denorms 15121# 15122fsub_norm: 15123 bsr.l addsub_scaler2 # scale exponents 15124 15125fsub_zero_entry: 15126 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15127 15128 fmov.l &0x0,%fpsr # clear FPSR 15129 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15130 15131 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15132 15133 fmov.l &0x0,%fpcr # clear FPCR 15134 fmov.l %fpsr,%d1 # fetch INEX2, N, Z 15135 15136 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 15137 15138 fbeq.w fsub_zero_exit # if result zero, end now 15139 15140 mov.l %d2,-(%sp) # save d2 15141 15142 fmovm.x &0x01,-(%sp) # save result to stack 15143 15144 mov.w 2+L_SCR3(%a6),%d1 15145 lsr.b &0x6,%d1 15146 15147 mov.w (%sp),%d2 # fetch new exponent 15148 andi.l &0x7fff,%d2 # strip sign 15149 sub.l %d0,%d2 # add scale factor 15150 15151 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow? 15152 bge.b fsub_ovfl # yes 15153 15154 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow? 15155 blt.w fsub_unfl # yes 15156 beq.w fsub_may_unfl # maybe; go find out 15157 15158fsub_normal: 15159 mov.w (%sp),%d1 15160 andi.w &0x8000,%d1 # keep sign 15161 or.w %d2,%d1 # insert new exponent 15162 mov.w %d1,(%sp) # insert new exponent 15163 15164 fmovm.x (%sp)+,&0x80 # return result in fp0 15165 15166 mov.l (%sp)+,%d2 # restore d2 15167 rts 15168 15169fsub_zero_exit: 15170# fmov.s &0x00000000,%fp0 # return zero in fp0 15171 rts 15172 15173tbl_fsub_ovfl: 15174 long 0x7fff # ext ovfl 15175 long 0x407f # sgl ovfl 15176 long 0x43ff # dbl ovfl 15177 15178tbl_fsub_unfl: 15179 long 0x0000 # ext unfl 15180 long 0x3f81 # sgl unfl 15181 long 0x3c01 # dbl unfl 15182 15183fsub_ovfl: 15184 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 15185 15186 mov.b FPCR_ENABLE(%a6),%d1 15187 andi.b &0x13,%d1 # is OVFL or INEX enabled? 15188 bne.b fsub_ovfl_ena # yes 15189 15190 add.l &0xc,%sp 15191fsub_ovfl_dis: 15192 btst &neg_bit,FPSR_CC(%a6) # is result negative? 15193 sne %d1 # set sign param accordingly 15194 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 15195 bsr.l ovf_res # calculate default result 15196 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 15197 fmovm.x (%a0),&0x80 # return default result in fp0 15198 mov.l (%sp)+,%d2 # restore d2 15199 rts 15200 15201fsub_ovfl_ena: 15202 mov.b L_SCR3(%a6),%d1 15203 andi.b &0xc0,%d1 # is precision extended? 15204 bne.b fsub_ovfl_ena_sd # no 15205 15206fsub_ovfl_ena_cont: 15207 mov.w (%sp),%d1 # fetch {sgn,exp} 15208 andi.w &0x8000,%d1 # keep sign 15209 subi.l &0x6000,%d2 # subtract new bias 15210 andi.w &0x7fff,%d2 # clear top bit 15211 or.w %d2,%d1 # concat sign,exp 15212 mov.w %d1,(%sp) # insert new exponent 15213 15214 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 15215 bra.b fsub_ovfl_dis 15216 15217fsub_ovfl_ena_sd: 15218 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15219 15220 mov.l L_SCR3(%a6),%d1 15221 andi.b &0x30,%d1 # clear rnd prec 15222 fmov.l %d1,%fpcr # set FPCR 15223 15224 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15225 15226 fmov.l &0x0,%fpcr # clear FPCR 15227 15228 add.l &0xc,%sp 15229 fmovm.x &0x01,-(%sp) 15230 bra.b fsub_ovfl_ena_cont 15231 15232fsub_unfl: 15233 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 15234 15235 add.l &0xc,%sp 15236 15237 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15238 15239 fmov.l &rz_mode*0x10,%fpcr # set FPCR 15240 fmov.l &0x0,%fpsr # clear FPSR 15241 15242 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15243 15244 fmov.l &0x0,%fpcr # clear FPCR 15245 fmov.l %fpsr,%d1 # save status 15246 15247 or.l %d1,USER_FPSR(%a6) 15248 15249 mov.b FPCR_ENABLE(%a6),%d1 15250 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 15251 bne.b fsub_unfl_ena # yes 15252 15253fsub_unfl_dis: 15254 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15255 15256 lea FP_SCR0(%a6),%a0 # pass: result addr 15257 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 15258 bsr.l unf_res # calculate default result 15259 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 15260 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 15261 mov.l (%sp)+,%d2 # restore d2 15262 rts 15263 15264fsub_unfl_ena: 15265 fmovm.x FP_SCR1(%a6),&0x40 15266 15267 mov.l L_SCR3(%a6),%d1 15268 andi.b &0xc0,%d1 # is precision extended? 15269 bne.b fsub_unfl_ena_sd # no 15270 15271 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15272 15273fsub_unfl_ena_cont: 15274 fmov.l &0x0,%fpsr # clear FPSR 15275 15276 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 15277 15278 fmov.l &0x0,%fpcr # clear FPCR 15279 15280 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack 15281 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 15282 mov.l %d1,%d2 # make a copy 15283 andi.l &0x7fff,%d1 # strip sign 15284 andi.w &0x8000,%d2 # keep old sign 15285 sub.l %d0,%d1 # add scale factor 15286 addi.l &0x6000,%d1 # subtract new bias 15287 andi.w &0x7fff,%d1 # clear top bit 15288 or.w %d2,%d1 # concat sgn,exp 15289 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 15290 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 15291 bra.w fsub_unfl_dis 15292 15293fsub_unfl_ena_sd: 15294 mov.l L_SCR3(%a6),%d1 15295 andi.b &0x30,%d1 # clear rnd prec 15296 fmov.l %d1,%fpcr # set FPCR 15297 15298 bra.b fsub_unfl_ena_cont 15299 15300# 15301# result is equal to the smallest normalized number in the selected precision 15302# if the precision is extended, this result could not have come from an 15303# underflow that rounded up. 15304# 15305fsub_may_unfl: 15306 mov.l L_SCR3(%a6),%d1 15307 andi.b &0xc0,%d1 # fetch rnd prec 15308 beq.w fsub_normal # yes; no underflow occurred 15309 15310 mov.l 0x4(%sp),%d1 15311 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 15312 bne.w fsub_normal # no; no underflow occurred 15313 15314 tst.l 0x8(%sp) # is lo(man) = 0x0? 15315 bne.w fsub_normal # no; no underflow occurred 15316 15317 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 15318 beq.w fsub_normal # no; no underflow occurred 15319 15320# 15321# ok, so now the result has a exponent equal to the smallest normalized 15322# exponent for the selected precision. also, the mantissa is equal to 15323# 0x8000000000000000 and this mantissa is the result of rounding non-zero 15324# g,r,s. 15325# now, we must determine whether the pre-rounded result was an underflow 15326# rounded "up" or a normalized number rounded "down". 15327# so, we do this be re-executing the add using RZ as the rounding mode and 15328# seeing if the new result is smaller or equal to the current result. 15329# 15330 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 15331 15332 mov.l L_SCR3(%a6),%d1 15333 andi.b &0xc0,%d1 # keep rnd prec 15334 ori.b &rz_mode*0x10,%d1 # insert rnd mode 15335 fmov.l %d1,%fpcr # set FPCR 15336 fmov.l &0x0,%fpsr # clear FPSR 15337 15338 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 15339 15340 fmov.l &0x0,%fpcr # clear FPCR 15341 15342 fabs.x %fp0 # compare absolute values 15343 fabs.x %fp1 15344 fcmp.x %fp0,%fp1 # is first result > second? 15345 15346 fbgt.w fsub_unfl # yes; it's an underflow 15347 bra.w fsub_normal # no; it's not an underflow 15348 15349########################################################################## 15350 15351# 15352# Sub: inputs are not both normalized; what are they? 15353# 15354fsub_not_norm: 15355 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1 15356 jmp (tbl_fsub_op.b,%pc,%d1.w*1) 15357 15358 swbeg &48 15359tbl_fsub_op: 15360 short fsub_norm - tbl_fsub_op # NORM - NORM 15361 short fsub_zero_src - tbl_fsub_op # NORM - ZERO 15362 short fsub_inf_src - tbl_fsub_op # NORM - INF 15363 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15364 short fsub_norm - tbl_fsub_op # NORM - DENORM 15365 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15366 short tbl_fsub_op - tbl_fsub_op # 15367 short tbl_fsub_op - tbl_fsub_op # 15368 15369 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM 15370 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO 15371 short fsub_inf_src - tbl_fsub_op # ZERO - INF 15372 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15373 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM 15374 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15375 short tbl_fsub_op - tbl_fsub_op # 15376 short tbl_fsub_op - tbl_fsub_op # 15377 15378 short fsub_inf_dst - tbl_fsub_op # INF - NORM 15379 short fsub_inf_dst - tbl_fsub_op # INF - ZERO 15380 short fsub_inf_2 - tbl_fsub_op # INF - INF 15381 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15382 short fsub_inf_dst - tbl_fsub_op # INF - DENORM 15383 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15384 short tbl_fsub_op - tbl_fsub_op # 15385 short tbl_fsub_op - tbl_fsub_op # 15386 15387 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM 15388 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO 15389 short fsub_res_qnan - tbl_fsub_op # QNAN - INF 15390 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN 15391 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM 15392 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN 15393 short tbl_fsub_op - tbl_fsub_op # 15394 short tbl_fsub_op - tbl_fsub_op # 15395 15396 short fsub_norm - tbl_fsub_op # DENORM - NORM 15397 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO 15398 short fsub_inf_src - tbl_fsub_op # DENORM - INF 15399 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15400 short fsub_norm - tbl_fsub_op # DENORM - DENORM 15401 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15402 short tbl_fsub_op - tbl_fsub_op # 15403 short tbl_fsub_op - tbl_fsub_op # 15404 15405 short fsub_res_snan - tbl_fsub_op # SNAN - NORM 15406 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO 15407 short fsub_res_snan - tbl_fsub_op # SNAN - INF 15408 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN 15409 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM 15410 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN 15411 short tbl_fsub_op - tbl_fsub_op # 15412 short tbl_fsub_op - tbl_fsub_op # 15413 15414fsub_res_qnan: 15415 bra.l res_qnan 15416fsub_res_snan: 15417 bra.l res_snan 15418 15419# 15420# both operands are ZEROes 15421# 15422fsub_zero_2: 15423 mov.b SRC_EX(%a0),%d0 15424 mov.b DST_EX(%a1),%d1 15425 eor.b %d1,%d0 15426 bpl.b fsub_zero_2_chk_rm 15427 15428# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO 15429 tst.b %d0 # is dst negative? 15430 bmi.b fsub_zero_2_rm # yes 15431 fmov.s &0x00000000,%fp0 # no; return +ZERO 15432 mov.b &z_bmask,FPSR_CC(%a6) # set Z 15433 rts 15434 15435# 15436# the ZEROes have the same signs: 15437# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP 15438# - -ZERO is returned in the case of RM. 15439# 15440fsub_zero_2_chk_rm: 15441 mov.b 3+L_SCR3(%a6),%d1 15442 andi.b &0x30,%d1 # extract rnd mode 15443 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM? 15444 beq.b fsub_zero_2_rm # yes 15445 fmov.s &0x00000000,%fp0 # no; return +ZERO 15446 mov.b &z_bmask,FPSR_CC(%a6) # set Z 15447 rts 15448 15449fsub_zero_2_rm: 15450 fmov.s &0x80000000,%fp0 # return -ZERO 15451 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG 15452 rts 15453 15454# 15455# one operand is a ZERO and the other is a DENORM or a NORM. 15456# scale the DENORM or NORM and jump to the regular fsub routine. 15457# 15458fsub_zero_dst: 15459 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15460 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15461 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15462 bsr.l scale_to_zero_src # scale the operand 15463 clr.w FP_SCR1_EX(%a6) 15464 clr.l FP_SCR1_HI(%a6) 15465 clr.l FP_SCR1_LO(%a6) 15466 bra.w fsub_zero_entry # go execute fsub 15467 15468fsub_zero_src: 15469 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 15470 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15471 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15472 bsr.l scale_to_zero_dst # scale the operand 15473 clr.w FP_SCR0_EX(%a6) 15474 clr.l FP_SCR0_HI(%a6) 15475 clr.l FP_SCR0_LO(%a6) 15476 bra.w fsub_zero_entry # go execute fsub 15477 15478# 15479# both operands are INFs. an OPERR will result if the INFs have the 15480# same signs. else, 15481# 15482fsub_inf_2: 15483 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 15484 mov.b DST_EX(%a1),%d1 15485 eor.b %d1,%d0 15486 bpl.l res_operr # weed out (-INF)+(+INF) 15487 15488# ok, so it's not an OPERR. but we do have to remember to return 15489# the src INF since that's where the 881/882 gets the j-bit. 15490 15491fsub_inf_src: 15492 fmovm.x SRC(%a0),&0x80 # return src INF 15493 fneg.x %fp0 # invert sign 15494 fbge.w fsub_inf_done # sign is now positive 15495 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15496 rts 15497 15498fsub_inf_dst: 15499 fmovm.x DST(%a1),&0x80 # return dst INF 15500 tst.b DST_EX(%a1) # is INF negative? 15501 bpl.b fsub_inf_done # no 15502 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15503 rts 15504 15505fsub_inf_done: 15506 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 15507 rts 15508 15509######################################################################### 15510# XDEF **************************************************************** # 15511# fsqrt(): emulates the fsqrt instruction # 15512# fssqrt(): emulates the fssqrt instruction # 15513# fdsqrt(): emulates the fdsqrt instruction # 15514# # 15515# XREF **************************************************************** # 15516# scale_sqrt() - scale the source operand # 15517# unf_res() - return default underflow result # 15518# ovf_res() - return default overflow result # 15519# res_qnan_1op() - return QNAN result # 15520# res_snan_1op() - return SNAN result # 15521# # 15522# INPUT *************************************************************** # 15523# a0 = pointer to extended precision source operand # 15524# d0 rnd prec,mode # 15525# # 15526# OUTPUT ************************************************************** # 15527# fp0 = result # 15528# fp1 = EXOP (if exception occurred) # 15529# # 15530# ALGORITHM *********************************************************** # 15531# Handle NANs, infinities, and zeroes as special cases. Divide # 15532# norms/denorms into ext/sgl/dbl precision. # 15533# For norms/denorms, scale the exponents such that a sqrt # 15534# instruction won't cause an exception. Use the regular fsqrt to # 15535# compute a result. Check if the regular operands would have taken # 15536# an exception. If so, return the default overflow/underflow result # 15537# and return the EXOP if exceptions are enabled. Else, scale the # 15538# result operand to the proper exponent. # 15539# # 15540######################################################################### 15541 15542 global fssqrt 15543fssqrt: 15544 andi.b &0x30,%d0 # clear rnd prec 15545 ori.b &s_mode*0x10,%d0 # insert sgl precision 15546 bra.b fsqrt 15547 15548 global fdsqrt 15549fdsqrt: 15550 andi.b &0x30,%d0 # clear rnd prec 15551 ori.b &d_mode*0x10,%d0 # insert dbl precision 15552 15553 global fsqrt 15554fsqrt: 15555 mov.l %d0,L_SCR3(%a6) # store rnd info 15556 clr.w %d1 15557 mov.b STAG(%a6),%d1 15558 bne.w fsqrt_not_norm # optimize on non-norm input 15559 15560# 15561# SQUARE ROOT: norms and denorms ONLY! 15562# 15563fsqrt_norm: 15564 tst.b SRC_EX(%a0) # is operand negative? 15565 bmi.l res_operr # yes 15566 15567 andi.b &0xc0,%d0 # is precision extended? 15568 bne.b fsqrt_not_ext # no; go handle sgl or dbl 15569 15570 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15571 fmov.l &0x0,%fpsr # clear FPSR 15572 15573 fsqrt.x (%a0),%fp0 # execute square root 15574 15575 fmov.l %fpsr,%d1 15576 or.l %d1,USER_FPSR(%a6) # set N,INEX 15577 15578 rts 15579 15580fsqrt_denorm: 15581 tst.b SRC_EX(%a0) # is operand negative? 15582 bmi.l res_operr # yes 15583 15584 andi.b &0xc0,%d0 # is precision extended? 15585 bne.b fsqrt_not_ext # no; go handle sgl or dbl 15586 15587 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15588 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15589 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15590 15591 bsr.l scale_sqrt # calculate scale factor 15592 15593 bra.w fsqrt_sd_normal 15594 15595# 15596# operand is either single or double 15597# 15598fsqrt_not_ext: 15599 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 15600 bne.w fsqrt_dbl 15601 15602# 15603# operand is to be rounded to single precision 15604# 15605fsqrt_sgl: 15606 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15607 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15608 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15609 15610 bsr.l scale_sqrt # calculate scale factor 15611 15612 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow? 15613 beq.w fsqrt_sd_may_unfl 15614 bgt.w fsqrt_sd_unfl # yes; go handle underflow 15615 cmpi.l %d0,&0x3fff-0x407f # will move in overflow? 15616 beq.w fsqrt_sd_may_ovfl # maybe; go check 15617 blt.w fsqrt_sd_ovfl # yes; go handle overflow 15618 15619# 15620# operand will NOT overflow or underflow when moved in to the fp reg file 15621# 15622fsqrt_sd_normal: 15623 fmov.l &0x0,%fpsr # clear FPSR 15624 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15625 15626 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 15627 15628 fmov.l %fpsr,%d1 # save FPSR 15629 fmov.l &0x0,%fpcr # clear FPCR 15630 15631 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15632 15633fsqrt_sd_normal_exit: 15634 mov.l %d2,-(%sp) # save d2 15635 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15636 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 15637 mov.l %d1,%d2 # make a copy 15638 andi.l &0x7fff,%d1 # strip sign 15639 sub.l %d0,%d1 # add scale factor 15640 andi.w &0x8000,%d2 # keep old sign 15641 or.w %d1,%d2 # concat old sign,new exp 15642 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 15643 mov.l (%sp)+,%d2 # restore d2 15644 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 15645 rts 15646 15647# 15648# operand is to be rounded to double precision 15649# 15650fsqrt_dbl: 15651 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15652 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15653 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15654 15655 bsr.l scale_sqrt # calculate scale factor 15656 15657 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow? 15658 beq.w fsqrt_sd_may_unfl 15659 bgt.b fsqrt_sd_unfl # yes; go handle underflow 15660 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow? 15661 beq.w fsqrt_sd_may_ovfl # maybe; go check 15662 blt.w fsqrt_sd_ovfl # yes; go handle overflow 15663 bra.w fsqrt_sd_normal # no; ho handle normalized op 15664 15665# we're on the line here and the distinguising characteristic is whether 15666# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number 15667# elsewise fall through to underflow. 15668fsqrt_sd_may_unfl: 15669 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 15670 bne.w fsqrt_sd_normal # yes, so no underflow 15671 15672# 15673# operand WILL underflow when moved in to the fp register file 15674# 15675fsqrt_sd_unfl: 15676 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 15677 15678 fmov.l &rz_mode*0x10,%fpcr # set FPCR 15679 fmov.l &0x0,%fpsr # clear FPSR 15680 15681 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root 15682 15683 fmov.l %fpsr,%d1 # save status 15684 fmov.l &0x0,%fpcr # clear FPCR 15685 15686 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15687 15688# if underflow or inexact is enabled, go calculate EXOP first. 15689 mov.b FPCR_ENABLE(%a6),%d1 15690 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 15691 bne.b fsqrt_sd_unfl_ena # yes 15692 15693fsqrt_sd_unfl_dis: 15694 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15695 15696 lea FP_SCR0(%a6),%a0 # pass: result addr 15697 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 15698 bsr.l unf_res # calculate default result 15699 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 15700 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 15701 rts 15702 15703# 15704# operand will underflow AND underflow is enabled. 15705# therefore, we must return the result rounded to extended precision. 15706# 15707fsqrt_sd_unfl_ena: 15708 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 15709 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 15710 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 15711 15712 mov.l %d2,-(%sp) # save d2 15713 mov.l %d1,%d2 # make a copy 15714 andi.l &0x7fff,%d1 # strip sign 15715 andi.w &0x8000,%d2 # keep old sign 15716 sub.l %d0,%d1 # subtract scale factor 15717 addi.l &0x6000,%d1 # add new bias 15718 andi.w &0x7fff,%d1 15719 or.w %d2,%d1 # concat new sign,new exp 15720 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 15721 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 15722 mov.l (%sp)+,%d2 # restore d2 15723 bra.b fsqrt_sd_unfl_dis 15724 15725# 15726# operand WILL overflow. 15727# 15728fsqrt_sd_ovfl: 15729 fmov.l &0x0,%fpsr # clear FPSR 15730 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15731 15732 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root 15733 15734 fmov.l &0x0,%fpcr # clear FPCR 15735 fmov.l %fpsr,%d1 # save FPSR 15736 15737 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15738 15739fsqrt_sd_ovfl_tst: 15740 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 15741 15742 mov.b FPCR_ENABLE(%a6),%d1 15743 andi.b &0x13,%d1 # is OVFL or INEX enabled? 15744 bne.b fsqrt_sd_ovfl_ena # yes 15745 15746# 15747# OVFL is not enabled; therefore, we must create the default result by 15748# calling ovf_res(). 15749# 15750fsqrt_sd_ovfl_dis: 15751 btst &neg_bit,FPSR_CC(%a6) # is result negative? 15752 sne %d1 # set sign param accordingly 15753 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 15754 bsr.l ovf_res # calculate default result 15755 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 15756 fmovm.x (%a0),&0x80 # return default result in fp0 15757 rts 15758 15759# 15760# OVFL is enabled. 15761# the INEX2 bit has already been updated by the round to the correct precision. 15762# now, round to extended(and don't alter the FPSR). 15763# 15764fsqrt_sd_ovfl_ena: 15765 mov.l %d2,-(%sp) # save d2 15766 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 15767 mov.l %d1,%d2 # make a copy 15768 andi.l &0x7fff,%d1 # strip sign 15769 andi.w &0x8000,%d2 # keep old sign 15770 sub.l %d0,%d1 # add scale factor 15771 subi.l &0x6000,%d1 # subtract bias 15772 andi.w &0x7fff,%d1 15773 or.w %d2,%d1 # concat sign,exp 15774 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 15775 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 15776 mov.l (%sp)+,%d2 # restore d2 15777 bra.b fsqrt_sd_ovfl_dis 15778 15779# 15780# the move in MAY underflow. so... 15781# 15782fsqrt_sd_may_ovfl: 15783 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 15784 bne.w fsqrt_sd_ovfl # yes, so overflow 15785 15786 fmov.l &0x0,%fpsr # clear FPSR 15787 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15788 15789 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 15790 15791 fmov.l %fpsr,%d1 # save status 15792 fmov.l &0x0,%fpcr # clear FPCR 15793 15794 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15795 15796 fmov.x %fp0,%fp1 # make a copy of result 15797 fcmp.b %fp1,&0x1 # is |result| >= 1.b? 15798 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred 15799 15800# no, it didn't overflow; we have correct result 15801 bra.w fsqrt_sd_normal_exit 15802 15803########################################################################## 15804 15805# 15806# input is not normalized; what is it? 15807# 15808fsqrt_not_norm: 15809 cmpi.b %d1,&DENORM # weed out DENORM 15810 beq.w fsqrt_denorm 15811 cmpi.b %d1,&ZERO # weed out ZERO 15812 beq.b fsqrt_zero 15813 cmpi.b %d1,&INF # weed out INF 15814 beq.b fsqrt_inf 15815 cmpi.b %d1,&SNAN # weed out SNAN 15816 beq.l res_snan_1op 15817 bra.l res_qnan_1op 15818 15819# 15820# fsqrt(+0) = +0 15821# fsqrt(-0) = -0 15822# fsqrt(+INF) = +INF 15823# fsqrt(-INF) = OPERR 15824# 15825fsqrt_zero: 15826 tst.b SRC_EX(%a0) # is ZERO positive or negative? 15827 bmi.b fsqrt_zero_m # negative 15828fsqrt_zero_p: 15829 fmov.s &0x00000000,%fp0 # return +ZERO 15830 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 15831 rts 15832fsqrt_zero_m: 15833 fmov.s &0x80000000,%fp0 # return -ZERO 15834 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 15835 rts 15836 15837fsqrt_inf: 15838 tst.b SRC_EX(%a0) # is INF positive or negative? 15839 bmi.l res_operr # negative 15840fsqrt_inf_p: 15841 fmovm.x SRC(%a0),&0x80 # return +INF in fp0 15842 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 15843 rts 15844 15845########################################################################## 15846 15847######################################################################### 15848# XDEF **************************************************************** # 15849# addsub_scaler2(): scale inputs to fadd/fsub such that no # 15850# OVFL/UNFL exceptions will result # 15851# # 15852# XREF **************************************************************** # 15853# norm() - normalize mantissa after adjusting exponent # 15854# # 15855# INPUT *************************************************************** # 15856# FP_SRC(a6) = fp op1(src) # 15857# FP_DST(a6) = fp op2(dst) # 15858# # 15859# OUTPUT ************************************************************** # 15860# FP_SRC(a6) = fp op1 scaled(src) # 15861# FP_DST(a6) = fp op2 scaled(dst) # 15862# d0 = scale amount # 15863# # 15864# ALGORITHM *********************************************************** # 15865# If the DST exponent is > the SRC exponent, set the DST exponent # 15866# equal to 0x3fff and scale the SRC exponent by the value that the # 15867# DST exponent was scaled by. If the SRC exponent is greater or equal, # 15868# do the opposite. Return this scale factor in d0. # 15869# If the two exponents differ by > the number of mantissa bits # 15870# plus two, then set the smallest exponent to a very small value as a # 15871# quick shortcut. # 15872# # 15873######################################################################### 15874 15875 global addsub_scaler2 15876addsub_scaler2: 15877 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15878 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15879 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15880 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15881 mov.w SRC_EX(%a0),%d0 15882 mov.w DST_EX(%a1),%d1 15883 mov.w %d0,FP_SCR0_EX(%a6) 15884 mov.w %d1,FP_SCR1_EX(%a6) 15885 15886 andi.w &0x7fff,%d0 15887 andi.w &0x7fff,%d1 15888 mov.w %d0,L_SCR1(%a6) # store src exponent 15889 mov.w %d1,2+L_SCR1(%a6) # store dst exponent 15890 15891 cmp.w %d0, %d1 # is src exp >= dst exp? 15892 bge.l src_exp_ge2 15893 15894# dst exp is > src exp; scale dst to exp = 0x3fff 15895dst_exp_gt2: 15896 bsr.l scale_to_zero_dst 15897 mov.l %d0,-(%sp) # save scale factor 15898 15899 cmpi.b STAG(%a6),&DENORM # is dst denormalized? 15900 bne.b cmpexp12 15901 15902 lea FP_SCR0(%a6),%a0 15903 bsr.l norm # normalize the denorm; result is new exp 15904 neg.w %d0 # new exp = -(shft val) 15905 mov.w %d0,L_SCR1(%a6) # inset new exp 15906 15907cmpexp12: 15908 mov.w 2+L_SCR1(%a6),%d0 15909 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 15910 15911 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2? 15912 bge.b quick_scale12 15913 15914 mov.w L_SCR1(%a6),%d0 15915 add.w 0x2(%sp),%d0 # scale src exponent by scale factor 15916 mov.w FP_SCR0_EX(%a6),%d1 15917 and.w &0x8000,%d1 15918 or.w %d1,%d0 # concat {sgn,new exp} 15919 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent 15920 15921 mov.l (%sp)+,%d0 # return SCALE factor 15922 rts 15923 15924quick_scale12: 15925 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent 15926 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1 15927 15928 mov.l (%sp)+,%d0 # return SCALE factor 15929 rts 15930 15931# src exp is >= dst exp; scale src to exp = 0x3fff 15932src_exp_ge2: 15933 bsr.l scale_to_zero_src 15934 mov.l %d0,-(%sp) # save scale factor 15935 15936 cmpi.b DTAG(%a6),&DENORM # is dst denormalized? 15937 bne.b cmpexp22 15938 lea FP_SCR1(%a6),%a0 15939 bsr.l norm # normalize the denorm; result is new exp 15940 neg.w %d0 # new exp = -(shft val) 15941 mov.w %d0,2+L_SCR1(%a6) # inset new exp 15942 15943cmpexp22: 15944 mov.w L_SCR1(%a6),%d0 15945 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 15946 15947 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2? 15948 bge.b quick_scale22 15949 15950 mov.w 2+L_SCR1(%a6),%d0 15951 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor 15952 mov.w FP_SCR1_EX(%a6),%d1 15953 andi.w &0x8000,%d1 15954 or.w %d1,%d0 # concat {sgn,new exp} 15955 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent 15956 15957 mov.l (%sp)+,%d0 # return SCALE factor 15958 rts 15959 15960quick_scale22: 15961 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent 15962 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1 15963 15964 mov.l (%sp)+,%d0 # return SCALE factor 15965 rts 15966 15967########################################################################## 15968 15969######################################################################### 15970# XDEF **************************************************************** # 15971# scale_to_zero_src(): scale the exponent of extended precision # 15972# value at FP_SCR0(a6). # 15973# # 15974# XREF **************************************************************** # 15975# norm() - normalize the mantissa if the operand was a DENORM # 15976# # 15977# INPUT *************************************************************** # 15978# FP_SCR0(a6) = extended precision operand to be scaled # 15979# # 15980# OUTPUT ************************************************************** # 15981# FP_SCR0(a6) = scaled extended precision operand # 15982# d0 = scale value # 15983# # 15984# ALGORITHM *********************************************************** # 15985# Set the exponent of the input operand to 0x3fff. Save the value # 15986# of the difference between the original and new exponent. Then, # 15987# normalize the operand if it was a DENORM. Add this normalization # 15988# value to the previous value. Return the result. # 15989# # 15990######################################################################### 15991 15992 global scale_to_zero_src 15993scale_to_zero_src: 15994 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 15995 mov.w %d1,%d0 # make a copy 15996 15997 andi.l &0x7fff,%d1 # extract operand's exponent 15998 15999 andi.w &0x8000,%d0 # extract operand's sgn 16000 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 16001 16002 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent 16003 16004 cmpi.b STAG(%a6),&DENORM # is operand normalized? 16005 beq.b stzs_denorm # normalize the DENORM 16006 16007stzs_norm: 16008 mov.l &0x3fff,%d0 16009 sub.l %d1,%d0 # scale = BIAS + (-exp) 16010 16011 rts 16012 16013stzs_denorm: 16014 lea FP_SCR0(%a6),%a0 # pass ptr to src op 16015 bsr.l norm # normalize denorm 16016 neg.l %d0 # new exponent = -(shft val) 16017 mov.l %d0,%d1 # prepare for op_norm call 16018 bra.b stzs_norm # finish scaling 16019 16020### 16021 16022######################################################################### 16023# XDEF **************************************************************** # 16024# scale_sqrt(): scale the input operand exponent so a subsequent # 16025# fsqrt operation won't take an exception. # 16026# # 16027# XREF **************************************************************** # 16028# norm() - normalize the mantissa if the operand was a DENORM # 16029# # 16030# INPUT *************************************************************** # 16031# FP_SCR0(a6) = extended precision operand to be scaled # 16032# # 16033# OUTPUT ************************************************************** # 16034# FP_SCR0(a6) = scaled extended precision operand # 16035# d0 = scale value # 16036# # 16037# ALGORITHM *********************************************************** # 16038# If the input operand is a DENORM, normalize it. # 16039# If the exponent of the input operand is even, set the exponent # 16040# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the # 16041# exponent of the input operand is off, set the exponent to ox3fff and # 16042# return a scale factor of "(exp-0x3fff)/2". # 16043# # 16044######################################################################### 16045 16046 global scale_sqrt 16047scale_sqrt: 16048 cmpi.b STAG(%a6),&DENORM # is operand normalized? 16049 beq.b ss_denorm # normalize the DENORM 16050 16051 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 16052 andi.l &0x7fff,%d1 # extract operand's exponent 16053 16054 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn 16055 16056 btst &0x0,%d1 # is exp even or odd? 16057 beq.b ss_norm_even 16058 16059 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16060 16061 mov.l &0x3fff,%d0 16062 sub.l %d1,%d0 # scale = BIAS + (-exp) 16063 asr.l &0x1,%d0 # divide scale factor by 2 16064 rts 16065 16066ss_norm_even: 16067 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16068 16069 mov.l &0x3ffe,%d0 16070 sub.l %d1,%d0 # scale = BIAS + (-exp) 16071 asr.l &0x1,%d0 # divide scale factor by 2 16072 rts 16073 16074ss_denorm: 16075 lea FP_SCR0(%a6),%a0 # pass ptr to src op 16076 bsr.l norm # normalize denorm 16077 16078 btst &0x0,%d0 # is exp even or odd? 16079 beq.b ss_denorm_even 16080 16081 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16082 16083 add.l &0x3fff,%d0 16084 asr.l &0x1,%d0 # divide scale factor by 2 16085 rts 16086 16087ss_denorm_even: 16088 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16089 16090 add.l &0x3ffe,%d0 16091 asr.l &0x1,%d0 # divide scale factor by 2 16092 rts 16093 16094### 16095 16096######################################################################### 16097# XDEF **************************************************************** # 16098# scale_to_zero_dst(): scale the exponent of extended precision # 16099# value at FP_SCR1(a6). # 16100# # 16101# XREF **************************************************************** # 16102# norm() - normalize the mantissa if the operand was a DENORM # 16103# # 16104# INPUT *************************************************************** # 16105# FP_SCR1(a6) = extended precision operand to be scaled # 16106# # 16107# OUTPUT ************************************************************** # 16108# FP_SCR1(a6) = scaled extended precision operand # 16109# d0 = scale value # 16110# # 16111# ALGORITHM *********************************************************** # 16112# Set the exponent of the input operand to 0x3fff. Save the value # 16113# of the difference between the original and new exponent. Then, # 16114# normalize the operand if it was a DENORM. Add this normalization # 16115# value to the previous value. Return the result. # 16116# # 16117######################################################################### 16118 16119 global scale_to_zero_dst 16120scale_to_zero_dst: 16121 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp} 16122 mov.w %d1,%d0 # make a copy 16123 16124 andi.l &0x7fff,%d1 # extract operand's exponent 16125 16126 andi.w &0x8000,%d0 # extract operand's sgn 16127 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 16128 16129 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent 16130 16131 cmpi.b DTAG(%a6),&DENORM # is operand normalized? 16132 beq.b stzd_denorm # normalize the DENORM 16133 16134stzd_norm: 16135 mov.l &0x3fff,%d0 16136 sub.l %d1,%d0 # scale = BIAS + (-exp) 16137 rts 16138 16139stzd_denorm: 16140 lea FP_SCR1(%a6),%a0 # pass ptr to dst op 16141 bsr.l norm # normalize denorm 16142 neg.l %d0 # new exponent = -(shft val) 16143 mov.l %d0,%d1 # prepare for op_norm call 16144 bra.b stzd_norm # finish scaling 16145 16146########################################################################## 16147 16148######################################################################### 16149# XDEF **************************************************************** # 16150# res_qnan(): return default result w/ QNAN operand for dyadic # 16151# res_snan(): return default result w/ SNAN operand for dyadic # 16152# res_qnan_1op(): return dflt result w/ QNAN operand for monadic # 16153# res_snan_1op(): return dflt result w/ SNAN operand for monadic # 16154# # 16155# XREF **************************************************************** # 16156# None # 16157# # 16158# INPUT *************************************************************** # 16159# FP_SRC(a6) = pointer to extended precision src operand # 16160# FP_DST(a6) = pointer to extended precision dst operand # 16161# # 16162# OUTPUT ************************************************************** # 16163# fp0 = default result # 16164# # 16165# ALGORITHM *********************************************************** # 16166# If either operand (but not both operands) of an operation is a # 16167# nonsignalling NAN, then that NAN is returned as the result. If both # 16168# operands are nonsignalling NANs, then the destination operand # 16169# nonsignalling NAN is returned as the result. # 16170# If either operand to an operation is a signalling NAN (SNAN), # 16171# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap # 16172# enable bit is set in the FPCR, then the trap is taken and the # 16173# destination is not modified. If the SNAN trap enable bit is not set, # 16174# then the SNAN is converted to a nonsignalling NAN (by setting the # 16175# SNAN bit in the operand to one), and the operation continues as # 16176# described in the preceding paragraph, for nonsignalling NANs. # 16177# Make sure the appropriate FPSR bits are set before exiting. # 16178# # 16179######################################################################### 16180 16181 global res_qnan 16182 global res_snan 16183res_qnan: 16184res_snan: 16185 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN? 16186 beq.b dst_snan2 16187 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN? 16188 beq.b dst_qnan2 16189src_nan: 16190 cmp.b STAG(%a6), &QNAN 16191 beq.b src_qnan2 16192 global res_snan_1op 16193res_snan_1op: 16194src_snan2: 16195 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit 16196 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 16197 lea FP_SRC(%a6), %a0 16198 bra.b nan_comp 16199 global res_qnan_1op 16200res_qnan_1op: 16201src_qnan2: 16202 or.l &nan_mask, USER_FPSR(%a6) 16203 lea FP_SRC(%a6), %a0 16204 bra.b nan_comp 16205dst_snan2: 16206 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 16207 bset &0x6, FP_DST_HI(%a6) # set SNAN bit 16208 lea FP_DST(%a6), %a0 16209 bra.b nan_comp 16210dst_qnan2: 16211 lea FP_DST(%a6), %a0 16212 cmp.b STAG(%a6), &SNAN 16213 bne nan_done 16214 or.l &aiop_mask+snan_mask, USER_FPSR(%a6) 16215nan_done: 16216 or.l &nan_mask, USER_FPSR(%a6) 16217nan_comp: 16218 btst &0x7, FTEMP_EX(%a0) # is NAN neg? 16219 beq.b nan_not_neg 16220 or.l &neg_mask, USER_FPSR(%a6) 16221nan_not_neg: 16222 fmovm.x (%a0), &0x80 16223 rts 16224 16225######################################################################### 16226# XDEF **************************************************************** # 16227# res_operr(): return default result during operand error # 16228# # 16229# XREF **************************************************************** # 16230# None # 16231# # 16232# INPUT *************************************************************** # 16233# None # 16234# # 16235# OUTPUT ************************************************************** # 16236# fp0 = default operand error result # 16237# # 16238# ALGORITHM *********************************************************** # 16239# An nonsignalling NAN is returned as the default result when # 16240# an operand error occurs for the following cases: # 16241# # 16242# Multiply: (Infinity x Zero) # 16243# Divide : (Zero / Zero) || (Infinity / Infinity) # 16244# # 16245######################################################################### 16246 16247 global res_operr 16248res_operr: 16249 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6) 16250 fmovm.x nan_return(%pc), &0x80 16251 rts 16252 16253nan_return: 16254 long 0x7fff0000, 0xffffffff, 0xffffffff 16255 16256######################################################################### 16257# fdbcc(): routine to emulate the fdbcc instruction # 16258# # 16259# XDEF **************************************************************** # 16260# _fdbcc() # 16261# # 16262# XREF **************************************************************** # 16263# fetch_dreg() - fetch Dn value # 16264# store_dreg_l() - store updated Dn value # 16265# # 16266# INPUT *************************************************************** # 16267# d0 = displacement # 16268# # 16269# OUTPUT ************************************************************** # 16270# none # 16271# # 16272# ALGORITHM *********************************************************** # 16273# This routine checks which conditional predicate is specified by # 16274# the stacked fdbcc instruction opcode and then branches to a routine # 16275# for that predicate. The corresponding fbcc instruction is then used # 16276# to see whether the condition (specified by the stacked FPSR) is true # 16277# or false. # 16278# If a BSUN exception should be indicated, the BSUN and ABSUN # 16279# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 16280# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 16281# enabled BSUN should not be flagged and the predicate is true, then # 16282# Dn is fetched and decremented by one. If Dn is not equal to -1, add # 16283# the displacement value to the stacked PC so that when an "rte" is # 16284# finally executed, the branch occurs. # 16285# # 16286######################################################################### 16287 global _fdbcc 16288_fdbcc: 16289 mov.l %d0,L_SCR1(%a6) # save displacement 16290 16291 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 16292 16293 clr.l %d1 # clear scratch reg 16294 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 16295 ror.l &0x8,%d1 # rotate to top byte 16296 fmov.l %d1,%fpsr # insert into FPSR 16297 16298 mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table 16299 jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine 16300 16301tbl_fdbcc: 16302 short fdbcc_f - tbl_fdbcc # 00 16303 short fdbcc_eq - tbl_fdbcc # 01 16304 short fdbcc_ogt - tbl_fdbcc # 02 16305 short fdbcc_oge - tbl_fdbcc # 03 16306 short fdbcc_olt - tbl_fdbcc # 04 16307 short fdbcc_ole - tbl_fdbcc # 05 16308 short fdbcc_ogl - tbl_fdbcc # 06 16309 short fdbcc_or - tbl_fdbcc # 07 16310 short fdbcc_un - tbl_fdbcc # 08 16311 short fdbcc_ueq - tbl_fdbcc # 09 16312 short fdbcc_ugt - tbl_fdbcc # 10 16313 short fdbcc_uge - tbl_fdbcc # 11 16314 short fdbcc_ult - tbl_fdbcc # 12 16315 short fdbcc_ule - tbl_fdbcc # 13 16316 short fdbcc_neq - tbl_fdbcc # 14 16317 short fdbcc_t - tbl_fdbcc # 15 16318 short fdbcc_sf - tbl_fdbcc # 16 16319 short fdbcc_seq - tbl_fdbcc # 17 16320 short fdbcc_gt - tbl_fdbcc # 18 16321 short fdbcc_ge - tbl_fdbcc # 19 16322 short fdbcc_lt - tbl_fdbcc # 20 16323 short fdbcc_le - tbl_fdbcc # 21 16324 short fdbcc_gl - tbl_fdbcc # 22 16325 short fdbcc_gle - tbl_fdbcc # 23 16326 short fdbcc_ngle - tbl_fdbcc # 24 16327 short fdbcc_ngl - tbl_fdbcc # 25 16328 short fdbcc_nle - tbl_fdbcc # 26 16329 short fdbcc_nlt - tbl_fdbcc # 27 16330 short fdbcc_nge - tbl_fdbcc # 28 16331 short fdbcc_ngt - tbl_fdbcc # 29 16332 short fdbcc_sneq - tbl_fdbcc # 30 16333 short fdbcc_st - tbl_fdbcc # 31 16334 16335######################################################################### 16336# # 16337# IEEE Nonaware tests # 16338# # 16339# For the IEEE nonaware tests, only the false branch changes the # 16340# counter. However, the true branch may set bsun so we check to see # 16341# if the NAN bit is set, in which case BSUN and AIOP will be set. # 16342# # 16343# The cases EQ and NE are shared by the Aware and Nonaware groups # 16344# and are incapable of setting the BSUN exception bit. # 16345# # 16346# Typically, only one of the two possible branch directions could # 16347# have the NAN bit set. # 16348# (This is assuming the mutual exclusiveness of FPSR cc bit groupings # 16349# is preserved.) # 16350# # 16351######################################################################### 16352 16353# 16354# equal: 16355# 16356# Z 16357# 16358fdbcc_eq: 16359 fbeq.w fdbcc_eq_yes # equal? 16360fdbcc_eq_no: 16361 bra.w fdbcc_false # no; go handle counter 16362fdbcc_eq_yes: 16363 rts 16364 16365# 16366# not equal: 16367# _ 16368# Z 16369# 16370fdbcc_neq: 16371 fbneq.w fdbcc_neq_yes # not equal? 16372fdbcc_neq_no: 16373 bra.w fdbcc_false # no; go handle counter 16374fdbcc_neq_yes: 16375 rts 16376 16377# 16378# greater than: 16379# _______ 16380# NANvZvN 16381# 16382fdbcc_gt: 16383 fbgt.w fdbcc_gt_yes # greater than? 16384 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16385 beq.w fdbcc_false # no;go handle counter 16386 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16387 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16388 bne.w fdbcc_bsun # yes; we have an exception 16389 bra.w fdbcc_false # no; go handle counter 16390fdbcc_gt_yes: 16391 rts # do nothing 16392 16393# 16394# not greater than: 16395# 16396# NANvZvN 16397# 16398fdbcc_ngt: 16399 fbngt.w fdbcc_ngt_yes # not greater than? 16400fdbcc_ngt_no: 16401 bra.w fdbcc_false # no; go handle counter 16402fdbcc_ngt_yes: 16403 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16404 beq.b fdbcc_ngt_done # no;go finish 16405 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16406 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16407 bne.w fdbcc_bsun # yes; we have an exception 16408fdbcc_ngt_done: 16409 rts # no; do nothing 16410 16411# 16412# greater than or equal: 16413# _____ 16414# Zv(NANvN) 16415# 16416fdbcc_ge: 16417 fbge.w fdbcc_ge_yes # greater than or equal? 16418fdbcc_ge_no: 16419 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16420 beq.w fdbcc_false # no;go handle counter 16421 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16422 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16423 bne.w fdbcc_bsun # yes; we have an exception 16424 bra.w fdbcc_false # no; go handle counter 16425fdbcc_ge_yes: 16426 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16427 beq.b fdbcc_ge_yes_done # no;go do nothing 16428 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16429 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16430 bne.w fdbcc_bsun # yes; we have an exception 16431fdbcc_ge_yes_done: 16432 rts # do nothing 16433 16434# 16435# not (greater than or equal): 16436# _ 16437# NANv(N^Z) 16438# 16439fdbcc_nge: 16440 fbnge.w fdbcc_nge_yes # not (greater than or equal)? 16441fdbcc_nge_no: 16442 bra.w fdbcc_false # no; go handle counter 16443fdbcc_nge_yes: 16444 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16445 beq.b fdbcc_nge_done # no;go finish 16446 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16447 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16448 bne.w fdbcc_bsun # yes; we have an exception 16449fdbcc_nge_done: 16450 rts # no; do nothing 16451 16452# 16453# less than: 16454# _____ 16455# N^(NANvZ) 16456# 16457fdbcc_lt: 16458 fblt.w fdbcc_lt_yes # less than? 16459fdbcc_lt_no: 16460 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16461 beq.w fdbcc_false # no; go handle counter 16462 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16463 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16464 bne.w fdbcc_bsun # yes; we have an exception 16465 bra.w fdbcc_false # no; go handle counter 16466fdbcc_lt_yes: 16467 rts # do nothing 16468 16469# 16470# not less than: 16471# _ 16472# NANv(ZvN) 16473# 16474fdbcc_nlt: 16475 fbnlt.w fdbcc_nlt_yes # not less than? 16476fdbcc_nlt_no: 16477 bra.w fdbcc_false # no; go handle counter 16478fdbcc_nlt_yes: 16479 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16480 beq.b fdbcc_nlt_done # no;go finish 16481 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16482 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16483 bne.w fdbcc_bsun # yes; we have an exception 16484fdbcc_nlt_done: 16485 rts # no; do nothing 16486 16487# 16488# less than or equal: 16489# ___ 16490# Zv(N^NAN) 16491# 16492fdbcc_le: 16493 fble.w fdbcc_le_yes # less than or equal? 16494fdbcc_le_no: 16495 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16496 beq.w fdbcc_false # no; go handle counter 16497 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16498 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16499 bne.w fdbcc_bsun # yes; we have an exception 16500 bra.w fdbcc_false # no; go handle counter 16501fdbcc_le_yes: 16502 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16503 beq.b fdbcc_le_yes_done # no; go do nothing 16504 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16505 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16506 bne.w fdbcc_bsun # yes; we have an exception 16507fdbcc_le_yes_done: 16508 rts # do nothing 16509 16510# 16511# not (less than or equal): 16512# ___ 16513# NANv(NvZ) 16514# 16515fdbcc_nle: 16516 fbnle.w fdbcc_nle_yes # not (less than or equal)? 16517fdbcc_nle_no: 16518 bra.w fdbcc_false # no; go handle counter 16519fdbcc_nle_yes: 16520 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16521 beq.w fdbcc_nle_done # no; go finish 16522 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16523 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16524 bne.w fdbcc_bsun # yes; we have an exception 16525fdbcc_nle_done: 16526 rts # no; do nothing 16527 16528# 16529# greater or less than: 16530# _____ 16531# NANvZ 16532# 16533fdbcc_gl: 16534 fbgl.w fdbcc_gl_yes # greater or less than? 16535fdbcc_gl_no: 16536 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16537 beq.w fdbcc_false # no; handle counter 16538 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16539 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16540 bne.w fdbcc_bsun # yes; we have an exception 16541 bra.w fdbcc_false # no; go handle counter 16542fdbcc_gl_yes: 16543 rts # do nothing 16544 16545# 16546# not (greater or less than): 16547# 16548# NANvZ 16549# 16550fdbcc_ngl: 16551 fbngl.w fdbcc_ngl_yes # not (greater or less than)? 16552fdbcc_ngl_no: 16553 bra.w fdbcc_false # no; go handle counter 16554fdbcc_ngl_yes: 16555 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16556 beq.b fdbcc_ngl_done # no; go finish 16557 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16558 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16559 bne.w fdbcc_bsun # yes; we have an exception 16560fdbcc_ngl_done: 16561 rts # no; do nothing 16562 16563# 16564# greater, less, or equal: 16565# ___ 16566# NAN 16567# 16568fdbcc_gle: 16569 fbgle.w fdbcc_gle_yes # greater, less, or equal? 16570fdbcc_gle_no: 16571 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16572 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16573 bne.w fdbcc_bsun # yes; we have an exception 16574 bra.w fdbcc_false # no; go handle counter 16575fdbcc_gle_yes: 16576 rts # do nothing 16577 16578# 16579# not (greater, less, or equal): 16580# 16581# NAN 16582# 16583fdbcc_ngle: 16584 fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)? 16585fdbcc_ngle_no: 16586 bra.w fdbcc_false # no; go handle counter 16587fdbcc_ngle_yes: 16588 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16589 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16590 bne.w fdbcc_bsun # yes; we have an exception 16591 rts # no; do nothing 16592 16593######################################################################### 16594# # 16595# Miscellaneous tests # 16596# # 16597# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. # 16598# # 16599######################################################################### 16600 16601# 16602# false: 16603# 16604# False 16605# 16606fdbcc_f: # no bsun possible 16607 bra.w fdbcc_false # go handle counter 16608 16609# 16610# true: 16611# 16612# True 16613# 16614fdbcc_t: # no bsun possible 16615 rts # do nothing 16616 16617# 16618# signalling false: 16619# 16620# False 16621# 16622fdbcc_sf: 16623 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16624 beq.w fdbcc_false # no;go handle counter 16625 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16626 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16627 bne.w fdbcc_bsun # yes; we have an exception 16628 bra.w fdbcc_false # go handle counter 16629 16630# 16631# signalling true: 16632# 16633# True 16634# 16635fdbcc_st: 16636 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16637 beq.b fdbcc_st_done # no;go finish 16638 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16639 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16640 bne.w fdbcc_bsun # yes; we have an exception 16641fdbcc_st_done: 16642 rts 16643 16644# 16645# signalling equal: 16646# 16647# Z 16648# 16649fdbcc_seq: 16650 fbseq.w fdbcc_seq_yes # signalling equal? 16651fdbcc_seq_no: 16652 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16653 beq.w fdbcc_false # no;go handle counter 16654 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16655 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16656 bne.w fdbcc_bsun # yes; we have an exception 16657 bra.w fdbcc_false # go handle counter 16658fdbcc_seq_yes: 16659 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16660 beq.b fdbcc_seq_yes_done # no;go do nothing 16661 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16662 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16663 bne.w fdbcc_bsun # yes; we have an exception 16664fdbcc_seq_yes_done: 16665 rts # yes; do nothing 16666 16667# 16668# signalling not equal: 16669# _ 16670# Z 16671# 16672fdbcc_sneq: 16673 fbsneq.w fdbcc_sneq_yes # signalling not equal? 16674fdbcc_sneq_no: 16675 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16676 beq.w fdbcc_false # no;go handle counter 16677 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16678 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16679 bne.w fdbcc_bsun # yes; we have an exception 16680 bra.w fdbcc_false # go handle counter 16681fdbcc_sneq_yes: 16682 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 16683 beq.w fdbcc_sneq_done # no;go finish 16684 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16685 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16686 bne.w fdbcc_bsun # yes; we have an exception 16687fdbcc_sneq_done: 16688 rts 16689 16690######################################################################### 16691# # 16692# IEEE Aware tests # 16693# # 16694# For the IEEE aware tests, action is only taken if the result is false.# 16695# Therefore, the opposite branch type is used to jump to the decrement # 16696# routine. # 16697# The BSUN exception will not be set for any of these tests. # 16698# # 16699######################################################################### 16700 16701# 16702# ordered greater than: 16703# _______ 16704# NANvZvN 16705# 16706fdbcc_ogt: 16707 fbogt.w fdbcc_ogt_yes # ordered greater than? 16708fdbcc_ogt_no: 16709 bra.w fdbcc_false # no; go handle counter 16710fdbcc_ogt_yes: 16711 rts # yes; do nothing 16712 16713# 16714# unordered or less or equal: 16715# _______ 16716# NANvZvN 16717# 16718fdbcc_ule: 16719 fbule.w fdbcc_ule_yes # unordered or less or equal? 16720fdbcc_ule_no: 16721 bra.w fdbcc_false # no; go handle counter 16722fdbcc_ule_yes: 16723 rts # yes; do nothing 16724 16725# 16726# ordered greater than or equal: 16727# _____ 16728# Zv(NANvN) 16729# 16730fdbcc_oge: 16731 fboge.w fdbcc_oge_yes # ordered greater than or equal? 16732fdbcc_oge_no: 16733 bra.w fdbcc_false # no; go handle counter 16734fdbcc_oge_yes: 16735 rts # yes; do nothing 16736 16737# 16738# unordered or less than: 16739# _ 16740# NANv(N^Z) 16741# 16742fdbcc_ult: 16743 fbult.w fdbcc_ult_yes # unordered or less than? 16744fdbcc_ult_no: 16745 bra.w fdbcc_false # no; go handle counter 16746fdbcc_ult_yes: 16747 rts # yes; do nothing 16748 16749# 16750# ordered less than: 16751# _____ 16752# N^(NANvZ) 16753# 16754fdbcc_olt: 16755 fbolt.w fdbcc_olt_yes # ordered less than? 16756fdbcc_olt_no: 16757 bra.w fdbcc_false # no; go handle counter 16758fdbcc_olt_yes: 16759 rts # yes; do nothing 16760 16761# 16762# unordered or greater or equal: 16763# 16764# NANvZvN 16765# 16766fdbcc_uge: 16767 fbuge.w fdbcc_uge_yes # unordered or greater than? 16768fdbcc_uge_no: 16769 bra.w fdbcc_false # no; go handle counter 16770fdbcc_uge_yes: 16771 rts # yes; do nothing 16772 16773# 16774# ordered less than or equal: 16775# ___ 16776# Zv(N^NAN) 16777# 16778fdbcc_ole: 16779 fbole.w fdbcc_ole_yes # ordered greater or less than? 16780fdbcc_ole_no: 16781 bra.w fdbcc_false # no; go handle counter 16782fdbcc_ole_yes: 16783 rts # yes; do nothing 16784 16785# 16786# unordered or greater than: 16787# ___ 16788# NANv(NvZ) 16789# 16790fdbcc_ugt: 16791 fbugt.w fdbcc_ugt_yes # unordered or greater than? 16792fdbcc_ugt_no: 16793 bra.w fdbcc_false # no; go handle counter 16794fdbcc_ugt_yes: 16795 rts # yes; do nothing 16796 16797# 16798# ordered greater or less than: 16799# _____ 16800# NANvZ 16801# 16802fdbcc_ogl: 16803 fbogl.w fdbcc_ogl_yes # ordered greater or less than? 16804fdbcc_ogl_no: 16805 bra.w fdbcc_false # no; go handle counter 16806fdbcc_ogl_yes: 16807 rts # yes; do nothing 16808 16809# 16810# unordered or equal: 16811# 16812# NANvZ 16813# 16814fdbcc_ueq: 16815 fbueq.w fdbcc_ueq_yes # unordered or equal? 16816fdbcc_ueq_no: 16817 bra.w fdbcc_false # no; go handle counter 16818fdbcc_ueq_yes: 16819 rts # yes; do nothing 16820 16821# 16822# ordered: 16823# ___ 16824# NAN 16825# 16826fdbcc_or: 16827 fbor.w fdbcc_or_yes # ordered? 16828fdbcc_or_no: 16829 bra.w fdbcc_false # no; go handle counter 16830fdbcc_or_yes: 16831 rts # yes; do nothing 16832 16833# 16834# unordered: 16835# 16836# NAN 16837# 16838fdbcc_un: 16839 fbun.w fdbcc_un_yes # unordered? 16840fdbcc_un_no: 16841 bra.w fdbcc_false # no; go handle counter 16842fdbcc_un_yes: 16843 rts # yes; do nothing 16844 16845####################################################################### 16846 16847# 16848# the bsun exception bit was not set. 16849# 16850# (1) subtract 1 from the count register 16851# (2) if (cr == -1) then 16852# pc = pc of next instruction 16853# else 16854# pc += sign_ext(16-bit displacement) 16855# 16856fdbcc_false: 16857 mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword 16858 andi.w &0x7, %d1 # extract count register 16859 16860 bsr.l fetch_dreg # fetch count value 16861# make sure that d0 isn't corrupted between calls... 16862 16863 subq.w &0x1, %d0 # Dn - 1 -> Dn 16864 16865 bsr.l store_dreg_l # store new count value 16866 16867 cmpi.w %d0, &-0x1 # is (Dn == -1)? 16868 bne.b fdbcc_false_cont # no; 16869 rts 16870 16871fdbcc_false_cont: 16872 mov.l L_SCR1(%a6),%d0 # fetch displacement 16873 add.l USER_FPIAR(%a6),%d0 # add instruction PC 16874 addq.l &0x4,%d0 # add instruction length 16875 mov.l %d0,EXC_PC(%a6) # set new PC 16876 rts 16877 16878# the emulation routine set bsun and BSUN was enabled. have to 16879# fix stack and jump to the bsun handler. 16880# let the caller of this routine shift the stack frame up to 16881# eliminate the effective address field. 16882fdbcc_bsun: 16883 mov.b &fbsun_flg,SPCOND_FLG(%a6) 16884 rts 16885 16886######################################################################### 16887# ftrapcc(): routine to emulate the ftrapcc instruction # 16888# # 16889# XDEF **************************************************************** # 16890# _ftrapcc() # 16891# # 16892# XREF **************************************************************** # 16893# none # 16894# # 16895# INPUT *************************************************************** # 16896# none # 16897# # 16898# OUTPUT ************************************************************** # 16899# none # 16900# # 16901# ALGORITHM *********************************************************** # 16902# This routine checks which conditional predicate is specified by # 16903# the stacked ftrapcc instruction opcode and then branches to a routine # 16904# for that predicate. The corresponding fbcc instruction is then used # 16905# to see whether the condition (specified by the stacked FPSR) is true # 16906# or false. # 16907# If a BSUN exception should be indicated, the BSUN and ABSUN # 16908# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 16909# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 16910# enabled BSUN should not be flagged and the predicate is true, then # 16911# the ftrapcc_flg is set in the SPCOND_FLG location. These special # 16912# flags indicate to the calling routine to emulate the exceptional # 16913# condition. # 16914# # 16915######################################################################### 16916 16917 global _ftrapcc 16918_ftrapcc: 16919 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 16920 16921 clr.l %d1 # clear scratch reg 16922 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 16923 ror.l &0x8,%d1 # rotate to top byte 16924 fmov.l %d1,%fpsr # insert into FPSR 16925 16926 mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table 16927 jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine 16928 16929tbl_ftrapcc: 16930 short ftrapcc_f - tbl_ftrapcc # 00 16931 short ftrapcc_eq - tbl_ftrapcc # 01 16932 short ftrapcc_ogt - tbl_ftrapcc # 02 16933 short ftrapcc_oge - tbl_ftrapcc # 03 16934 short ftrapcc_olt - tbl_ftrapcc # 04 16935 short ftrapcc_ole - tbl_ftrapcc # 05 16936 short ftrapcc_ogl - tbl_ftrapcc # 06 16937 short ftrapcc_or - tbl_ftrapcc # 07 16938 short ftrapcc_un - tbl_ftrapcc # 08 16939 short ftrapcc_ueq - tbl_ftrapcc # 09 16940 short ftrapcc_ugt - tbl_ftrapcc # 10 16941 short ftrapcc_uge - tbl_ftrapcc # 11 16942 short ftrapcc_ult - tbl_ftrapcc # 12 16943 short ftrapcc_ule - tbl_ftrapcc # 13 16944 short ftrapcc_neq - tbl_ftrapcc # 14 16945 short ftrapcc_t - tbl_ftrapcc # 15 16946 short ftrapcc_sf - tbl_ftrapcc # 16 16947 short ftrapcc_seq - tbl_ftrapcc # 17 16948 short ftrapcc_gt - tbl_ftrapcc # 18 16949 short ftrapcc_ge - tbl_ftrapcc # 19 16950 short ftrapcc_lt - tbl_ftrapcc # 20 16951 short ftrapcc_le - tbl_ftrapcc # 21 16952 short ftrapcc_gl - tbl_ftrapcc # 22 16953 short ftrapcc_gle - tbl_ftrapcc # 23 16954 short ftrapcc_ngle - tbl_ftrapcc # 24 16955 short ftrapcc_ngl - tbl_ftrapcc # 25 16956 short ftrapcc_nle - tbl_ftrapcc # 26 16957 short ftrapcc_nlt - tbl_ftrapcc # 27 16958 short ftrapcc_nge - tbl_ftrapcc # 28 16959 short ftrapcc_ngt - tbl_ftrapcc # 29 16960 short ftrapcc_sneq - tbl_ftrapcc # 30 16961 short ftrapcc_st - tbl_ftrapcc # 31 16962 16963######################################################################### 16964# # 16965# IEEE Nonaware tests # 16966# # 16967# For the IEEE nonaware tests, we set the result based on the # 16968# floating point condition codes. In addition, we check to see # 16969# if the NAN bit is set, in which case BSUN and AIOP will be set. # 16970# # 16971# The cases EQ and NE are shared by the Aware and Nonaware groups # 16972# and are incapable of setting the BSUN exception bit. # 16973# # 16974# Typically, only one of the two possible branch directions could # 16975# have the NAN bit set. # 16976# # 16977######################################################################### 16978 16979# 16980# equal: 16981# 16982# Z 16983# 16984ftrapcc_eq: 16985 fbeq.w ftrapcc_trap # equal? 16986ftrapcc_eq_no: 16987 rts # do nothing 16988 16989# 16990# not equal: 16991# _ 16992# Z 16993# 16994ftrapcc_neq: 16995 fbneq.w ftrapcc_trap # not equal? 16996ftrapcc_neq_no: 16997 rts # do nothing 16998 16999# 17000# greater than: 17001# _______ 17002# NANvZvN 17003# 17004ftrapcc_gt: 17005 fbgt.w ftrapcc_trap # greater than? 17006ftrapcc_gt_no: 17007 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17008 beq.b ftrapcc_gt_done # no 17009 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17010 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17011 bne.w ftrapcc_bsun # yes 17012ftrapcc_gt_done: 17013 rts # no; do nothing 17014 17015# 17016# not greater than: 17017# 17018# NANvZvN 17019# 17020ftrapcc_ngt: 17021 fbngt.w ftrapcc_ngt_yes # not greater than? 17022ftrapcc_ngt_no: 17023 rts # do nothing 17024ftrapcc_ngt_yes: 17025 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17026 beq.w ftrapcc_trap # no; go take trap 17027 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17028 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17029 bne.w ftrapcc_bsun # yes 17030 bra.w ftrapcc_trap # no; go take trap 17031 17032# 17033# greater than or equal: 17034# _____ 17035# Zv(NANvN) 17036# 17037ftrapcc_ge: 17038 fbge.w ftrapcc_ge_yes # greater than or equal? 17039ftrapcc_ge_no: 17040 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17041 beq.b ftrapcc_ge_done # no; go finish 17042 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17043 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17044 bne.w ftrapcc_bsun # yes 17045ftrapcc_ge_done: 17046 rts # no; do nothing 17047ftrapcc_ge_yes: 17048 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17049 beq.w ftrapcc_trap # no; go take trap 17050 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17051 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17052 bne.w ftrapcc_bsun # yes 17053 bra.w ftrapcc_trap # no; go take trap 17054 17055# 17056# not (greater than or equal): 17057# _ 17058# NANv(N^Z) 17059# 17060ftrapcc_nge: 17061 fbnge.w ftrapcc_nge_yes # not (greater than or equal)? 17062ftrapcc_nge_no: 17063 rts # do nothing 17064ftrapcc_nge_yes: 17065 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17066 beq.w ftrapcc_trap # no; go take trap 17067 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17068 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17069 bne.w ftrapcc_bsun # yes 17070 bra.w ftrapcc_trap # no; go take trap 17071 17072# 17073# less than: 17074# _____ 17075# N^(NANvZ) 17076# 17077ftrapcc_lt: 17078 fblt.w ftrapcc_trap # less than? 17079ftrapcc_lt_no: 17080 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17081 beq.b ftrapcc_lt_done # no; go finish 17082 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17083 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17084 bne.w ftrapcc_bsun # yes 17085ftrapcc_lt_done: 17086 rts # no; do nothing 17087 17088# 17089# not less than: 17090# _ 17091# NANv(ZvN) 17092# 17093ftrapcc_nlt: 17094 fbnlt.w ftrapcc_nlt_yes # not less than? 17095ftrapcc_nlt_no: 17096 rts # do nothing 17097ftrapcc_nlt_yes: 17098 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17099 beq.w ftrapcc_trap # no; go take trap 17100 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17101 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17102 bne.w ftrapcc_bsun # yes 17103 bra.w ftrapcc_trap # no; go take trap 17104 17105# 17106# less than or equal: 17107# ___ 17108# Zv(N^NAN) 17109# 17110ftrapcc_le: 17111 fble.w ftrapcc_le_yes # less than or equal? 17112ftrapcc_le_no: 17113 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17114 beq.b ftrapcc_le_done # no; go finish 17115 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17116 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17117 bne.w ftrapcc_bsun # yes 17118ftrapcc_le_done: 17119 rts # no; do nothing 17120ftrapcc_le_yes: 17121 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17122 beq.w ftrapcc_trap # no; go take trap 17123 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17124 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17125 bne.w ftrapcc_bsun # yes 17126 bra.w ftrapcc_trap # no; go take trap 17127 17128# 17129# not (less than or equal): 17130# ___ 17131# NANv(NvZ) 17132# 17133ftrapcc_nle: 17134 fbnle.w ftrapcc_nle_yes # not (less than or equal)? 17135ftrapcc_nle_no: 17136 rts # do nothing 17137ftrapcc_nle_yes: 17138 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17139 beq.w ftrapcc_trap # no; go take trap 17140 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17141 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17142 bne.w ftrapcc_bsun # yes 17143 bra.w ftrapcc_trap # no; go take trap 17144 17145# 17146# greater or less than: 17147# _____ 17148# NANvZ 17149# 17150ftrapcc_gl: 17151 fbgl.w ftrapcc_trap # greater or less than? 17152ftrapcc_gl_no: 17153 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17154 beq.b ftrapcc_gl_done # no; go finish 17155 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17156 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17157 bne.w ftrapcc_bsun # yes 17158ftrapcc_gl_done: 17159 rts # no; do nothing 17160 17161# 17162# not (greater or less than): 17163# 17164# NANvZ 17165# 17166ftrapcc_ngl: 17167 fbngl.w ftrapcc_ngl_yes # not (greater or less than)? 17168ftrapcc_ngl_no: 17169 rts # do nothing 17170ftrapcc_ngl_yes: 17171 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17172 beq.w ftrapcc_trap # no; go take trap 17173 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17174 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17175 bne.w ftrapcc_bsun # yes 17176 bra.w ftrapcc_trap # no; go take trap 17177 17178# 17179# greater, less, or equal: 17180# ___ 17181# NAN 17182# 17183ftrapcc_gle: 17184 fbgle.w ftrapcc_trap # greater, less, or equal? 17185ftrapcc_gle_no: 17186 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17187 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17188 bne.w ftrapcc_bsun # yes 17189 rts # no; do nothing 17190 17191# 17192# not (greater, less, or equal): 17193# 17194# NAN 17195# 17196ftrapcc_ngle: 17197 fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)? 17198ftrapcc_ngle_no: 17199 rts # do nothing 17200ftrapcc_ngle_yes: 17201 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17202 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17203 bne.w ftrapcc_bsun # yes 17204 bra.w ftrapcc_trap # no; go take trap 17205 17206######################################################################### 17207# # 17208# Miscellaneous tests # 17209# # 17210# For the IEEE aware tests, we only have to set the result based on the # 17211# floating point condition codes. The BSUN exception will not be # 17212# set for any of these tests. # 17213# # 17214######################################################################### 17215 17216# 17217# false: 17218# 17219# False 17220# 17221ftrapcc_f: 17222 rts # do nothing 17223 17224# 17225# true: 17226# 17227# True 17228# 17229ftrapcc_t: 17230 bra.w ftrapcc_trap # go take trap 17231 17232# 17233# signalling false: 17234# 17235# False 17236# 17237ftrapcc_sf: 17238 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17239 beq.b ftrapcc_sf_done # no; go finish 17240 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17241 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17242 bne.w ftrapcc_bsun # yes 17243ftrapcc_sf_done: 17244 rts # no; do nothing 17245 17246# 17247# signalling true: 17248# 17249# True 17250# 17251ftrapcc_st: 17252 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17253 beq.w ftrapcc_trap # no; go take trap 17254 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17255 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17256 bne.w ftrapcc_bsun # yes 17257 bra.w ftrapcc_trap # no; go take trap 17258 17259# 17260# signalling equal: 17261# 17262# Z 17263# 17264ftrapcc_seq: 17265 fbseq.w ftrapcc_seq_yes # signalling equal? 17266ftrapcc_seq_no: 17267 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17268 beq.w ftrapcc_seq_done # no; go finish 17269 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17270 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17271 bne.w ftrapcc_bsun # yes 17272ftrapcc_seq_done: 17273 rts # no; do nothing 17274ftrapcc_seq_yes: 17275 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17276 beq.w ftrapcc_trap # no; go take trap 17277 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17278 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17279 bne.w ftrapcc_bsun # yes 17280 bra.w ftrapcc_trap # no; go take trap 17281 17282# 17283# signalling not equal: 17284# _ 17285# Z 17286# 17287ftrapcc_sneq: 17288 fbsneq.w ftrapcc_sneq_yes # signalling equal? 17289ftrapcc_sneq_no: 17290 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17291 beq.w ftrapcc_sneq_no_done # no; go finish 17292 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17293 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17294 bne.w ftrapcc_bsun # yes 17295ftrapcc_sneq_no_done: 17296 rts # do nothing 17297ftrapcc_sneq_yes: 17298 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17299 beq.w ftrapcc_trap # no; go take trap 17300 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17301 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17302 bne.w ftrapcc_bsun # yes 17303 bra.w ftrapcc_trap # no; go take trap 17304 17305######################################################################### 17306# # 17307# IEEE Aware tests # 17308# # 17309# For the IEEE aware tests, we only have to set the result based on the # 17310# floating point condition codes. The BSUN exception will not be # 17311# set for any of these tests. # 17312# # 17313######################################################################### 17314 17315# 17316# ordered greater than: 17317# _______ 17318# NANvZvN 17319# 17320ftrapcc_ogt: 17321 fbogt.w ftrapcc_trap # ordered greater than? 17322ftrapcc_ogt_no: 17323 rts # do nothing 17324 17325# 17326# unordered or less or equal: 17327# _______ 17328# NANvZvN 17329# 17330ftrapcc_ule: 17331 fbule.w ftrapcc_trap # unordered or less or equal? 17332ftrapcc_ule_no: 17333 rts # do nothing 17334 17335# 17336# ordered greater than or equal: 17337# _____ 17338# Zv(NANvN) 17339# 17340ftrapcc_oge: 17341 fboge.w ftrapcc_trap # ordered greater than or equal? 17342ftrapcc_oge_no: 17343 rts # do nothing 17344 17345# 17346# unordered or less than: 17347# _ 17348# NANv(N^Z) 17349# 17350ftrapcc_ult: 17351 fbult.w ftrapcc_trap # unordered or less than? 17352ftrapcc_ult_no: 17353 rts # do nothing 17354 17355# 17356# ordered less than: 17357# _____ 17358# N^(NANvZ) 17359# 17360ftrapcc_olt: 17361 fbolt.w ftrapcc_trap # ordered less than? 17362ftrapcc_olt_no: 17363 rts # do nothing 17364 17365# 17366# unordered or greater or equal: 17367# 17368# NANvZvN 17369# 17370ftrapcc_uge: 17371 fbuge.w ftrapcc_trap # unordered or greater than? 17372ftrapcc_uge_no: 17373 rts # do nothing 17374 17375# 17376# ordered less than or equal: 17377# ___ 17378# Zv(N^NAN) 17379# 17380ftrapcc_ole: 17381 fbole.w ftrapcc_trap # ordered greater or less than? 17382ftrapcc_ole_no: 17383 rts # do nothing 17384 17385# 17386# unordered or greater than: 17387# ___ 17388# NANv(NvZ) 17389# 17390ftrapcc_ugt: 17391 fbugt.w ftrapcc_trap # unordered or greater than? 17392ftrapcc_ugt_no: 17393 rts # do nothing 17394 17395# 17396# ordered greater or less than: 17397# _____ 17398# NANvZ 17399# 17400ftrapcc_ogl: 17401 fbogl.w ftrapcc_trap # ordered greater or less than? 17402ftrapcc_ogl_no: 17403 rts # do nothing 17404 17405# 17406# unordered or equal: 17407# 17408# NANvZ 17409# 17410ftrapcc_ueq: 17411 fbueq.w ftrapcc_trap # unordered or equal? 17412ftrapcc_ueq_no: 17413 rts # do nothing 17414 17415# 17416# ordered: 17417# ___ 17418# NAN 17419# 17420ftrapcc_or: 17421 fbor.w ftrapcc_trap # ordered? 17422ftrapcc_or_no: 17423 rts # do nothing 17424 17425# 17426# unordered: 17427# 17428# NAN 17429# 17430ftrapcc_un: 17431 fbun.w ftrapcc_trap # unordered? 17432ftrapcc_un_no: 17433 rts # do nothing 17434 17435####################################################################### 17436 17437# the bsun exception bit was not set. 17438# we will need to jump to the ftrapcc vector. the stack frame 17439# is the same size as that of the fp unimp instruction. the 17440# only difference is that the <ea> field should hold the PC 17441# of the ftrapcc instruction and the vector offset field 17442# should denote the ftrapcc trap. 17443ftrapcc_trap: 17444 mov.b &ftrapcc_flg,SPCOND_FLG(%a6) 17445 rts 17446 17447# the emulation routine set bsun and BSUN was enabled. have to 17448# fix stack and jump to the bsun handler. 17449# let the caller of this routine shift the stack frame up to 17450# eliminate the effective address field. 17451ftrapcc_bsun: 17452 mov.b &fbsun_flg,SPCOND_FLG(%a6) 17453 rts 17454 17455######################################################################### 17456# fscc(): routine to emulate the fscc instruction # 17457# # 17458# XDEF **************************************************************** # 17459# _fscc() # 17460# # 17461# XREF **************************************************************** # 17462# store_dreg_b() - store result to data register file # 17463# dec_areg() - decrement an areg for -(an) mode # 17464# inc_areg() - increment an areg for (an)+ mode # 17465# _dmem_write_byte() - store result to memory # 17466# # 17467# INPUT *************************************************************** # 17468# none # 17469# # 17470# OUTPUT ************************************************************** # 17471# none # 17472# # 17473# ALGORITHM *********************************************************** # 17474# This routine checks which conditional predicate is specified by # 17475# the stacked fscc instruction opcode and then branches to a routine # 17476# for that predicate. The corresponding fbcc instruction is then used # 17477# to see whether the condition (specified by the stacked FPSR) is true # 17478# or false. # 17479# If a BSUN exception should be indicated, the BSUN and ABSUN # 17480# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 17481# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 17482# enabled BSUN should not be flagged and the predicate is true, then # 17483# the result is stored to the data register file or memory # 17484# # 17485######################################################################### 17486 17487 global _fscc 17488_fscc: 17489 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 17490 17491 clr.l %d1 # clear scratch reg 17492 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 17493 ror.l &0x8,%d1 # rotate to top byte 17494 fmov.l %d1,%fpsr # insert into FPSR 17495 17496 mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table 17497 jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine 17498 17499tbl_fscc: 17500 short fscc_f - tbl_fscc # 00 17501 short fscc_eq - tbl_fscc # 01 17502 short fscc_ogt - tbl_fscc # 02 17503 short fscc_oge - tbl_fscc # 03 17504 short fscc_olt - tbl_fscc # 04 17505 short fscc_ole - tbl_fscc # 05 17506 short fscc_ogl - tbl_fscc # 06 17507 short fscc_or - tbl_fscc # 07 17508 short fscc_un - tbl_fscc # 08 17509 short fscc_ueq - tbl_fscc # 09 17510 short fscc_ugt - tbl_fscc # 10 17511 short fscc_uge - tbl_fscc # 11 17512 short fscc_ult - tbl_fscc # 12 17513 short fscc_ule - tbl_fscc # 13 17514 short fscc_neq - tbl_fscc # 14 17515 short fscc_t - tbl_fscc # 15 17516 short fscc_sf - tbl_fscc # 16 17517 short fscc_seq - tbl_fscc # 17 17518 short fscc_gt - tbl_fscc # 18 17519 short fscc_ge - tbl_fscc # 19 17520 short fscc_lt - tbl_fscc # 20 17521 short fscc_le - tbl_fscc # 21 17522 short fscc_gl - tbl_fscc # 22 17523 short fscc_gle - tbl_fscc # 23 17524 short fscc_ngle - tbl_fscc # 24 17525 short fscc_ngl - tbl_fscc # 25 17526 short fscc_nle - tbl_fscc # 26 17527 short fscc_nlt - tbl_fscc # 27 17528 short fscc_nge - tbl_fscc # 28 17529 short fscc_ngt - tbl_fscc # 29 17530 short fscc_sneq - tbl_fscc # 30 17531 short fscc_st - tbl_fscc # 31 17532 17533######################################################################### 17534# # 17535# IEEE Nonaware tests # 17536# # 17537# For the IEEE nonaware tests, we set the result based on the # 17538# floating point condition codes. In addition, we check to see # 17539# if the NAN bit is set, in which case BSUN and AIOP will be set. # 17540# # 17541# The cases EQ and NE are shared by the Aware and Nonaware groups # 17542# and are incapable of setting the BSUN exception bit. # 17543# # 17544# Typically, only one of the two possible branch directions could # 17545# have the NAN bit set. # 17546# # 17547######################################################################### 17548 17549# 17550# equal: 17551# 17552# Z 17553# 17554fscc_eq: 17555 fbeq.w fscc_eq_yes # equal? 17556fscc_eq_no: 17557 clr.b %d0 # set false 17558 bra.w fscc_done # go finish 17559fscc_eq_yes: 17560 st %d0 # set true 17561 bra.w fscc_done # go finish 17562 17563# 17564# not equal: 17565# _ 17566# Z 17567# 17568fscc_neq: 17569 fbneq.w fscc_neq_yes # not equal? 17570fscc_neq_no: 17571 clr.b %d0 # set false 17572 bra.w fscc_done # go finish 17573fscc_neq_yes: 17574 st %d0 # set true 17575 bra.w fscc_done # go finish 17576 17577# 17578# greater than: 17579# _______ 17580# NANvZvN 17581# 17582fscc_gt: 17583 fbgt.w fscc_gt_yes # greater than? 17584fscc_gt_no: 17585 clr.b %d0 # set false 17586 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17587 beq.w fscc_done # no;go finish 17588 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17589 bra.w fscc_chk_bsun # go finish 17590fscc_gt_yes: 17591 st %d0 # set true 17592 bra.w fscc_done # go finish 17593 17594# 17595# not greater than: 17596# 17597# NANvZvN 17598# 17599fscc_ngt: 17600 fbngt.w fscc_ngt_yes # not greater than? 17601fscc_ngt_no: 17602 clr.b %d0 # set false 17603 bra.w fscc_done # go finish 17604fscc_ngt_yes: 17605 st %d0 # set true 17606 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17607 beq.w fscc_done # no;go finish 17608 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17609 bra.w fscc_chk_bsun # go finish 17610 17611# 17612# greater than or equal: 17613# _____ 17614# Zv(NANvN) 17615# 17616fscc_ge: 17617 fbge.w fscc_ge_yes # greater than or equal? 17618fscc_ge_no: 17619 clr.b %d0 # set false 17620 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17621 beq.w fscc_done # no;go finish 17622 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17623 bra.w fscc_chk_bsun # go finish 17624fscc_ge_yes: 17625 st %d0 # set true 17626 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17627 beq.w fscc_done # no;go finish 17628 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17629 bra.w fscc_chk_bsun # go finish 17630 17631# 17632# not (greater than or equal): 17633# _ 17634# NANv(N^Z) 17635# 17636fscc_nge: 17637 fbnge.w fscc_nge_yes # not (greater than or equal)? 17638fscc_nge_no: 17639 clr.b %d0 # set false 17640 bra.w fscc_done # go finish 17641fscc_nge_yes: 17642 st %d0 # set true 17643 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17644 beq.w fscc_done # no;go finish 17645 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17646 bra.w fscc_chk_bsun # go finish 17647 17648# 17649# less than: 17650# _____ 17651# N^(NANvZ) 17652# 17653fscc_lt: 17654 fblt.w fscc_lt_yes # less than? 17655fscc_lt_no: 17656 clr.b %d0 # set false 17657 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17658 beq.w fscc_done # no;go finish 17659 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17660 bra.w fscc_chk_bsun # go finish 17661fscc_lt_yes: 17662 st %d0 # set true 17663 bra.w fscc_done # go finish 17664 17665# 17666# not less than: 17667# _ 17668# NANv(ZvN) 17669# 17670fscc_nlt: 17671 fbnlt.w fscc_nlt_yes # not less than? 17672fscc_nlt_no: 17673 clr.b %d0 # set false 17674 bra.w fscc_done # go finish 17675fscc_nlt_yes: 17676 st %d0 # set true 17677 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17678 beq.w fscc_done # no;go finish 17679 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17680 bra.w fscc_chk_bsun # go finish 17681 17682# 17683# less than or equal: 17684# ___ 17685# Zv(N^NAN) 17686# 17687fscc_le: 17688 fble.w fscc_le_yes # less than or equal? 17689fscc_le_no: 17690 clr.b %d0 # set false 17691 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17692 beq.w fscc_done # no;go finish 17693 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17694 bra.w fscc_chk_bsun # go finish 17695fscc_le_yes: 17696 st %d0 # set true 17697 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17698 beq.w fscc_done # no;go finish 17699 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17700 bra.w fscc_chk_bsun # go finish 17701 17702# 17703# not (less than or equal): 17704# ___ 17705# NANv(NvZ) 17706# 17707fscc_nle: 17708 fbnle.w fscc_nle_yes # not (less than or equal)? 17709fscc_nle_no: 17710 clr.b %d0 # set false 17711 bra.w fscc_done # go finish 17712fscc_nle_yes: 17713 st %d0 # set true 17714 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17715 beq.w fscc_done # no;go finish 17716 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17717 bra.w fscc_chk_bsun # go finish 17718 17719# 17720# greater or less than: 17721# _____ 17722# NANvZ 17723# 17724fscc_gl: 17725 fbgl.w fscc_gl_yes # greater or less than? 17726fscc_gl_no: 17727 clr.b %d0 # set false 17728 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17729 beq.w fscc_done # no;go finish 17730 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17731 bra.w fscc_chk_bsun # go finish 17732fscc_gl_yes: 17733 st %d0 # set true 17734 bra.w fscc_done # go finish 17735 17736# 17737# not (greater or less than): 17738# 17739# NANvZ 17740# 17741fscc_ngl: 17742 fbngl.w fscc_ngl_yes # not (greater or less than)? 17743fscc_ngl_no: 17744 clr.b %d0 # set false 17745 bra.w fscc_done # go finish 17746fscc_ngl_yes: 17747 st %d0 # set true 17748 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17749 beq.w fscc_done # no;go finish 17750 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17751 bra.w fscc_chk_bsun # go finish 17752 17753# 17754# greater, less, or equal: 17755# ___ 17756# NAN 17757# 17758fscc_gle: 17759 fbgle.w fscc_gle_yes # greater, less, or equal? 17760fscc_gle_no: 17761 clr.b %d0 # set false 17762 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17763 bra.w fscc_chk_bsun # go finish 17764fscc_gle_yes: 17765 st %d0 # set true 17766 bra.w fscc_done # go finish 17767 17768# 17769# not (greater, less, or equal): 17770# 17771# NAN 17772# 17773fscc_ngle: 17774 fbngle.w fscc_ngle_yes # not (greater, less, or equal)? 17775fscc_ngle_no: 17776 clr.b %d0 # set false 17777 bra.w fscc_done # go finish 17778fscc_ngle_yes: 17779 st %d0 # set true 17780 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17781 bra.w fscc_chk_bsun # go finish 17782 17783######################################################################### 17784# # 17785# Miscellaneous tests # 17786# # 17787# For the IEEE aware tests, we only have to set the result based on the # 17788# floating point condition codes. The BSUN exception will not be # 17789# set for any of these tests. # 17790# # 17791######################################################################### 17792 17793# 17794# false: 17795# 17796# False 17797# 17798fscc_f: 17799 clr.b %d0 # set false 17800 bra.w fscc_done # go finish 17801 17802# 17803# true: 17804# 17805# True 17806# 17807fscc_t: 17808 st %d0 # set true 17809 bra.w fscc_done # go finish 17810 17811# 17812# signalling false: 17813# 17814# False 17815# 17816fscc_sf: 17817 clr.b %d0 # set false 17818 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17819 beq.w fscc_done # no;go finish 17820 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17821 bra.w fscc_chk_bsun # go finish 17822 17823# 17824# signalling true: 17825# 17826# True 17827# 17828fscc_st: 17829 st %d0 # set false 17830 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17831 beq.w fscc_done # no;go finish 17832 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17833 bra.w fscc_chk_bsun # go finish 17834 17835# 17836# signalling equal: 17837# 17838# Z 17839# 17840fscc_seq: 17841 fbseq.w fscc_seq_yes # signalling equal? 17842fscc_seq_no: 17843 clr.b %d0 # set false 17844 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17845 beq.w fscc_done # no;go finish 17846 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17847 bra.w fscc_chk_bsun # go finish 17848fscc_seq_yes: 17849 st %d0 # set true 17850 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17851 beq.w fscc_done # no;go finish 17852 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17853 bra.w fscc_chk_bsun # go finish 17854 17855# 17856# signalling not equal: 17857# _ 17858# Z 17859# 17860fscc_sneq: 17861 fbsneq.w fscc_sneq_yes # signalling equal? 17862fscc_sneq_no: 17863 clr.b %d0 # set false 17864 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17865 beq.w fscc_done # no;go finish 17866 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17867 bra.w fscc_chk_bsun # go finish 17868fscc_sneq_yes: 17869 st %d0 # set true 17870 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17871 beq.w fscc_done # no;go finish 17872 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17873 bra.w fscc_chk_bsun # go finish 17874 17875######################################################################### 17876# # 17877# IEEE Aware tests # 17878# # 17879# For the IEEE aware tests, we only have to set the result based on the # 17880# floating point condition codes. The BSUN exception will not be # 17881# set for any of these tests. # 17882# # 17883######################################################################### 17884 17885# 17886# ordered greater than: 17887# _______ 17888# NANvZvN 17889# 17890fscc_ogt: 17891 fbogt.w fscc_ogt_yes # ordered greater than? 17892fscc_ogt_no: 17893 clr.b %d0 # set false 17894 bra.w fscc_done # go finish 17895fscc_ogt_yes: 17896 st %d0 # set true 17897 bra.w fscc_done # go finish 17898 17899# 17900# unordered or less or equal: 17901# _______ 17902# NANvZvN 17903# 17904fscc_ule: 17905 fbule.w fscc_ule_yes # unordered or less or equal? 17906fscc_ule_no: 17907 clr.b %d0 # set false 17908 bra.w fscc_done # go finish 17909fscc_ule_yes: 17910 st %d0 # set true 17911 bra.w fscc_done # go finish 17912 17913# 17914# ordered greater than or equal: 17915# _____ 17916# Zv(NANvN) 17917# 17918fscc_oge: 17919 fboge.w fscc_oge_yes # ordered greater than or equal? 17920fscc_oge_no: 17921 clr.b %d0 # set false 17922 bra.w fscc_done # go finish 17923fscc_oge_yes: 17924 st %d0 # set true 17925 bra.w fscc_done # go finish 17926 17927# 17928# unordered or less than: 17929# _ 17930# NANv(N^Z) 17931# 17932fscc_ult: 17933 fbult.w fscc_ult_yes # unordered or less than? 17934fscc_ult_no: 17935 clr.b %d0 # set false 17936 bra.w fscc_done # go finish 17937fscc_ult_yes: 17938 st %d0 # set true 17939 bra.w fscc_done # go finish 17940 17941# 17942# ordered less than: 17943# _____ 17944# N^(NANvZ) 17945# 17946fscc_olt: 17947 fbolt.w fscc_olt_yes # ordered less than? 17948fscc_olt_no: 17949 clr.b %d0 # set false 17950 bra.w fscc_done # go finish 17951fscc_olt_yes: 17952 st %d0 # set true 17953 bra.w fscc_done # go finish 17954 17955# 17956# unordered or greater or equal: 17957# 17958# NANvZvN 17959# 17960fscc_uge: 17961 fbuge.w fscc_uge_yes # unordered or greater than? 17962fscc_uge_no: 17963 clr.b %d0 # set false 17964 bra.w fscc_done # go finish 17965fscc_uge_yes: 17966 st %d0 # set true 17967 bra.w fscc_done # go finish 17968 17969# 17970# ordered less than or equal: 17971# ___ 17972# Zv(N^NAN) 17973# 17974fscc_ole: 17975 fbole.w fscc_ole_yes # ordered greater or less than? 17976fscc_ole_no: 17977 clr.b %d0 # set false 17978 bra.w fscc_done # go finish 17979fscc_ole_yes: 17980 st %d0 # set true 17981 bra.w fscc_done # go finish 17982 17983# 17984# unordered or greater than: 17985# ___ 17986# NANv(NvZ) 17987# 17988fscc_ugt: 17989 fbugt.w fscc_ugt_yes # unordered or greater than? 17990fscc_ugt_no: 17991 clr.b %d0 # set false 17992 bra.w fscc_done # go finish 17993fscc_ugt_yes: 17994 st %d0 # set true 17995 bra.w fscc_done # go finish 17996 17997# 17998# ordered greater or less than: 17999# _____ 18000# NANvZ 18001# 18002fscc_ogl: 18003 fbogl.w fscc_ogl_yes # ordered greater or less than? 18004fscc_ogl_no: 18005 clr.b %d0 # set false 18006 bra.w fscc_done # go finish 18007fscc_ogl_yes: 18008 st %d0 # set true 18009 bra.w fscc_done # go finish 18010 18011# 18012# unordered or equal: 18013# 18014# NANvZ 18015# 18016fscc_ueq: 18017 fbueq.w fscc_ueq_yes # unordered or equal? 18018fscc_ueq_no: 18019 clr.b %d0 # set false 18020 bra.w fscc_done # go finish 18021fscc_ueq_yes: 18022 st %d0 # set true 18023 bra.w fscc_done # go finish 18024 18025# 18026# ordered: 18027# ___ 18028# NAN 18029# 18030fscc_or: 18031 fbor.w fscc_or_yes # ordered? 18032fscc_or_no: 18033 clr.b %d0 # set false 18034 bra.w fscc_done # go finish 18035fscc_or_yes: 18036 st %d0 # set true 18037 bra.w fscc_done # go finish 18038 18039# 18040# unordered: 18041# 18042# NAN 18043# 18044fscc_un: 18045 fbun.w fscc_un_yes # unordered? 18046fscc_un_no: 18047 clr.b %d0 # set false 18048 bra.w fscc_done # go finish 18049fscc_un_yes: 18050 st %d0 # set true 18051 bra.w fscc_done # go finish 18052 18053####################################################################### 18054 18055# 18056# the bsun exception bit was set. now, check to see is BSUN 18057# is enabled. if so, don't store result and correct stack frame 18058# for a bsun exception. 18059# 18060fscc_chk_bsun: 18061 btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set? 18062 bne.w fscc_bsun 18063 18064# 18065# the bsun exception bit was not set. 18066# the result has been selected. 18067# now, check to see if the result is to be stored in the data register 18068# file or in memory. 18069# 18070fscc_done: 18071 mov.l %d0,%a0 # save result for a moment 18072 18073 mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword 18074 mov.l %d1,%d0 # make a copy 18075 andi.b &0x38,%d1 # extract src mode 18076 18077 bne.b fscc_mem_op # it's a memory operation 18078 18079 mov.l %d0,%d1 18080 andi.w &0x7,%d1 # pass index in d1 18081 mov.l %a0,%d0 # pass result in d0 18082 bsr.l store_dreg_b # save result in regfile 18083 rts 18084 18085# 18086# the stacked <ea> is correct with the exception of: 18087# -> Dn : <ea> is garbage 18088# 18089# if the addressing mode is post-increment or pre-decrement, 18090# then the address registers have not been updated. 18091# 18092fscc_mem_op: 18093 cmpi.b %d1,&0x18 # is <ea> (An)+ ? 18094 beq.b fscc_mem_inc # yes 18095 cmpi.b %d1,&0x20 # is <ea> -(An) ? 18096 beq.b fscc_mem_dec # yes 18097 18098 mov.l %a0,%d0 # pass result in d0 18099 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18100 bsr.l _dmem_write_byte # write result byte 18101 18102 tst.l %d1 # did dstore fail? 18103 bne.w fscc_err # yes 18104 18105 rts 18106 18107# addresing mode is post-increment. write the result byte. if the write 18108# fails then don't update the address register. if write passes then 18109# call inc_areg() to update the address register. 18110fscc_mem_inc: 18111 mov.l %a0,%d0 # pass result in d0 18112 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18113 bsr.l _dmem_write_byte # write result byte 18114 18115 tst.l %d1 # did dstore fail? 18116 bne.w fscc_err # yes 18117 18118 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword 18119 andi.w &0x7,%d1 # pass index in d1 18120 movq.l &0x1,%d0 # pass amt to inc by 18121 bsr.l inc_areg # increment address register 18122 18123 rts 18124 18125# addressing mode is pre-decrement. write the result byte. if the write 18126# fails then don't update the address register. if the write passes then 18127# call dec_areg() to update the address register. 18128fscc_mem_dec: 18129 mov.l %a0,%d0 # pass result in d0 18130 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18131 bsr.l _dmem_write_byte # write result byte 18132 18133 tst.l %d1 # did dstore fail? 18134 bne.w fscc_err # yes 18135 18136 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword 18137 andi.w &0x7,%d1 # pass index in d1 18138 movq.l &0x1,%d0 # pass amt to dec by 18139 bsr.l dec_areg # decrement address register 18140 18141 rts 18142 18143# the emulation routine set bsun and BSUN was enabled. have to 18144# fix stack and jump to the bsun handler. 18145# let the caller of this routine shift the stack frame up to 18146# eliminate the effective address field. 18147fscc_bsun: 18148 mov.b &fbsun_flg,SPCOND_FLG(%a6) 18149 rts 18150 18151# the byte write to memory has failed. pass the failing effective address 18152# and a FSLW to funimp_dacc(). 18153fscc_err: 18154 mov.w &0x00a1,EXC_VOFF(%a6) 18155 bra.l facc_finish 18156 18157######################################################################### 18158# XDEF **************************************************************** # 18159# fmovm_dynamic(): emulate "fmovm" dynamic instruction # 18160# # 18161# XREF **************************************************************** # 18162# fetch_dreg() - fetch data register # 18163# {i,d,}mem_read() - fetch data from memory # 18164# _mem_write() - write data to memory # 18165# iea_iacc() - instruction memory access error occurred # 18166# iea_dacc() - data memory access error occurred # 18167# restore() - restore An index regs if access error occurred # 18168# # 18169# INPUT *************************************************************** # 18170# None # 18171# # 18172# OUTPUT ************************************************************** # 18173# If instr is "fmovm Dn,-(A7)" from supervisor mode, # 18174# d0 = size of dump # 18175# d1 = Dn # 18176# Else if instruction access error, # 18177# d0 = FSLW # 18178# Else if data access error, # 18179# d0 = FSLW # 18180# a0 = address of fault # 18181# Else # 18182# none. # 18183# # 18184# ALGORITHM *********************************************************** # 18185# The effective address must be calculated since this is entered # 18186# from an "Unimplemented Effective Address" exception handler. So, we # 18187# have our own fcalc_ea() routine here. If an access error is flagged # 18188# by a _{i,d,}mem_read() call, we must exit through the special # 18189# handler. # 18190# The data register is determined and its value loaded to get the # 18191# string of FP registers affected. This value is used as an index into # 18192# a lookup table such that we can determine the number of bytes # 18193# involved. # 18194# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used # 18195# to read in all FP values. Again, _mem_read() may fail and require a # 18196# special exit. # 18197# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used # 18198# to write all FP values. _mem_write() may also fail. # 18199# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, # 18200# then we return the size of the dump and the string to the caller # 18201# so that the move can occur outside of this routine. This special # 18202# case is required so that moves to the system stack are handled # 18203# correctly. # 18204# # 18205# DYNAMIC: # 18206# fmovm.x dn, <ea> # 18207# fmovm.x <ea>, dn # 18208# # 18209# <WORD 1> <WORD2> # 18210# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 # 18211# # 18212# & = (0): predecrement addressing mode # 18213# (1): postincrement or control addressing mode # 18214# @ = (0): move listed regs from memory to the FPU # 18215# (1): move listed regs from the FPU to memory # 18216# $$$ : index of data register holding reg select mask # 18217# # 18218# NOTES: # 18219# If the data register holds a zero, then the # 18220# instruction is a nop. # 18221# # 18222######################################################################### 18223 18224 global fmovm_dynamic 18225fmovm_dynamic: 18226 18227# extract the data register in which the bit string resides... 18228 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword 18229 andi.w &0x70,%d1 # extract reg bits 18230 lsr.b &0x4,%d1 # shift into lo bits 18231 18232# fetch the bit string into d0... 18233 bsr.l fetch_dreg # fetch reg string 18234 18235 andi.l &0x000000ff,%d0 # keep only lo byte 18236 18237 mov.l %d0,-(%sp) # save strg 18238 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0 18239 mov.l %d0,-(%sp) # save size 18240 bsr.l fmovm_calc_ea # calculate <ea> 18241 mov.l (%sp)+,%d0 # restore size 18242 mov.l (%sp)+,%d1 # restore strg 18243 18244# if the bit string is a zero, then the operation is a no-op 18245# but, make sure that we've calculated ea and advanced the opword pointer 18246 beq.w fmovm_data_done 18247 18248# separate move ins from move outs... 18249 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out? 18250 beq.w fmovm_data_in # it's a move out 18251 18252############# 18253# MOVE OUT: # 18254############# 18255fmovm_data_out: 18256 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement? 18257 bne.w fmovm_out_ctrl # control 18258 18259############################ 18260fmovm_out_predec: 18261# for predecrement mode, the bit string is the opposite of both control 18262# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0) 18263# here, we convert it to be just like the others... 18264 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1 18265 18266 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 18267 beq.b fmovm_out_ctrl # user 18268 18269fmovm_out_predec_s: 18270 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 18271 bne.b fmovm_out_ctrl 18272 18273# the operation was unfortunately an: fmovm.x dn,-(sp) 18274# called from supervisor mode. 18275# we're also passing "size" and "strg" back to the calling routine 18276 rts 18277 18278############################ 18279fmovm_out_ctrl: 18280 mov.l %a0,%a1 # move <ea> to a1 18281 18282 sub.l %d0,%sp # subtract size of dump 18283 lea (%sp),%a0 18284 18285 tst.b %d1 # should FP0 be moved? 18286 bpl.b fmovm_out_ctrl_fp1 # no 18287 18288 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes 18289 mov.l 0x4+EXC_FP0(%a6),(%a0)+ 18290 mov.l 0x8+EXC_FP0(%a6),(%a0)+ 18291 18292fmovm_out_ctrl_fp1: 18293 lsl.b &0x1,%d1 # should FP1 be moved? 18294 bpl.b fmovm_out_ctrl_fp2 # no 18295 18296 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes 18297 mov.l 0x4+EXC_FP1(%a6),(%a0)+ 18298 mov.l 0x8+EXC_FP1(%a6),(%a0)+ 18299 18300fmovm_out_ctrl_fp2: 18301 lsl.b &0x1,%d1 # should FP2 be moved? 18302 bpl.b fmovm_out_ctrl_fp3 # no 18303 18304 fmovm.x &0x20,(%a0) # yes 18305 add.l &0xc,%a0 18306 18307fmovm_out_ctrl_fp3: 18308 lsl.b &0x1,%d1 # should FP3 be moved? 18309 bpl.b fmovm_out_ctrl_fp4 # no 18310 18311 fmovm.x &0x10,(%a0) # yes 18312 add.l &0xc,%a0 18313 18314fmovm_out_ctrl_fp4: 18315 lsl.b &0x1,%d1 # should FP4 be moved? 18316 bpl.b fmovm_out_ctrl_fp5 # no 18317 18318 fmovm.x &0x08,(%a0) # yes 18319 add.l &0xc,%a0 18320 18321fmovm_out_ctrl_fp5: 18322 lsl.b &0x1,%d1 # should FP5 be moved? 18323 bpl.b fmovm_out_ctrl_fp6 # no 18324 18325 fmovm.x &0x04,(%a0) # yes 18326 add.l &0xc,%a0 18327 18328fmovm_out_ctrl_fp6: 18329 lsl.b &0x1,%d1 # should FP6 be moved? 18330 bpl.b fmovm_out_ctrl_fp7 # no 18331 18332 fmovm.x &0x02,(%a0) # yes 18333 add.l &0xc,%a0 18334 18335fmovm_out_ctrl_fp7: 18336 lsl.b &0x1,%d1 # should FP7 be moved? 18337 bpl.b fmovm_out_ctrl_done # no 18338 18339 fmovm.x &0x01,(%a0) # yes 18340 add.l &0xc,%a0 18341 18342fmovm_out_ctrl_done: 18343 mov.l %a1,L_SCR1(%a6) 18344 18345 lea (%sp),%a0 # pass: supervisor src 18346 mov.l %d0,-(%sp) # save size 18347 bsr.l _dmem_write # copy data to user mem 18348 18349 mov.l (%sp)+,%d0 18350 add.l %d0,%sp # clear fpreg data from stack 18351 18352 tst.l %d1 # did dstore err? 18353 bne.w fmovm_out_err # yes 18354 18355 rts 18356 18357############ 18358# MOVE IN: # 18359############ 18360fmovm_data_in: 18361 mov.l %a0,L_SCR1(%a6) 18362 18363 sub.l %d0,%sp # make room for fpregs 18364 lea (%sp),%a1 18365 18366 mov.l %d1,-(%sp) # save bit string for later 18367 mov.l %d0,-(%sp) # save # of bytes 18368 18369 bsr.l _dmem_read # copy data from user mem 18370 18371 mov.l (%sp)+,%d0 # retrieve # of bytes 18372 18373 tst.l %d1 # did dfetch fail? 18374 bne.w fmovm_in_err # yes 18375 18376 mov.l (%sp)+,%d1 # load bit string 18377 18378 lea (%sp),%a0 # addr of stack 18379 18380 tst.b %d1 # should FP0 be moved? 18381 bpl.b fmovm_data_in_fp1 # no 18382 18383 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes 18384 mov.l (%a0)+,0x4+EXC_FP0(%a6) 18385 mov.l (%a0)+,0x8+EXC_FP0(%a6) 18386 18387fmovm_data_in_fp1: 18388 lsl.b &0x1,%d1 # should FP1 be moved? 18389 bpl.b fmovm_data_in_fp2 # no 18390 18391 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes 18392 mov.l (%a0)+,0x4+EXC_FP1(%a6) 18393 mov.l (%a0)+,0x8+EXC_FP1(%a6) 18394 18395fmovm_data_in_fp2: 18396 lsl.b &0x1,%d1 # should FP2 be moved? 18397 bpl.b fmovm_data_in_fp3 # no 18398 18399 fmovm.x (%a0)+,&0x20 # yes 18400 18401fmovm_data_in_fp3: 18402 lsl.b &0x1,%d1 # should FP3 be moved? 18403 bpl.b fmovm_data_in_fp4 # no 18404 18405 fmovm.x (%a0)+,&0x10 # yes 18406 18407fmovm_data_in_fp4: 18408 lsl.b &0x1,%d1 # should FP4 be moved? 18409 bpl.b fmovm_data_in_fp5 # no 18410 18411 fmovm.x (%a0)+,&0x08 # yes 18412 18413fmovm_data_in_fp5: 18414 lsl.b &0x1,%d1 # should FP5 be moved? 18415 bpl.b fmovm_data_in_fp6 # no 18416 18417 fmovm.x (%a0)+,&0x04 # yes 18418 18419fmovm_data_in_fp6: 18420 lsl.b &0x1,%d1 # should FP6 be moved? 18421 bpl.b fmovm_data_in_fp7 # no 18422 18423 fmovm.x (%a0)+,&0x02 # yes 18424 18425fmovm_data_in_fp7: 18426 lsl.b &0x1,%d1 # should FP7 be moved? 18427 bpl.b fmovm_data_in_done # no 18428 18429 fmovm.x (%a0)+,&0x01 # yes 18430 18431fmovm_data_in_done: 18432 add.l %d0,%sp # remove fpregs from stack 18433 rts 18434 18435##################################### 18436 18437fmovm_data_done: 18438 rts 18439 18440############################################################################## 18441 18442# 18443# table indexed by the operation's bit string that gives the number 18444# of bytes that will be moved. 18445# 18446# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg) 18447# 18448tbl_fmovm_size: 18449 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24 18450 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18451 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18452 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18453 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18454 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18455 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18456 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18457 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18458 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18459 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18460 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18461 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18462 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18463 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18464 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18465 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18466 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18467 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18468 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18469 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18470 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18471 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18472 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18473 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18474 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18475 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18476 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18477 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18478 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18479 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18480 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60 18481 18482# 18483# table to convert a pre-decrement bit string into a post-increment 18484# or control bit string. 18485# ex: 0x00 ==> 0x00 18486# 0x01 ==> 0x80 18487# 0x02 ==> 0x40 18488# . 18489# . 18490# 0xfd ==> 0xbf 18491# 0xfe ==> 0x7f 18492# 0xff ==> 0xff 18493# 18494tbl_fmovm_convert: 18495 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0 18496 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0 18497 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8 18498 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8 18499 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4 18500 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4 18501 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec 18502 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc 18503 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2 18504 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2 18505 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea 18506 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa 18507 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6 18508 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6 18509 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee 18510 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe 18511 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1 18512 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1 18513 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9 18514 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9 18515 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5 18516 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5 18517 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed 18518 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd 18519 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3 18520 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3 18521 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb 18522 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb 18523 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7 18524 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7 18525 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef 18526 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 18527 18528 global fmovm_calc_ea 18529############################################### 18530# _fmovm_calc_ea: calculate effective address # 18531############################################### 18532fmovm_calc_ea: 18533 mov.l %d0,%a0 # move # bytes to a0 18534 18535# currently, MODE and REG are taken from the EXC_OPWORD. this could be 18536# easily changed if they were inputs passed in registers. 18537 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word 18538 mov.w %d0,%d1 # make a copy 18539 18540 andi.w &0x3f,%d0 # extract mode field 18541 andi.l &0x7,%d1 # extract reg field 18542 18543# jump to the corresponding function for each {MODE,REG} pair. 18544 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance 18545 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode 18546 18547 swbeg &64 18548tbl_fea_mode: 18549 short tbl_fea_mode - tbl_fea_mode 18550 short tbl_fea_mode - tbl_fea_mode 18551 short tbl_fea_mode - tbl_fea_mode 18552 short tbl_fea_mode - tbl_fea_mode 18553 short tbl_fea_mode - tbl_fea_mode 18554 short tbl_fea_mode - tbl_fea_mode 18555 short tbl_fea_mode - tbl_fea_mode 18556 short tbl_fea_mode - tbl_fea_mode 18557 18558 short tbl_fea_mode - tbl_fea_mode 18559 short tbl_fea_mode - tbl_fea_mode 18560 short tbl_fea_mode - tbl_fea_mode 18561 short tbl_fea_mode - tbl_fea_mode 18562 short tbl_fea_mode - tbl_fea_mode 18563 short tbl_fea_mode - tbl_fea_mode 18564 short tbl_fea_mode - tbl_fea_mode 18565 short tbl_fea_mode - tbl_fea_mode 18566 18567 short faddr_ind_a0 - tbl_fea_mode 18568 short faddr_ind_a1 - tbl_fea_mode 18569 short faddr_ind_a2 - tbl_fea_mode 18570 short faddr_ind_a3 - tbl_fea_mode 18571 short faddr_ind_a4 - tbl_fea_mode 18572 short faddr_ind_a5 - tbl_fea_mode 18573 short faddr_ind_a6 - tbl_fea_mode 18574 short faddr_ind_a7 - tbl_fea_mode 18575 18576 short faddr_ind_p_a0 - tbl_fea_mode 18577 short faddr_ind_p_a1 - tbl_fea_mode 18578 short faddr_ind_p_a2 - tbl_fea_mode 18579 short faddr_ind_p_a3 - tbl_fea_mode 18580 short faddr_ind_p_a4 - tbl_fea_mode 18581 short faddr_ind_p_a5 - tbl_fea_mode 18582 short faddr_ind_p_a6 - tbl_fea_mode 18583 short faddr_ind_p_a7 - tbl_fea_mode 18584 18585 short faddr_ind_m_a0 - tbl_fea_mode 18586 short faddr_ind_m_a1 - tbl_fea_mode 18587 short faddr_ind_m_a2 - tbl_fea_mode 18588 short faddr_ind_m_a3 - tbl_fea_mode 18589 short faddr_ind_m_a4 - tbl_fea_mode 18590 short faddr_ind_m_a5 - tbl_fea_mode 18591 short faddr_ind_m_a6 - tbl_fea_mode 18592 short faddr_ind_m_a7 - tbl_fea_mode 18593 18594 short faddr_ind_disp_a0 - tbl_fea_mode 18595 short faddr_ind_disp_a1 - tbl_fea_mode 18596 short faddr_ind_disp_a2 - tbl_fea_mode 18597 short faddr_ind_disp_a3 - tbl_fea_mode 18598 short faddr_ind_disp_a4 - tbl_fea_mode 18599 short faddr_ind_disp_a5 - tbl_fea_mode 18600 short faddr_ind_disp_a6 - tbl_fea_mode 18601 short faddr_ind_disp_a7 - tbl_fea_mode 18602 18603 short faddr_ind_ext - tbl_fea_mode 18604 short faddr_ind_ext - tbl_fea_mode 18605 short faddr_ind_ext - tbl_fea_mode 18606 short faddr_ind_ext - tbl_fea_mode 18607 short faddr_ind_ext - tbl_fea_mode 18608 short faddr_ind_ext - tbl_fea_mode 18609 short faddr_ind_ext - tbl_fea_mode 18610 short faddr_ind_ext - tbl_fea_mode 18611 18612 short fabs_short - tbl_fea_mode 18613 short fabs_long - tbl_fea_mode 18614 short fpc_ind - tbl_fea_mode 18615 short fpc_ind_ext - tbl_fea_mode 18616 short tbl_fea_mode - tbl_fea_mode 18617 short tbl_fea_mode - tbl_fea_mode 18618 short tbl_fea_mode - tbl_fea_mode 18619 short tbl_fea_mode - tbl_fea_mode 18620 18621################################### 18622# Address register indirect: (An) # 18623################################### 18624faddr_ind_a0: 18625 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0 18626 rts 18627 18628faddr_ind_a1: 18629 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1 18630 rts 18631 18632faddr_ind_a2: 18633 mov.l %a2,%a0 # Get current a2 18634 rts 18635 18636faddr_ind_a3: 18637 mov.l %a3,%a0 # Get current a3 18638 rts 18639 18640faddr_ind_a4: 18641 mov.l %a4,%a0 # Get current a4 18642 rts 18643 18644faddr_ind_a5: 18645 mov.l %a5,%a0 # Get current a5 18646 rts 18647 18648faddr_ind_a6: 18649 mov.l (%a6),%a0 # Get current a6 18650 rts 18651 18652faddr_ind_a7: 18653 mov.l EXC_A7(%a6),%a0 # Get current a7 18654 rts 18655 18656##################################################### 18657# Address register indirect w/ postincrement: (An)+ # 18658##################################################### 18659faddr_ind_p_a0: 18660 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 18661 mov.l %d0,%d1 18662 add.l %a0,%d1 # Increment 18663 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value 18664 mov.l %d0,%a0 18665 rts 18666 18667faddr_ind_p_a1: 18668 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 18669 mov.l %d0,%d1 18670 add.l %a0,%d1 # Increment 18671 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value 18672 mov.l %d0,%a0 18673 rts 18674 18675faddr_ind_p_a2: 18676 mov.l %a2,%d0 # Get current a2 18677 mov.l %d0,%d1 18678 add.l %a0,%d1 # Increment 18679 mov.l %d1,%a2 # Save incr value 18680 mov.l %d0,%a0 18681 rts 18682 18683faddr_ind_p_a3: 18684 mov.l %a3,%d0 # Get current a3 18685 mov.l %d0,%d1 18686 add.l %a0,%d1 # Increment 18687 mov.l %d1,%a3 # Save incr value 18688 mov.l %d0,%a0 18689 rts 18690 18691faddr_ind_p_a4: 18692 mov.l %a4,%d0 # Get current a4 18693 mov.l %d0,%d1 18694 add.l %a0,%d1 # Increment 18695 mov.l %d1,%a4 # Save incr value 18696 mov.l %d0,%a0 18697 rts 18698 18699faddr_ind_p_a5: 18700 mov.l %a5,%d0 # Get current a5 18701 mov.l %d0,%d1 18702 add.l %a0,%d1 # Increment 18703 mov.l %d1,%a5 # Save incr value 18704 mov.l %d0,%a0 18705 rts 18706 18707faddr_ind_p_a6: 18708 mov.l (%a6),%d0 # Get current a6 18709 mov.l %d0,%d1 18710 add.l %a0,%d1 # Increment 18711 mov.l %d1,(%a6) # Save incr value 18712 mov.l %d0,%a0 18713 rts 18714 18715faddr_ind_p_a7: 18716 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag 18717 18718 mov.l EXC_A7(%a6),%d0 # Get current a7 18719 mov.l %d0,%d1 18720 add.l %a0,%d1 # Increment 18721 mov.l %d1,EXC_A7(%a6) # Save incr value 18722 mov.l %d0,%a0 18723 rts 18724 18725#################################################### 18726# Address register indirect w/ predecrement: -(An) # 18727#################################################### 18728faddr_ind_m_a0: 18729 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 18730 sub.l %a0,%d0 # Decrement 18731 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value 18732 mov.l %d0,%a0 18733 rts 18734 18735faddr_ind_m_a1: 18736 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 18737 sub.l %a0,%d0 # Decrement 18738 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value 18739 mov.l %d0,%a0 18740 rts 18741 18742faddr_ind_m_a2: 18743 mov.l %a2,%d0 # Get current a2 18744 sub.l %a0,%d0 # Decrement 18745 mov.l %d0,%a2 # Save decr value 18746 mov.l %d0,%a0 18747 rts 18748 18749faddr_ind_m_a3: 18750 mov.l %a3,%d0 # Get current a3 18751 sub.l %a0,%d0 # Decrement 18752 mov.l %d0,%a3 # Save decr value 18753 mov.l %d0,%a0 18754 rts 18755 18756faddr_ind_m_a4: 18757 mov.l %a4,%d0 # Get current a4 18758 sub.l %a0,%d0 # Decrement 18759 mov.l %d0,%a4 # Save decr value 18760 mov.l %d0,%a0 18761 rts 18762 18763faddr_ind_m_a5: 18764 mov.l %a5,%d0 # Get current a5 18765 sub.l %a0,%d0 # Decrement 18766 mov.l %d0,%a5 # Save decr value 18767 mov.l %d0,%a0 18768 rts 18769 18770faddr_ind_m_a6: 18771 mov.l (%a6),%d0 # Get current a6 18772 sub.l %a0,%d0 # Decrement 18773 mov.l %d0,(%a6) # Save decr value 18774 mov.l %d0,%a0 18775 rts 18776 18777faddr_ind_m_a7: 18778 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag 18779 18780 mov.l EXC_A7(%a6),%d0 # Get current a7 18781 sub.l %a0,%d0 # Decrement 18782 mov.l %d0,EXC_A7(%a6) # Save decr value 18783 mov.l %d0,%a0 18784 rts 18785 18786######################################################## 18787# Address register indirect w/ displacement: (d16, An) # 18788######################################################## 18789faddr_ind_disp_a0: 18790 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18791 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18792 bsr.l _imem_read_word 18793 18794 tst.l %d1 # did ifetch fail? 18795 bne.l iea_iacc # yes 18796 18797 mov.w %d0,%a0 # sign extend displacement 18798 18799 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16 18800 rts 18801 18802faddr_ind_disp_a1: 18803 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18804 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18805 bsr.l _imem_read_word 18806 18807 tst.l %d1 # did ifetch fail? 18808 bne.l iea_iacc # yes 18809 18810 mov.w %d0,%a0 # sign extend displacement 18811 18812 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16 18813 rts 18814 18815faddr_ind_disp_a2: 18816 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18817 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18818 bsr.l _imem_read_word 18819 18820 tst.l %d1 # did ifetch fail? 18821 bne.l iea_iacc # yes 18822 18823 mov.w %d0,%a0 # sign extend displacement 18824 18825 add.l %a2,%a0 # a2 + d16 18826 rts 18827 18828faddr_ind_disp_a3: 18829 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18830 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18831 bsr.l _imem_read_word 18832 18833 tst.l %d1 # did ifetch fail? 18834 bne.l iea_iacc # yes 18835 18836 mov.w %d0,%a0 # sign extend displacement 18837 18838 add.l %a3,%a0 # a3 + d16 18839 rts 18840 18841faddr_ind_disp_a4: 18842 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18843 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18844 bsr.l _imem_read_word 18845 18846 tst.l %d1 # did ifetch fail? 18847 bne.l iea_iacc # yes 18848 18849 mov.w %d0,%a0 # sign extend displacement 18850 18851 add.l %a4,%a0 # a4 + d16 18852 rts 18853 18854faddr_ind_disp_a5: 18855 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18856 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18857 bsr.l _imem_read_word 18858 18859 tst.l %d1 # did ifetch fail? 18860 bne.l iea_iacc # yes 18861 18862 mov.w %d0,%a0 # sign extend displacement 18863 18864 add.l %a5,%a0 # a5 + d16 18865 rts 18866 18867faddr_ind_disp_a6: 18868 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18869 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18870 bsr.l _imem_read_word 18871 18872 tst.l %d1 # did ifetch fail? 18873 bne.l iea_iacc # yes 18874 18875 mov.w %d0,%a0 # sign extend displacement 18876 18877 add.l (%a6),%a0 # a6 + d16 18878 rts 18879 18880faddr_ind_disp_a7: 18881 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18882 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18883 bsr.l _imem_read_word 18884 18885 tst.l %d1 # did ifetch fail? 18886 bne.l iea_iacc # yes 18887 18888 mov.w %d0,%a0 # sign extend displacement 18889 18890 add.l EXC_A7(%a6),%a0 # a7 + d16 18891 rts 18892 18893######################################################################## 18894# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) # 18895# " " " w/ " (base displacement): (bd, An, Xn) # 18896# Memory indirect postindexed: ([bd, An], Xn, od) # 18897# Memory indirect preindexed: ([bd, An, Xn], od) # 18898######################################################################## 18899faddr_ind_ext: 18900 addq.l &0x8,%d1 18901 bsr.l fetch_dreg # fetch base areg 18902 mov.l %d0,-(%sp) 18903 18904 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18905 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18906 bsr.l _imem_read_word # fetch extword in d0 18907 18908 tst.l %d1 # did ifetch fail? 18909 bne.l iea_iacc # yes 18910 18911 mov.l (%sp)+,%a0 18912 18913 btst &0x8,%d0 18914 bne.w fcalc_mem_ind 18915 18916 mov.l %d0,L_SCR1(%a6) # hold opword 18917 18918 mov.l %d0,%d1 18919 rol.w &0x4,%d1 18920 andi.w &0xf,%d1 # extract index regno 18921 18922# count on fetch_dreg() not to alter a0... 18923 bsr.l fetch_dreg # fetch index 18924 18925 mov.l %d2,-(%sp) # save d2 18926 mov.l L_SCR1(%a6),%d2 # fetch opword 18927 18928 btst &0xb,%d2 # is it word or long? 18929 bne.b faii8_long 18930 ext.l %d0 # sign extend word index 18931faii8_long: 18932 mov.l %d2,%d1 18933 rol.w &0x7,%d1 18934 andi.l &0x3,%d1 # extract scale value 18935 18936 lsl.l %d1,%d0 # shift index by scale 18937 18938 extb.l %d2 # sign extend displacement 18939 add.l %d2,%d0 # index + disp 18940 add.l %d0,%a0 # An + (index + disp) 18941 18942 mov.l (%sp)+,%d2 # restore old d2 18943 rts 18944 18945########################### 18946# Absolute short: (XXX).W # 18947########################### 18948fabs_short: 18949 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18950 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18951 bsr.l _imem_read_word # fetch short address 18952 18953 tst.l %d1 # did ifetch fail? 18954 bne.l iea_iacc # yes 18955 18956 mov.w %d0,%a0 # return <ea> in a0 18957 rts 18958 18959########################## 18960# Absolute long: (XXX).L # 18961########################## 18962fabs_long: 18963 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18964 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 18965 bsr.l _imem_read_long # fetch long address 18966 18967 tst.l %d1 # did ifetch fail? 18968 bne.l iea_iacc # yes 18969 18970 mov.l %d0,%a0 # return <ea> in a0 18971 rts 18972 18973####################################################### 18974# Program counter indirect w/ displacement: (d16, PC) # 18975####################################################### 18976fpc_ind: 18977 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18978 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18979 bsr.l _imem_read_word # fetch word displacement 18980 18981 tst.l %d1 # did ifetch fail? 18982 bne.l iea_iacc # yes 18983 18984 mov.w %d0,%a0 # sign extend displacement 18985 18986 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16 18987 18988# _imem_read_word() increased the extwptr by 2. need to adjust here. 18989 subq.l &0x2,%a0 # adjust <ea> 18990 rts 18991 18992########################################################## 18993# PC indirect w/ index(8-bit displacement): (d8, PC, An) # 18994# " " w/ " (base displacement): (bd, PC, An) # 18995# PC memory indirect postindexed: ([bd, PC], Xn, od) # 18996# PC memory indirect preindexed: ([bd, PC, Xn], od) # 18997########################################################## 18998fpc_ind_ext: 18999 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19000 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 19001 bsr.l _imem_read_word # fetch ext word 19002 19003 tst.l %d1 # did ifetch fail? 19004 bne.l iea_iacc # yes 19005 19006 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0 19007 subq.l &0x2,%a0 # adjust base 19008 19009 btst &0x8,%d0 # is disp only 8 bits? 19010 bne.w fcalc_mem_ind # calc memory indirect 19011 19012 mov.l %d0,L_SCR1(%a6) # store opword 19013 19014 mov.l %d0,%d1 # make extword copy 19015 rol.w &0x4,%d1 # rotate reg num into place 19016 andi.w &0xf,%d1 # extract register number 19017 19018# count on fetch_dreg() not to alter a0... 19019 bsr.l fetch_dreg # fetch index 19020 19021 mov.l %d2,-(%sp) # save d2 19022 mov.l L_SCR1(%a6),%d2 # fetch opword 19023 19024 btst &0xb,%d2 # is index word or long? 19025 bne.b fpii8_long # long 19026 ext.l %d0 # sign extend word index 19027fpii8_long: 19028 mov.l %d2,%d1 19029 rol.w &0x7,%d1 # rotate scale value into place 19030 andi.l &0x3,%d1 # extract scale value 19031 19032 lsl.l %d1,%d0 # shift index by scale 19033 19034 extb.l %d2 # sign extend displacement 19035 add.l %d2,%d0 # disp + index 19036 add.l %d0,%a0 # An + (index + disp) 19037 19038 mov.l (%sp)+,%d2 # restore temp register 19039 rts 19040 19041# d2 = index 19042# d3 = base 19043# d4 = od 19044# d5 = extword 19045fcalc_mem_ind: 19046 btst &0x6,%d0 # is the index suppressed? 19047 beq.b fcalc_index 19048 19049 movm.l &0x3c00,-(%sp) # save d2-d5 19050 19051 mov.l %d0,%d5 # put extword in d5 19052 mov.l %a0,%d3 # put base in d3 19053 19054 clr.l %d2 # yes, so index = 0 19055 bra.b fbase_supp_ck 19056 19057# index: 19058fcalc_index: 19059 mov.l %d0,L_SCR1(%a6) # save d0 (opword) 19060 bfextu %d0{&16:&4},%d1 # fetch dreg index 19061 bsr.l fetch_dreg 19062 19063 movm.l &0x3c00,-(%sp) # save d2-d5 19064 mov.l %d0,%d2 # put index in d2 19065 mov.l L_SCR1(%a6),%d5 19066 mov.l %a0,%d3 19067 19068 btst &0xb,%d5 # is index word or long? 19069 bne.b fno_ext 19070 ext.l %d2 19071 19072fno_ext: 19073 bfextu %d5{&21:&2},%d0 19074 lsl.l %d0,%d2 19075 19076# base address (passed as parameter in d3): 19077# we clear the value here if it should actually be suppressed. 19078fbase_supp_ck: 19079 btst &0x7,%d5 # is the bd suppressed? 19080 beq.b fno_base_sup 19081 clr.l %d3 19082 19083# base displacement: 19084fno_base_sup: 19085 bfextu %d5{&26:&2},%d0 # get bd size 19086# beq.l fmovm_error # if (size == 0) it's reserved 19087 19088 cmpi.b %d0,&0x2 19089 blt.b fno_bd 19090 beq.b fget_word_bd 19091 19092 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19093 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19094 bsr.l _imem_read_long 19095 19096 tst.l %d1 # did ifetch fail? 19097 bne.l fcea_iacc # yes 19098 19099 bra.b fchk_ind 19100 19101fget_word_bd: 19102 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19103 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 19104 bsr.l _imem_read_word 19105 19106 tst.l %d1 # did ifetch fail? 19107 bne.l fcea_iacc # yes 19108 19109 ext.l %d0 # sign extend bd 19110 19111fchk_ind: 19112 add.l %d0,%d3 # base += bd 19113 19114# outer displacement: 19115fno_bd: 19116 bfextu %d5{&30:&2},%d0 # is od suppressed? 19117 beq.w faii_bd 19118 19119 cmpi.b %d0,&0x2 19120 blt.b fnull_od 19121 beq.b fword_od 19122 19123 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19124 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19125 bsr.l _imem_read_long 19126 19127 tst.l %d1 # did ifetch fail? 19128 bne.l fcea_iacc # yes 19129 19130 bra.b fadd_them 19131 19132fword_od: 19133 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19134 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 19135 bsr.l _imem_read_word 19136 19137 tst.l %d1 # did ifetch fail? 19138 bne.l fcea_iacc # yes 19139 19140 ext.l %d0 # sign extend od 19141 bra.b fadd_them 19142 19143fnull_od: 19144 clr.l %d0 19145 19146fadd_them: 19147 mov.l %d0,%d4 19148 19149 btst &0x2,%d5 # pre or post indexing? 19150 beq.b fpre_indexed 19151 19152 mov.l %d3,%a0 19153 bsr.l _dmem_read_long 19154 19155 tst.l %d1 # did dfetch fail? 19156 bne.w fcea_err # yes 19157 19158 add.l %d2,%d0 # <ea> += index 19159 add.l %d4,%d0 # <ea> += od 19160 bra.b fdone_ea 19161 19162fpre_indexed: 19163 add.l %d2,%d3 # preindexing 19164 mov.l %d3,%a0 19165 bsr.l _dmem_read_long 19166 19167 tst.l %d1 # did dfetch fail? 19168 bne.w fcea_err # yes 19169 19170 add.l %d4,%d0 # ea += od 19171 bra.b fdone_ea 19172 19173faii_bd: 19174 add.l %d2,%d3 # ea = (base + bd) + index 19175 mov.l %d3,%d0 19176fdone_ea: 19177 mov.l %d0,%a0 19178 19179 movm.l (%sp)+,&0x003c # restore d2-d5 19180 rts 19181 19182######################################################### 19183fcea_err: 19184 mov.l %d3,%a0 19185 19186 movm.l (%sp)+,&0x003c # restore d2-d5 19187 mov.w &0x0101,%d0 19188 bra.l iea_dacc 19189 19190fcea_iacc: 19191 movm.l (%sp)+,&0x003c # restore d2-d5 19192 bra.l iea_iacc 19193 19194fmovm_out_err: 19195 bsr.l restore 19196 mov.w &0x00e1,%d0 19197 bra.b fmovm_err 19198 19199fmovm_in_err: 19200 bsr.l restore 19201 mov.w &0x0161,%d0 19202 19203fmovm_err: 19204 mov.l L_SCR1(%a6),%a0 19205 bra.l iea_dacc 19206 19207######################################################################### 19208# XDEF **************************************************************** # 19209# fmovm_ctrl(): emulate fmovm.l of control registers instr # 19210# # 19211# XREF **************************************************************** # 19212# _imem_read_long() - read longword from memory # 19213# iea_iacc() - _imem_read_long() failed; error recovery # 19214# # 19215# INPUT *************************************************************** # 19216# None # 19217# # 19218# OUTPUT ************************************************************** # 19219# If _imem_read_long() doesn't fail: # 19220# USER_FPCR(a6) = new FPCR value # 19221# USER_FPSR(a6) = new FPSR value # 19222# USER_FPIAR(a6) = new FPIAR value # 19223# # 19224# ALGORITHM *********************************************************** # 19225# Decode the instruction type by looking at the extension word # 19226# in order to see how many control registers to fetch from memory. # 19227# Fetch them using _imem_read_long(). If this fetch fails, exit through # 19228# the special access error exit handler iea_iacc(). # 19229# # 19230# Instruction word decoding: # 19231# # 19232# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} # 19233# # 19234# WORD1 WORD2 # 19235# 1111 0010 00 111100 100$ $$00 0000 0000 # 19236# # 19237# $$$ (100): FPCR # 19238# (010): FPSR # 19239# (001): FPIAR # 19240# (000): FPIAR # 19241# # 19242######################################################################### 19243 19244 global fmovm_ctrl 19245fmovm_ctrl: 19246 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits 19247 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ? 19248 beq.w fctrl_in_7 # yes 19249 cmpi.b %d0,&0x98 # fpcr & fpsr ? 19250 beq.w fctrl_in_6 # yes 19251 cmpi.b %d0,&0x94 # fpcr & fpiar ? 19252 beq.b fctrl_in_5 # yes 19253 19254# fmovem.l #<data>, fpsr/fpiar 19255fctrl_in_3: 19256 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19257 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19258 bsr.l _imem_read_long # fetch FPSR from mem 19259 19260 tst.l %d1 # did ifetch fail? 19261 bne.l iea_iacc # yes 19262 19263 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack 19264 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19265 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19266 bsr.l _imem_read_long # fetch FPIAR from mem 19267 19268 tst.l %d1 # did ifetch fail? 19269 bne.l iea_iacc # yes 19270 19271 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 19272 rts 19273 19274# fmovem.l #<data>, fpcr/fpiar 19275fctrl_in_5: 19276 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19277 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19278 bsr.l _imem_read_long # fetch FPCR from mem 19279 19280 tst.l %d1 # did ifetch fail? 19281 bne.l iea_iacc # yes 19282 19283 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack 19284 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19285 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19286 bsr.l _imem_read_long # fetch FPIAR from mem 19287 19288 tst.l %d1 # did ifetch fail? 19289 bne.l iea_iacc # yes 19290 19291 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 19292 rts 19293 19294# fmovem.l #<data>, fpcr/fpsr 19295fctrl_in_6: 19296 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19297 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19298 bsr.l _imem_read_long # fetch FPCR from mem 19299 19300 tst.l %d1 # did ifetch fail? 19301 bne.l iea_iacc # yes 19302 19303 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 19304 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19305 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19306 bsr.l _imem_read_long # fetch FPSR from mem 19307 19308 tst.l %d1 # did ifetch fail? 19309 bne.l iea_iacc # yes 19310 19311 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 19312 rts 19313 19314# fmovem.l #<data>, fpcr/fpsr/fpiar 19315fctrl_in_7: 19316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19318 bsr.l _imem_read_long # fetch FPCR from mem 19319 19320 tst.l %d1 # did ifetch fail? 19321 bne.l iea_iacc # yes 19322 19323 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 19324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19325 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19326 bsr.l _imem_read_long # fetch FPSR from mem 19327 19328 tst.l %d1 # did ifetch fail? 19329 bne.l iea_iacc # yes 19330 19331 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 19332 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19333 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19334 bsr.l _imem_read_long # fetch FPIAR from mem 19335 19336 tst.l %d1 # did ifetch fail? 19337 bne.l iea_iacc # yes 19338 19339 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem 19340 rts 19341 19342######################################################################### 19343# XDEF **************************************************************** # 19344# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception # 19345# # 19346# XREF **************************************************************** # 19347# inc_areg() - increment an address register # 19348# dec_areg() - decrement an address register # 19349# # 19350# INPUT *************************************************************** # 19351# d0 = number of bytes to adjust <ea> by # 19352# # 19353# OUTPUT ************************************************************** # 19354# None # 19355# # 19356# ALGORITHM *********************************************************** # 19357# "Dummy" CALCulate Effective Address: # 19358# The stacked <ea> for FP unimplemented instructions and opclass # 19359# two packed instructions is correct with the exception of... # 19360# # 19361# 1) -(An) : The register is not updated regardless of size. # 19362# Also, for extended precision and packed, the # 19363# stacked <ea> value is 8 bytes too big # 19364# 2) (An)+ : The register is not updated. # 19365# 3) #<data> : The upper longword of the immediate operand is # 19366# stacked b,w,l and s sizes are completely stacked. # 19367# d,x, and p are not. # 19368# # 19369######################################################################### 19370 19371 global _dcalc_ea 19372_dcalc_ea: 19373 mov.l %d0, %a0 # move # bytes to %a0 19374 19375 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word 19376 mov.l %d0, %d1 # make a copy 19377 19378 andi.w &0x38, %d0 # extract mode field 19379 andi.l &0x7, %d1 # extract reg field 19380 19381 cmpi.b %d0,&0x18 # is mode (An)+ ? 19382 beq.b dcea_pi # yes 19383 19384 cmpi.b %d0,&0x20 # is mode -(An) ? 19385 beq.b dcea_pd # yes 19386 19387 or.w %d1,%d0 # concat mode,reg 19388 cmpi.b %d0,&0x3c # is mode #<data>? 19389 19390 beq.b dcea_imm # yes 19391 19392 mov.l EXC_EA(%a6),%a0 # return <ea> 19393 rts 19394 19395# need to set immediate data flag here since we'll need to do 19396# an imem_read to fetch this later. 19397dcea_imm: 19398 mov.b &immed_flg,SPCOND_FLG(%a6) 19399 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea> 19400 rts 19401 19402# here, the <ea> is stacked correctly. however, we must update the 19403# address register... 19404dcea_pi: 19405 mov.l %a0,%d0 # pass amt to inc by 19406 bsr.l inc_areg # inc addr register 19407 19408 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19409 rts 19410 19411# the <ea> is stacked correctly for all but extended and packed which 19412# the <ea>s are 8 bytes too large. 19413# it would make no sense to have a pre-decrement to a7 in supervisor 19414# mode so we don't even worry about this tricky case here : ) 19415dcea_pd: 19416 mov.l %a0,%d0 # pass amt to dec by 19417 bsr.l dec_areg # dec addr register 19418 19419 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19420 19421 cmpi.b %d0,&0xc # is opsize ext or packed? 19422 beq.b dcea_pd2 # yes 19423 rts 19424dcea_pd2: 19425 sub.l &0x8,%a0 # correct <ea> 19426 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack 19427 rts 19428 19429######################################################################### 19430# XDEF **************************************************************** # 19431# _calc_ea_fout(): calculate correct stacked <ea> for extended # 19432# and packed data opclass 3 operations. # 19433# # 19434# XREF **************************************************************** # 19435# None # 19436# # 19437# INPUT *************************************************************** # 19438# None # 19439# # 19440# OUTPUT ************************************************************** # 19441# a0 = return correct effective address # 19442# # 19443# ALGORITHM *********************************************************** # 19444# For opclass 3 extended and packed data operations, the <ea> # 19445# stacked for the exception is incorrect for -(an) and (an)+ addressing # 19446# modes. Also, while we're at it, the index register itself must get # 19447# updated. # 19448# So, for -(an), we must subtract 8 off of the stacked <ea> value # 19449# and return that value as the correct <ea> and store that value in An. # 19450# For (an)+, the stacked <ea> is correct but we must adjust An by +12. # 19451# # 19452######################################################################### 19453 19454# This calc_ea is currently used to retrieve the correct <ea> 19455# for fmove outs of type extended and packed. 19456 global _calc_ea_fout 19457_calc_ea_fout: 19458 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word 19459 mov.l %d0,%d1 # make a copy 19460 19461 andi.w &0x38,%d0 # extract mode field 19462 andi.l &0x7,%d1 # extract reg field 19463 19464 cmpi.b %d0,&0x18 # is mode (An)+ ? 19465 beq.b ceaf_pi # yes 19466 19467 cmpi.b %d0,&0x20 # is mode -(An) ? 19468 beq.w ceaf_pd # yes 19469 19470 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19471 rts 19472 19473# (An)+ : extended and packed fmove out 19474# : stacked <ea> is correct 19475# : "An" not updated 19476ceaf_pi: 19477 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1 19478 mov.l EXC_EA(%a6),%a0 19479 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1) 19480 19481 swbeg &0x8 19482tbl_ceaf_pi: 19483 short ceaf_pi0 - tbl_ceaf_pi 19484 short ceaf_pi1 - tbl_ceaf_pi 19485 short ceaf_pi2 - tbl_ceaf_pi 19486 short ceaf_pi3 - tbl_ceaf_pi 19487 short ceaf_pi4 - tbl_ceaf_pi 19488 short ceaf_pi5 - tbl_ceaf_pi 19489 short ceaf_pi6 - tbl_ceaf_pi 19490 short ceaf_pi7 - tbl_ceaf_pi 19491 19492ceaf_pi0: 19493 addi.l &0xc,EXC_DREGS+0x8(%a6) 19494 rts 19495ceaf_pi1: 19496 addi.l &0xc,EXC_DREGS+0xc(%a6) 19497 rts 19498ceaf_pi2: 19499 add.l &0xc,%a2 19500 rts 19501ceaf_pi3: 19502 add.l &0xc,%a3 19503 rts 19504ceaf_pi4: 19505 add.l &0xc,%a4 19506 rts 19507ceaf_pi5: 19508 add.l &0xc,%a5 19509 rts 19510ceaf_pi6: 19511 addi.l &0xc,EXC_A6(%a6) 19512 rts 19513ceaf_pi7: 19514 mov.b &mia7_flg,SPCOND_FLG(%a6) 19515 addi.l &0xc,EXC_A7(%a6) 19516 rts 19517 19518# -(An) : extended and packed fmove out 19519# : stacked <ea> = actual <ea> + 8 19520# : "An" not updated 19521ceaf_pd: 19522 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1 19523 mov.l EXC_EA(%a6),%a0 19524 sub.l &0x8,%a0 19525 sub.l &0x8,EXC_EA(%a6) 19526 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1) 19527 19528 swbeg &0x8 19529tbl_ceaf_pd: 19530 short ceaf_pd0 - tbl_ceaf_pd 19531 short ceaf_pd1 - tbl_ceaf_pd 19532 short ceaf_pd2 - tbl_ceaf_pd 19533 short ceaf_pd3 - tbl_ceaf_pd 19534 short ceaf_pd4 - tbl_ceaf_pd 19535 short ceaf_pd5 - tbl_ceaf_pd 19536 short ceaf_pd6 - tbl_ceaf_pd 19537 short ceaf_pd7 - tbl_ceaf_pd 19538 19539ceaf_pd0: 19540 mov.l %a0,EXC_DREGS+0x8(%a6) 19541 rts 19542ceaf_pd1: 19543 mov.l %a0,EXC_DREGS+0xc(%a6) 19544 rts 19545ceaf_pd2: 19546 mov.l %a0,%a2 19547 rts 19548ceaf_pd3: 19549 mov.l %a0,%a3 19550 rts 19551ceaf_pd4: 19552 mov.l %a0,%a4 19553 rts 19554ceaf_pd5: 19555 mov.l %a0,%a5 19556 rts 19557ceaf_pd6: 19558 mov.l %a0,EXC_A6(%a6) 19559 rts 19560ceaf_pd7: 19561 mov.l %a0,EXC_A7(%a6) 19562 mov.b &mda7_flg,SPCOND_FLG(%a6) 19563 rts 19564 19565######################################################################### 19566# XDEF **************************************************************** # 19567# _load_fop(): load operand for unimplemented FP exception # 19568# # 19569# XREF **************************************************************** # 19570# set_tag_x() - determine ext prec optype tag # 19571# set_tag_s() - determine sgl prec optype tag # 19572# set_tag_d() - determine dbl prec optype tag # 19573# unnorm_fix() - convert normalized number to denorm or zero # 19574# norm() - normalize a denormalized number # 19575# get_packed() - fetch a packed operand from memory # 19576# _dcalc_ea() - calculate <ea>, fixing An in process # 19577# # 19578# _imem_read_{word,long}() - read from instruction memory # 19579# _dmem_read() - read from data memory # 19580# _dmem_read_{byte,word,long}() - read from data memory # 19581# # 19582# facc_in_{b,w,l,d,x}() - mem read failed; special exit point # 19583# # 19584# INPUT *************************************************************** # 19585# None # 19586# # 19587# OUTPUT ************************************************************** # 19588# If memory access doesn't fail: # 19589# FP_SRC(a6) = source operand in extended precision # 19590# FP_DST(a6) = destination operand in extended precision # 19591# # 19592# ALGORITHM *********************************************************** # 19593# This is called from the Unimplemented FP exception handler in # 19594# order to load the source and maybe destination operand into # 19595# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load # 19596# the source and destination from the FP register file. Set the optype # 19597# tags for both if dyadic, one for monadic. If a number is an UNNORM, # 19598# convert it to a DENORM or a ZERO. # 19599# If the instruction is opclass two (memory->reg), then fetch # 19600# the destination from the register file and the source operand from # 19601# memory. Tag and fix both as above w/ opclass zero instructions. # 19602# If the source operand is byte,word,long, or single, it may be # 19603# in the data register file. If it's actually out in memory, use one of # 19604# the mem_read() routines to fetch it. If the mem_read() access returns # 19605# a failing value, exit through the special facc_in() routine which # 19606# will create an acess error exception frame from the current exception # 19607# frame. # 19608# Immediate data and regular data accesses are separated because # 19609# if an immediate data access fails, the resulting fault status # 19610# longword stacked for the access error exception must have the # 19611# instruction bit set. # 19612# # 19613######################################################################### 19614 19615 global _load_fop 19616_load_fop: 19617 19618# 15 13 12 10 9 7 6 0 19619# / \ / \ / \ / \ 19620# --------------------------------- 19621# | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction) 19622# --------------------------------- 19623# 19624 19625# bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass 19626# cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011) 19627# beq.w op010 # handle <ea> -> fpn 19628# bgt.w op011 # handle fpn -> <ea> 19629 19630# we're not using op011 for now... 19631 btst &0x6,EXC_CMDREG(%a6) 19632 bne.b op010 19633 19634############################ 19635# OPCLASS '000: reg -> reg # 19636############################ 19637op000: 19638 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo 19639 btst &0x5,%d0 # testing extension bits 19640 beq.b op000_src # (bit 5 == 0) => monadic 19641 btst &0x4,%d0 # (bit 5 == 1) 19642 beq.b op000_dst # (bit 4 == 0) => dyadic 19643 and.w &0x007f,%d0 # extract extension bits {6:0} 19644 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ? 19645 bne.b op000_src # it's an fcmp 19646 19647op000_dst: 19648 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field 19649 bsr.l load_fpn2 # fetch dst fpreg into FP_DST 19650 19651 bsr.l set_tag_x # get dst optype tag 19652 19653 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM? 19654 beq.b op000_dst_unnorm # yes 19655op000_dst_cont: 19656 mov.b %d0, DTAG(%a6) # store the dst optype tag 19657 19658op000_src: 19659 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field 19660 bsr.l load_fpn1 # fetch src fpreg into FP_SRC 19661 19662 bsr.l set_tag_x # get src optype tag 19663 19664 cmpi.b %d0, &UNNORM # is src fpreg an UNNORM? 19665 beq.b op000_src_unnorm # yes 19666op000_src_cont: 19667 mov.b %d0, STAG(%a6) # store the src optype tag 19668 rts 19669 19670op000_dst_unnorm: 19671 bsr.l unnorm_fix # fix the dst UNNORM 19672 bra.b op000_dst_cont 19673op000_src_unnorm: 19674 bsr.l unnorm_fix # fix the src UNNORM 19675 bra.b op000_src_cont 19676 19677############################# 19678# OPCLASS '010: <ea> -> reg # 19679############################# 19680op010: 19681 mov.w EXC_CMDREG(%a6),%d0 # fetch extension word 19682 btst &0x5,%d0 # testing extension bits 19683 beq.b op010_src # (bit 5 == 0) => monadic 19684 btst &0x4,%d0 # (bit 5 == 1) 19685 beq.b op010_dst # (bit 4 == 0) => dyadic 19686 and.w &0x007f,%d0 # extract extension bits {6:0} 19687 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ? 19688 bne.b op010_src # it's an fcmp 19689 19690op010_dst: 19691 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field 19692 bsr.l load_fpn2 # fetch dst fpreg ptr 19693 19694 bsr.l set_tag_x # get dst type tag 19695 19696 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM? 19697 beq.b op010_dst_unnorm # yes 19698op010_dst_cont: 19699 mov.b %d0, DTAG(%a6) # store the dst optype tag 19700 19701op010_src: 19702 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field 19703 19704 bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field 19705 bne.w fetch_from_mem # src op is in memory 19706 19707op010_dreg: 19708 clr.b STAG(%a6) # either NORM or ZERO 19709 bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field 19710 19711 mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype 19712 jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg 19713 19714op010_dst_unnorm: 19715 bsr.l unnorm_fix # fix the dst UNNORM 19716 bra.b op010_dst_cont 19717 19718 swbeg &0x8 19719tbl_op010_dreg: 19720 short opd_long - tbl_op010_dreg 19721 short opd_sgl - tbl_op010_dreg 19722 short tbl_op010_dreg - tbl_op010_dreg 19723 short tbl_op010_dreg - tbl_op010_dreg 19724 short opd_word - tbl_op010_dreg 19725 short tbl_op010_dreg - tbl_op010_dreg 19726 short opd_byte - tbl_op010_dreg 19727 short tbl_op010_dreg - tbl_op010_dreg 19728 19729# 19730# LONG: can be either NORM or ZERO... 19731# 19732opd_long: 19733 bsr.l fetch_dreg # fetch long in d0 19734 fmov.l %d0, %fp0 # load a long 19735 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19736 fbeq.w opd_long_zero # long is a ZERO 19737 rts 19738opd_long_zero: 19739 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19740 rts 19741 19742# 19743# WORD: can be either NORM or ZERO... 19744# 19745opd_word: 19746 bsr.l fetch_dreg # fetch word in d0 19747 fmov.w %d0, %fp0 # load a word 19748 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19749 fbeq.w opd_word_zero # WORD is a ZERO 19750 rts 19751opd_word_zero: 19752 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19753 rts 19754 19755# 19756# BYTE: can be either NORM or ZERO... 19757# 19758opd_byte: 19759 bsr.l fetch_dreg # fetch word in d0 19760 fmov.b %d0, %fp0 # load a byte 19761 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19762 fbeq.w opd_byte_zero # byte is a ZERO 19763 rts 19764opd_byte_zero: 19765 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19766 rts 19767 19768# 19769# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM 19770# 19771# separate SNANs and DENORMs so they can be loaded w/ special care. 19772# all others can simply be moved "in" using fmove. 19773# 19774opd_sgl: 19775 bsr.l fetch_dreg # fetch sgl in d0 19776 mov.l %d0,L_SCR1(%a6) 19777 19778 lea L_SCR1(%a6), %a0 # pass: ptr to the sgl 19779 bsr.l set_tag_s # determine sgl type 19780 mov.b %d0, STAG(%a6) # save the src tag 19781 19782 cmpi.b %d0, &SNAN # is it an SNAN? 19783 beq.w get_sgl_snan # yes 19784 19785 cmpi.b %d0, &DENORM # is it a DENORM? 19786 beq.w get_sgl_denorm # yes 19787 19788 fmov.s (%a0), %fp0 # no, so can load it regular 19789 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19790 rts 19791 19792############################################################################## 19793 19794######################################################################### 19795# fetch_from_mem(): # 19796# - src is out in memory. must: # 19797# (1) calc ea - must read AFTER you know the src type since # 19798# if the ea is -() or ()+, need to know # of bytes. # 19799# (2) read it in from either user or supervisor space # 19800# (3) if (b || w || l) then simply read in # 19801# if (s || d || x) then check for SNAN,UNNORM,DENORM # 19802# if (packed) then punt for now # 19803# INPUT: # 19804# %d0 : src type field # 19805######################################################################### 19806fetch_from_mem: 19807 clr.b STAG(%a6) # either NORM or ZERO 19808 19809 mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field 19810 jmp (tbl_fp_type.b,%pc,%d0.w*1) 19811 19812 swbeg &0x8 19813tbl_fp_type: 19814 short load_long - tbl_fp_type 19815 short load_sgl - tbl_fp_type 19816 short load_ext - tbl_fp_type 19817 short load_packed - tbl_fp_type 19818 short load_word - tbl_fp_type 19819 short load_dbl - tbl_fp_type 19820 short load_byte - tbl_fp_type 19821 short tbl_fp_type - tbl_fp_type 19822 19823######################################### 19824# load a LONG into %fp0: # 19825# -number can't fault # 19826# (1) calc ea # 19827# (2) read 4 bytes into L_SCR1 # 19828# (3) fmov.l into %fp0 # 19829######################################### 19830load_long: 19831 movq.l &0x4, %d0 # pass: 4 (bytes) 19832 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19833 19834 cmpi.b SPCOND_FLG(%a6),&immed_flg 19835 beq.b load_long_immed 19836 19837 bsr.l _dmem_read_long # fetch src operand from memory 19838 19839 tst.l %d1 # did dfetch fail? 19840 bne.l facc_in_l # yes 19841 19842load_long_cont: 19843 fmov.l %d0, %fp0 # read into %fp0;convert to xprec 19844 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19845 19846 fbeq.w load_long_zero # src op is a ZERO 19847 rts 19848load_long_zero: 19849 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19850 rts 19851 19852load_long_immed: 19853 bsr.l _imem_read_long # fetch src operand immed data 19854 19855 tst.l %d1 # did ifetch fail? 19856 bne.l funimp_iacc # yes 19857 bra.b load_long_cont 19858 19859######################################### 19860# load a WORD into %fp0: # 19861# -number can't fault # 19862# (1) calc ea # 19863# (2) read 2 bytes into L_SCR1 # 19864# (3) fmov.w into %fp0 # 19865######################################### 19866load_word: 19867 movq.l &0x2, %d0 # pass: 2 (bytes) 19868 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19869 19870 cmpi.b SPCOND_FLG(%a6),&immed_flg 19871 beq.b load_word_immed 19872 19873 bsr.l _dmem_read_word # fetch src operand from memory 19874 19875 tst.l %d1 # did dfetch fail? 19876 bne.l facc_in_w # yes 19877 19878load_word_cont: 19879 fmov.w %d0, %fp0 # read into %fp0;convert to xprec 19880 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19881 19882 fbeq.w load_word_zero # src op is a ZERO 19883 rts 19884load_word_zero: 19885 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19886 rts 19887 19888load_word_immed: 19889 bsr.l _imem_read_word # fetch src operand immed data 19890 19891 tst.l %d1 # did ifetch fail? 19892 bne.l funimp_iacc # yes 19893 bra.b load_word_cont 19894 19895######################################### 19896# load a BYTE into %fp0: # 19897# -number can't fault # 19898# (1) calc ea # 19899# (2) read 1 byte into L_SCR1 # 19900# (3) fmov.b into %fp0 # 19901######################################### 19902load_byte: 19903 movq.l &0x1, %d0 # pass: 1 (byte) 19904 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19905 19906 cmpi.b SPCOND_FLG(%a6),&immed_flg 19907 beq.b load_byte_immed 19908 19909 bsr.l _dmem_read_byte # fetch src operand from memory 19910 19911 tst.l %d1 # did dfetch fail? 19912 bne.l facc_in_b # yes 19913 19914load_byte_cont: 19915 fmov.b %d0, %fp0 # read into %fp0;convert to xprec 19916 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19917 19918 fbeq.w load_byte_zero # src op is a ZERO 19919 rts 19920load_byte_zero: 19921 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19922 rts 19923 19924load_byte_immed: 19925 bsr.l _imem_read_word # fetch src operand immed data 19926 19927 tst.l %d1 # did ifetch fail? 19928 bne.l funimp_iacc # yes 19929 bra.b load_byte_cont 19930 19931######################################### 19932# load a SGL into %fp0: # 19933# -number can't fault # 19934# (1) calc ea # 19935# (2) read 4 bytes into L_SCR1 # 19936# (3) fmov.s into %fp0 # 19937######################################### 19938load_sgl: 19939 movq.l &0x4, %d0 # pass: 4 (bytes) 19940 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19941 19942 cmpi.b SPCOND_FLG(%a6),&immed_flg 19943 beq.b load_sgl_immed 19944 19945 bsr.l _dmem_read_long # fetch src operand from memory 19946 mov.l %d0, L_SCR1(%a6) # store src op on stack 19947 19948 tst.l %d1 # did dfetch fail? 19949 bne.l facc_in_l # yes 19950 19951load_sgl_cont: 19952 lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op 19953 bsr.l set_tag_s # determine src type tag 19954 mov.b %d0, STAG(%a6) # save src optype tag on stack 19955 19956 cmpi.b %d0, &DENORM # is it a sgl DENORM? 19957 beq.w get_sgl_denorm # yes 19958 19959 cmpi.b %d0, &SNAN # is it a sgl SNAN? 19960 beq.w get_sgl_snan # yes 19961 19962 fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec 19963 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19964 rts 19965 19966load_sgl_immed: 19967 bsr.l _imem_read_long # fetch src operand immed data 19968 19969 tst.l %d1 # did ifetch fail? 19970 bne.l funimp_iacc # yes 19971 bra.b load_sgl_cont 19972 19973# must convert sgl denorm format to an Xprec denorm fmt suitable for 19974# normalization... 19975# %a0 : points to sgl denorm 19976get_sgl_denorm: 19977 clr.w FP_SRC_EX(%a6) 19978 bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa) 19979 lsl.l &0x8, %d0 19980 mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa) 19981 clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa) 19982 19983 clr.w FP_SRC_EX(%a6) 19984 btst &0x7, (%a0) # is sgn bit set? 19985 beq.b sgl_dnrm_norm 19986 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value 19987 19988sgl_dnrm_norm: 19989 lea FP_SRC(%a6), %a0 19990 bsr.l norm # normalize number 19991 mov.w &0x3f81, %d1 # xprec exp = 0x3f81 19992 sub.w %d0, %d1 # exp = 0x3f81 - shft amt. 19993 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp} 19994 19995 mov.b &NORM, STAG(%a6) # fix src type tag 19996 rts 19997 19998# convert sgl to ext SNAN 19999# %a0 : points to sgl SNAN 20000get_sgl_snan: 20001 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN 20002 bfextu (%a0){&9:&23}, %d0 20003 lsl.l &0x8, %d0 # extract and insert hi(man) 20004 mov.l %d0, FP_SRC_HI(%a6) 20005 clr.l FP_SRC_LO(%a6) 20006 20007 btst &0x7, (%a0) # see if sign of SNAN is set 20008 beq.b no_sgl_snan_sgn 20009 bset &0x7, FP_SRC_EX(%a6) 20010no_sgl_snan_sgn: 20011 rts 20012 20013######################################### 20014# load a DBL into %fp0: # 20015# -number can't fault # 20016# (1) calc ea # 20017# (2) read 8 bytes into L_SCR(1,2)# 20018# (3) fmov.d into %fp0 # 20019######################################### 20020load_dbl: 20021 movq.l &0x8, %d0 # pass: 8 (bytes) 20022 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 20023 20024 cmpi.b SPCOND_FLG(%a6),&immed_flg 20025 beq.b load_dbl_immed 20026 20027 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space 20028 movq.l &0x8, %d0 # pass: # bytes to read 20029 bsr.l _dmem_read # fetch src operand from memory 20030 20031 tst.l %d1 # did dfetch fail? 20032 bne.l facc_in_d # yes 20033 20034load_dbl_cont: 20035 lea L_SCR1(%a6), %a0 # pass: ptr to input dbl 20036 bsr.l set_tag_d # determine src type tag 20037 mov.b %d0, STAG(%a6) # set src optype tag 20038 20039 cmpi.b %d0, &DENORM # is it a dbl DENORM? 20040 beq.w get_dbl_denorm # yes 20041 20042 cmpi.b %d0, &SNAN # is it a dbl SNAN? 20043 beq.w get_dbl_snan # yes 20044 20045 fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec 20046 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 20047 rts 20048 20049load_dbl_immed: 20050 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space 20051 movq.l &0x8, %d0 # pass: # bytes to read 20052 bsr.l _imem_read # fetch src operand from memory 20053 20054 tst.l %d1 # did ifetch fail? 20055 bne.l funimp_iacc # yes 20056 bra.b load_dbl_cont 20057 20058# must convert dbl denorm format to an Xprec denorm fmt suitable for 20059# normalization... 20060# %a0 : loc. of dbl denorm 20061get_dbl_denorm: 20062 clr.w FP_SRC_EX(%a6) 20063 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa) 20064 mov.l %d0, FP_SRC_HI(%a6) 20065 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa) 20066 mov.l &0xb, %d1 20067 lsl.l %d1, %d0 20068 mov.l %d0, FP_SRC_LO(%a6) 20069 20070 btst &0x7, (%a0) # is sgn bit set? 20071 beq.b dbl_dnrm_norm 20072 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value 20073 20074dbl_dnrm_norm: 20075 lea FP_SRC(%a6), %a0 20076 bsr.l norm # normalize number 20077 mov.w &0x3c01, %d1 # xprec exp = 0x3c01 20078 sub.w %d0, %d1 # exp = 0x3c01 - shft amt. 20079 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp} 20080 20081 mov.b &NORM, STAG(%a6) # fix src type tag 20082 rts 20083 20084# convert dbl to ext SNAN 20085# %a0 : points to dbl SNAN 20086get_dbl_snan: 20087 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN 20088 20089 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa) 20090 mov.l %d0, FP_SRC_HI(%a6) 20091 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa) 20092 mov.l &0xb, %d1 20093 lsl.l %d1, %d0 20094 mov.l %d0, FP_SRC_LO(%a6) 20095 20096 btst &0x7, (%a0) # see if sign of SNAN is set 20097 beq.b no_dbl_snan_sgn 20098 bset &0x7, FP_SRC_EX(%a6) 20099no_dbl_snan_sgn: 20100 rts 20101 20102################################################# 20103# load a Xprec into %fp0: # 20104# -number can't fault # 20105# (1) calc ea # 20106# (2) read 12 bytes into L_SCR(1,2) # 20107# (3) fmov.x into %fp0 # 20108################################################# 20109load_ext: 20110 mov.l &0xc, %d0 # pass: 12 (bytes) 20111 bsr.l _dcalc_ea # calc <ea> 20112 20113 lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space 20114 mov.l &0xc, %d0 # pass: # of bytes to read 20115 bsr.l _dmem_read # fetch src operand from memory 20116 20117 tst.l %d1 # did dfetch fail? 20118 bne.l facc_in_x # yes 20119 20120 lea FP_SRC(%a6), %a0 # pass: ptr to src op 20121 bsr.l set_tag_x # determine src type tag 20122 20123 cmpi.b %d0, &UNNORM # is the src op an UNNORM? 20124 beq.b load_ext_unnorm # yes 20125 20126 mov.b %d0, STAG(%a6) # store the src optype tag 20127 rts 20128 20129load_ext_unnorm: 20130 bsr.l unnorm_fix # fix the src UNNORM 20131 mov.b %d0, STAG(%a6) # store the src optype tag 20132 rts 20133 20134################################################# 20135# load a packed into %fp0: # 20136# -number can't fault # 20137# (1) calc ea # 20138# (2) read 12 bytes into L_SCR(1,2,3) # 20139# (3) fmov.x into %fp0 # 20140################################################# 20141load_packed: 20142 bsr.l get_packed 20143 20144 lea FP_SRC(%a6),%a0 # pass ptr to src op 20145 bsr.l set_tag_x # determine src type tag 20146 cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO? 20147 beq.b load_packed_unnorm # yes 20148 20149 mov.b %d0,STAG(%a6) # store the src optype tag 20150 rts 20151 20152load_packed_unnorm: 20153 bsr.l unnorm_fix # fix the UNNORM ZERO 20154 mov.b %d0,STAG(%a6) # store the src optype tag 20155 rts 20156 20157######################################################################### 20158# XDEF **************************************************************** # 20159# fout(): move from fp register to memory or data register # 20160# # 20161# XREF **************************************************************** # 20162# _round() - needed to create EXOP for sgl/dbl precision # 20163# norm() - needed to create EXOP for extended precision # 20164# ovf_res() - create default overflow result for sgl/dbl precision# 20165# unf_res() - create default underflow result for sgl/dbl prec. # 20166# dst_dbl() - create rounded dbl precision result. # 20167# dst_sgl() - create rounded sgl precision result. # 20168# fetch_dreg() - fetch dynamic k-factor reg for packed. # 20169# bindec() - convert FP binary number to packed number. # 20170# _mem_write() - write data to memory. # 20171# _mem_write2() - write data to memory unless supv mode -(a7) exc.# 20172# _dmem_write_{byte,word,long}() - write data to memory. # 20173# store_dreg_{b,w,l}() - store data to data register file. # 20174# facc_out_{b,w,l,d,x}() - data access error occurred. # 20175# # 20176# INPUT *************************************************************** # 20177# a0 = pointer to extended precision source operand # 20178# d0 = round prec,mode # 20179# # 20180# OUTPUT ************************************************************** # 20181# fp0 : intermediate underflow or overflow result if # 20182# OVFL/UNFL occurred for a sgl or dbl operand # 20183# # 20184# ALGORITHM *********************************************************** # 20185# This routine is accessed by many handlers that need to do an # 20186# opclass three move of an operand out to memory. # 20187# Decode an fmove out (opclass 3) instruction to determine if # 20188# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data # 20189# register or memory. The algorithm uses a standard "fmove" to create # 20190# the rounded result. Also, since exceptions are disabled, this also # 20191# create the correct OPERR default result if appropriate. # 20192# For sgl or dbl precision, overflow or underflow can occur. If # 20193# either occurs and is enabled, the EXOP. # 20194# For extended precision, the stacked <ea> must be fixed along # 20195# w/ the address index register as appropriate w/ _calc_ea_fout(). If # 20196# the source is a denorm and if underflow is enabled, an EXOP must be # 20197# created. # 20198# For packed, the k-factor must be fetched from the instruction # 20199# word or a data register. The <ea> must be fixed as w/ extended # 20200# precision. Then, bindec() is called to create the appropriate # 20201# packed result. # 20202# If at any time an access error is flagged by one of the move- # 20203# to-memory routines, then a special exit must be made so that the # 20204# access error can be handled properly. # 20205# # 20206######################################################################### 20207 20208 global fout 20209fout: 20210 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt 20211 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index 20212 jmp (tbl_fout.b,%pc,%a1) # jump to routine 20213 20214 swbeg &0x8 20215tbl_fout: 20216 short fout_long - tbl_fout 20217 short fout_sgl - tbl_fout 20218 short fout_ext - tbl_fout 20219 short fout_pack - tbl_fout 20220 short fout_word - tbl_fout 20221 short fout_dbl - tbl_fout 20222 short fout_byte - tbl_fout 20223 short fout_pack - tbl_fout 20224 20225################################################################# 20226# fmove.b out ################################################### 20227################################################################# 20228 20229# Only "Unimplemented Data Type" exceptions enter here. The operand 20230# is either a DENORM or a NORM. 20231fout_byte: 20232 tst.b STAG(%a6) # is operand normalized? 20233 bne.b fout_byte_denorm # no 20234 20235 fmovm.x SRC(%a0),&0x80 # load value 20236 20237fout_byte_norm: 20238 fmov.l %d0,%fpcr # insert rnd prec,mode 20239 20240 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode 20241 20242 fmov.l &0x0,%fpcr # clear FPCR 20243 fmov.l %fpsr,%d1 # fetch FPSR 20244 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20245 20246 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20247 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20248 beq.b fout_byte_dn # must save to integer regfile 20249 20250 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20251 bsr.l _dmem_write_byte # write byte 20252 20253 tst.l %d1 # did dstore fail? 20254 bne.l facc_out_b # yes 20255 20256 rts 20257 20258fout_byte_dn: 20259 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20260 andi.w &0x7,%d1 20261 bsr.l store_dreg_b 20262 rts 20263 20264fout_byte_denorm: 20265 mov.l SRC_EX(%a0),%d1 20266 andi.l &0x80000000,%d1 # keep DENORM sign 20267 ori.l &0x00800000,%d1 # make smallest sgl 20268 fmov.s %d1,%fp0 20269 bra.b fout_byte_norm 20270 20271################################################################# 20272# fmove.w out ################################################### 20273################################################################# 20274 20275# Only "Unimplemented Data Type" exceptions enter here. The operand 20276# is either a DENORM or a NORM. 20277fout_word: 20278 tst.b STAG(%a6) # is operand normalized? 20279 bne.b fout_word_denorm # no 20280 20281 fmovm.x SRC(%a0),&0x80 # load value 20282 20283fout_word_norm: 20284 fmov.l %d0,%fpcr # insert rnd prec:mode 20285 20286 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode 20287 20288 fmov.l &0x0,%fpcr # clear FPCR 20289 fmov.l %fpsr,%d1 # fetch FPSR 20290 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20291 20292 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20293 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20294 beq.b fout_word_dn # must save to integer regfile 20295 20296 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20297 bsr.l _dmem_write_word # write word 20298 20299 tst.l %d1 # did dstore fail? 20300 bne.l facc_out_w # yes 20301 20302 rts 20303 20304fout_word_dn: 20305 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20306 andi.w &0x7,%d1 20307 bsr.l store_dreg_w 20308 rts 20309 20310fout_word_denorm: 20311 mov.l SRC_EX(%a0),%d1 20312 andi.l &0x80000000,%d1 # keep DENORM sign 20313 ori.l &0x00800000,%d1 # make smallest sgl 20314 fmov.s %d1,%fp0 20315 bra.b fout_word_norm 20316 20317################################################################# 20318# fmove.l out ################################################### 20319################################################################# 20320 20321# Only "Unimplemented Data Type" exceptions enter here. The operand 20322# is either a DENORM or a NORM. 20323fout_long: 20324 tst.b STAG(%a6) # is operand normalized? 20325 bne.b fout_long_denorm # no 20326 20327 fmovm.x SRC(%a0),&0x80 # load value 20328 20329fout_long_norm: 20330 fmov.l %d0,%fpcr # insert rnd prec:mode 20331 20332 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode 20333 20334 fmov.l &0x0,%fpcr # clear FPCR 20335 fmov.l %fpsr,%d1 # fetch FPSR 20336 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20337 20338fout_long_write: 20339 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20340 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20341 beq.b fout_long_dn # must save to integer regfile 20342 20343 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20344 bsr.l _dmem_write_long # write long 20345 20346 tst.l %d1 # did dstore fail? 20347 bne.l facc_out_l # yes 20348 20349 rts 20350 20351fout_long_dn: 20352 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20353 andi.w &0x7,%d1 20354 bsr.l store_dreg_l 20355 rts 20356 20357fout_long_denorm: 20358 mov.l SRC_EX(%a0),%d1 20359 andi.l &0x80000000,%d1 # keep DENORM sign 20360 ori.l &0x00800000,%d1 # make smallest sgl 20361 fmov.s %d1,%fp0 20362 bra.b fout_long_norm 20363 20364################################################################# 20365# fmove.x out ################################################### 20366################################################################# 20367 20368# Only "Unimplemented Data Type" exceptions enter here. The operand 20369# is either a DENORM or a NORM. 20370# The DENORM causes an Underflow exception. 20371fout_ext: 20372 20373# we copy the extended precision result to FP_SCR0 so that the reserved 20374# 16-bit field gets zeroed. we do this since we promise not to disturb 20375# what's at SRC(a0). 20376 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20377 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field 20378 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20379 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20380 20381 fmovm.x SRC(%a0),&0x80 # return result 20382 20383 bsr.l _calc_ea_fout # fix stacked <ea> 20384 20385 mov.l %a0,%a1 # pass: dst addr 20386 lea FP_SCR0(%a6),%a0 # pass: src addr 20387 mov.l &0xc,%d0 # pass: opsize is 12 bytes 20388 20389# we must not yet write the extended precision data to the stack 20390# in the pre-decrement case from supervisor mode or else we'll corrupt 20391# the stack frame. so, leave it in FP_SRC for now and deal with it later... 20392 cmpi.b SPCOND_FLG(%a6),&mda7_flg 20393 beq.b fout_ext_a7 20394 20395 bsr.l _dmem_write # write ext prec number to memory 20396 20397 tst.l %d1 # did dstore fail? 20398 bne.w fout_ext_err # yes 20399 20400 tst.b STAG(%a6) # is operand normalized? 20401 bne.b fout_ext_denorm # no 20402 rts 20403 20404# the number is a DENORM. must set the underflow exception bit 20405fout_ext_denorm: 20406 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit 20407 20408 mov.b FPCR_ENABLE(%a6),%d0 20409 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 20410 bne.b fout_ext_exc # yes 20411 rts 20412 20413# we don't want to do the write if the exception occurred in supervisor mode 20414# so _mem_write2() handles this for us. 20415fout_ext_a7: 20416 bsr.l _mem_write2 # write ext prec number to memory 20417 20418 tst.l %d1 # did dstore fail? 20419 bne.w fout_ext_err # yes 20420 20421 tst.b STAG(%a6) # is operand normalized? 20422 bne.b fout_ext_denorm # no 20423 rts 20424 20425fout_ext_exc: 20426 lea FP_SCR0(%a6),%a0 20427 bsr.l norm # normalize the mantissa 20428 neg.w %d0 # new exp = -(shft amt) 20429 andi.w &0x7fff,%d0 20430 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign 20431 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 20432 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 20433 rts 20434 20435fout_ext_err: 20436 mov.l EXC_A6(%a6),(%a6) # fix stacked a6 20437 bra.l facc_out_x 20438 20439######################################################################### 20440# fmove.s out ########################################################### 20441######################################################################### 20442fout_sgl: 20443 andi.b &0x30,%d0 # clear rnd prec 20444 ori.b &s_mode*0x10,%d0 # insert sgl prec 20445 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 20446 20447# 20448# operand is a normalized number. first, we check to see if the move out 20449# would cause either an underflow or overflow. these cases are handled 20450# separately. otherwise, set the FPCR to the proper rounding mode and 20451# execute the move. 20452# 20453 mov.w SRC_EX(%a0),%d0 # extract exponent 20454 andi.w &0x7fff,%d0 # strip sign 20455 20456 cmpi.w %d0,&SGL_HI # will operand overflow? 20457 bgt.w fout_sgl_ovfl # yes; go handle OVFL 20458 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL 20459 cmpi.w %d0,&SGL_LO # will operand underflow? 20460 blt.w fout_sgl_unfl # yes; go handle underflow 20461 20462# 20463# NORMs(in range) can be stored out by a simple "fmov.s" 20464# Unnormalized inputs can come through this point. 20465# 20466fout_sgl_exg: 20467 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 20468 20469 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20470 fmov.l &0x0,%fpsr # clear FPSR 20471 20472 fmov.s %fp0,%d0 # store does convert and round 20473 20474 fmov.l &0x0,%fpcr # clear FPCR 20475 fmov.l %fpsr,%d1 # save FPSR 20476 20477 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex 20478 20479fout_sgl_exg_write: 20480 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20481 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20482 beq.b fout_sgl_exg_write_dn # must save to integer regfile 20483 20484 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20485 bsr.l _dmem_write_long # write long 20486 20487 tst.l %d1 # did dstore fail? 20488 bne.l facc_out_l # yes 20489 20490 rts 20491 20492fout_sgl_exg_write_dn: 20493 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20494 andi.w &0x7,%d1 20495 bsr.l store_dreg_l 20496 rts 20497 20498# 20499# here, we know that the operand would UNFL if moved out to single prec, 20500# so, denorm and round and then use generic store single routine to 20501# write the value to memory. 20502# 20503fout_sgl_unfl: 20504 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 20505 20506 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20507 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20508 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20509 mov.l %a0,-(%sp) 20510 20511 clr.l %d0 # pass: S.F. = 0 20512 20513 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 20514 bne.b fout_sgl_unfl_cont # let DENORMs fall through 20515 20516 lea FP_SCR0(%a6),%a0 20517 bsr.l norm # normalize the DENORM 20518 20519fout_sgl_unfl_cont: 20520 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 20521 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 20522 bsr.l unf_res # calc default underflow result 20523 20524 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 20525 bsr.l dst_sgl # convert to single prec 20526 20527 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20528 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20529 beq.b fout_sgl_unfl_dn # must save to integer regfile 20530 20531 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20532 bsr.l _dmem_write_long # write long 20533 20534 tst.l %d1 # did dstore fail? 20535 bne.l facc_out_l # yes 20536 20537 bra.b fout_sgl_unfl_chkexc 20538 20539fout_sgl_unfl_dn: 20540 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20541 andi.w &0x7,%d1 20542 bsr.l store_dreg_l 20543 20544fout_sgl_unfl_chkexc: 20545 mov.b FPCR_ENABLE(%a6),%d1 20546 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20547 bne.w fout_sd_exc_unfl # yes 20548 addq.l &0x4,%sp 20549 rts 20550 20551# 20552# it's definitely an overflow so call ovf_res to get the correct answer 20553# 20554fout_sgl_ovfl: 20555 tst.b 3+SRC_HI(%a0) # is result inexact? 20556 bne.b fout_sgl_ovfl_inex2 20557 tst.l SRC_LO(%a0) # is result inexact? 20558 bne.b fout_sgl_ovfl_inex2 20559 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 20560 bra.b fout_sgl_ovfl_cont 20561fout_sgl_ovfl_inex2: 20562 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 20563 20564fout_sgl_ovfl_cont: 20565 mov.l %a0,-(%sp) 20566 20567# call ovf_res() w/ sgl prec and the correct rnd mode to create the default 20568# overflow result. DON'T save the returned ccodes from ovf_res() since 20569# fmove out doesn't alter them. 20570 tst.b SRC_EX(%a0) # is operand negative? 20571 smi %d1 # set if so 20572 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode 20573 bsr.l ovf_res # calc OVFL result 20574 fmovm.x (%a0),&0x80 # load default overflow result 20575 fmov.s %fp0,%d0 # store to single 20576 20577 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20578 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20579 beq.b fout_sgl_ovfl_dn # must save to integer regfile 20580 20581 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20582 bsr.l _dmem_write_long # write long 20583 20584 tst.l %d1 # did dstore fail? 20585 bne.l facc_out_l # yes 20586 20587 bra.b fout_sgl_ovfl_chkexc 20588 20589fout_sgl_ovfl_dn: 20590 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20591 andi.w &0x7,%d1 20592 bsr.l store_dreg_l 20593 20594fout_sgl_ovfl_chkexc: 20595 mov.b FPCR_ENABLE(%a6),%d1 20596 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20597 bne.w fout_sd_exc_ovfl # yes 20598 addq.l &0x4,%sp 20599 rts 20600 20601# 20602# move out MAY overflow: 20603# (1) force the exp to 0x3fff 20604# (2) do a move w/ appropriate rnd mode 20605# (3) if exp still equals zero, then insert original exponent 20606# for the correct result. 20607# if exp now equals one, then it overflowed so call ovf_res. 20608# 20609fout_sgl_may_ovfl: 20610 mov.w SRC_EX(%a0),%d1 # fetch current sign 20611 andi.w &0x8000,%d1 # keep it,clear exp 20612 ori.w &0x3fff,%d1 # insert exp = 0 20613 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 20614 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 20615 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 20616 20617 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20618 20619 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 20620 fmov.l &0x0,%fpcr # clear FPCR 20621 20622 fabs.x %fp0 # need absolute value 20623 fcmp.b %fp0,&0x2 # did exponent increase? 20624 fblt.w fout_sgl_exg # no; go finish NORM 20625 bra.w fout_sgl_ovfl # yes; go handle overflow 20626 20627################ 20628 20629fout_sd_exc_unfl: 20630 mov.l (%sp)+,%a0 20631 20632 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20633 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20634 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20635 20636 cmpi.b STAG(%a6),&DENORM # was src a DENORM? 20637 bne.b fout_sd_exc_cont # no 20638 20639 lea FP_SCR0(%a6),%a0 20640 bsr.l norm 20641 neg.l %d0 20642 andi.w &0x7fff,%d0 20643 bfins %d0,FP_SCR0_EX(%a6){&1:&15} 20644 bra.b fout_sd_exc_cont 20645 20646fout_sd_exc: 20647fout_sd_exc_ovfl: 20648 mov.l (%sp)+,%a0 # restore a0 20649 20650 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20651 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20652 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20653 20654fout_sd_exc_cont: 20655 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit 20656 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit 20657 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM 20658 20659 mov.b 3+L_SCR3(%a6),%d1 20660 lsr.b &0x4,%d1 20661 andi.w &0x0c,%d1 20662 swap %d1 20663 mov.b 3+L_SCR3(%a6),%d1 20664 lsr.b &0x4,%d1 20665 andi.w &0x03,%d1 20666 clr.l %d0 # pass: zero g,r,s 20667 bsr.l _round # round the DENORM 20668 20669 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative? 20670 beq.b fout_sd_exc_done # no 20671 bset &0x7,FP_SCR0_EX(%a6) # yes 20672 20673fout_sd_exc_done: 20674 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 20675 rts 20676 20677################################################################# 20678# fmove.d out ################################################### 20679################################################################# 20680fout_dbl: 20681 andi.b &0x30,%d0 # clear rnd prec 20682 ori.b &d_mode*0x10,%d0 # insert dbl prec 20683 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 20684 20685# 20686# operand is a normalized number. first, we check to see if the move out 20687# would cause either an underflow or overflow. these cases are handled 20688# separately. otherwise, set the FPCR to the proper rounding mode and 20689# execute the move. 20690# 20691 mov.w SRC_EX(%a0),%d0 # extract exponent 20692 andi.w &0x7fff,%d0 # strip sign 20693 20694 cmpi.w %d0,&DBL_HI # will operand overflow? 20695 bgt.w fout_dbl_ovfl # yes; go handle OVFL 20696 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL 20697 cmpi.w %d0,&DBL_LO # will operand underflow? 20698 blt.w fout_dbl_unfl # yes; go handle underflow 20699 20700# 20701# NORMs(in range) can be stored out by a simple "fmov.d" 20702# Unnormalized inputs can come through this point. 20703# 20704fout_dbl_exg: 20705 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 20706 20707 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20708 fmov.l &0x0,%fpsr # clear FPSR 20709 20710 fmov.d %fp0,L_SCR1(%a6) # store does convert and round 20711 20712 fmov.l &0x0,%fpcr # clear FPCR 20713 fmov.l %fpsr,%d0 # save FPSR 20714 20715 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex 20716 20717 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20718 lea L_SCR1(%a6),%a0 # pass: src addr 20719 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20720 bsr.l _dmem_write # store dbl fop to memory 20721 20722 tst.l %d1 # did dstore fail? 20723 bne.l facc_out_d # yes 20724 20725 rts # no; so we're finished 20726 20727# 20728# here, we know that the operand would UNFL if moved out to double prec, 20729# so, denorm and round and then use generic store double routine to 20730# write the value to memory. 20731# 20732fout_dbl_unfl: 20733 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 20734 20735 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20736 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20737 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20738 mov.l %a0,-(%sp) 20739 20740 clr.l %d0 # pass: S.F. = 0 20741 20742 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 20743 bne.b fout_dbl_unfl_cont # let DENORMs fall through 20744 20745 lea FP_SCR0(%a6),%a0 20746 bsr.l norm # normalize the DENORM 20747 20748fout_dbl_unfl_cont: 20749 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 20750 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 20751 bsr.l unf_res # calc default underflow result 20752 20753 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 20754 bsr.l dst_dbl # convert to single prec 20755 mov.l %d0,L_SCR1(%a6) 20756 mov.l %d1,L_SCR2(%a6) 20757 20758 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20759 lea L_SCR1(%a6),%a0 # pass: src addr 20760 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20761 bsr.l _dmem_write # store dbl fop to memory 20762 20763 tst.l %d1 # did dstore fail? 20764 bne.l facc_out_d # yes 20765 20766 mov.b FPCR_ENABLE(%a6),%d1 20767 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20768 bne.w fout_sd_exc_unfl # yes 20769 addq.l &0x4,%sp 20770 rts 20771 20772# 20773# it's definitely an overflow so call ovf_res to get the correct answer 20774# 20775fout_dbl_ovfl: 20776 mov.w 2+SRC_LO(%a0),%d0 20777 andi.w &0x7ff,%d0 20778 bne.b fout_dbl_ovfl_inex2 20779 20780 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 20781 bra.b fout_dbl_ovfl_cont 20782fout_dbl_ovfl_inex2: 20783 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 20784 20785fout_dbl_ovfl_cont: 20786 mov.l %a0,-(%sp) 20787 20788# call ovf_res() w/ dbl prec and the correct rnd mode to create the default 20789# overflow result. DON'T save the returned ccodes from ovf_res() since 20790# fmove out doesn't alter them. 20791 tst.b SRC_EX(%a0) # is operand negative? 20792 smi %d1 # set if so 20793 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode 20794 bsr.l ovf_res # calc OVFL result 20795 fmovm.x (%a0),&0x80 # load default overflow result 20796 fmov.d %fp0,L_SCR1(%a6) # store to double 20797 20798 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20799 lea L_SCR1(%a6),%a0 # pass: src addr 20800 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20801 bsr.l _dmem_write # store dbl fop to memory 20802 20803 tst.l %d1 # did dstore fail? 20804 bne.l facc_out_d # yes 20805 20806 mov.b FPCR_ENABLE(%a6),%d1 20807 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20808 bne.w fout_sd_exc_ovfl # yes 20809 addq.l &0x4,%sp 20810 rts 20811 20812# 20813# move out MAY overflow: 20814# (1) force the exp to 0x3fff 20815# (2) do a move w/ appropriate rnd mode 20816# (3) if exp still equals zero, then insert original exponent 20817# for the correct result. 20818# if exp now equals one, then it overflowed so call ovf_res. 20819# 20820fout_dbl_may_ovfl: 20821 mov.w SRC_EX(%a0),%d1 # fetch current sign 20822 andi.w &0x8000,%d1 # keep it,clear exp 20823 ori.w &0x3fff,%d1 # insert exp = 0 20824 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 20825 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 20826 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 20827 20828 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20829 20830 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 20831 fmov.l &0x0,%fpcr # clear FPCR 20832 20833 fabs.x %fp0 # need absolute value 20834 fcmp.b %fp0,&0x2 # did exponent increase? 20835 fblt.w fout_dbl_exg # no; go finish NORM 20836 bra.w fout_dbl_ovfl # yes; go handle overflow 20837 20838######################################################################### 20839# XDEF **************************************************************** # 20840# dst_dbl(): create double precision value from extended prec. # 20841# # 20842# XREF **************************************************************** # 20843# None # 20844# # 20845# INPUT *************************************************************** # 20846# a0 = pointer to source operand in extended precision # 20847# # 20848# OUTPUT ************************************************************** # 20849# d0 = hi(double precision result) # 20850# d1 = lo(double precision result) # 20851# # 20852# ALGORITHM *********************************************************** # 20853# # 20854# Changes extended precision to double precision. # 20855# Note: no attempt is made to round the extended value to double. # 20856# dbl_sign = ext_sign # 20857# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) # 20858# get rid of ext integer bit # 20859# dbl_mant = ext_mant{62:12} # 20860# # 20861# --------------- --------------- --------------- # 20862# extended -> |s| exp | |1| ms mant | | ls mant | # 20863# --------------- --------------- --------------- # 20864# 95 64 63 62 32 31 11 0 # 20865# | | # 20866# | | # 20867# | | # 20868# v v # 20869# --------------- --------------- # 20870# double -> |s|exp| mant | | mant | # 20871# --------------- --------------- # 20872# 63 51 32 31 0 # 20873# # 20874######################################################################### 20875 20876dst_dbl: 20877 clr.l %d0 # clear d0 20878 mov.w FTEMP_EX(%a0),%d0 # get exponent 20879 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 20880 addi.w &DBL_BIAS,%d0 # add double precision bias 20881 tst.b FTEMP_HI(%a0) # is number a denorm? 20882 bmi.b dst_get_dupper # no 20883 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1 20884dst_get_dupper: 20885 swap %d0 # d0 now in upper word 20886 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp 20887 tst.b FTEMP_EX(%a0) # test sign 20888 bpl.b dst_get_dman # if postive, go process mantissa 20889 bset &0x1f,%d0 # if negative, set sign 20890dst_get_dman: 20891 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20892 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms 20893 or.l %d1,%d0 # put these bits in ms word of double 20894 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack 20895 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20896 mov.l &21,%d0 # load shift count 20897 lsl.l %d0,%d1 # put lower 11 bits in upper bits 20898 mov.l %d1,L_SCR2(%a6) # build lower lword in memory 20899 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa 20900 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double 20901 mov.l L_SCR2(%a6),%d1 20902 or.l %d0,%d1 # put them in double result 20903 mov.l L_SCR1(%a6),%d0 20904 rts 20905 20906######################################################################### 20907# XDEF **************************************************************** # 20908# dst_sgl(): create single precision value from extended prec # 20909# # 20910# XREF **************************************************************** # 20911# # 20912# INPUT *************************************************************** # 20913# a0 = pointer to source operand in extended precision # 20914# # 20915# OUTPUT ************************************************************** # 20916# d0 = single precision result # 20917# # 20918# ALGORITHM *********************************************************** # 20919# # 20920# Changes extended precision to single precision. # 20921# sgl_sign = ext_sign # 20922# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) # 20923# get rid of ext integer bit # 20924# sgl_mant = ext_mant{62:12} # 20925# # 20926# --------------- --------------- --------------- # 20927# extended -> |s| exp | |1| ms mant | | ls mant | # 20928# --------------- --------------- --------------- # 20929# 95 64 63 62 40 32 31 12 0 # 20930# | | # 20931# | | # 20932# | | # 20933# v v # 20934# --------------- # 20935# single -> |s|exp| mant | # 20936# --------------- # 20937# 31 22 0 # 20938# # 20939######################################################################### 20940 20941dst_sgl: 20942 clr.l %d0 20943 mov.w FTEMP_EX(%a0),%d0 # get exponent 20944 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 20945 addi.w &SGL_BIAS,%d0 # add single precision bias 20946 tst.b FTEMP_HI(%a0) # is number a denorm? 20947 bmi.b dst_get_supper # no 20948 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1 20949dst_get_supper: 20950 swap %d0 # put exp in upper word of d0 20951 lsl.l &0x7,%d0 # shift it into single exp bits 20952 tst.b FTEMP_EX(%a0) # test sign 20953 bpl.b dst_get_sman # if positive, continue 20954 bset &0x1f,%d0 # if negative, put in sign first 20955dst_get_sman: 20956 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20957 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms 20958 lsr.l &0x8,%d1 # and put them flush right 20959 or.l %d1,%d0 # put these bits in ms word of single 20960 rts 20961 20962############################################################################## 20963fout_pack: 20964 bsr.l _calc_ea_fout # fetch the <ea> 20965 mov.l %a0,-(%sp) 20966 20967 mov.b STAG(%a6),%d0 # fetch input type 20968 bne.w fout_pack_not_norm # input is not NORM 20969 20970fout_pack_norm: 20971 btst &0x4,EXC_CMDREG(%a6) # static or dynamic? 20972 beq.b fout_pack_s # static 20973 20974fout_pack_d: 20975 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg 20976 lsr.b &0x4,%d1 20977 andi.w &0x7,%d1 20978 20979 bsr.l fetch_dreg # fetch Dn w/ k-factor 20980 20981 bra.b fout_pack_type 20982fout_pack_s: 20983 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field 20984 20985fout_pack_type: 20986 bfexts %d0{&25:&7},%d0 # extract k-factor 20987 mov.l %d0,-(%sp) 20988 20989 lea FP_SRC(%a6),%a0 # pass: ptr to input 20990 20991# bindec is currently scrambling FP_SRC for denorm inputs. 20992# we'll have to change this, but for now, tough luck!!! 20993 bsr.l bindec # convert xprec to packed 20994 20995# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields 20996 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields 20997 20998 mov.l (%sp)+,%d0 20999 21000 tst.b 3+FP_SCR0_EX(%a6) 21001 bne.b fout_pack_set 21002 tst.l FP_SCR0_HI(%a6) 21003 bne.b fout_pack_set 21004 tst.l FP_SCR0_LO(%a6) 21005 bne.b fout_pack_set 21006 21007# add the extra condition that only if the k-factor was zero, too, should 21008# we zero the exponent 21009 tst.l %d0 21010 bne.b fout_pack_set 21011# "mantissa" is all zero which means that the answer is zero. but, the '040 21012# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, 21013# if the mantissa is zero, I will zero the exponent, too. 21014# the question now is whether the exponents sign bit is allowed to be non-zero 21015# for a zero, also... 21016 andi.w &0xf000,FP_SCR0(%a6) 21017 21018fout_pack_set: 21019 21020 lea FP_SCR0(%a6),%a0 # pass: src addr 21021 21022fout_pack_write: 21023 mov.l (%sp)+,%a1 # pass: dst addr 21024 mov.l &0xc,%d0 # pass: opsize is 12 bytes 21025 21026 cmpi.b SPCOND_FLG(%a6),&mda7_flg 21027 beq.b fout_pack_a7 21028 21029 bsr.l _dmem_write # write ext prec number to memory 21030 21031 tst.l %d1 # did dstore fail? 21032 bne.w fout_ext_err # yes 21033 21034 rts 21035 21036# we don't want to do the write if the exception occurred in supervisor mode 21037# so _mem_write2() handles this for us. 21038fout_pack_a7: 21039 bsr.l _mem_write2 # write ext prec number to memory 21040 21041 tst.l %d1 # did dstore fail? 21042 bne.w fout_ext_err # yes 21043 21044 rts 21045 21046fout_pack_not_norm: 21047 cmpi.b %d0,&DENORM # is it a DENORM? 21048 beq.w fout_pack_norm # yes 21049 lea FP_SRC(%a6),%a0 21050 clr.w 2+FP_SRC_EX(%a6) 21051 cmpi.b %d0,&SNAN # is it an SNAN? 21052 beq.b fout_pack_snan # yes 21053 bra.b fout_pack_write # no 21054 21055fout_pack_snan: 21056 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP 21057 bset &0x6,FP_SRC_HI(%a6) # set snan bit 21058 bra.b fout_pack_write 21059 21060######################################################################### 21061# XDEF **************************************************************** # 21062# fetch_dreg(): fetch register according to index in d1 # 21063# # 21064# XREF **************************************************************** # 21065# None # 21066# # 21067# INPUT *************************************************************** # 21068# d1 = index of register to fetch from # 21069# # 21070# OUTPUT ************************************************************** # 21071# d0 = value of register fetched # 21072# # 21073# ALGORITHM *********************************************************** # 21074# According to the index value in d1 which can range from zero # 21075# to fifteen, load the corresponding register file value (where # 21076# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the # 21077# stack. The rest should still be in their original places. # 21078# # 21079######################################################################### 21080 21081# this routine leaves d1 intact for subsequent store_dreg calls. 21082 global fetch_dreg 21083fetch_dreg: 21084 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0 21085 jmp (tbl_fdreg.b,%pc,%d0.w*1) 21086 21087tbl_fdreg: 21088 short fdreg0 - tbl_fdreg 21089 short fdreg1 - tbl_fdreg 21090 short fdreg2 - tbl_fdreg 21091 short fdreg3 - tbl_fdreg 21092 short fdreg4 - tbl_fdreg 21093 short fdreg5 - tbl_fdreg 21094 short fdreg6 - tbl_fdreg 21095 short fdreg7 - tbl_fdreg 21096 short fdreg8 - tbl_fdreg 21097 short fdreg9 - tbl_fdreg 21098 short fdrega - tbl_fdreg 21099 short fdregb - tbl_fdreg 21100 short fdregc - tbl_fdreg 21101 short fdregd - tbl_fdreg 21102 short fdrege - tbl_fdreg 21103 short fdregf - tbl_fdreg 21104 21105fdreg0: 21106 mov.l EXC_DREGS+0x0(%a6),%d0 21107 rts 21108fdreg1: 21109 mov.l EXC_DREGS+0x4(%a6),%d0 21110 rts 21111fdreg2: 21112 mov.l %d2,%d0 21113 rts 21114fdreg3: 21115 mov.l %d3,%d0 21116 rts 21117fdreg4: 21118 mov.l %d4,%d0 21119 rts 21120fdreg5: 21121 mov.l %d5,%d0 21122 rts 21123fdreg6: 21124 mov.l %d6,%d0 21125 rts 21126fdreg7: 21127 mov.l %d7,%d0 21128 rts 21129fdreg8: 21130 mov.l EXC_DREGS+0x8(%a6),%d0 21131 rts 21132fdreg9: 21133 mov.l EXC_DREGS+0xc(%a6),%d0 21134 rts 21135fdrega: 21136 mov.l %a2,%d0 21137 rts 21138fdregb: 21139 mov.l %a3,%d0 21140 rts 21141fdregc: 21142 mov.l %a4,%d0 21143 rts 21144fdregd: 21145 mov.l %a5,%d0 21146 rts 21147fdrege: 21148 mov.l (%a6),%d0 21149 rts 21150fdregf: 21151 mov.l EXC_A7(%a6),%d0 21152 rts 21153 21154######################################################################### 21155# XDEF **************************************************************** # 21156# store_dreg_l(): store longword to data register specified by d1 # 21157# # 21158# XREF **************************************************************** # 21159# None # 21160# # 21161# INPUT *************************************************************** # 21162# d0 = longowrd value to store # 21163# d1 = index of register to fetch from # 21164# # 21165# OUTPUT ************************************************************** # 21166# (data register is updated) # 21167# # 21168# ALGORITHM *********************************************************** # 21169# According to the index value in d1, store the longword value # 21170# in d0 to the corresponding data register. D0/D1 are on the stack # 21171# while the rest are in their initial places. # 21172# # 21173######################################################################### 21174 21175 global store_dreg_l 21176store_dreg_l: 21177 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1 21178 jmp (tbl_sdregl.b,%pc,%d1.w*1) 21179 21180tbl_sdregl: 21181 short sdregl0 - tbl_sdregl 21182 short sdregl1 - tbl_sdregl 21183 short sdregl2 - tbl_sdregl 21184 short sdregl3 - tbl_sdregl 21185 short sdregl4 - tbl_sdregl 21186 short sdregl5 - tbl_sdregl 21187 short sdregl6 - tbl_sdregl 21188 short sdregl7 - tbl_sdregl 21189 21190sdregl0: 21191 mov.l %d0,EXC_DREGS+0x0(%a6) 21192 rts 21193sdregl1: 21194 mov.l %d0,EXC_DREGS+0x4(%a6) 21195 rts 21196sdregl2: 21197 mov.l %d0,%d2 21198 rts 21199sdregl3: 21200 mov.l %d0,%d3 21201 rts 21202sdregl4: 21203 mov.l %d0,%d4 21204 rts 21205sdregl5: 21206 mov.l %d0,%d5 21207 rts 21208sdregl6: 21209 mov.l %d0,%d6 21210 rts 21211sdregl7: 21212 mov.l %d0,%d7 21213 rts 21214 21215######################################################################### 21216# XDEF **************************************************************** # 21217# store_dreg_w(): store word to data register specified by d1 # 21218# # 21219# XREF **************************************************************** # 21220# None # 21221# # 21222# INPUT *************************************************************** # 21223# d0 = word value to store # 21224# d1 = index of register to fetch from # 21225# # 21226# OUTPUT ************************************************************** # 21227# (data register is updated) # 21228# # 21229# ALGORITHM *********************************************************** # 21230# According to the index value in d1, store the word value # 21231# in d0 to the corresponding data register. D0/D1 are on the stack # 21232# while the rest are in their initial places. # 21233# # 21234######################################################################### 21235 21236 global store_dreg_w 21237store_dreg_w: 21238 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1 21239 jmp (tbl_sdregw.b,%pc,%d1.w*1) 21240 21241tbl_sdregw: 21242 short sdregw0 - tbl_sdregw 21243 short sdregw1 - tbl_sdregw 21244 short sdregw2 - tbl_sdregw 21245 short sdregw3 - tbl_sdregw 21246 short sdregw4 - tbl_sdregw 21247 short sdregw5 - tbl_sdregw 21248 short sdregw6 - tbl_sdregw 21249 short sdregw7 - tbl_sdregw 21250 21251sdregw0: 21252 mov.w %d0,2+EXC_DREGS+0x0(%a6) 21253 rts 21254sdregw1: 21255 mov.w %d0,2+EXC_DREGS+0x4(%a6) 21256 rts 21257sdregw2: 21258 mov.w %d0,%d2 21259 rts 21260sdregw3: 21261 mov.w %d0,%d3 21262 rts 21263sdregw4: 21264 mov.w %d0,%d4 21265 rts 21266sdregw5: 21267 mov.w %d0,%d5 21268 rts 21269sdregw6: 21270 mov.w %d0,%d6 21271 rts 21272sdregw7: 21273 mov.w %d0,%d7 21274 rts 21275 21276######################################################################### 21277# XDEF **************************************************************** # 21278# store_dreg_b(): store byte to data register specified by d1 # 21279# # 21280# XREF **************************************************************** # 21281# None # 21282# # 21283# INPUT *************************************************************** # 21284# d0 = byte value to store # 21285# d1 = index of register to fetch from # 21286# # 21287# OUTPUT ************************************************************** # 21288# (data register is updated) # 21289# # 21290# ALGORITHM *********************************************************** # 21291# According to the index value in d1, store the byte value # 21292# in d0 to the corresponding data register. D0/D1 are on the stack # 21293# while the rest are in their initial places. # 21294# # 21295######################################################################### 21296 21297 global store_dreg_b 21298store_dreg_b: 21299 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1 21300 jmp (tbl_sdregb.b,%pc,%d1.w*1) 21301 21302tbl_sdregb: 21303 short sdregb0 - tbl_sdregb 21304 short sdregb1 - tbl_sdregb 21305 short sdregb2 - tbl_sdregb 21306 short sdregb3 - tbl_sdregb 21307 short sdregb4 - tbl_sdregb 21308 short sdregb5 - tbl_sdregb 21309 short sdregb6 - tbl_sdregb 21310 short sdregb7 - tbl_sdregb 21311 21312sdregb0: 21313 mov.b %d0,3+EXC_DREGS+0x0(%a6) 21314 rts 21315sdregb1: 21316 mov.b %d0,3+EXC_DREGS+0x4(%a6) 21317 rts 21318sdregb2: 21319 mov.b %d0,%d2 21320 rts 21321sdregb3: 21322 mov.b %d0,%d3 21323 rts 21324sdregb4: 21325 mov.b %d0,%d4 21326 rts 21327sdregb5: 21328 mov.b %d0,%d5 21329 rts 21330sdregb6: 21331 mov.b %d0,%d6 21332 rts 21333sdregb7: 21334 mov.b %d0,%d7 21335 rts 21336 21337######################################################################### 21338# XDEF **************************************************************** # 21339# inc_areg(): increment an address register by the value in d0 # 21340# # 21341# XREF **************************************************************** # 21342# None # 21343# # 21344# INPUT *************************************************************** # 21345# d0 = amount to increment by # 21346# d1 = index of address register to increment # 21347# # 21348# OUTPUT ************************************************************** # 21349# (address register is updated) # 21350# # 21351# ALGORITHM *********************************************************** # 21352# Typically used for an instruction w/ a post-increment <ea>, # 21353# this routine adds the increment value in d0 to the address register # 21354# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 21355# in their original places. # 21356# For a7, if the increment amount is one, then we have to # 21357# increment by two. For any a7 update, set the mia7_flag so that if # 21358# an access error exception occurs later in emulation, this address # 21359# register update can be undone. # 21360# # 21361######################################################################### 21362 21363 global inc_areg 21364inc_areg: 21365 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1 21366 jmp (tbl_iareg.b,%pc,%d1.w*1) 21367 21368tbl_iareg: 21369 short iareg0 - tbl_iareg 21370 short iareg1 - tbl_iareg 21371 short iareg2 - tbl_iareg 21372 short iareg3 - tbl_iareg 21373 short iareg4 - tbl_iareg 21374 short iareg5 - tbl_iareg 21375 short iareg6 - tbl_iareg 21376 short iareg7 - tbl_iareg 21377 21378iareg0: add.l %d0,EXC_DREGS+0x8(%a6) 21379 rts 21380iareg1: add.l %d0,EXC_DREGS+0xc(%a6) 21381 rts 21382iareg2: add.l %d0,%a2 21383 rts 21384iareg3: add.l %d0,%a3 21385 rts 21386iareg4: add.l %d0,%a4 21387 rts 21388iareg5: add.l %d0,%a5 21389 rts 21390iareg6: add.l %d0,(%a6) 21391 rts 21392iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6) 21393 cmpi.b %d0,&0x1 21394 beq.b iareg7b 21395 add.l %d0,EXC_A7(%a6) 21396 rts 21397iareg7b: 21398 addq.l &0x2,EXC_A7(%a6) 21399 rts 21400 21401######################################################################### 21402# XDEF **************************************************************** # 21403# dec_areg(): decrement an address register by the value in d0 # 21404# # 21405# XREF **************************************************************** # 21406# None # 21407# # 21408# INPUT *************************************************************** # 21409# d0 = amount to decrement by # 21410# d1 = index of address register to decrement # 21411# # 21412# OUTPUT ************************************************************** # 21413# (address register is updated) # 21414# # 21415# ALGORITHM *********************************************************** # 21416# Typically used for an instruction w/ a pre-decrement <ea>, # 21417# this routine adds the decrement value in d0 to the address register # 21418# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 21419# in their original places. # 21420# For a7, if the decrement amount is one, then we have to # 21421# decrement by two. For any a7 update, set the mda7_flag so that if # 21422# an access error exception occurs later in emulation, this address # 21423# register update can be undone. # 21424# # 21425######################################################################### 21426 21427 global dec_areg 21428dec_areg: 21429 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1 21430 jmp (tbl_dareg.b,%pc,%d1.w*1) 21431 21432tbl_dareg: 21433 short dareg0 - tbl_dareg 21434 short dareg1 - tbl_dareg 21435 short dareg2 - tbl_dareg 21436 short dareg3 - tbl_dareg 21437 short dareg4 - tbl_dareg 21438 short dareg5 - tbl_dareg 21439 short dareg6 - tbl_dareg 21440 short dareg7 - tbl_dareg 21441 21442dareg0: sub.l %d0,EXC_DREGS+0x8(%a6) 21443 rts 21444dareg1: sub.l %d0,EXC_DREGS+0xc(%a6) 21445 rts 21446dareg2: sub.l %d0,%a2 21447 rts 21448dareg3: sub.l %d0,%a3 21449 rts 21450dareg4: sub.l %d0,%a4 21451 rts 21452dareg5: sub.l %d0,%a5 21453 rts 21454dareg6: sub.l %d0,(%a6) 21455 rts 21456dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6) 21457 cmpi.b %d0,&0x1 21458 beq.b dareg7b 21459 sub.l %d0,EXC_A7(%a6) 21460 rts 21461dareg7b: 21462 subq.l &0x2,EXC_A7(%a6) 21463 rts 21464 21465############################################################################## 21466 21467######################################################################### 21468# XDEF **************************************************************** # 21469# load_fpn1(): load FP register value into FP_SRC(a6). # 21470# # 21471# XREF **************************************************************** # 21472# None # 21473# # 21474# INPUT *************************************************************** # 21475# d0 = index of FP register to load # 21476# # 21477# OUTPUT ************************************************************** # 21478# FP_SRC(a6) = value loaded from FP register file # 21479# # 21480# ALGORITHM *********************************************************** # 21481# Using the index in d0, load FP_SRC(a6) with a number from the # 21482# FP register file. # 21483# # 21484######################################################################### 21485 21486 global load_fpn1 21487load_fpn1: 21488 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0 21489 jmp (tbl_load_fpn1.b,%pc,%d0.w*1) 21490 21491tbl_load_fpn1: 21492 short load_fpn1_0 - tbl_load_fpn1 21493 short load_fpn1_1 - tbl_load_fpn1 21494 short load_fpn1_2 - tbl_load_fpn1 21495 short load_fpn1_3 - tbl_load_fpn1 21496 short load_fpn1_4 - tbl_load_fpn1 21497 short load_fpn1_5 - tbl_load_fpn1 21498 short load_fpn1_6 - tbl_load_fpn1 21499 short load_fpn1_7 - tbl_load_fpn1 21500 21501load_fpn1_0: 21502 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6) 21503 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6) 21504 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6) 21505 lea FP_SRC(%a6), %a0 21506 rts 21507load_fpn1_1: 21508 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6) 21509 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6) 21510 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6) 21511 lea FP_SRC(%a6), %a0 21512 rts 21513load_fpn1_2: 21514 fmovm.x &0x20, FP_SRC(%a6) 21515 lea FP_SRC(%a6), %a0 21516 rts 21517load_fpn1_3: 21518 fmovm.x &0x10, FP_SRC(%a6) 21519 lea FP_SRC(%a6), %a0 21520 rts 21521load_fpn1_4: 21522 fmovm.x &0x08, FP_SRC(%a6) 21523 lea FP_SRC(%a6), %a0 21524 rts 21525load_fpn1_5: 21526 fmovm.x &0x04, FP_SRC(%a6) 21527 lea FP_SRC(%a6), %a0 21528 rts 21529load_fpn1_6: 21530 fmovm.x &0x02, FP_SRC(%a6) 21531 lea FP_SRC(%a6), %a0 21532 rts 21533load_fpn1_7: 21534 fmovm.x &0x01, FP_SRC(%a6) 21535 lea FP_SRC(%a6), %a0 21536 rts 21537 21538############################################################################# 21539 21540######################################################################### 21541# XDEF **************************************************************** # 21542# load_fpn2(): load FP register value into FP_DST(a6). # 21543# # 21544# XREF **************************************************************** # 21545# None # 21546# # 21547# INPUT *************************************************************** # 21548# d0 = index of FP register to load # 21549# # 21550# OUTPUT ************************************************************** # 21551# FP_DST(a6) = value loaded from FP register file # 21552# # 21553# ALGORITHM *********************************************************** # 21554# Using the index in d0, load FP_DST(a6) with a number from the # 21555# FP register file. # 21556# # 21557######################################################################### 21558 21559 global load_fpn2 21560load_fpn2: 21561 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0 21562 jmp (tbl_load_fpn2.b,%pc,%d0.w*1) 21563 21564tbl_load_fpn2: 21565 short load_fpn2_0 - tbl_load_fpn2 21566 short load_fpn2_1 - tbl_load_fpn2 21567 short load_fpn2_2 - tbl_load_fpn2 21568 short load_fpn2_3 - tbl_load_fpn2 21569 short load_fpn2_4 - tbl_load_fpn2 21570 short load_fpn2_5 - tbl_load_fpn2 21571 short load_fpn2_6 - tbl_load_fpn2 21572 short load_fpn2_7 - tbl_load_fpn2 21573 21574load_fpn2_0: 21575 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6) 21576 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6) 21577 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6) 21578 lea FP_DST(%a6), %a0 21579 rts 21580load_fpn2_1: 21581 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6) 21582 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6) 21583 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6) 21584 lea FP_DST(%a6), %a0 21585 rts 21586load_fpn2_2: 21587 fmovm.x &0x20, FP_DST(%a6) 21588 lea FP_DST(%a6), %a0 21589 rts 21590load_fpn2_3: 21591 fmovm.x &0x10, FP_DST(%a6) 21592 lea FP_DST(%a6), %a0 21593 rts 21594load_fpn2_4: 21595 fmovm.x &0x08, FP_DST(%a6) 21596 lea FP_DST(%a6), %a0 21597 rts 21598load_fpn2_5: 21599 fmovm.x &0x04, FP_DST(%a6) 21600 lea FP_DST(%a6), %a0 21601 rts 21602load_fpn2_6: 21603 fmovm.x &0x02, FP_DST(%a6) 21604 lea FP_DST(%a6), %a0 21605 rts 21606load_fpn2_7: 21607 fmovm.x &0x01, FP_DST(%a6) 21608 lea FP_DST(%a6), %a0 21609 rts 21610 21611############################################################################# 21612 21613######################################################################### 21614# XDEF **************************************************************** # 21615# store_fpreg(): store an fp value to the fpreg designated d0. # 21616# # 21617# XREF **************************************************************** # 21618# None # 21619# # 21620# INPUT *************************************************************** # 21621# fp0 = extended precision value to store # 21622# d0 = index of floating-point register # 21623# # 21624# OUTPUT ************************************************************** # 21625# None # 21626# # 21627# ALGORITHM *********************************************************** # 21628# Store the value in fp0 to the FP register designated by the # 21629# value in d0. The FP number can be DENORM or SNAN so we have to be # 21630# careful that we don't take an exception here. # 21631# # 21632######################################################################### 21633 21634 global store_fpreg 21635store_fpreg: 21636 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0 21637 jmp (tbl_store_fpreg.b,%pc,%d0.w*1) 21638 21639tbl_store_fpreg: 21640 short store_fpreg_0 - tbl_store_fpreg 21641 short store_fpreg_1 - tbl_store_fpreg 21642 short store_fpreg_2 - tbl_store_fpreg 21643 short store_fpreg_3 - tbl_store_fpreg 21644 short store_fpreg_4 - tbl_store_fpreg 21645 short store_fpreg_5 - tbl_store_fpreg 21646 short store_fpreg_6 - tbl_store_fpreg 21647 short store_fpreg_7 - tbl_store_fpreg 21648 21649store_fpreg_0: 21650 fmovm.x &0x80, EXC_FP0(%a6) 21651 rts 21652store_fpreg_1: 21653 fmovm.x &0x80, EXC_FP1(%a6) 21654 rts 21655store_fpreg_2: 21656 fmovm.x &0x01, -(%sp) 21657 fmovm.x (%sp)+, &0x20 21658 rts 21659store_fpreg_3: 21660 fmovm.x &0x01, -(%sp) 21661 fmovm.x (%sp)+, &0x10 21662 rts 21663store_fpreg_4: 21664 fmovm.x &0x01, -(%sp) 21665 fmovm.x (%sp)+, &0x08 21666 rts 21667store_fpreg_5: 21668 fmovm.x &0x01, -(%sp) 21669 fmovm.x (%sp)+, &0x04 21670 rts 21671store_fpreg_6: 21672 fmovm.x &0x01, -(%sp) 21673 fmovm.x (%sp)+, &0x02 21674 rts 21675store_fpreg_7: 21676 fmovm.x &0x01, -(%sp) 21677 fmovm.x (%sp)+, &0x01 21678 rts 21679 21680######################################################################### 21681# XDEF **************************************************************** # 21682# _denorm(): denormalize an intermediate result # 21683# # 21684# XREF **************************************************************** # 21685# None # 21686# # 21687# INPUT *************************************************************** # 21688# a0 = points to the operand to be denormalized # 21689# (in the internal extended format) # 21690# # 21691# d0 = rounding precision # 21692# # 21693# OUTPUT ************************************************************** # 21694# a0 = pointer to the denormalized result # 21695# (in the internal extended format) # 21696# # 21697# d0 = guard,round,sticky # 21698# # 21699# ALGORITHM *********************************************************** # 21700# According to the exponent underflow threshold for the given # 21701# precision, shift the mantissa bits to the right in order raise the # 21702# exponent of the operand to the threshold value. While shifting the # 21703# mantissa bits right, maintain the value of the guard, round, and # 21704# sticky bits. # 21705# other notes: # 21706# (1) _denorm() is called by the underflow routines # 21707# (2) _denorm() does NOT affect the status register # 21708# # 21709######################################################################### 21710 21711# 21712# table of exponent threshold values for each precision 21713# 21714tbl_thresh: 21715 short 0x0 21716 short sgl_thresh 21717 short dbl_thresh 21718 21719 global _denorm 21720_denorm: 21721# 21722# Load the exponent threshold for the precision selected and check 21723# to see if (threshold - exponent) is > 65 in which case we can 21724# simply calculate the sticky bit and zero the mantissa. otherwise 21725# we have to call the denormalization routine. 21726# 21727 lsr.b &0x2, %d0 # shift prec to lo bits 21728 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold 21729 mov.w %d1, %d0 # copy d1 into d0 21730 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp 21731 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits) 21732 bpl.b denorm_set_stky # yes; just calc sticky 21733 21734 clr.l %d0 # clear g,r,s 21735 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set? 21736 beq.b denorm_call # no; don't change anything 21737 bset &29, %d0 # yes; set sticky bit 21738 21739denorm_call: 21740 bsr.l dnrm_lp # denormalize the number 21741 rts 21742 21743# 21744# all bit would have been shifted off during the denorm so simply 21745# calculate if the sticky should be set and clear the entire mantissa. 21746# 21747denorm_set_stky: 21748 mov.l &0x20000000, %d0 # set sticky bit in return value 21749 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold 21750 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa) 21751 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa) 21752 rts 21753 21754# # 21755# dnrm_lp(): normalize exponent/mantissa to specified threshold # 21756# # 21757# INPUT: # 21758# %a0 : points to the operand to be denormalized # 21759# %d0{31:29} : initial guard,round,sticky # 21760# %d1{15:0} : denormalization threshold # 21761# OUTPUT: # 21762# %a0 : points to the denormalized operand # 21763# %d0{31:29} : final guard,round,sticky # 21764# # 21765 21766# *** Local Equates *** # 21767set GRS, L_SCR2 # g,r,s temp storage 21768set FTEMP_LO2, L_SCR1 # FTEMP_LO copy 21769 21770 global dnrm_lp 21771dnrm_lp: 21772 21773# 21774# make a copy of FTEMP_LO and place the g,r,s bits directly after it 21775# in memory so as to make the bitfield extraction for denormalization easier. 21776# 21777 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy 21778 mov.l %d0, GRS(%a6) # place g,r,s after it 21779 21780# 21781# check to see how much less than the underflow threshold the operand 21782# exponent is. 21783# 21784 mov.l %d1, %d0 # copy the denorm threshold 21785 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent 21786 ble.b dnrm_no_lp # d1 <= 0 21787 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ? 21788 blt.b case_1 # yes 21789 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ? 21790 blt.b case_2 # yes 21791 bra.w case_3 # (d1 >= 64) 21792 21793# 21794# No normalization necessary 21795# 21796dnrm_no_lp: 21797 mov.l GRS(%a6), %d0 # restore original g,r,s 21798 rts 21799 21800# 21801# case (0<d1<32) 21802# 21803# %d0 = denorm threshold 21804# %d1 = "n" = amt to shift 21805# 21806# --------------------------------------------------------- 21807# | FTEMP_HI | FTEMP_LO |grs000.........000| 21808# --------------------------------------------------------- 21809# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 21810# \ \ \ \ 21811# \ \ \ \ 21812# \ \ \ \ 21813# \ \ \ \ 21814# \ \ \ \ 21815# \ \ \ \ 21816# \ \ \ \ 21817# \ \ \ \ 21818# <-(n)-><-(32 - n)-><------(32)-------><------(32)-------> 21819# --------------------------------------------------------- 21820# |0.....0| NEW_HI | NEW_FTEMP_LO |grs | 21821# --------------------------------------------------------- 21822# 21823case_1: 21824 mov.l %d2, -(%sp) # create temp storage 21825 21826 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 21827 mov.l &32, %d0 21828 sub.w %d1, %d0 # %d0 = 32 - %d1 21829 21830 cmpi.w %d1, &29 # is shft amt >= 29 21831 blt.b case1_extract # no; no fix needed 21832 mov.b GRS(%a6), %d2 21833 or.b %d2, 3+FTEMP_LO2(%a6) 21834 21835case1_extract: 21836 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI 21837 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO 21838 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S 21839 21840 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI 21841 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO 21842 21843 bftst %d0{&2:&30} # were bits shifted off? 21844 beq.b case1_sticky_clear # no; go finish 21845 bset &rnd_stky_bit, %d0 # yes; set sticky bit 21846 21847case1_sticky_clear: 21848 and.l &0xe0000000, %d0 # clear all but G,R,S 21849 mov.l (%sp)+, %d2 # restore temp register 21850 rts 21851 21852# 21853# case (32<=d1<64) 21854# 21855# %d0 = denorm threshold 21856# %d1 = "n" = amt to shift 21857# 21858# --------------------------------------------------------- 21859# | FTEMP_HI | FTEMP_LO |grs000.........000| 21860# --------------------------------------------------------- 21861# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 21862# \ \ \ 21863# \ \ \ 21864# \ \ ------------------- 21865# \ -------------------- \ 21866# ------------------- \ \ 21867# \ \ \ 21868# \ \ \ 21869# \ \ \ 21870# <-------(32)------><-(n)-><-(32 - n)-><------(32)-------> 21871# --------------------------------------------------------- 21872# |0...............0|0....0| NEW_LO |grs | 21873# --------------------------------------------------------- 21874# 21875case_2: 21876 mov.l %d2, -(%sp) # create temp storage 21877 21878 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 21879 subi.w &0x20, %d1 # %d1 now between 0 and 32 21880 mov.l &0x20, %d0 21881 sub.w %d1, %d0 # %d0 = 32 - %d1 21882 21883# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize 21884# the number of bits to check for the sticky detect. 21885# it only plays a role in shift amounts of 61-63. 21886 mov.b GRS(%a6), %d2 21887 or.b %d2, 3+FTEMP_LO2(%a6) 21888 21889 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO 21890 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S 21891 21892 bftst %d1{&2:&30} # were any bits shifted off? 21893 bne.b case2_set_sticky # yes; set sticky bit 21894 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off? 21895 bne.b case2_set_sticky # yes; set sticky bit 21896 21897 mov.l %d1, %d0 # move new G,R,S to %d0 21898 bra.b case2_end 21899 21900case2_set_sticky: 21901 mov.l %d1, %d0 # move new G,R,S to %d0 21902 bset &rnd_stky_bit, %d0 # set sticky bit 21903 21904case2_end: 21905 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0 21906 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO 21907 and.l &0xe0000000, %d0 # clear all but G,R,S 21908 21909 mov.l (%sp)+,%d2 # restore temp register 21910 rts 21911 21912# 21913# case (d1>=64) 21914# 21915# %d0 = denorm threshold 21916# %d1 = amt to shift 21917# 21918case_3: 21919 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold 21920 21921 cmpi.w %d1, &65 # is shift amt > 65? 21922 blt.b case3_64 # no; it's == 64 21923 beq.b case3_65 # no; it's == 65 21924 21925# 21926# case (d1>65) 21927# 21928# Shift value is > 65 and out of range. All bits are shifted off. 21929# Return a zero mantissa with the sticky bit set 21930# 21931 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 21932 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 21933 mov.l &0x20000000, %d0 # set sticky bit 21934 rts 21935 21936# 21937# case (d1 == 64) 21938# 21939# --------------------------------------------------------- 21940# | FTEMP_HI | FTEMP_LO |grs000.........000| 21941# --------------------------------------------------------- 21942# <-------(32)------> 21943# \ \ 21944# \ \ 21945# \ \ 21946# \ ------------------------------ 21947# ------------------------------- \ 21948# \ \ 21949# \ \ 21950# \ \ 21951# <-------(32)------> 21952# --------------------------------------------------------- 21953# |0...............0|0................0|grs | 21954# --------------------------------------------------------- 21955# 21956case3_64: 21957 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 21958 mov.l %d0, %d1 # make a copy 21959 and.l &0xc0000000, %d0 # extract G,R 21960 and.l &0x3fffffff, %d1 # extract other bits 21961 21962 bra.b case3_complete 21963 21964# 21965# case (d1 == 65) 21966# 21967# --------------------------------------------------------- 21968# | FTEMP_HI | FTEMP_LO |grs000.........000| 21969# --------------------------------------------------------- 21970# <-------(32)------> 21971# \ \ 21972# \ \ 21973# \ \ 21974# \ ------------------------------ 21975# -------------------------------- \ 21976# \ \ 21977# \ \ 21978# \ \ 21979# <-------(31)-----> 21980# --------------------------------------------------------- 21981# |0...............0|0................0|0rs | 21982# --------------------------------------------------------- 21983# 21984case3_65: 21985 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 21986 and.l &0x80000000, %d0 # extract R bit 21987 lsr.l &0x1, %d0 # shift high bit into R bit 21988 and.l &0x7fffffff, %d1 # extract other bits 21989 21990case3_complete: 21991# last operation done was an "and" of the bits shifted off so the condition 21992# codes are already set so branch accordingly. 21993 bne.b case3_set_sticky # yes; go set new sticky 21994 tst.l FTEMP_LO(%a0) # were any bits shifted off? 21995 bne.b case3_set_sticky # yes; go set new sticky 21996 tst.b GRS(%a6) # were any bits shifted off? 21997 bne.b case3_set_sticky # yes; go set new sticky 21998 21999# 22000# no bits were shifted off so don't set the sticky bit. 22001# the guard and 22002# the entire mantissa is zero. 22003# 22004 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 22005 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 22006 rts 22007 22008# 22009# some bits were shifted off so set the sticky bit. 22010# the entire mantissa is zero. 22011# 22012case3_set_sticky: 22013 bset &rnd_stky_bit,%d0 # set new sticky bit 22014 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 22015 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 22016 rts 22017 22018######################################################################### 22019# XDEF **************************************************************** # 22020# _round(): round result according to precision/mode # 22021# # 22022# XREF **************************************************************** # 22023# None # 22024# # 22025# INPUT *************************************************************** # 22026# a0 = ptr to input operand in internal extended format # 22027# d1(hi) = contains rounding precision: # 22028# ext = $0000xxxx # 22029# sgl = $0004xxxx # 22030# dbl = $0008xxxx # 22031# d1(lo) = contains rounding mode: # 22032# RN = $xxxx0000 # 22033# RZ = $xxxx0001 # 22034# RM = $xxxx0002 # 22035# RP = $xxxx0003 # 22036# d0{31:29} = contains the g,r,s bits (extended) # 22037# # 22038# OUTPUT ************************************************************** # 22039# a0 = pointer to rounded result # 22040# # 22041# ALGORITHM *********************************************************** # 22042# On return the value pointed to by a0 is correctly rounded, # 22043# a0 is preserved and the g-r-s bits in d0 are cleared. # 22044# The result is not typed - the tag field is invalid. The # 22045# result is still in the internal extended format. # 22046# # 22047# The INEX bit of USER_FPSR will be set if the rounded result was # 22048# inexact (i.e. if any of the g-r-s bits were set). # 22049# # 22050######################################################################### 22051 22052 global _round 22053_round: 22054# 22055# ext_grs() looks at the rounding precision and sets the appropriate 22056# G,R,S bits. 22057# If (G,R,S == 0) then result is exact and round is done, else set 22058# the inex flag in status reg and continue. 22059# 22060 bsr.l ext_grs # extract G,R,S 22061 22062 tst.l %d0 # are G,R,S zero? 22063 beq.w truncate # yes; round is complete 22064 22065 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex 22066 22067# 22068# Use rounding mode as an index into a jump table for these modes. 22069# All of the following assumes grs != 0. 22070# 22071 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset 22072 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler 22073 22074tbl_mode: 22075 short rnd_near - tbl_mode 22076 short truncate - tbl_mode # RZ always truncates 22077 short rnd_mnus - tbl_mode 22078 short rnd_plus - tbl_mode 22079 22080################################################################# 22081# ROUND PLUS INFINITY # 22082# # 22083# If sign of fp number = 0 (positive), then add 1 to l. # 22084################################################################# 22085rnd_plus: 22086 tst.b FTEMP_SGN(%a0) # check for sign 22087 bmi.w truncate # if positive then truncate 22088 22089 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 22090 swap %d1 # set up d1 for round prec. 22091 22092 cmpi.b %d1, &s_mode # is prec = sgl? 22093 beq.w add_sgl # yes 22094 bgt.w add_dbl # no; it's dbl 22095 bra.w add_ext # no; it's ext 22096 22097################################################################# 22098# ROUND MINUS INFINITY # 22099# # 22100# If sign of fp number = 1 (negative), then add 1 to l. # 22101################################################################# 22102rnd_mnus: 22103 tst.b FTEMP_SGN(%a0) # check for sign 22104 bpl.w truncate # if negative then truncate 22105 22106 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 22107 swap %d1 # set up d1 for round prec. 22108 22109 cmpi.b %d1, &s_mode # is prec = sgl? 22110 beq.w add_sgl # yes 22111 bgt.w add_dbl # no; it's dbl 22112 bra.w add_ext # no; it's ext 22113 22114################################################################# 22115# ROUND NEAREST # 22116# # 22117# If (g=1), then add 1 to l and if (r=s=0), then clear l # 22118# Note that this will round to even in case of a tie. # 22119################################################################# 22120rnd_near: 22121 asl.l &0x1, %d0 # shift g-bit to c-bit 22122 bcc.w truncate # if (g=1) then 22123 22124 swap %d1 # set up d1 for round prec. 22125 22126 cmpi.b %d1, &s_mode # is prec = sgl? 22127 beq.w add_sgl # yes 22128 bgt.w add_dbl # no; it's dbl 22129 bra.w add_ext # no; it's ext 22130 22131# *** LOCAL EQUATES *** 22132set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec 22133set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec 22134 22135######################### 22136# ADD SINGLE # 22137######################### 22138add_sgl: 22139 add.l &ad_1_sgl, FTEMP_HI(%a0) 22140 bcc.b scc_clr # no mantissa overflow 22141 roxr.w FTEMP_HI(%a0) # shift v-bit back in 22142 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in 22143 add.w &0x1, FTEMP_EX(%a0) # and incr exponent 22144scc_clr: 22145 tst.l %d0 # test for rs = 0 22146 bne.b sgl_done 22147 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit 22148sgl_done: 22149 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit 22150 clr.l FTEMP_LO(%a0) # clear d2 22151 rts 22152 22153######################### 22154# ADD EXTENDED # 22155######################### 22156add_ext: 22157 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit 22158 bcc.b xcc_clr # test for carry out 22159 addq.l &1,FTEMP_HI(%a0) # propogate carry 22160 bcc.b xcc_clr 22161 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 22162 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 22163 roxr.w FTEMP_LO(%a0) 22164 roxr.w FTEMP_LO+2(%a0) 22165 add.w &0x1,FTEMP_EX(%a0) # and inc exp 22166xcc_clr: 22167 tst.l %d0 # test rs = 0 22168 bne.b add_ext_done 22169 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit 22170add_ext_done: 22171 rts 22172 22173######################### 22174# ADD DOUBLE # 22175######################### 22176add_dbl: 22177 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb 22178 bcc.b dcc_clr # no carry 22179 addq.l &0x1, FTEMP_HI(%a0) # propogate carry 22180 bcc.b dcc_clr # no carry 22181 22182 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 22183 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 22184 roxr.w FTEMP_LO(%a0) 22185 roxr.w FTEMP_LO+2(%a0) 22186 addq.w &0x1, FTEMP_EX(%a0) # incr exponent 22187dcc_clr: 22188 tst.l %d0 # test for rs = 0 22189 bne.b dbl_done 22190 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit 22191 22192dbl_done: 22193 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit 22194 rts 22195 22196########################### 22197# Truncate all other bits # 22198########################### 22199truncate: 22200 swap %d1 # select rnd prec 22201 22202 cmpi.b %d1, &s_mode # is prec sgl? 22203 beq.w sgl_done # yes 22204 bgt.b dbl_done # no; it's dbl 22205 rts # no; it's ext 22206 22207 22208# 22209# ext_grs(): extract guard, round and sticky bits according to 22210# rounding precision. 22211# 22212# INPUT 22213# d0 = extended precision g,r,s (in d0{31:29}) 22214# d1 = {PREC,ROUND} 22215# OUTPUT 22216# d0{31:29} = guard, round, sticky 22217# 22218# The ext_grs extract the guard/round/sticky bits according to the 22219# selected rounding precision. It is called by the round subroutine 22220# only. All registers except d0 are kept intact. d0 becomes an 22221# updated guard,round,sticky in d0{31:29} 22222# 22223# Notes: the ext_grs uses the round PREC, and therefore has to swap d1 22224# prior to usage, and needs to restore d1 to original. this 22225# routine is tightly tied to the round routine and not meant to 22226# uphold standard subroutine calling practices. 22227# 22228 22229ext_grs: 22230 swap %d1 # have d1.w point to round precision 22231 tst.b %d1 # is rnd prec = extended? 22232 bne.b ext_grs_not_ext # no; go handle sgl or dbl 22233 22234# 22235# %d0 actually already hold g,r,s since _round() had it before calling 22236# this function. so, as long as we don't disturb it, we are "returning" it. 22237# 22238ext_grs_ext: 22239 swap %d1 # yes; return to correct positions 22240 rts 22241 22242ext_grs_not_ext: 22243 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3} 22244 22245 cmpi.b %d1, &s_mode # is rnd prec = sgl? 22246 bne.b ext_grs_dbl # no; go handle dbl 22247 22248# 22249# sgl: 22250# 96 64 40 32 0 22251# ----------------------------------------------------- 22252# | EXP |XXXXXXX| |xx | |grs| 22253# ----------------------------------------------------- 22254# <--(24)--->nn\ / 22255# ee --------------------- 22256# ww | 22257# v 22258# gr new sticky 22259# 22260ext_grs_sgl: 22261 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right 22262 mov.l &30, %d2 # of the sgl prec. limits 22263 lsl.l %d2, %d3 # shift g-r bits to MSB of d3 22264 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test 22265 and.l &0x0000003f, %d2 # s bit is the or of all other 22266 bne.b ext_grs_st_stky # bits to the right of g-r 22267 tst.l FTEMP_LO(%a0) # test lower mantissa 22268 bne.b ext_grs_st_stky # if any are set, set sticky 22269 tst.l %d0 # test original g,r,s 22270 bne.b ext_grs_st_stky # if any are set, set sticky 22271 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit 22272 22273# 22274# dbl: 22275# 96 64 32 11 0 22276# ----------------------------------------------------- 22277# | EXP |XXXXXXX| | |xx |grs| 22278# ----------------------------------------------------- 22279# nn\ / 22280# ee ------- 22281# ww | 22282# v 22283# gr new sticky 22284# 22285ext_grs_dbl: 22286 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right 22287 mov.l &30, %d2 # of the dbl prec. limits 22288 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3 22289 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test 22290 and.l &0x000001ff, %d2 # s bit is the or-ing of all 22291 bne.b ext_grs_st_stky # other bits to the right of g-r 22292 tst.l %d0 # test word original g,r,s 22293 bne.b ext_grs_st_stky # if any are set, set sticky 22294 bra.b ext_grs_end_sd # if clear, exit 22295 22296ext_grs_st_stky: 22297 bset &rnd_stky_bit, %d3 # set sticky bit 22298ext_grs_end_sd: 22299 mov.l %d3, %d0 # return grs to d0 22300 22301 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3} 22302 22303 swap %d1 # restore d1 to original 22304 rts 22305 22306######################################################################### 22307# norm(): normalize the mantissa of an extended precision input. the # 22308# input operand should not be normalized already. # 22309# # 22310# XDEF **************************************************************** # 22311# norm() # 22312# # 22313# XREF **************************************************************** # 22314# none # 22315# # 22316# INPUT *************************************************************** # 22317# a0 = pointer fp extended precision operand to normalize # 22318# # 22319# OUTPUT ************************************************************** # 22320# d0 = number of bit positions the mantissa was shifted # 22321# a0 = the input operand's mantissa is normalized; the exponent # 22322# is unchanged. # 22323# # 22324######################################################################### 22325 global norm 22326norm: 22327 mov.l %d2, -(%sp) # create some temp regs 22328 mov.l %d3, -(%sp) 22329 22330 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 22331 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 22332 22333 bfffo %d0{&0:&32}, %d2 # how many places to shift? 22334 beq.b norm_lo # hi(man) is all zeroes! 22335 22336norm_hi: 22337 lsl.l %d2, %d0 # left shift hi(man) 22338 bfextu %d1{&0:%d2}, %d3 # extract lo bits 22339 22340 or.l %d3, %d0 # create hi(man) 22341 lsl.l %d2, %d1 # create lo(man) 22342 22343 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 22344 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 22345 22346 mov.l %d2, %d0 # return shift amount 22347 22348 mov.l (%sp)+, %d3 # restore temp regs 22349 mov.l (%sp)+, %d2 22350 22351 rts 22352 22353norm_lo: 22354 bfffo %d1{&0:&32}, %d2 # how many places to shift? 22355 lsl.l %d2, %d1 # shift lo(man) 22356 add.l &32, %d2 # add 32 to shft amount 22357 22358 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 22359 clr.l FTEMP_LO(%a0) # lo(man) is now zero 22360 22361 mov.l %d2, %d0 # return shift amount 22362 22363 mov.l (%sp)+, %d3 # restore temp regs 22364 mov.l (%sp)+, %d2 22365 22366 rts 22367 22368######################################################################### 22369# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 22370# - returns corresponding optype tag # 22371# # 22372# XDEF **************************************************************** # 22373# unnorm_fix() # 22374# # 22375# XREF **************************************************************** # 22376# norm() - normalize the mantissa # 22377# # 22378# INPUT *************************************************************** # 22379# a0 = pointer to unnormalized extended precision number # 22380# # 22381# OUTPUT ************************************************************** # 22382# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 22383# a0 = input operand has been converted to a norm, denorm, or # 22384# zero; both the exponent and mantissa are changed. # 22385# # 22386######################################################################### 22387 22388 global unnorm_fix 22389unnorm_fix: 22390 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 22391 bne.b unnorm_shift # hi(man) is not all zeroes 22392 22393# 22394# hi(man) is all zeroes so see if any bits in lo(man) are set 22395# 22396unnorm_chk_lo: 22397 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 22398 beq.w unnorm_zero # yes 22399 22400 add.w &32, %d0 # no; fix shift distance 22401 22402# 22403# d0 = # shifts needed for complete normalization 22404# 22405unnorm_shift: 22406 clr.l %d1 # clear top word 22407 mov.w FTEMP_EX(%a0), %d1 # extract exponent 22408 and.w &0x7fff, %d1 # strip off sgn 22409 22410 cmp.w %d0, %d1 # will denorm push exp < 0? 22411 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 22412 22413# 22414# exponent would not go < 0. therefore, number stays normalized 22415# 22416 sub.w %d0, %d1 # shift exponent value 22417 mov.w FTEMP_EX(%a0), %d0 # load old exponent 22418 and.w &0x8000, %d0 # save old sign 22419 or.w %d0, %d1 # {sgn,new exp} 22420 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 22421 22422 bsr.l norm # normalize UNNORM 22423 22424 mov.b &NORM, %d0 # return new optype tag 22425 rts 22426 22427# 22428# exponent would go < 0, so only denormalize until exp = 0 22429# 22430unnorm_nrm_zero: 22431 cmp.b %d1, &32 # is exp <= 32? 22432 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 22433 22434 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 22435 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 22436 22437 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 22438 lsl.l %d1, %d0 # extract new lo(man) 22439 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 22440 22441 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 22442 22443 mov.b &DENORM, %d0 # return new optype tag 22444 rts 22445 22446# 22447# only mantissa bits set are in lo(man) 22448# 22449unnorm_nrm_zero_lrg: 22450 sub.w &32, %d1 # adjust shft amt by 32 22451 22452 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 22453 lsl.l %d1, %d0 # left shift lo(man) 22454 22455 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 22456 clr.l FTEMP_LO(%a0) # lo(man) = 0 22457 22458 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 22459 22460 mov.b &DENORM, %d0 # return new optype tag 22461 rts 22462 22463# 22464# whole mantissa is zero so this UNNORM is actually a zero 22465# 22466unnorm_zero: 22467 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 22468 22469 mov.b &ZERO, %d0 # fix optype tag 22470 rts 22471 22472######################################################################### 22473# XDEF **************************************************************** # 22474# set_tag_x(): return the optype of the input ext fp number # 22475# # 22476# XREF **************************************************************** # 22477# None # 22478# # 22479# INPUT *************************************************************** # 22480# a0 = pointer to extended precision operand # 22481# # 22482# OUTPUT ************************************************************** # 22483# d0 = value of type tag # 22484# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO # 22485# # 22486# ALGORITHM *********************************************************** # 22487# Simply test the exponent, j-bit, and mantissa values to # 22488# determine the type of operand. # 22489# If it's an unnormalized zero, alter the operand and force it # 22490# to be a normal zero. # 22491# # 22492######################################################################### 22493 22494 global set_tag_x 22495set_tag_x: 22496 mov.w FTEMP_EX(%a0), %d0 # extract exponent 22497 andi.w &0x7fff, %d0 # strip off sign 22498 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 22499 beq.b inf_or_nan_x 22500not_inf_or_nan_x: 22501 btst &0x7,FTEMP_HI(%a0) 22502 beq.b not_norm_x 22503is_norm_x: 22504 mov.b &NORM, %d0 22505 rts 22506not_norm_x: 22507 tst.w %d0 # is exponent = 0? 22508 bne.b is_unnorm_x 22509not_unnorm_x: 22510 tst.l FTEMP_HI(%a0) 22511 bne.b is_denorm_x 22512 tst.l FTEMP_LO(%a0) 22513 bne.b is_denorm_x 22514is_zero_x: 22515 mov.b &ZERO, %d0 22516 rts 22517is_denorm_x: 22518 mov.b &DENORM, %d0 22519 rts 22520# must distinguish now "Unnormalized zeroes" which we 22521# must convert to zero. 22522is_unnorm_x: 22523 tst.l FTEMP_HI(%a0) 22524 bne.b is_unnorm_reg_x 22525 tst.l FTEMP_LO(%a0) 22526 bne.b is_unnorm_reg_x 22527# it's an "unnormalized zero". let's convert it to an actual zero... 22528 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent 22529 mov.b &ZERO, %d0 22530 rts 22531is_unnorm_reg_x: 22532 mov.b &UNNORM, %d0 22533 rts 22534inf_or_nan_x: 22535 tst.l FTEMP_LO(%a0) 22536 bne.b is_nan_x 22537 mov.l FTEMP_HI(%a0), %d0 22538 and.l &0x7fffffff, %d0 # msb is a don't care! 22539 bne.b is_nan_x 22540is_inf_x: 22541 mov.b &INF, %d0 22542 rts 22543is_nan_x: 22544 btst &0x6, FTEMP_HI(%a0) 22545 beq.b is_snan_x 22546 mov.b &QNAN, %d0 22547 rts 22548is_snan_x: 22549 mov.b &SNAN, %d0 22550 rts 22551 22552######################################################################### 22553# XDEF **************************************************************** # 22554# set_tag_d(): return the optype of the input dbl fp number # 22555# # 22556# XREF **************************************************************** # 22557# None # 22558# # 22559# INPUT *************************************************************** # 22560# a0 = points to double precision operand # 22561# # 22562# OUTPUT ************************************************************** # 22563# d0 = value of type tag # 22564# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 22565# # 22566# ALGORITHM *********************************************************** # 22567# Simply test the exponent, j-bit, and mantissa values to # 22568# determine the type of operand. # 22569# # 22570######################################################################### 22571 22572 global set_tag_d 22573set_tag_d: 22574 mov.l FTEMP(%a0), %d0 22575 mov.l %d0, %d1 22576 22577 andi.l &0x7ff00000, %d0 22578 beq.b zero_or_denorm_d 22579 22580 cmpi.l %d0, &0x7ff00000 22581 beq.b inf_or_nan_d 22582 22583is_norm_d: 22584 mov.b &NORM, %d0 22585 rts 22586zero_or_denorm_d: 22587 and.l &0x000fffff, %d1 22588 bne is_denorm_d 22589 tst.l 4+FTEMP(%a0) 22590 bne is_denorm_d 22591is_zero_d: 22592 mov.b &ZERO, %d0 22593 rts 22594is_denorm_d: 22595 mov.b &DENORM, %d0 22596 rts 22597inf_or_nan_d: 22598 and.l &0x000fffff, %d1 22599 bne is_nan_d 22600 tst.l 4+FTEMP(%a0) 22601 bne is_nan_d 22602is_inf_d: 22603 mov.b &INF, %d0 22604 rts 22605is_nan_d: 22606 btst &19, %d1 22607 bne is_qnan_d 22608is_snan_d: 22609 mov.b &SNAN, %d0 22610 rts 22611is_qnan_d: 22612 mov.b &QNAN, %d0 22613 rts 22614 22615######################################################################### 22616# XDEF **************************************************************** # 22617# set_tag_s(): return the optype of the input sgl fp number # 22618# # 22619# XREF **************************************************************** # 22620# None # 22621# # 22622# INPUT *************************************************************** # 22623# a0 = pointer to single precision operand # 22624# # 22625# OUTPUT ************************************************************** # 22626# d0 = value of type tag # 22627# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 22628# # 22629# ALGORITHM *********************************************************** # 22630# Simply test the exponent, j-bit, and mantissa values to # 22631# determine the type of operand. # 22632# # 22633######################################################################### 22634 22635 global set_tag_s 22636set_tag_s: 22637 mov.l FTEMP(%a0), %d0 22638 mov.l %d0, %d1 22639 22640 andi.l &0x7f800000, %d0 22641 beq.b zero_or_denorm_s 22642 22643 cmpi.l %d0, &0x7f800000 22644 beq.b inf_or_nan_s 22645 22646is_norm_s: 22647 mov.b &NORM, %d0 22648 rts 22649zero_or_denorm_s: 22650 and.l &0x007fffff, %d1 22651 bne is_denorm_s 22652is_zero_s: 22653 mov.b &ZERO, %d0 22654 rts 22655is_denorm_s: 22656 mov.b &DENORM, %d0 22657 rts 22658inf_or_nan_s: 22659 and.l &0x007fffff, %d1 22660 bne is_nan_s 22661is_inf_s: 22662 mov.b &INF, %d0 22663 rts 22664is_nan_s: 22665 btst &22, %d1 22666 bne is_qnan_s 22667is_snan_s: 22668 mov.b &SNAN, %d0 22669 rts 22670is_qnan_s: 22671 mov.b &QNAN, %d0 22672 rts 22673 22674######################################################################### 22675# XDEF **************************************************************** # 22676# unf_res(): routine to produce default underflow result of a # 22677# scaled extended precision number; this is used by # 22678# fadd/fdiv/fmul/etc. emulation routines. # 22679# unf_res4(): same as above but for fsglmul/fsgldiv which use # 22680# single round prec and extended prec mode. # 22681# # 22682# XREF **************************************************************** # 22683# _denorm() - denormalize according to scale factor # 22684# _round() - round denormalized number according to rnd prec # 22685# # 22686# INPUT *************************************************************** # 22687# a0 = pointer to extended precison operand # 22688# d0 = scale factor # 22689# d1 = rounding precision/mode # 22690# # 22691# OUTPUT ************************************************************** # 22692# a0 = pointer to default underflow result in extended precision # 22693# d0.b = result FPSR_cc which caller may or may not want to save # 22694# # 22695# ALGORITHM *********************************************************** # 22696# Convert the input operand to "internal format" which means the # 22697# exponent is extended to 16 bits and the sign is stored in the unused # 22698# portion of the extended precison operand. Denormalize the number # 22699# according to the scale factor passed in d0. Then, round the # 22700# denormalized result. # 22701# Set the FPSR_exc bits as appropriate but return the cc bits in # 22702# d0 in case the caller doesn't want to save them (as is the case for # 22703# fmove out). # 22704# unf_res4() for fsglmul/fsgldiv forces the denorm to extended # 22705# precision and the rounding mode to single. # 22706# # 22707######################################################################### 22708 global unf_res 22709unf_res: 22710 mov.l %d1, -(%sp) # save rnd prec,mode on stack 22711 22712 btst &0x7, FTEMP_EX(%a0) # make "internal" format 22713 sne FTEMP_SGN(%a0) 22714 22715 mov.w FTEMP_EX(%a0), %d1 # extract exponent 22716 and.w &0x7fff, %d1 22717 sub.w %d0, %d1 22718 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent 22719 22720 mov.l %a0, -(%sp) # save operand ptr during calls 22721 22722 mov.l 0x4(%sp),%d0 # pass rnd prec. 22723 andi.w &0x00c0,%d0 22724 lsr.w &0x4,%d0 22725 bsr.l _denorm # denorm result 22726 22727 mov.l (%sp),%a0 22728 mov.w 0x6(%sp),%d1 # load prec:mode into %d1 22729 andi.w &0xc0,%d1 # extract rnd prec 22730 lsr.w &0x4,%d1 22731 swap %d1 22732 mov.w 0x6(%sp),%d1 22733 andi.w &0x30,%d1 22734 lsr.w &0x4,%d1 22735 bsr.l _round # round the denorm 22736 22737 mov.l (%sp)+, %a0 22738 22739# result is now rounded properly. convert back to normal format 22740 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue 22741 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 22742 beq.b unf_res_chkifzero # no; result is positive 22743 bset &0x7, FTEMP_EX(%a0) # set result sgn 22744 clr.b FTEMP_SGN(%a0) # clear temp sign 22745 22746# the number may have become zero after rounding. set ccodes accordingly. 22747unf_res_chkifzero: 22748 clr.l %d0 22749 tst.l FTEMP_HI(%a0) # is value now a zero? 22750 bne.b unf_res_cont # no 22751 tst.l FTEMP_LO(%a0) 22752 bne.b unf_res_cont # no 22753# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit 22754 bset &z_bit, %d0 # yes; set zero ccode bit 22755 22756unf_res_cont: 22757 22758# 22759# can inex1 also be set along with unfl and inex2??? 22760# 22761# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 22762# 22763 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set? 22764 beq.b unf_res_end # no 22765 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl 22766 22767unf_res_end: 22768 add.l &0x4, %sp # clear stack 22769 rts 22770 22771# unf_res() for fsglmul() and fsgldiv(). 22772 global unf_res4 22773unf_res4: 22774 mov.l %d1,-(%sp) # save rnd prec,mode on stack 22775 22776 btst &0x7,FTEMP_EX(%a0) # make "internal" format 22777 sne FTEMP_SGN(%a0) 22778 22779 mov.w FTEMP_EX(%a0),%d1 # extract exponent 22780 and.w &0x7fff,%d1 22781 sub.w %d0,%d1 22782 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent 22783 22784 mov.l %a0,-(%sp) # save operand ptr during calls 22785 22786 clr.l %d0 # force rnd prec = ext 22787 bsr.l _denorm # denorm result 22788 22789 mov.l (%sp),%a0 22790 mov.w &s_mode,%d1 # force rnd prec = sgl 22791 swap %d1 22792 mov.w 0x6(%sp),%d1 # load rnd mode 22793 andi.w &0x30,%d1 # extract rnd prec 22794 lsr.w &0x4,%d1 22795 bsr.l _round # round the denorm 22796 22797 mov.l (%sp)+,%a0 22798 22799# result is now rounded properly. convert back to normal format 22800 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue 22801 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 22802 beq.b unf_res4_chkifzero # no; result is positive 22803 bset &0x7,FTEMP_EX(%a0) # set result sgn 22804 clr.b FTEMP_SGN(%a0) # clear temp sign 22805 22806# the number may have become zero after rounding. set ccodes accordingly. 22807unf_res4_chkifzero: 22808 clr.l %d0 22809 tst.l FTEMP_HI(%a0) # is value now a zero? 22810 bne.b unf_res4_cont # no 22811 tst.l FTEMP_LO(%a0) 22812 bne.b unf_res4_cont # no 22813# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit 22814 bset &z_bit,%d0 # yes; set zero ccode bit 22815 22816unf_res4_cont: 22817 22818# 22819# can inex1 also be set along with unfl and inex2??? 22820# 22821# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 22822# 22823 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 22824 beq.b unf_res4_end # no 22825 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl 22826 22827unf_res4_end: 22828 add.l &0x4,%sp # clear stack 22829 rts 22830 22831######################################################################### 22832# XDEF **************************************************************** # 22833# ovf_res(): routine to produce the default overflow result of # 22834# an overflowing number. # 22835# ovf_res2(): same as above but the rnd mode/prec are passed # 22836# differently. # 22837# # 22838# XREF **************************************************************** # 22839# none # 22840# # 22841# INPUT *************************************************************** # 22842# d1.b = '-1' => (-); '0' => (+) # 22843# ovf_res(): # 22844# d0 = rnd mode/prec # 22845# ovf_res2(): # 22846# hi(d0) = rnd prec # 22847# lo(d0) = rnd mode # 22848# # 22849# OUTPUT ************************************************************** # 22850# a0 = points to extended precision result # 22851# d0.b = condition code bits # 22852# # 22853# ALGORITHM *********************************************************** # 22854# The default overflow result can be determined by the sign of # 22855# the result and the rounding mode/prec in effect. These bits are # 22856# concatenated together to create an index into the default result # 22857# table. A pointer to the correct result is returned in a0. The # 22858# resulting condition codes are returned in d0 in case the caller # 22859# doesn't want FPSR_cc altered (as is the case for fmove out). # 22860# # 22861######################################################################### 22862 22863 global ovf_res 22864ovf_res: 22865 andi.w &0x10,%d1 # keep result sign 22866 lsr.b &0x4,%d0 # shift prec/mode 22867 or.b %d0,%d1 # concat the two 22868 mov.w %d1,%d0 # make a copy 22869 lsl.b &0x1,%d1 # multiply d1 by 2 22870 bra.b ovf_res_load 22871 22872 global ovf_res2 22873ovf_res2: 22874 and.w &0x10, %d1 # keep result sign 22875 or.b %d0, %d1 # insert rnd mode 22876 swap %d0 22877 or.b %d0, %d1 # insert rnd prec 22878 mov.w %d1, %d0 # make a copy 22879 lsl.b &0x1, %d1 # shift left by 1 22880 22881# 22882# use the rounding mode, precision, and result sign as in index into the 22883# two tables below to fetch the default result and the result ccodes. 22884# 22885ovf_res_load: 22886 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes 22887 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr 22888 22889 rts 22890 22891tbl_ovfl_cc: 22892 byte 0x2, 0x0, 0x0, 0x2 22893 byte 0x2, 0x0, 0x0, 0x2 22894 byte 0x2, 0x0, 0x0, 0x2 22895 byte 0x0, 0x0, 0x0, 0x0 22896 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22897 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22898 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22899 22900tbl_ovfl_result: 22901 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22902 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ 22903 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM 22904 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22905 22906 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22907 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ 22908 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM 22909 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22910 22911 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22912 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ 22913 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM 22914 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22915 22916 long 0x00000000,0x00000000,0x00000000,0x00000000 22917 long 0x00000000,0x00000000,0x00000000,0x00000000 22918 long 0x00000000,0x00000000,0x00000000,0x00000000 22919 long 0x00000000,0x00000000,0x00000000,0x00000000 22920 22921 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22922 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ 22923 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22924 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP 22925 22926 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22927 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ 22928 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22929 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP 22930 22931 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22932 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ 22933 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22934 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP 22935 22936######################################################################### 22937# XDEF **************************************************************** # 22938# get_packed(): fetch a packed operand from memory and then # 22939# convert it to a floating-point binary number. # 22940# # 22941# XREF **************************************************************** # 22942# _dcalc_ea() - calculate the correct <ea> # 22943# _mem_read() - fetch the packed operand from memory # 22944# facc_in_x() - the fetch failed so jump to special exit code # 22945# decbin() - convert packed to binary extended precision # 22946# # 22947# INPUT *************************************************************** # 22948# None # 22949# # 22950# OUTPUT ************************************************************** # 22951# If no failure on _mem_read(): # 22952# FP_SRC(a6) = packed operand now as a binary FP number # 22953# # 22954# ALGORITHM *********************************************************** # 22955# Get the correct <ea> whihc is the value on the exception stack # 22956# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. # 22957# Then, fetch the operand from memory. If the fetch fails, exit # 22958# through facc_in_x(). # 22959# If the packed operand is a ZERO,NAN, or INF, convert it to # 22960# its binary representation here. Else, call decbin() which will # 22961# convert the packed value to an extended precision binary value. # 22962# # 22963######################################################################### 22964 22965# the stacked <ea> for packed is correct except for -(An). 22966# the base reg must be updated for both -(An) and (An)+. 22967 global get_packed 22968get_packed: 22969 mov.l &0xc,%d0 # packed is 12 bytes 22970 bsr.l _dcalc_ea # fetch <ea>; correct An 22971 22972 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 22973 mov.l &0xc,%d0 # pass: 12 bytes 22974 bsr.l _dmem_read # read packed operand 22975 22976 tst.l %d1 # did dfetch fail? 22977 bne.l facc_in_x # yes 22978 22979# The packed operand is an INF or a NAN if the exponent field is all ones. 22980 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 22981 cmpi.w %d0,&0x7fff # INF or NAN? 22982 bne.b gp_try_zero # no 22983 rts # operand is an INF or NAN 22984 22985# The packed operand is a zero if the mantissa is all zero, else it's 22986# a normal packed op. 22987gp_try_zero: 22988 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 22989 andi.b &0x0f,%d0 # clear all but last nybble 22990 bne.b gp_not_spec # not a zero 22991 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 22992 bne.b gp_not_spec # not a zero 22993 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 22994 bne.b gp_not_spec # not a zero 22995 rts # operand is a ZERO 22996gp_not_spec: 22997 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 22998 bsr.l decbin # convert to extended 22999 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 23000 rts 23001 23002######################################################################### 23003# decbin(): Converts normalized packed bcd value pointed to by register # 23004# a0 to extended-precision value in fp0. # 23005# # 23006# INPUT *************************************************************** # 23007# a0 = pointer to normalized packed bcd value # 23008# # 23009# OUTPUT ************************************************************** # 23010# fp0 = exact fp representation of the packed bcd value. # 23011# # 23012# ALGORITHM *********************************************************** # 23013# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, # 23014# and NaN operands are dispatched without entering this routine) # 23015# value in 68881/882 format at location (a0). # 23016# # 23017# A1. Convert the bcd exponent to binary by successive adds and # 23018# muls. Set the sign according to SE. Subtract 16 to compensate # 23019# for the mantissa which is to be interpreted as 17 integer # 23020# digits, rather than 1 integer and 16 fraction digits. # 23021# Note: this operation can never overflow. # 23022# # 23023# A2. Convert the bcd mantissa to binary by successive # 23024# adds and muls in FP0. Set the sign according to SM. # 23025# The mantissa digits will be converted with the decimal point # 23026# assumed following the least-significant digit. # 23027# Note: this operation can never overflow. # 23028# # 23029# A3. Count the number of leading/trailing zeros in the # 23030# bcd string. If SE is positive, count the leading zeros; # 23031# if negative, count the trailing zeros. Set the adjusted # 23032# exponent equal to the exponent from A1 and the zero count # 23033# added if SM = 1 and subtracted if SM = 0. Scale the # 23034# mantissa the equivalent of forcing in the bcd value: # 23035# # 23036# SM = 0 a non-zero digit in the integer position # 23037# SM = 1 a non-zero digit in Mant0, lsd of the fraction # 23038# # 23039# this will insure that any value, regardless of its # 23040# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted # 23041# consistently. # 23042# # 23043# A4. Calculate the factor 10^exp in FP1 using a table of # 23044# 10^(2^n) values. To reduce the error in forming factors # 23045# greater than 10^27, a directed rounding scheme is used with # 23046# tables rounded to RN, RM, and RP, according to the table # 23047# in the comments of the pwrten section. # 23048# # 23049# A5. Form the final binary number by scaling the mantissa by # 23050# the exponent factor. This is done by multiplying the # 23051# mantissa in FP0 by the factor in FP1 if the adjusted # 23052# exponent sign is positive, and dividing FP0 by FP1 if # 23053# it is negative. # 23054# # 23055# Clean up and return. Check if the final mul or div was inexact. # 23056# If so, set INEX1 in USER_FPSR. # 23057# # 23058######################################################################### 23059 23060# 23061# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded 23062# to nearest, minus, and plus, respectively. The tables include 23063# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding 23064# is required until the power is greater than 27, however, all 23065# tables include the first 5 for ease of indexing. 23066# 23067RTABLE: 23068 byte 0,0,0,0 23069 byte 2,3,2,3 23070 byte 2,3,3,2 23071 byte 3,2,2,3 23072 23073 set FNIBS,7 23074 set FSTRT,0 23075 23076 set ESTRT,4 23077 set EDIGITS,2 23078 23079 global decbin 23080decbin: 23081 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input 23082 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it 23083 mov.l 0x8(%a0),FP_SCR0_LO(%a6) 23084 23085 lea FP_SCR0(%a6),%a0 23086 23087 movm.l &0x3c00,-(%sp) # save d2-d5 23088 fmovm.x &0x1,-(%sp) # save fp1 23089# 23090# Calculate exponent: 23091# 1. Copy bcd value in memory for use as a working copy. 23092# 2. Calculate absolute value of exponent in d1 by mul and add. 23093# 3. Correct for exponent sign. 23094# 4. Subtract 16 to compensate for interpreting the mant as all integer digits. 23095# (i.e., all digits assumed left of the decimal point.) 23096# 23097# Register usage: 23098# 23099# calc_e: 23100# (*) d0: temp digit storage 23101# (*) d1: accumulator for binary exponent 23102# (*) d2: digit count 23103# (*) d3: offset pointer 23104# ( ) d4: first word of bcd 23105# ( ) a0: pointer to working bcd value 23106# ( ) a6: pointer to original bcd value 23107# (*) FP_SCR1: working copy of original bcd value 23108# (*) L_SCR1: copy of original exponent word 23109# 23110calc_e: 23111 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part 23112 mov.l &ESTRT,%d3 # counter to pick up digits 23113 mov.l (%a0),%d4 # get first word of bcd 23114 clr.l %d1 # zero d1 for accumulator 23115e_gd: 23116 mulu.l &0xa,%d1 # mul partial product by one digit place 23117 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0 23118 add.l %d0,%d1 # d1 = d1 + d0 23119 addq.b &4,%d3 # advance d3 to the next digit 23120 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop 23121 btst &30,%d4 # get SE 23122 beq.b e_pos # don't negate if pos 23123 neg.l %d1 # negate before subtracting 23124e_pos: 23125 sub.l &16,%d1 # sub to compensate for shift of mant 23126 bge.b e_save # if still pos, do not neg 23127 neg.l %d1 # now negative, make pos and set SE 23128 or.l &0x40000000,%d4 # set SE in d4, 23129 or.l &0x40000000,(%a0) # and in working bcd 23130e_save: 23131 mov.l %d1,-(%sp) # save exp on stack 23132# 23133# 23134# Calculate mantissa: 23135# 1. Calculate absolute value of mantissa in fp0 by mul and add. 23136# 2. Correct for mantissa sign. 23137# (i.e., all digits assumed left of the decimal point.) 23138# 23139# Register usage: 23140# 23141# calc_m: 23142# (*) d0: temp digit storage 23143# (*) d1: lword counter 23144# (*) d2: digit count 23145# (*) d3: offset pointer 23146# ( ) d4: words 2 and 3 of bcd 23147# ( ) a0: pointer to working bcd value 23148# ( ) a6: pointer to original bcd value 23149# (*) fp0: mantissa accumulator 23150# ( ) FP_SCR1: working copy of original bcd value 23151# ( ) L_SCR1: copy of original exponent word 23152# 23153calc_m: 23154 mov.l &1,%d1 # word counter, init to 1 23155 fmov.s &0x00000000,%fp0 # accumulator 23156# 23157# 23158# Since the packed number has a long word between the first & second parts, 23159# get the integer digit then skip down & get the rest of the 23160# mantissa. We will unroll the loop once. 23161# 23162 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word 23163 fadd.b %d0,%fp0 # add digit to sum in fp0 23164# 23165# 23166# Get the rest of the mantissa. 23167# 23168loadlw: 23169 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4 23170 mov.l &FSTRT,%d3 # counter to pick up digits 23171 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr 23172md2b: 23173 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10 23174 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend 23175 fadd.b %d0,%fp0 # fp0 = fp0 + digit 23176# 23177# 23178# If all the digits (8) in that long word have been converted (d2=0), 23179# then inc d1 (=2) to point to the next long word and reset d3 to 0 23180# to initialize the digit offset, and set d2 to 7 for the digit count; 23181# else continue with this long word. 23182# 23183 addq.b &4,%d3 # advance d3 to the next digit 23184 dbf.w %d2,md2b # check for last digit in this lw 23185nextlw: 23186 addq.l &1,%d1 # inc lw pointer in mantissa 23187 cmp.l %d1,&2 # test for last lw 23188 ble.b loadlw # if not, get last one 23189# 23190# Check the sign of the mant and make the value in fp0 the same sign. 23191# 23192m_sign: 23193 btst &31,(%a0) # test sign of the mantissa 23194 beq.b ap_st_z # if clear, go to append/strip zeros 23195 fneg.x %fp0 # if set, negate fp0 23196# 23197# Append/strip zeros: 23198# 23199# For adjusted exponents which have an absolute value greater than 27*, 23200# this routine calculates the amount needed to normalize the mantissa 23201# for the adjusted exponent. That number is subtracted from the exp 23202# if the exp was positive, and added if it was negative. The purpose 23203# of this is to reduce the value of the exponent and the possibility 23204# of error in calculation of pwrten. 23205# 23206# 1. Branch on the sign of the adjusted exponent. 23207# 2p.(positive exp) 23208# 2. Check M16 and the digits in lwords 2 and 3 in decending order. 23209# 3. Add one for each zero encountered until a non-zero digit. 23210# 4. Subtract the count from the exp. 23211# 5. Check if the exp has crossed zero in #3 above; make the exp abs 23212# and set SE. 23213# 6. Multiply the mantissa by 10**count. 23214# 2n.(negative exp) 23215# 2. Check the digits in lwords 3 and 2 in decending order. 23216# 3. Add one for each zero encountered until a non-zero digit. 23217# 4. Add the count to the exp. 23218# 5. Check if the exp has crossed zero in #3 above; clear SE. 23219# 6. Divide the mantissa by 10**count. 23220# 23221# *Why 27? If the adjusted exponent is within -28 < expA < 28, than 23222# any adjustment due to append/strip zeros will drive the resultane 23223# exponent towards zero. Since all pwrten constants with a power 23224# of 27 or less are exact, there is no need to use this routine to 23225# attempt to lessen the resultant exponent. 23226# 23227# Register usage: 23228# 23229# ap_st_z: 23230# (*) d0: temp digit storage 23231# (*) d1: zero count 23232# (*) d2: digit count 23233# (*) d3: offset pointer 23234# ( ) d4: first word of bcd 23235# (*) d5: lword counter 23236# ( ) a0: pointer to working bcd value 23237# ( ) FP_SCR1: working copy of original bcd value 23238# ( ) L_SCR1: copy of original exponent word 23239# 23240# 23241# First check the absolute value of the exponent to see if this 23242# routine is necessary. If so, then check the sign of the exponent 23243# and do append (+) or strip (-) zeros accordingly. 23244# This section handles a positive adjusted exponent. 23245# 23246ap_st_z: 23247 mov.l (%sp),%d1 # load expA for range test 23248 cmp.l %d1,&27 # test is with 27 23249 ble.w pwrten # if abs(expA) <28, skip ap/st zeros 23250 btst &30,(%a0) # check sign of exp 23251 bne.b ap_st_n # if neg, go to neg side 23252 clr.l %d1 # zero count reg 23253 mov.l (%a0),%d4 # load lword 1 to d4 23254 bfextu %d4{&28:&4},%d0 # get M16 in d0 23255 bne.b ap_p_fx # if M16 is non-zero, go fix exp 23256 addq.l &1,%d1 # inc zero count 23257 mov.l &1,%d5 # init lword counter 23258 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4 23259 bne.b ap_p_cl # if lw 2 is zero, skip it 23260 addq.l &8,%d1 # and inc count by 8 23261 addq.l &1,%d5 # inc lword counter 23262 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4 23263ap_p_cl: 23264 clr.l %d3 # init offset reg 23265 mov.l &7,%d2 # init digit counter 23266ap_p_gd: 23267 bfextu %d4{%d3:&4},%d0 # get digit 23268 bne.b ap_p_fx # if non-zero, go to fix exp 23269 addq.l &4,%d3 # point to next digit 23270 addq.l &1,%d1 # inc digit counter 23271 dbf.w %d2,ap_p_gd # get next digit 23272ap_p_fx: 23273 mov.l %d1,%d0 # copy counter to d2 23274 mov.l (%sp),%d1 # get adjusted exp from memory 23275 sub.l %d0,%d1 # subtract count from exp 23276 bge.b ap_p_fm # if still pos, go to pwrten 23277 neg.l %d1 # now its neg; get abs 23278 mov.l (%a0),%d4 # load lword 1 to d4 23279 or.l &0x40000000,%d4 # and set SE in d4 23280 or.l &0x40000000,(%a0) # and in memory 23281# 23282# Calculate the mantissa multiplier to compensate for the striping of 23283# zeros from the mantissa. 23284# 23285ap_p_fm: 23286 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 23287 clr.l %d3 # init table index 23288 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23289 mov.l &3,%d2 # init d2 to count bits in counter 23290ap_p_el: 23291 asr.l &1,%d0 # shift lsb into carry 23292 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor 23293 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23294ap_p_en: 23295 add.l &12,%d3 # inc d3 to next rtable entry 23296 tst.l %d0 # check if d0 is zero 23297 bne.b ap_p_el # if not, get next bit 23298 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted) 23299 bra.b pwrten # go calc pwrten 23300# 23301# This section handles a negative adjusted exponent. 23302# 23303ap_st_n: 23304 clr.l %d1 # clr counter 23305 mov.l &2,%d5 # set up d5 to point to lword 3 23306 mov.l (%a0,%d5.L*4),%d4 # get lword 3 23307 bne.b ap_n_cl # if not zero, check digits 23308 sub.l &1,%d5 # dec d5 to point to lword 2 23309 addq.l &8,%d1 # inc counter by 8 23310 mov.l (%a0,%d5.L*4),%d4 # get lword 2 23311ap_n_cl: 23312 mov.l &28,%d3 # point to last digit 23313 mov.l &7,%d2 # init digit counter 23314ap_n_gd: 23315 bfextu %d4{%d3:&4},%d0 # get digit 23316 bne.b ap_n_fx # if non-zero, go to exp fix 23317 subq.l &4,%d3 # point to previous digit 23318 addq.l &1,%d1 # inc digit counter 23319 dbf.w %d2,ap_n_gd # get next digit 23320ap_n_fx: 23321 mov.l %d1,%d0 # copy counter to d0 23322 mov.l (%sp),%d1 # get adjusted exp from memory 23323 sub.l %d0,%d1 # subtract count from exp 23324 bgt.b ap_n_fm # if still pos, go fix mantissa 23325 neg.l %d1 # take abs of exp and clr SE 23326 mov.l (%a0),%d4 # load lword 1 to d4 23327 and.l &0xbfffffff,%d4 # and clr SE in d4 23328 and.l &0xbfffffff,(%a0) # and in memory 23329# 23330# Calculate the mantissa multiplier to compensate for the appending of 23331# zeros to the mantissa. 23332# 23333ap_n_fm: 23334 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 23335 clr.l %d3 # init table index 23336 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23337 mov.l &3,%d2 # init d2 to count bits in counter 23338ap_n_el: 23339 asr.l &1,%d0 # shift lsb into carry 23340 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor 23341 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23342ap_n_en: 23343 add.l &12,%d3 # inc d3 to next rtable entry 23344 tst.l %d0 # check if d0 is zero 23345 bne.b ap_n_el # if not, get next bit 23346 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted) 23347# 23348# 23349# Calculate power-of-ten factor from adjusted and shifted exponent. 23350# 23351# Register usage: 23352# 23353# pwrten: 23354# (*) d0: temp 23355# ( ) d1: exponent 23356# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp 23357# (*) d3: FPCR work copy 23358# ( ) d4: first word of bcd 23359# (*) a1: RTABLE pointer 23360# calc_p: 23361# (*) d0: temp 23362# ( ) d1: exponent 23363# (*) d3: PWRTxx table index 23364# ( ) a0: pointer to working copy of bcd 23365# (*) a1: PWRTxx pointer 23366# (*) fp1: power-of-ten accumulator 23367# 23368# Pwrten calculates the exponent factor in the selected rounding mode 23369# according to the following table: 23370# 23371# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode 23372# 23373# ANY ANY RN RN 23374# 23375# + + RP RP 23376# - + RP RM 23377# + - RP RM 23378# - - RP RP 23379# 23380# + + RM RM 23381# - + RM RP 23382# + - RM RP 23383# - - RM RM 23384# 23385# + + RZ RM 23386# - + RZ RM 23387# + - RZ RP 23388# - - RZ RP 23389# 23390# 23391pwrten: 23392 mov.l USER_FPCR(%a6),%d3 # get user's FPCR 23393 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits 23394 mov.l (%a0),%d4 # reload 1st bcd word to d4 23395 asl.l &2,%d2 # format d2 to be 23396 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE} 23397 add.l %d0,%d2 # in d2 as index into RTABLE 23398 lea.l RTABLE(%pc),%a1 # load rtable base 23399 mov.b (%a1,%d2),%d0 # load new rounding bits from table 23400 clr.l %d3 # clear d3 to force no exc and extended 23401 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR 23402 fmov.l %d3,%fpcr # write new FPCR 23403 asr.l &1,%d0 # write correct PTENxx table 23404 bcc.b not_rp # to a1 23405 lea.l PTENRP(%pc),%a1 # it is RP 23406 bra.b calc_p # go to init section 23407not_rp: 23408 asr.l &1,%d0 # keep checking 23409 bcc.b not_rm 23410 lea.l PTENRM(%pc),%a1 # it is RM 23411 bra.b calc_p # go to init section 23412not_rm: 23413 lea.l PTENRN(%pc),%a1 # it is RN 23414calc_p: 23415 mov.l %d1,%d0 # copy exp to d0;use d0 23416 bpl.b no_neg # if exp is negative, 23417 neg.l %d0 # invert it 23418 or.l &0x40000000,(%a0) # and set SE bit 23419no_neg: 23420 clr.l %d3 # table index 23421 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23422e_loop: 23423 asr.l &1,%d0 # shift next bit into carry 23424 bcc.b e_next # if zero, skip the mul 23425 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23426e_next: 23427 add.l &12,%d3 # inc d3 to next rtable entry 23428 tst.l %d0 # check if d0 is zero 23429 bne.b e_loop # not zero, continue shifting 23430# 23431# 23432# Check the sign of the adjusted exp and make the value in fp0 the 23433# same sign. If the exp was pos then multiply fp1*fp0; 23434# else divide fp0/fp1. 23435# 23436# Register Usage: 23437# norm: 23438# ( ) a0: pointer to working bcd value 23439# (*) fp0: mantissa accumulator 23440# ( ) fp1: scaling factor - 10**(abs(exp)) 23441# 23442pnorm: 23443 btst &30,(%a0) # test the sign of the exponent 23444 beq.b mul # if clear, go to multiply 23445div: 23446 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp 23447 bra.b end_dec 23448mul: 23449 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp 23450# 23451# 23452# Clean up and return with result in fp0. 23453# 23454# If the final mul/div in decbin incurred an inex exception, 23455# it will be inex2, but will be reported as inex1 by get_op. 23456# 23457end_dec: 23458 fmov.l %fpsr,%d0 # get status register 23459 bclr &inex2_bit+8,%d0 # test for inex2 and clear it 23460 beq.b no_exc # skip this if no exc 23461 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX 23462no_exc: 23463 add.l &0x4,%sp # clear 1 lw param 23464 fmovm.x (%sp)+,&0x40 # restore fp1 23465 movm.l (%sp)+,&0x3c # restore d2-d5 23466 fmov.l &0x0,%fpcr 23467 fmov.l &0x0,%fpsr 23468 rts 23469 23470######################################################################### 23471# bindec(): Converts an input in extended precision format to bcd format# 23472# # 23473# INPUT *************************************************************** # 23474# a0 = pointer to the input extended precision value in memory. # 23475# the input may be either normalized, unnormalized, or # 23476# denormalized. # 23477# d0 = contains the k-factor sign-extended to 32-bits. # 23478# # 23479# OUTPUT ************************************************************** # 23480# FP_SCR0(a6) = bcd format result on the stack. # 23481# # 23482# ALGORITHM *********************************************************** # 23483# # 23484# A1. Set RM and size ext; Set SIGMA = sign of input. # 23485# The k-factor is saved for use in d7. Clear the # 23486# BINDEC_FLG for separating normalized/denormalized # 23487# input. If input is unnormalized or denormalized, # 23488# normalize it. # 23489# # 23490# A2. Set X = abs(input). # 23491# # 23492# A3. Compute ILOG. # 23493# ILOG is the log base 10 of the input value. It is # 23494# approximated by adding e + 0.f when the original # 23495# value is viewed as 2^^e * 1.f in extended precision. # 23496# This value is stored in d6. # 23497# # 23498# A4. Clr INEX bit. # 23499# The operation in A3 above may have set INEX2. # 23500# # 23501# A5. Set ICTR = 0; # 23502# ICTR is a flag used in A13. It must be set before the # 23503# loop entry A6. # 23504# # 23505# A6. Calculate LEN. # 23506# LEN is the number of digits to be displayed. The # 23507# k-factor can dictate either the total number of digits, # 23508# if it is a positive number, or the number of digits # 23509# after the decimal point which are to be included as # 23510# significant. See the 68882 manual for examples. # 23511# If LEN is computed to be greater than 17, set OPERR in # 23512# USER_FPSR. LEN is stored in d4. # 23513# # 23514# A7. Calculate SCALE. # 23515# SCALE is equal to 10^ISCALE, where ISCALE is the number # 23516# of decimal places needed to insure LEN integer digits # 23517# in the output before conversion to bcd. LAMBDA is the # 23518# sign of ISCALE, used in A9. Fp1 contains # 23519# 10^^(abs(ISCALE)) using a rounding mode which is a # 23520# function of the original rounding mode and the signs # 23521# of ISCALE and X. A table is given in the code. # 23522# # 23523# A8. Clr INEX; Force RZ. # 23524# The operation in A3 above may have set INEX2. # 23525# RZ mode is forced for the scaling operation to insure # 23526# only one rounding error. The grs bits are collected in # 23527# the INEX flag for use in A10. # 23528# # 23529# A9. Scale X -> Y. # 23530# The mantissa is scaled to the desired number of # 23531# significant digits. The excess digits are collected # 23532# in INEX2. # 23533# # 23534# A10. Or in INEX. # 23535# If INEX is set, round error occurred. This is # 23536# compensated for by 'or-ing' in the INEX2 flag to # 23537# the lsb of Y. # 23538# # 23539# A11. Restore original FPCR; set size ext. # 23540# Perform FINT operation in the user's rounding mode. # 23541# Keep the size to extended. # 23542# # 23543# A12. Calculate YINT = FINT(Y) according to user's rounding # 23544# mode. The FPSP routine sintd0 is used. The output # 23545# is in fp0. # 23546# # 23547# A13. Check for LEN digits. # 23548# If the int operation results in more than LEN digits, # 23549# or less than LEN -1 digits, adjust ILOG and repeat from # 23550# A6. This test occurs only on the first pass. If the # 23551# result is exactly 10^LEN, decrement ILOG and divide # 23552# the mantissa by 10. # 23553# # 23554# A14. Convert the mantissa to bcd. # 23555# The binstr routine is used to convert the LEN digit # 23556# mantissa to bcd in memory. The input to binstr is # 23557# to be a fraction; i.e. (mantissa)/10^LEN and adjusted # 23558# such that the decimal point is to the left of bit 63. # 23559# The bcd digits are stored in the correct position in # 23560# the final string area in memory. # 23561# # 23562# A15. Convert the exponent to bcd. # 23563# As in A14 above, the exp is converted to bcd and the # 23564# digits are stored in the final string. # 23565# Test the length of the final exponent string. If the # 23566# length is 4, set operr. # 23567# # 23568# A16. Write sign bits to final string. # 23569# # 23570######################################################################### 23571 23572set BINDEC_FLG, EXC_TEMP # DENORM flag 23573 23574# Constants in extended precision 23575PLOG2: 23576 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 23577PLOG2UP1: 23578 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 23579 23580# Constants in single precision 23581FONE: 23582 long 0x3F800000,0x00000000,0x00000000,0x00000000 23583FTWO: 23584 long 0x40000000,0x00000000,0x00000000,0x00000000 23585FTEN: 23586 long 0x41200000,0x00000000,0x00000000,0x00000000 23587F4933: 23588 long 0x459A2800,0x00000000,0x00000000,0x00000000 23589 23590RBDTBL: 23591 byte 0,0,0,0 23592 byte 3,3,2,2 23593 byte 3,2,2,3 23594 byte 2,3,3,2 23595 23596# Implementation Notes: 23597# 23598# The registers are used as follows: 23599# 23600# d0: scratch; LEN input to binstr 23601# d1: scratch 23602# d2: upper 32-bits of mantissa for binstr 23603# d3: scratch;lower 32-bits of mantissa for binstr 23604# d4: LEN 23605# d5: LAMBDA/ICTR 23606# d6: ILOG 23607# d7: k-factor 23608# a0: ptr for original operand/final result 23609# a1: scratch pointer 23610# a2: pointer to FP_X; abs(original value) in ext 23611# fp0: scratch 23612# fp1: scratch 23613# fp2: scratch 23614# F_SCR1: 23615# F_SCR2: 23616# L_SCR1: 23617# L_SCR2: 23618 23619 global bindec 23620bindec: 23621 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2} 23622 fmovm.x &0x7,-(%sp) # {%fp0-%fp2} 23623 23624# A1. Set RM and size ext. Set SIGMA = sign input; 23625# The k-factor is saved for use in d7. Clear BINDEC_FLG for 23626# separating normalized/denormalized input. If the input 23627# is a denormalized number, set the BINDEC_FLG memory word 23628# to signal denorm. If the input is unnormalized, normalize 23629# the input and test for denormalized result. 23630# 23631 fmov.l &rm_mode*0x10,%fpcr # set RM and ext 23632 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check 23633 mov.l %d0,%d7 # move k-factor to d7 23634 23635 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag 23636 cmpi.b STAG(%a6),&DENORM # is input a DENORM? 23637 bne.w A2_str # no; input is a NORM 23638 23639# 23640# Normalize the denorm 23641# 23642un_de_norm: 23643 mov.w (%a0),%d0 23644 and.w &0x7fff,%d0 # strip sign of normalized exp 23645 mov.l 4(%a0),%d1 23646 mov.l 8(%a0),%d2 23647norm_loop: 23648 sub.w &1,%d0 23649 lsl.l &1,%d2 23650 roxl.l &1,%d1 23651 tst.l %d1 23652 bge.b norm_loop 23653# 23654# Test if the normalized input is denormalized 23655# 23656 tst.w %d0 23657 bgt.b pos_exp # if greater than zero, it is a norm 23658 st BINDEC_FLG(%a6) # set flag for denorm 23659pos_exp: 23660 and.w &0x7fff,%d0 # strip sign of normalized exp 23661 mov.w %d0,(%a0) 23662 mov.l %d1,4(%a0) 23663 mov.l %d2,8(%a0) 23664 23665# A2. Set X = abs(input). 23666# 23667A2_str: 23668 mov.l (%a0),FP_SCR1(%a6) # move input to work space 23669 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space 23670 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space 23671 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X) 23672 23673# A3. Compute ILOG. 23674# ILOG is the log base 10 of the input value. It is approx- 23675# imated by adding e + 0.f when the original value is viewed 23676# as 2^^e * 1.f in extended precision. This value is stored 23677# in d6. 23678# 23679# Register usage: 23680# Input/Output 23681# d0: k-factor/exponent 23682# d2: x/x 23683# d3: x/x 23684# d4: x/x 23685# d5: x/x 23686# d6: x/ILOG 23687# d7: k-factor/Unchanged 23688# a0: ptr for original operand/final result 23689# a1: x/x 23690# a2: x/x 23691# fp0: x/float(ILOG) 23692# fp1: x/x 23693# fp2: x/x 23694# F_SCR1:x/x 23695# F_SCR2:Abs(X)/Abs(X) with $3fff exponent 23696# L_SCR1:x/x 23697# L_SCR2:first word of X packed/Unchanged 23698 23699 tst.b BINDEC_FLG(%a6) # check for denorm 23700 beq.b A3_cont # if clr, continue with norm 23701 mov.l &-4933,%d6 # force ILOG = -4933 23702 bra.b A4_str 23703A3_cont: 23704 mov.w FP_SCR1(%a6),%d0 # move exp to d0 23705 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff 23706 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f 23707 sub.w &0x3fff,%d0 # strip off bias 23708 fadd.w %d0,%fp0 # add in exp 23709 fsub.s FONE(%pc),%fp0 # subtract off 1.0 23710 fbge.w pos_res # if pos, branch 23711 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1 23712 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 23713 bra.b A4_str # go move out ILOG 23714pos_res: 23715 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2 23716 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 23717 23718 23719# A4. Clr INEX bit. 23720# The operation in A3 above may have set INEX2. 23721 23722A4_str: 23723 fmov.l &0,%fpsr # zero all of fpsr - nothing needed 23724 23725 23726# A5. Set ICTR = 0; 23727# ICTR is a flag used in A13. It must be set before the 23728# loop entry A6. The lower word of d5 is used for ICTR. 23729 23730 clr.w %d5 # clear ICTR 23731 23732# A6. Calculate LEN. 23733# LEN is the number of digits to be displayed. The k-factor 23734# can dictate either the total number of digits, if it is 23735# a positive number, or the number of digits after the 23736# original decimal point which are to be included as 23737# significant. See the 68882 manual for examples. 23738# If LEN is computed to be greater than 17, set OPERR in 23739# USER_FPSR. LEN is stored in d4. 23740# 23741# Register usage: 23742# Input/Output 23743# d0: exponent/Unchanged 23744# d2: x/x/scratch 23745# d3: x/x 23746# d4: exc picture/LEN 23747# d5: ICTR/Unchanged 23748# d6: ILOG/Unchanged 23749# d7: k-factor/Unchanged 23750# a0: ptr for original operand/final result 23751# a1: x/x 23752# a2: x/x 23753# fp0: float(ILOG)/Unchanged 23754# fp1: x/x 23755# fp2: x/x 23756# F_SCR1:x/x 23757# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23758# L_SCR1:x/x 23759# L_SCR2:first word of X packed/Unchanged 23760 23761A6_str: 23762 tst.l %d7 # branch on sign of k 23763 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k 23764 mov.l %d7,%d4 # if k > 0, LEN = k 23765 bra.b len_ck # skip to LEN check 23766k_neg: 23767 mov.l %d6,%d4 # first load ILOG to d4 23768 sub.l %d7,%d4 # subtract off k 23769 addq.l &1,%d4 # add in the 1 23770len_ck: 23771 tst.l %d4 # LEN check: branch on sign of LEN 23772 ble.b LEN_ng # if neg, set LEN = 1 23773 cmp.l %d4,&17 # test if LEN > 17 23774 ble.b A7_str # if not, forget it 23775 mov.l &17,%d4 # set max LEN = 17 23776 tst.l %d7 # if negative, never set OPERR 23777 ble.b A7_str # if positive, continue 23778 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 23779 bra.b A7_str # finished here 23780LEN_ng: 23781 mov.l &1,%d4 # min LEN is 1 23782 23783 23784# A7. Calculate SCALE. 23785# SCALE is equal to 10^ISCALE, where ISCALE is the number 23786# of decimal places needed to insure LEN integer digits 23787# in the output before conversion to bcd. LAMBDA is the sign 23788# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using 23789# the rounding mode as given in the following table (see 23790# Coonen, p. 7.23 as ref.; however, the SCALE variable is 23791# of opposite sign in bindec.sa from Coonen). 23792# 23793# Initial USE 23794# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] 23795# ---------------------------------------------- 23796# RN 00 0 0 00/0 RN 23797# RN 00 0 1 00/0 RN 23798# RN 00 1 0 00/0 RN 23799# RN 00 1 1 00/0 RN 23800# RZ 01 0 0 11/3 RP 23801# RZ 01 0 1 11/3 RP 23802# RZ 01 1 0 10/2 RM 23803# RZ 01 1 1 10/2 RM 23804# RM 10 0 0 11/3 RP 23805# RM 10 0 1 10/2 RM 23806# RM 10 1 0 10/2 RM 23807# RM 10 1 1 11/3 RP 23808# RP 11 0 0 10/2 RM 23809# RP 11 0 1 11/3 RP 23810# RP 11 1 0 11/3 RP 23811# RP 11 1 1 10/2 RM 23812# 23813# Register usage: 23814# Input/Output 23815# d0: exponent/scratch - final is 0 23816# d2: x/0 or 24 for A9 23817# d3: x/scratch - offset ptr into PTENRM array 23818# d4: LEN/Unchanged 23819# d5: 0/ICTR:LAMBDA 23820# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) 23821# d7: k-factor/Unchanged 23822# a0: ptr for original operand/final result 23823# a1: x/ptr to PTENRM array 23824# a2: x/x 23825# fp0: float(ILOG)/Unchanged 23826# fp1: x/10^ISCALE 23827# fp2: x/x 23828# F_SCR1:x/x 23829# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23830# L_SCR1:x/x 23831# L_SCR2:first word of X packed/Unchanged 23832 23833A7_str: 23834 tst.l %d7 # test sign of k 23835 bgt.b k_pos # if pos and > 0, skip this 23836 cmp.l %d7,%d6 # test k - ILOG 23837 blt.b k_pos # if ILOG >= k, skip this 23838 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k 23839k_pos: 23840 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0 23841 addq.l &1,%d0 # add the 1 23842 sub.l %d4,%d0 # sub off LEN 23843 swap %d5 # use upper word of d5 for LAMBDA 23844 clr.w %d5 # set it zero initially 23845 clr.w %d2 # set up d2 for very small case 23846 tst.l %d0 # test sign of ISCALE 23847 bge.b iscale # if pos, skip next inst 23848 addq.w &1,%d5 # if neg, set LAMBDA true 23849 cmp.l %d0,&0xffffecd4 # test iscale <= -4908 23850 bgt.b no_inf # if false, skip rest 23851 add.l &24,%d0 # add in 24 to iscale 23852 mov.l &24,%d2 # put 24 in d2 for A9 23853no_inf: 23854 neg.l %d0 # and take abs of ISCALE 23855iscale: 23856 fmov.s FONE(%pc),%fp1 # init fp1 to 1 23857 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits 23858 lsl.w &1,%d1 # put them in bits 2:1 23859 add.w %d5,%d1 # add in LAMBDA 23860 lsl.w &1,%d1 # put them in bits 3:1 23861 tst.l L_SCR2(%a6) # test sign of original x 23862 bge.b x_pos # if pos, don't set bit 0 23863 addq.l &1,%d1 # if neg, set bit 0 23864x_pos: 23865 lea.l RBDTBL(%pc),%a2 # load rbdtbl base 23866 mov.b (%a2,%d1),%d3 # load d3 with new rmode 23867 lsl.l &4,%d3 # put bits in proper position 23868 fmov.l %d3,%fpcr # load bits into fpu 23869 lsr.l &4,%d3 # put bits in proper position 23870 tst.b %d3 # decode new rmode for pten table 23871 bne.b not_rn # if zero, it is RN 23872 lea.l PTENRN(%pc),%a1 # load a1 with RN table base 23873 bra.b rmode # exit decode 23874not_rn: 23875 lsr.b &1,%d3 # get lsb in carry 23876 bcc.b not_rp2 # if carry clear, it is RM 23877 lea.l PTENRP(%pc),%a1 # load a1 with RP table base 23878 bra.b rmode # exit decode 23879not_rp2: 23880 lea.l PTENRM(%pc),%a1 # load a1 with RM table base 23881rmode: 23882 clr.l %d3 # clr table index 23883e_loop2: 23884 lsr.l &1,%d0 # shift next bit into carry 23885 bcc.b e_next2 # if zero, skip the mul 23886 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23887e_next2: 23888 add.l &12,%d3 # inc d3 to next pwrten table entry 23889 tst.l %d0 # test if ISCALE is zero 23890 bne.b e_loop2 # if not, loop 23891 23892# A8. Clr INEX; Force RZ. 23893# The operation in A3 above may have set INEX2. 23894# RZ mode is forced for the scaling operation to insure 23895# only one rounding error. The grs bits are collected in 23896# the INEX flag for use in A10. 23897# 23898# Register usage: 23899# Input/Output 23900 23901 fmov.l &0,%fpsr # clr INEX 23902 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode 23903 23904# A9. Scale X -> Y. 23905# The mantissa is scaled to the desired number of significant 23906# digits. The excess digits are collected in INEX2. If mul, 23907# Check d2 for excess 10 exponential value. If not zero, 23908# the iscale value would have caused the pwrten calculation 23909# to overflow. Only a negative iscale can cause this, so 23910# multiply by 10^(d2), which is now only allowed to be 24, 23911# with a multiply by 10^8 and 10^16, which is exact since 23912# 10^24 is exact. If the input was denormalized, we must 23913# create a busy stack frame with the mul command and the 23914# two operands, and allow the fpu to complete the multiply. 23915# 23916# Register usage: 23917# Input/Output 23918# d0: FPCR with RZ mode/Unchanged 23919# d2: 0 or 24/unchanged 23920# d3: x/x 23921# d4: LEN/Unchanged 23922# d5: ICTR:LAMBDA 23923# d6: ILOG/Unchanged 23924# d7: k-factor/Unchanged 23925# a0: ptr for original operand/final result 23926# a1: ptr to PTENRM array/Unchanged 23927# a2: x/x 23928# fp0: float(ILOG)/X adjusted for SCALE (Y) 23929# fp1: 10^ISCALE/Unchanged 23930# fp2: x/x 23931# F_SCR1:x/x 23932# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23933# L_SCR1:x/x 23934# L_SCR2:first word of X packed/Unchanged 23935 23936A9_str: 23937 fmov.x (%a0),%fp0 # load X from memory 23938 fabs.x %fp0 # use abs(X) 23939 tst.w %d5 # LAMBDA is in lower word of d5 23940 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul 23941 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0 23942 bra.w A10_st # branch to A10 23943 23944sc_mul: 23945 tst.b BINDEC_FLG(%a6) # check for denorm 23946 beq.w A9_norm # if norm, continue with mul 23947 23948# for DENORM, we must calculate: 23949# fp0 = input_op * 10^ISCALE * 10^24 23950# since the input operand is a DENORM, we can't multiply it directly. 23951# so, we do the multiplication of the exponents and mantissas separately. 23952# in this way, we avoid underflow on intermediate stages of the 23953# multiplication and guarantee a result without exception. 23954 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack 23955 23956 mov.w (%sp),%d3 # grab exponent 23957 andi.w &0x7fff,%d3 # clear sign 23958 ori.w &0x8000,(%a0) # make DENORM exp negative 23959 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp 23960 subi.w &0x3fff,%d3 # subtract BIAS 23961 add.w 36(%a1),%d3 23962 subi.w &0x3fff,%d3 # subtract BIAS 23963 add.w 48(%a1),%d3 23964 subi.w &0x3fff,%d3 # subtract BIAS 23965 23966 bmi.w sc_mul_err # is result is DENORM, punt!!! 23967 23968 andi.w &0x8000,(%sp) # keep sign 23969 or.w %d3,(%sp) # insert new exponent 23970 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again 23971 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk 23972 mov.l 0x4(%a0),-(%sp) 23973 mov.l &0x3fff0000,-(%sp) # force exp to zero 23974 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0 23975 fmul.x (%sp)+,%fp0 23976 23977# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 23978# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 23979 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa 23980 mov.l 36+4(%a1),-(%sp) 23981 mov.l &0x3fff0000,-(%sp) # force exp to zero 23982 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa 23983 mov.l 48+4(%a1),-(%sp) 23984 mov.l &0x3fff0000,-(%sp)# force exp to zero 23985 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8 23986 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16 23987 bra.b A10_st 23988 23989sc_mul_err: 23990 bra.b sc_mul_err 23991 23992A9_norm: 23993 tst.w %d2 # test for small exp case 23994 beq.b A9_con # if zero, continue as normal 23995 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 23996 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 23997A9_con: 23998 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0 23999 24000# A10. Or in INEX. 24001# If INEX is set, round error occurred. This is compensated 24002# for by 'or-ing' in the INEX2 flag to the lsb of Y. 24003# 24004# Register usage: 24005# Input/Output 24006# d0: FPCR with RZ mode/FPSR with INEX2 isolated 24007# d2: x/x 24008# d3: x/x 24009# d4: LEN/Unchanged 24010# d5: ICTR:LAMBDA 24011# d6: ILOG/Unchanged 24012# d7: k-factor/Unchanged 24013# a0: ptr for original operand/final result 24014# a1: ptr to PTENxx array/Unchanged 24015# a2: x/ptr to FP_SCR1(a6) 24016# fp0: Y/Y with lsb adjusted 24017# fp1: 10^ISCALE/Unchanged 24018# fp2: x/x 24019 24020A10_st: 24021 fmov.l %fpsr,%d0 # get FPSR 24022 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory 24023 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1 24024 btst &9,%d0 # check if INEX2 set 24025 beq.b A11_st # if clear, skip rest 24026 or.l &1,8(%a2) # or in 1 to lsb of mantissa 24027 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu 24028 24029 24030# A11. Restore original FPCR; set size ext. 24031# Perform FINT operation in the user's rounding mode. Keep 24032# the size to extended. The sintdo entry point in the sint 24033# routine expects the FPCR value to be in USER_FPCR for 24034# mode and precision. The original FPCR is saved in L_SCR1. 24035 24036A11_st: 24037 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later 24038 and.l &0x00000030,USER_FPCR(%a6) # set size to ext, 24039# ;block exceptions 24040 24041 24042# A12. Calculate YINT = FINT(Y) according to user's rounding mode. 24043# The FPSP routine sintd0 is used. The output is in fp0. 24044# 24045# Register usage: 24046# Input/Output 24047# d0: FPSR with AINEX cleared/FPCR with size set to ext 24048# d2: x/x/scratch 24049# d3: x/x 24050# d4: LEN/Unchanged 24051# d5: ICTR:LAMBDA/Unchanged 24052# d6: ILOG/Unchanged 24053# d7: k-factor/Unchanged 24054# a0: ptr for original operand/src ptr for sintdo 24055# a1: ptr to PTENxx array/Unchanged 24056# a2: ptr to FP_SCR1(a6)/Unchanged 24057# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored 24058# fp0: Y/YINT 24059# fp1: 10^ISCALE/Unchanged 24060# fp2: x/x 24061# F_SCR1:x/x 24062# F_SCR2:Y adjusted for inex/Y with original exponent 24063# L_SCR1:x/original USER_FPCR 24064# L_SCR2:first word of X packed/Unchanged 24065 24066A12_st: 24067 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1} 24068 mov.l L_SCR1(%a6),-(%sp) 24069 mov.l L_SCR2(%a6),-(%sp) 24070 24071 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6) 24072 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6) 24073 tst.l L_SCR2(%a6) # test sign of original operand 24074 bge.b do_fint12 # if pos, use Y 24075 or.l &0x80000000,(%a0) # if neg, use -Y 24076do_fint12: 24077 mov.l USER_FPSR(%a6),-(%sp) 24078# bsr sintdo # sint routine returns int in fp0 24079 24080 fmov.l USER_FPCR(%a6),%fpcr 24081 fmov.l &0x0,%fpsr # clear the AEXC bits!!! 24082## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode 24083## andi.l &0x00000030,%d0 24084## fmov.l %d0,%fpcr 24085 fint.x FP_SCR1(%a6),%fp0 # do fint() 24086 fmov.l %fpsr,%d0 24087 or.w %d0,FPSR_EXCEPT(%a6) 24088## fmov.l &0x0,%fpcr 24089## fmov.l %fpsr,%d0 # don't keep ccodes 24090## or.w %d0,FPSR_EXCEPT(%a6) 24091 24092 mov.b (%sp),USER_FPSR(%a6) 24093 add.l &4,%sp 24094 24095 mov.l (%sp)+,L_SCR2(%a6) 24096 mov.l (%sp)+,L_SCR1(%a6) 24097 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1} 24098 24099 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent 24100 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR 24101 24102# A13. Check for LEN digits. 24103# If the int operation results in more than LEN digits, 24104# or less than LEN -1 digits, adjust ILOG and repeat from 24105# A6. This test occurs only on the first pass. If the 24106# result is exactly 10^LEN, decrement ILOG and divide 24107# the mantissa by 10. The calculation of 10^LEN cannot 24108# be inexact, since all powers of ten upto 10^27 are exact 24109# in extended precision, so the use of a previous power-of-ten 24110# table will introduce no error. 24111# 24112# 24113# Register usage: 24114# Input/Output 24115# d0: FPCR with size set to ext/scratch final = 0 24116# d2: x/x 24117# d3: x/scratch final = x 24118# d4: LEN/LEN adjusted 24119# d5: ICTR:LAMBDA/LAMBDA:ICTR 24120# d6: ILOG/ILOG adjusted 24121# d7: k-factor/Unchanged 24122# a0: pointer into memory for packed bcd string formation 24123# a1: ptr to PTENxx array/Unchanged 24124# a2: ptr to FP_SCR1(a6)/Unchanged 24125# fp0: int portion of Y/abs(YINT) adjusted 24126# fp1: 10^ISCALE/Unchanged 24127# fp2: x/10^LEN 24128# F_SCR1:x/x 24129# F_SCR2:Y with original exponent/Unchanged 24130# L_SCR1:original USER_FPCR/Unchanged 24131# L_SCR2:first word of X packed/Unchanged 24132 24133A13_st: 24134 swap %d5 # put ICTR in lower word of d5 24135 tst.w %d5 # check if ICTR = 0 24136 bne not_zr # if non-zero, go to second test 24137# 24138# Compute 10^(LEN-1) 24139# 24140 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 24141 mov.l %d4,%d0 # put LEN in d0 24142 subq.l &1,%d0 # d0 = LEN -1 24143 clr.l %d3 # clr table index 24144l_loop: 24145 lsr.l &1,%d0 # shift next bit into carry 24146 bcc.b l_next # if zero, skip the mul 24147 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 24148l_next: 24149 add.l &12,%d3 # inc d3 to next pwrten table entry 24150 tst.l %d0 # test if LEN is zero 24151 bne.b l_loop # if not, loop 24152# 24153# 10^LEN-1 is computed for this test and A14. If the input was 24154# denormalized, check only the case in which YINT > 10^LEN. 24155# 24156 tst.b BINDEC_FLG(%a6) # check if input was norm 24157 beq.b A13_con # if norm, continue with checking 24158 fabs.x %fp0 # take abs of YINT 24159 bra test_2 24160# 24161# Compare abs(YINT) to 10^(LEN-1) and 10^LEN 24162# 24163A13_con: 24164 fabs.x %fp0 # take abs of YINT 24165 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1) 24166 fbge.w test_2 # if greater, do next test 24167 subq.l &1,%d6 # subtract 1 from ILOG 24168 mov.w &1,%d5 # set ICTR 24169 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 24170 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 24171 bra.w A6_str # return to A6 and recompute YINT 24172test_2: 24173 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 24174 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN 24175 fblt.w A14_st # if less, all is ok, go to A14 24176 fbgt.w fix_ex # if greater, fix and redo 24177 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10 24178 addq.l &1,%d6 # and inc ILOG 24179 bra.b A14_st # and continue elsewhere 24180fix_ex: 24181 addq.l &1,%d6 # increment ILOG by 1 24182 mov.w &1,%d5 # set ICTR 24183 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 24184 bra.w A6_str # return to A6 and recompute YINT 24185# 24186# Since ICTR <> 0, we have already been through one adjustment, 24187# and shouldn't have another; this is to check if abs(YINT) = 10^LEN 24188# 10^LEN is again computed using whatever table is in a1 since the 24189# value calculated cannot be inexact. 24190# 24191not_zr: 24192 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 24193 mov.l %d4,%d0 # put LEN in d0 24194 clr.l %d3 # clr table index 24195z_loop: 24196 lsr.l &1,%d0 # shift next bit into carry 24197 bcc.b z_next # if zero, skip the mul 24198 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 24199z_next: 24200 add.l &12,%d3 # inc d3 to next pwrten table entry 24201 tst.l %d0 # test if LEN is zero 24202 bne.b z_loop # if not, loop 24203 fabs.x %fp0 # get abs(YINT) 24204 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN 24205 fbneq.w A14_st # if not, skip this 24206 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10 24207 addq.l &1,%d6 # and inc ILOG by 1 24208 addq.l &1,%d4 # and inc LEN 24209 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN 24210 24211# A14. Convert the mantissa to bcd. 24212# The binstr routine is used to convert the LEN digit 24213# mantissa to bcd in memory. The input to binstr is 24214# to be a fraction; i.e. (mantissa)/10^LEN and adjusted 24215# such that the decimal point is to the left of bit 63. 24216# The bcd digits are stored in the correct position in 24217# the final string area in memory. 24218# 24219# 24220# Register usage: 24221# Input/Output 24222# d0: x/LEN call to binstr - final is 0 24223# d1: x/0 24224# d2: x/ms 32-bits of mant of abs(YINT) 24225# d3: x/ls 32-bits of mant of abs(YINT) 24226# d4: LEN/Unchanged 24227# d5: ICTR:LAMBDA/LAMBDA:ICTR 24228# d6: ILOG 24229# d7: k-factor/Unchanged 24230# a0: pointer into memory for packed bcd string formation 24231# /ptr to first mantissa byte in result string 24232# a1: ptr to PTENxx array/Unchanged 24233# a2: ptr to FP_SCR1(a6)/Unchanged 24234# fp0: int portion of Y/abs(YINT) adjusted 24235# fp1: 10^ISCALE/Unchanged 24236# fp2: 10^LEN/Unchanged 24237# F_SCR1:x/Work area for final result 24238# F_SCR2:Y with original exponent/Unchanged 24239# L_SCR1:original USER_FPCR/Unchanged 24240# L_SCR2:first word of X packed/Unchanged 24241 24242A14_st: 24243 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion 24244 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN 24245 lea.l FP_SCR0(%a6),%a0 24246 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory 24247 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2 24248 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3 24249 clr.l 4(%a0) # zero word 2 of FP_RES 24250 clr.l 8(%a0) # zero word 3 of FP_RES 24251 mov.l (%a0),%d0 # move exponent to d0 24252 swap %d0 # put exponent in lower word 24253 beq.b no_sft # if zero, don't shift 24254 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract 24255 tst.l %d0 # check if > 1 24256 bgt.b no_sft # if so, don't shift 24257 neg.l %d0 # make exp positive 24258m_loop: 24259 lsr.l &1,%d2 # shift d2:d3 right, add 0s 24260 roxr.l &1,%d3 # the number of places 24261 dbf.w %d0,m_loop # given in d0 24262no_sft: 24263 tst.l %d2 # check for mantissa of zero 24264 bne.b no_zr # if not, go on 24265 tst.l %d3 # continue zero check 24266 beq.b zer_m # if zero, go directly to binstr 24267no_zr: 24268 clr.l %d1 # put zero in d1 for addx 24269 add.l &0x00000080,%d3 # inc at bit 7 24270 addx.l %d1,%d2 # continue inc 24271 and.l &0xffffff80,%d3 # strip off lsb not used by 882 24272zer_m: 24273 mov.l %d4,%d0 # put LEN in d0 for binstr call 24274 addq.l &3,%a0 # a0 points to M16 byte in result 24275 bsr binstr # call binstr to convert mant 24276 24277 24278# A15. Convert the exponent to bcd. 24279# As in A14 above, the exp is converted to bcd and the 24280# digits are stored in the final string. 24281# 24282# Digits are stored in L_SCR1(a6) on return from BINDEC as: 24283# 24284# 32 16 15 0 24285# ----------------------------------------- 24286# | 0 | e3 | e2 | e1 | e4 | X | X | X | 24287# ----------------------------------------- 24288# 24289# And are moved into their proper places in FP_SCR0. If digit e4 24290# is non-zero, OPERR is signaled. In all cases, all 4 digits are 24291# written as specified in the 881/882 manual for packed decimal. 24292# 24293# Register usage: 24294# Input/Output 24295# d0: x/LEN call to binstr - final is 0 24296# d1: x/scratch (0);shift count for final exponent packing 24297# d2: x/ms 32-bits of exp fraction/scratch 24298# d3: x/ls 32-bits of exp fraction 24299# d4: LEN/Unchanged 24300# d5: ICTR:LAMBDA/LAMBDA:ICTR 24301# d6: ILOG 24302# d7: k-factor/Unchanged 24303# a0: ptr to result string/ptr to L_SCR1(a6) 24304# a1: ptr to PTENxx array/Unchanged 24305# a2: ptr to FP_SCR1(a6)/Unchanged 24306# fp0: abs(YINT) adjusted/float(ILOG) 24307# fp1: 10^ISCALE/Unchanged 24308# fp2: 10^LEN/Unchanged 24309# F_SCR1:Work area for final result/BCD result 24310# F_SCR2:Y with original exponent/ILOG/10^4 24311# L_SCR1:original USER_FPCR/Exponent digits on return from binstr 24312# L_SCR2:first word of X packed/Unchanged 24313 24314A15_st: 24315 tst.b BINDEC_FLG(%a6) # check for denorm 24316 beq.b not_denorm 24317 ftest.x %fp0 # test for zero 24318 fbeq.w den_zero # if zero, use k-factor or 4933 24319 fmov.l %d6,%fp0 # float ILOG 24320 fabs.x %fp0 # get abs of ILOG 24321 bra.b convrt 24322den_zero: 24323 tst.l %d7 # check sign of the k-factor 24324 blt.b use_ilog # if negative, use ILOG 24325 fmov.s F4933(%pc),%fp0 # force exponent to 4933 24326 bra.b convrt # do it 24327use_ilog: 24328 fmov.l %d6,%fp0 # float ILOG 24329 fabs.x %fp0 # get abs of ILOG 24330 bra.b convrt 24331not_denorm: 24332 ftest.x %fp0 # test for zero 24333 fbneq.w not_zero # if zero, force exponent 24334 fmov.s FONE(%pc),%fp0 # force exponent to 1 24335 bra.b convrt # do it 24336not_zero: 24337 fmov.l %d6,%fp0 # float ILOG 24338 fabs.x %fp0 # get abs of ILOG 24339convrt: 24340 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4 24341 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory 24342 mov.l 4(%a2),%d2 # move word 2 to d2 24343 mov.l 8(%a2),%d3 # move word 3 to d3 24344 mov.w (%a2),%d0 # move exp to d0 24345 beq.b x_loop_fin # if zero, skip the shift 24346 sub.w &0x3ffd,%d0 # subtract off bias 24347 neg.w %d0 # make exp positive 24348x_loop: 24349 lsr.l &1,%d2 # shift d2:d3 right 24350 roxr.l &1,%d3 # the number of places 24351 dbf.w %d0,x_loop # given in d0 24352x_loop_fin: 24353 clr.l %d1 # put zero in d1 for addx 24354 add.l &0x00000080,%d3 # inc at bit 6 24355 addx.l %d1,%d2 # continue inc 24356 and.l &0xffffff80,%d3 # strip off lsb not used by 882 24357 mov.l &4,%d0 # put 4 in d0 for binstr call 24358 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits 24359 bsr binstr # call binstr to convert exp 24360 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0 24361 mov.l &12,%d1 # use d1 for shift count 24362 lsr.l %d1,%d0 # shift d0 right by 12 24363 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0 24364 lsr.l %d1,%d0 # shift d0 right by 12 24365 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0 24366 tst.b %d0 # check if e4 is zero 24367 beq.b A16_st # if zero, skip rest 24368 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 24369 24370 24371# A16. Write sign bits to final string. 24372# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). 24373# 24374# Register usage: 24375# Input/Output 24376# d0: x/scratch - final is x 24377# d2: x/x 24378# d3: x/x 24379# d4: LEN/Unchanged 24380# d5: ICTR:LAMBDA/LAMBDA:ICTR 24381# d6: ILOG/ILOG adjusted 24382# d7: k-factor/Unchanged 24383# a0: ptr to L_SCR1(a6)/Unchanged 24384# a1: ptr to PTENxx array/Unchanged 24385# a2: ptr to FP_SCR1(a6)/Unchanged 24386# fp0: float(ILOG)/Unchanged 24387# fp1: 10^ISCALE/Unchanged 24388# fp2: 10^LEN/Unchanged 24389# F_SCR1:BCD result with correct signs 24390# F_SCR2:ILOG/10^4 24391# L_SCR1:Exponent digits on return from binstr 24392# L_SCR2:first word of X packed/Unchanged 24393 24394A16_st: 24395 clr.l %d0 # clr d0 for collection of signs 24396 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0 24397 tst.l L_SCR2(%a6) # check sign of original mantissa 24398 bge.b mant_p # if pos, don't set SM 24399 mov.l &2,%d0 # move 2 in to d0 for SM 24400mant_p: 24401 tst.l %d6 # check sign of ILOG 24402 bge.b wr_sgn # if pos, don't set SE 24403 addq.l &1,%d0 # set bit 0 in d0 for SE 24404wr_sgn: 24405 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0 24406 24407# Clean up and restore all registers used. 24408 24409 fmov.l &0,%fpsr # clear possible inex2/ainex bits 24410 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2} 24411 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2} 24412 rts 24413 24414 global PTENRN 24415PTENRN: 24416 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24417 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24418 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24419 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24420 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24421 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 24422 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 24423 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 24424 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 24425 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 24426 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 24427 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 24428 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 24429 24430 global PTENRP 24431PTENRP: 24432 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24433 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24434 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24435 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24436 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24437 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 24438 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 24439 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 24440 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 24441 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 24442 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 24443 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 24444 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 24445 24446 global PTENRM 24447PTENRM: 24448 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24449 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24450 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24451 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24452 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24453 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 24454 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 24455 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 24456 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 24457 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 24458 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 24459 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 24460 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 24461 24462######################################################################### 24463# binstr(): Converts a 64-bit binary integer to bcd. # 24464# # 24465# INPUT *************************************************************** # 24466# d2:d3 = 64-bit binary integer # 24467# d0 = desired length (LEN) # 24468# a0 = pointer to start in memory for bcd characters # 24469# (This pointer must point to byte 4 of the first # 24470# lword of the packed decimal memory string.) # 24471# # 24472# OUTPUT ************************************************************** # 24473# a0 = pointer to LEN bcd digits representing the 64-bit integer. # 24474# # 24475# ALGORITHM *********************************************************** # 24476# The 64-bit binary is assumed to have a decimal point before # 24477# bit 63. The fraction is multiplied by 10 using a mul by 2 # 24478# shift and a mul by 8 shift. The bits shifted out of the # 24479# msb form a decimal digit. This process is iterated until # 24480# LEN digits are formed. # 24481# # 24482# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the # 24483# digit formed will be assumed the least significant. This is # 24484# to force the first byte formed to have a 0 in the upper 4 bits. # 24485# # 24486# A2. Beginning of the loop: # 24487# Copy the fraction in d2:d3 to d4:d5. # 24488# # 24489# A3. Multiply the fraction in d2:d3 by 8 using bit-field # 24490# extracts and shifts. The three msbs from d2 will go into d1. # 24491# # 24492# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb # 24493# will be collected by the carry. # 24494# # 24495# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 # 24496# into d2:d3. D1 will contain the bcd digit formed. # 24497# # 24498# A6. Test d7. If zero, the digit formed is the ms digit. If non- # 24499# zero, it is the ls digit. Put the digit in its place in the # 24500# upper word of d0. If it is the ls digit, write the word # 24501# from d0 to memory. # 24502# # 24503# A7. Decrement d6 (LEN counter) and repeat the loop until zero. # 24504# # 24505######################################################################### 24506 24507# Implementation Notes: 24508# 24509# The registers are used as follows: 24510# 24511# d0: LEN counter 24512# d1: temp used to form the digit 24513# d2: upper 32-bits of fraction for mul by 8 24514# d3: lower 32-bits of fraction for mul by 8 24515# d4: upper 32-bits of fraction for mul by 2 24516# d5: lower 32-bits of fraction for mul by 2 24517# d6: temp for bit-field extracts 24518# d7: byte digit formation word;digit count {0,1} 24519# a0: pointer into memory for packed bcd string formation 24520# 24521 24522 global binstr 24523binstr: 24524 movm.l &0xff00,-(%sp) # {%d0-%d7} 24525 24526# 24527# A1: Init d7 24528# 24529 mov.l &1,%d7 # init d7 for second digit 24530 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes 24531# 24532# A2. Copy d2:d3 to d4:d5. Start loop. 24533# 24534loop: 24535 mov.l %d2,%d4 # copy the fraction before muls 24536 mov.l %d3,%d5 # to d4:d5 24537# 24538# A3. Multiply d2:d3 by 8; extract msbs into d1. 24539# 24540 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1 24541 asl.l &3,%d2 # shift d2 left by 3 places 24542 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6 24543 asl.l &3,%d3 # shift d3 left by 3 places 24544 or.l %d6,%d2 # or in msbs from d3 into d2 24545# 24546# A4. Multiply d4:d5 by 2; add carry out to d1. 24547# 24548 asl.l &1,%d5 # mul d5 by 2 24549 roxl.l &1,%d4 # mul d4 by 2 24550 swap %d6 # put 0 in d6 lower word 24551 addx.w %d6,%d1 # add in extend from mul by 2 24552# 24553# A5. Add mul by 8 to mul by 2. D1 contains the digit formed. 24554# 24555 add.l %d5,%d3 # add lower 32 bits 24556 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 24557 addx.l %d4,%d2 # add with extend upper 32 bits 24558 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 24559 addx.w %d6,%d1 # add in extend from add to d1 24560 swap %d6 # with d6 = 0; put 0 in upper word 24561# 24562# A6. Test d7 and branch. 24563# 24564 tst.w %d7 # if zero, store digit & to loop 24565 beq.b first_d # if non-zero, form byte & write 24566sec_d: 24567 swap %d7 # bring first digit to word d7b 24568 asl.w &4,%d7 # first digit in upper 4 bits d7b 24569 add.w %d1,%d7 # add in ls digit to d7b 24570 mov.b %d7,(%a0)+ # store d7b byte in memory 24571 swap %d7 # put LEN counter in word d7a 24572 clr.w %d7 # set d7a to signal no digits done 24573 dbf.w %d0,loop # do loop some more! 24574 bra.b end_bstr # finished, so exit 24575first_d: 24576 swap %d7 # put digit word in d7b 24577 mov.w %d1,%d7 # put new digit in d7b 24578 swap %d7 # put LEN counter in word d7a 24579 addq.w &1,%d7 # set d7a to signal first digit done 24580 dbf.w %d0,loop # do loop some more! 24581 swap %d7 # put last digit in string 24582 lsl.w &4,%d7 # move it to upper 4 bits 24583 mov.b %d7,(%a0)+ # store it in memory string 24584# 24585# Clean up and return with result in fp0. 24586# 24587end_bstr: 24588 movm.l (%sp)+,&0xff # {%d0-%d7} 24589 rts 24590 24591######################################################################### 24592# XDEF **************************************************************** # 24593# facc_in_b(): dmem_read_byte failed # 24594# facc_in_w(): dmem_read_word failed # 24595# facc_in_l(): dmem_read_long failed # 24596# facc_in_d(): dmem_read of dbl prec failed # 24597# facc_in_x(): dmem_read of ext prec failed # 24598# # 24599# facc_out_b(): dmem_write_byte failed # 24600# facc_out_w(): dmem_write_word failed # 24601# facc_out_l(): dmem_write_long failed # 24602# facc_out_d(): dmem_write of dbl prec failed # 24603# facc_out_x(): dmem_write of ext prec failed # 24604# # 24605# XREF **************************************************************** # 24606# _real_access() - exit through access error handler # 24607# # 24608# INPUT *************************************************************** # 24609# None # 24610# # 24611# OUTPUT ************************************************************** # 24612# None # 24613# # 24614# ALGORITHM *********************************************************** # 24615# Flow jumps here when an FP data fetch call gets an error # 24616# result. This means the operating system wants an access error frame # 24617# made out of the current exception stack frame. # 24618# So, we first call restore() which makes sure that any updated # 24619# -(an)+ register gets returned to its pre-exception value and then # 24620# we change the stack to an acess error stack frame. # 24621# # 24622######################################################################### 24623 24624facc_in_b: 24625 movq.l &0x1,%d0 # one byte 24626 bsr.w restore # fix An 24627 24628 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW 24629 bra.w facc_finish 24630 24631facc_in_w: 24632 movq.l &0x2,%d0 # two bytes 24633 bsr.w restore # fix An 24634 24635 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW 24636 bra.b facc_finish 24637 24638facc_in_l: 24639 movq.l &0x4,%d0 # four bytes 24640 bsr.w restore # fix An 24641 24642 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW 24643 bra.b facc_finish 24644 24645facc_in_d: 24646 movq.l &0x8,%d0 # eight bytes 24647 bsr.w restore # fix An 24648 24649 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 24650 bra.b facc_finish 24651 24652facc_in_x: 24653 movq.l &0xc,%d0 # twelve bytes 24654 bsr.w restore # fix An 24655 24656 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 24657 bra.b facc_finish 24658 24659################################################################ 24660 24661facc_out_b: 24662 movq.l &0x1,%d0 # one byte 24663 bsr.w restore # restore An 24664 24665 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW 24666 bra.b facc_finish 24667 24668facc_out_w: 24669 movq.l &0x2,%d0 # two bytes 24670 bsr.w restore # restore An 24671 24672 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW 24673 bra.b facc_finish 24674 24675facc_out_l: 24676 movq.l &0x4,%d0 # four bytes 24677 bsr.w restore # restore An 24678 24679 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW 24680 bra.b facc_finish 24681 24682facc_out_d: 24683 movq.l &0x8,%d0 # eight bytes 24684 bsr.w restore # restore An 24685 24686 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 24687 bra.b facc_finish 24688 24689facc_out_x: 24690 mov.l &0xc,%d0 # twelve bytes 24691 bsr.w restore # restore An 24692 24693 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 24694 24695# here's where we actually create the access error frame from the 24696# current exception stack frame. 24697facc_finish: 24698 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 24699 24700 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 24701 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 24702 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 24703 24704 unlk %a6 24705 24706 mov.l (%sp),-(%sp) # store SR, hi(PC) 24707 mov.l 0x8(%sp),0x4(%sp) # store lo(PC) 24708 mov.l 0xc(%sp),0x8(%sp) # store EA 24709 mov.l &0x00000001,0xc(%sp) # store FSLW 24710 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size) 24711 mov.w &0x4008,0x6(%sp) # store voff 24712 24713 btst &0x5,(%sp) # supervisor or user mode? 24714 beq.b facc_out2 # user 24715 bset &0x2,0xd(%sp) # set supervisor TM bit 24716 24717facc_out2: 24718 bra.l _real_access 24719 24720################################################################## 24721 24722# if the effective addressing mode was predecrement or postincrement, 24723# the emulation has already changed its value to the correct post- 24724# instruction value. but since we're exiting to the access error 24725# handler, then AN must be returned to its pre-instruction value. 24726# we do that here. 24727restore: 24728 mov.b EXC_OPWORD+0x1(%a6),%d1 24729 andi.b &0x38,%d1 # extract opmode 24730 cmpi.b %d1,&0x18 # postinc? 24731 beq.w rest_inc 24732 cmpi.b %d1,&0x20 # predec? 24733 beq.w rest_dec 24734 rts 24735 24736rest_inc: 24737 mov.b EXC_OPWORD+0x1(%a6),%d1 24738 andi.w &0x0007,%d1 # fetch An 24739 24740 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1 24741 jmp (tbl_rest_inc.b,%pc,%d1.w*1) 24742 24743tbl_rest_inc: 24744 short ri_a0 - tbl_rest_inc 24745 short ri_a1 - tbl_rest_inc 24746 short ri_a2 - tbl_rest_inc 24747 short ri_a3 - tbl_rest_inc 24748 short ri_a4 - tbl_rest_inc 24749 short ri_a5 - tbl_rest_inc 24750 short ri_a6 - tbl_rest_inc 24751 short ri_a7 - tbl_rest_inc 24752 24753ri_a0: 24754 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0 24755 rts 24756ri_a1: 24757 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1 24758 rts 24759ri_a2: 24760 sub.l %d0,%a2 # fix a2 24761 rts 24762ri_a3: 24763 sub.l %d0,%a3 # fix a3 24764 rts 24765ri_a4: 24766 sub.l %d0,%a4 # fix a4 24767 rts 24768ri_a5: 24769 sub.l %d0,%a5 # fix a5 24770 rts 24771ri_a6: 24772 sub.l %d0,(%a6) # fix stacked a6 24773 rts 24774# if it's a fmove out instruction, we don't have to fix a7 24775# because we hadn't changed it yet. if it's an opclass two 24776# instruction (data moved in) and the exception was in supervisor 24777# mode, then also also wasn't updated. if it was user mode, then 24778# restore the correct a7 which is in the USP currently. 24779ri_a7: 24780 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out? 24781 bne.b ri_a7_done # out 24782 24783 btst &0x5,EXC_SR(%a6) # user or supervisor? 24784 bne.b ri_a7_done # supervisor 24785 movc %usp,%a0 # restore USP 24786 sub.l %d0,%a0 24787 movc %a0,%usp 24788ri_a7_done: 24789 rts 24790 24791# need to invert adjustment value if the <ea> was predec 24792rest_dec: 24793 neg.l %d0 24794 bra.b rest_inc 24795