1# 2# $NetBSD: pfpsp.s,v 1.2 2001/09/16 16:34:31 wiz Exp $ 3# 4 5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 7# M68000 Hi-Performance Microprocessor Division 8# M68060 Software Package Production Release 9# 10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc. 11# All rights reserved. 12# 13# THE SOFTWARE is provided on an "AS IS" basis and without warranty. 14# To the maximum extent permitted by applicable law, 15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS 17# FOR A PARTICULAR PURPOSE and any warranty against infringement with 18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) 19# and any accompanying written materials. 20# 21# To the maximum extent permitted by applicable law, 22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 26# 27# Motorola assumes no responsibility for the maintenance and support 28# of the SOFTWARE. 29# 30# You are hereby granted a copyright license to use, modify, and distribute the 31# SOFTWARE so long as this entire notice is retained without alteration 32# in any modified and/or redistributed versions, and that such modified 33# versions are clearly identified as such. 34# No licenses are granted by implication, estoppel or otherwise under any 35# patents or trademarks of Motorola, Inc. 36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 38# 39# freal.s: 40# This file is appended to the top of the 060FPSP package 41# and contains the entry points into the package. The user, in 42# effect, branches to one of the branch table entries located 43# after _060FPSP_TABLE. 44# Also, subroutine stubs exist in this file (_fpsp_done for 45# example) that are referenced by the FPSP package itself in order 46# to call a given routine. The stub routine actually performs the 47# callout. The FPSP code does a "bsr" to the stub routine. This 48# extra layer of hierarchy adds a slight performance penalty but 49# it makes the FPSP code easier to read and more mainatinable. 50# 51 52set _off_bsun, 0x00 53set _off_snan, 0x04 54set _off_operr, 0x08 55set _off_ovfl, 0x0c 56set _off_unfl, 0x10 57set _off_dz, 0x14 58set _off_inex, 0x18 59set _off_fline, 0x1c 60set _off_fpu_dis, 0x20 61set _off_trap, 0x24 62set _off_trace, 0x28 63set _off_access, 0x2c 64set _off_done, 0x30 65 66set _off_imr, 0x40 67set _off_dmr, 0x44 68set _off_dmw, 0x48 69set _off_irw, 0x4c 70set _off_irl, 0x50 71set _off_drb, 0x54 72set _off_drw, 0x58 73set _off_drl, 0x5c 74set _off_dwb, 0x60 75set _off_dww, 0x64 76set _off_dwl, 0x68 77 78_060FPSP_TABLE: 79 80############################################################### 81 82# Here's the table of ENTRY POINTS for those linking the package. 83 bra.l _fpsp_snan 84 short 0x0000 85 bra.l _fpsp_operr 86 short 0x0000 87 bra.l _fpsp_ovfl 88 short 0x0000 89 bra.l _fpsp_unfl 90 short 0x0000 91 bra.l _fpsp_dz 92 short 0x0000 93 bra.l _fpsp_inex 94 short 0x0000 95 bra.l _fpsp_fline 96 short 0x0000 97 bra.l _fpsp_unsupp 98 short 0x0000 99 bra.l _fpsp_effadd 100 short 0x0000 101 102 space 56 103 104############################################################### 105 global _fpsp_done 106_fpsp_done: 107 mov.l %d0,-(%sp) 108 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 109 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 110 mov.l 0x4(%sp),%d0 111 rtd &0x4 112 113 global _real_ovfl 114_real_ovfl: 115 mov.l %d0,-(%sp) 116 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 117 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 118 mov.l 0x4(%sp),%d0 119 rtd &0x4 120 121 global _real_unfl 122_real_unfl: 123 mov.l %d0,-(%sp) 124 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 125 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 126 mov.l 0x4(%sp),%d0 127 rtd &0x4 128 129 global _real_inex 130_real_inex: 131 mov.l %d0,-(%sp) 132 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 133 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 134 mov.l 0x4(%sp),%d0 135 rtd &0x4 136 137 global _real_bsun 138_real_bsun: 139 mov.l %d0,-(%sp) 140 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 141 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 142 mov.l 0x4(%sp),%d0 143 rtd &0x4 144 145 global _real_operr 146_real_operr: 147 mov.l %d0,-(%sp) 148 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 149 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 150 mov.l 0x4(%sp),%d0 151 rtd &0x4 152 153 global _real_snan 154_real_snan: 155 mov.l %d0,-(%sp) 156 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 157 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 158 mov.l 0x4(%sp),%d0 159 rtd &0x4 160 161 global _real_dz 162_real_dz: 163 mov.l %d0,-(%sp) 164 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 165 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 166 mov.l 0x4(%sp),%d0 167 rtd &0x4 168 169 global _real_fline 170_real_fline: 171 mov.l %d0,-(%sp) 172 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 173 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 174 mov.l 0x4(%sp),%d0 175 rtd &0x4 176 177 global _real_fpu_disabled 178_real_fpu_disabled: 179 mov.l %d0,-(%sp) 180 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 181 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 182 mov.l 0x4(%sp),%d0 183 rtd &0x4 184 185 global _real_trap 186_real_trap: 187 mov.l %d0,-(%sp) 188 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 189 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 190 mov.l 0x4(%sp),%d0 191 rtd &0x4 192 193 global _real_trace 194_real_trace: 195 mov.l %d0,-(%sp) 196 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 197 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 198 mov.l 0x4(%sp),%d0 199 rtd &0x4 200 201 global _real_access 202_real_access: 203 mov.l %d0,-(%sp) 204 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 206 mov.l 0x4(%sp),%d0 207 rtd &0x4 208 209####################################### 210 211 global _imem_read 212_imem_read: 213 mov.l %d0,-(%sp) 214 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 215 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 216 mov.l 0x4(%sp),%d0 217 rtd &0x4 218 219 global _dmem_read 220_dmem_read: 221 mov.l %d0,-(%sp) 222 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 223 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 224 mov.l 0x4(%sp),%d0 225 rtd &0x4 226 227 global _dmem_write 228_dmem_write: 229 mov.l %d0,-(%sp) 230 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 231 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 232 mov.l 0x4(%sp),%d0 233 rtd &0x4 234 235 global _imem_read_word 236_imem_read_word: 237 mov.l %d0,-(%sp) 238 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 239 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 240 mov.l 0x4(%sp),%d0 241 rtd &0x4 242 243 global _imem_read_long 244_imem_read_long: 245 mov.l %d0,-(%sp) 246 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 247 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 248 mov.l 0x4(%sp),%d0 249 rtd &0x4 250 251 global _dmem_read_byte 252_dmem_read_byte: 253 mov.l %d0,-(%sp) 254 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 255 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 256 mov.l 0x4(%sp),%d0 257 rtd &0x4 258 259 global _dmem_read_word 260_dmem_read_word: 261 mov.l %d0,-(%sp) 262 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 263 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 264 mov.l 0x4(%sp),%d0 265 rtd &0x4 266 267 global _dmem_read_long 268_dmem_read_long: 269 mov.l %d0,-(%sp) 270 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 271 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 272 mov.l 0x4(%sp),%d0 273 rtd &0x4 274 275 global _dmem_write_byte 276_dmem_write_byte: 277 mov.l %d0,-(%sp) 278 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 279 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 280 mov.l 0x4(%sp),%d0 281 rtd &0x4 282 283 global _dmem_write_word 284_dmem_write_word: 285 mov.l %d0,-(%sp) 286 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 287 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 288 mov.l 0x4(%sp),%d0 289 rtd &0x4 290 291 global _dmem_write_long 292_dmem_write_long: 293 mov.l %d0,-(%sp) 294 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 295 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 296 mov.l 0x4(%sp),%d0 297 rtd &0x4 298 299# 300# This file contains a set of define statements for constants 301# in order to promote readability within the corecode itself. 302# 303 304set LOCAL_SIZE, 192 # stack frame size(bytes) 305set LV, -LOCAL_SIZE # stack offset 306 307set EXC_SR, 0x4 # stack status register 308set EXC_PC, 0x6 # stack pc 309set EXC_VOFF, 0xa # stacked vector offset 310set EXC_EA, 0xc # stacked <ea> 311 312set EXC_FP, 0x0 # frame pointer 313 314set EXC_AREGS, -68 # offset of all address regs 315set EXC_DREGS, -100 # offset of all data regs 316set EXC_FPREGS, -36 # offset of all fp regs 317 318set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 319set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 320set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 321set EXC_A5, EXC_AREGS+(5*4) 322set EXC_A4, EXC_AREGS+(4*4) 323set EXC_A3, EXC_AREGS+(3*4) 324set EXC_A2, EXC_AREGS+(2*4) 325set EXC_A1, EXC_AREGS+(1*4) 326set EXC_A0, EXC_AREGS+(0*4) 327set EXC_D7, EXC_DREGS+(7*4) 328set EXC_D6, EXC_DREGS+(6*4) 329set EXC_D5, EXC_DREGS+(5*4) 330set EXC_D4, EXC_DREGS+(4*4) 331set EXC_D3, EXC_DREGS+(3*4) 332set EXC_D2, EXC_DREGS+(2*4) 333set EXC_D1, EXC_DREGS+(1*4) 334set EXC_D0, EXC_DREGS+(0*4) 335 336set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 337set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 338set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 339 340set FP_SCR1, LV+80 # fp scratch 1 341set FP_SCR1_EX, FP_SCR1+0 342set FP_SCR1_SGN, FP_SCR1+2 343set FP_SCR1_HI, FP_SCR1+4 344set FP_SCR1_LO, FP_SCR1+8 345 346set FP_SCR0, LV+68 # fp scratch 0 347set FP_SCR0_EX, FP_SCR0+0 348set FP_SCR0_SGN, FP_SCR0+2 349set FP_SCR0_HI, FP_SCR0+4 350set FP_SCR0_LO, FP_SCR0+8 351 352set FP_DST, LV+56 # fp destination operand 353set FP_DST_EX, FP_DST+0 354set FP_DST_SGN, FP_DST+2 355set FP_DST_HI, FP_DST+4 356set FP_DST_LO, FP_DST+8 357 358set FP_SRC, LV+44 # fp source operand 359set FP_SRC_EX, FP_SRC+0 360set FP_SRC_SGN, FP_SRC+2 361set FP_SRC_HI, FP_SRC+4 362set FP_SRC_LO, FP_SRC+8 363 364set USER_FPIAR, LV+40 # FP instr address register 365 366set USER_FPSR, LV+36 # FP status register 367set FPSR_CC, USER_FPSR+0 # FPSR condition codes 368set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 369set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 370set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 371 372set USER_FPCR, LV+32 # FP control register 373set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 374set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 375 376set L_SCR3, LV+28 # integer scratch 3 377set L_SCR2, LV+24 # integer scratch 2 378set L_SCR1, LV+20 # integer scratch 1 379 380set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 381 382set EXC_TEMP2, LV+24 # temporary space 383set EXC_TEMP, LV+16 # temporary space 384 385set DTAG, LV+15 # destination operand type 386set STAG, LV+14 # source operand type 387 388set SPCOND_FLG, LV+10 # flag: special case (see below) 389 390set EXC_CC, LV+8 # saved condition codes 391set EXC_EXTWPTR, LV+4 # saved current PC (active) 392set EXC_EXTWORD, LV+2 # saved extension word 393set EXC_CMDREG, LV+2 # saved extension word 394set EXC_OPWORD, LV+0 # saved operation word 395 396################################ 397 398# Helpful macros 399 400set FTEMP, 0 # offsets within an 401set FTEMP_EX, 0 # extended precision 402set FTEMP_SGN, 2 # value saved in memory. 403set FTEMP_HI, 4 404set FTEMP_LO, 8 405set FTEMP_GRS, 12 406 407set LOCAL, 0 # offsets within an 408set LOCAL_EX, 0 # extended precision 409set LOCAL_SGN, 2 # value saved in memory. 410set LOCAL_HI, 4 411set LOCAL_LO, 8 412set LOCAL_GRS, 12 413 414set DST, 0 # offsets within an 415set DST_EX, 0 # extended precision 416set DST_HI, 4 # value saved in memory. 417set DST_LO, 8 418 419set SRC, 0 # offsets within an 420set SRC_EX, 0 # extended precision 421set SRC_HI, 4 # value saved in memory. 422set SRC_LO, 8 423 424set SGL_LO, 0x3f81 # min sgl prec exponent 425set SGL_HI, 0x407e # max sgl prec exponent 426set DBL_LO, 0x3c01 # min dbl prec exponent 427set DBL_HI, 0x43fe # max dbl prec exponent 428set EXT_LO, 0x0 # min ext prec exponent 429set EXT_HI, 0x7ffe # max ext prec exponent 430 431set EXT_BIAS, 0x3fff # extended precision bias 432set SGL_BIAS, 0x007f # single precision bias 433set DBL_BIAS, 0x03ff # double precision bias 434 435set NORM, 0x00 # operand type for STAG/DTAG 436set ZERO, 0x01 # operand type for STAG/DTAG 437set INF, 0x02 # operand type for STAG/DTAG 438set QNAN, 0x03 # operand type for STAG/DTAG 439set DENORM, 0x04 # operand type for STAG/DTAG 440set SNAN, 0x05 # operand type for STAG/DTAG 441set UNNORM, 0x06 # operand type for STAG/DTAG 442 443################## 444# FPSR/FPCR bits # 445################## 446set neg_bit, 0x3 # negative result 447set z_bit, 0x2 # zero result 448set inf_bit, 0x1 # infinite result 449set nan_bit, 0x0 # NAN result 450 451set q_sn_bit, 0x7 # sign bit of quotient byte 452 453set bsun_bit, 7 # branch on unordered 454set snan_bit, 6 # signalling NAN 455set operr_bit, 5 # operand error 456set ovfl_bit, 4 # overflow 457set unfl_bit, 3 # underflow 458set dz_bit, 2 # divide by zero 459set inex2_bit, 1 # inexact result 2 460set inex1_bit, 0 # inexact result 1 461 462set aiop_bit, 7 # accrued inexact operation bit 463set aovfl_bit, 6 # accrued overflow bit 464set aunfl_bit, 5 # accrued underflow bit 465set adz_bit, 4 # accrued dz bit 466set ainex_bit, 3 # accrued inexact bit 467 468############################# 469# FPSR individual bit masks # 470############################# 471set neg_mask, 0x08000000 # negative bit mask (lw) 472set inf_mask, 0x02000000 # infinity bit mask (lw) 473set z_mask, 0x04000000 # zero bit mask (lw) 474set nan_mask, 0x01000000 # nan bit mask (lw) 475 476set neg_bmask, 0x08 # negative bit mask (byte) 477set inf_bmask, 0x02 # infinity bit mask (byte) 478set z_bmask, 0x04 # zero bit mask (byte) 479set nan_bmask, 0x01 # nan bit mask (byte) 480 481set bsun_mask, 0x00008000 # bsun exception mask 482set snan_mask, 0x00004000 # snan exception mask 483set operr_mask, 0x00002000 # operr exception mask 484set ovfl_mask, 0x00001000 # overflow exception mask 485set unfl_mask, 0x00000800 # underflow exception mask 486set dz_mask, 0x00000400 # dz exception mask 487set inex2_mask, 0x00000200 # inex2 exception mask 488set inex1_mask, 0x00000100 # inex1 exception mask 489 490set aiop_mask, 0x00000080 # accrued illegal operation 491set aovfl_mask, 0x00000040 # accrued overflow 492set aunfl_mask, 0x00000020 # accrued underflow 493set adz_mask, 0x00000010 # accrued divide by zero 494set ainex_mask, 0x00000008 # accrued inexact 495 496###################################### 497# FPSR combinations used in the FPSP # 498###################################### 499set dzinf_mask, inf_mask+dz_mask+adz_mask 500set opnan_mask, nan_mask+operr_mask+aiop_mask 501set nzi_mask, 0x01ffffff #clears N, Z, and I 502set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 503set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 504set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 505set inx1a_mask, inex1_mask+ainex_mask 506set inx2a_mask, inex2_mask+ainex_mask 507set snaniop_mask, nan_mask+snan_mask+aiop_mask 508set snaniop2_mask, snan_mask+aiop_mask 509set naniop_mask, nan_mask+aiop_mask 510set neginf_mask, neg_mask+inf_mask 511set infaiop_mask, inf_mask+aiop_mask 512set negz_mask, neg_mask+z_mask 513set opaop_mask, operr_mask+aiop_mask 514set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 515set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 516 517######### 518# misc. # 519######### 520set rnd_stky_bit, 29 # stky bit pos in longword 521 522set sign_bit, 0x7 # sign bit 523set signan_bit, 0x6 # signalling nan bit 524 525set sgl_thresh, 0x3f81 # minimum sgl exponent 526set dbl_thresh, 0x3c01 # minimum dbl exponent 527 528set x_mode, 0x0 # extended precision 529set s_mode, 0x4 # single precision 530set d_mode, 0x8 # double precision 531 532set rn_mode, 0x0 # round-to-nearest 533set rz_mode, 0x1 # round-to-zero 534set rm_mode, 0x2 # round-tp-minus-infinity 535set rp_mode, 0x3 # round-to-plus-infinity 536 537set mantissalen, 64 # length of mantissa in bits 538 539set BYTE, 1 # len(byte) == 1 byte 540set WORD, 2 # len(word) == 2 bytes 541set LONG, 4 # len(longword) == 2 bytes 542 543set BSUN_VEC, 0xc0 # bsun vector offset 544set INEX_VEC, 0xc4 # inexact vector offset 545set DZ_VEC, 0xc8 # dz vector offset 546set UNFL_VEC, 0xcc # unfl vector offset 547set OPERR_VEC, 0xd0 # operr vector offset 548set OVFL_VEC, 0xd4 # ovfl vector offset 549set SNAN_VEC, 0xd8 # snan vector offset 550 551########################### 552# SPecial CONDition FLaGs # 553########################### 554set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 555set fbsun_flg, 0x02 # flag bit: bsun exception 556set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 557set mda7_flg, 0x08 # flag bit: -(a7) <ea> 558set fmovm_flg, 0x40 # flag bit: fmovm instruction 559set immed_flg, 0x80 # flag bit: &<data> <ea> 560 561set ftrapcc_bit, 0x0 562set fbsun_bit, 0x1 563set mia7_bit, 0x2 564set mda7_bit, 0x3 565set immed_bit, 0x7 566 567################################## 568# TRANSCENDENTAL "LAST-OP" FLAGS # 569################################## 570set FMUL_OP, 0x0 # fmul instr performed last 571set FDIV_OP, 0x1 # fdiv performed last 572set FADD_OP, 0x2 # fadd performed last 573set FMOV_OP, 0x3 # fmov performed last 574 575############# 576# CONSTANTS # 577############# 578T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 579T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 580 581PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 582PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 583 584TWOBYPI: 585 long 0x3FE45F30,0x6DC9C883 586 587######################################################################### 588# XDEF **************************************************************** # 589# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # 590# # 591# This handler should be the first code executed upon taking the # 592# FP Overflow exception in an operating system. # 593# # 594# XREF **************************************************************** # 595# _imem_read_long() - read instruction longword # 596# fix_skewed_ops() - adjust src operand in fsave frame # 597# set_tag_x() - determine optype of src/dst operands # 598# store_fpreg() - store opclass 0 or 2 result to FP regfile # 599# unnorm_fix() - change UNNORM operands to NORM or ZERO # 600# load_fpn2() - load dst operand from FP regfile # 601# fout() - emulate an opclass 3 instruction # 602# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 603# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 604# _real_ovfl() - "callout" for Overflow exception enabled code # 605# _real_inex() - "callout" for Inexact exception enabled code # 606# _real_trace() - "callout" for Trace exception code # 607# # 608# INPUT *************************************************************** # 609# - The system stack contains the FP Ovfl exception stack frame # 610# - The fsave frame contains the source operand # 611# # 612# OUTPUT ************************************************************** # 613# Overflow Exception enabled: # 614# - The system stack is unchanged # 615# - The fsave frame contains the adjusted src op for opclass 0,2 # 616# Overflow Exception disabled: # 617# - The system stack is unchanged # 618# - The "exception present" flag in the fsave frame is cleared # 619# # 620# ALGORITHM *********************************************************** # 621# On the 060, if an FP overflow is present as the result of any # 622# instruction, the 060 will take an overflow exception whether the # 623# exception is enabled or disabled in the FPCR. For the disabled case, # 624# This handler emulates the instruction to determine what the correct # 625# default result should be for the operation. This default result is # 626# then stored in either the FP regfile, data regfile, or memory. # 627# Finally, the handler exits through the "callout" _fpsp_done() # 628# denoting that no exceptional conditions exist within the machine. # 629# If the exception is enabled, then this handler must create the # 630# exceptional operand and plave it in the fsave state frame, and store # 631# the default result (only if the instruction is opclass 3). For # 632# exceptions enabled, this handler must exit through the "callout" # 633# _real_ovfl() so that the operating system enabled overflow handler # 634# can handle this case. # 635# Two other conditions exist. First, if overflow was disabled # 636# but the inexact exception was enabled, this handler must exit # 637# through the "callout" _real_inex() regardless of whether the result # 638# was inexact. # 639# Also, in the case of an opclass three instruction where # 640# overflow was disabled and the trace exception was enabled, this # 641# handler must exit through the "callout" _real_trace(). # 642# # 643######################################################################### 644 645 global _fpsp_ovfl 646_fpsp_ovfl: 647 648#$# sub.l &24,%sp # make room for src/dst 649 650 link.w %a6,&-LOCAL_SIZE # init stack frame 651 652 fsave FP_SRC(%a6) # grab the "busy" frame 653 654 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 655 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 656 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 657 658# the FPIAR holds the "current PC" of the faulting instruction 659 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 660 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 661 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 662 bsr.l _imem_read_long # fetch the instruction words 663 mov.l %d0,EXC_OPWORD(%a6) 664 665############################################################################## 666 667 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 668 bne.w fovfl_out 669 670 671 lea FP_SRC(%a6),%a0 # pass: ptr to src op 672 bsr.l fix_skewed_ops # fix src op 673 674# since, I believe, only NORMs and DENORMs can come through here, 675# maybe we can avoid the subroutine call. 676 lea FP_SRC(%a6),%a0 # pass: ptr to src op 677 bsr.l set_tag_x # tag the operand type 678 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 679 680# bit five of the fp extension word separates the monadic and dyadic operations 681# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos 682# will never take this exception. 683 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 684 beq.b fovfl_extract # monadic 685 686 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 687 bsr.l load_fpn2 # load dst into FP_DST 688 689 lea FP_DST(%a6),%a0 # pass: ptr to dst op 690 bsr.l set_tag_x # tag the operand type 691 cmpi.b %d0,&UNNORM # is operand an UNNORM? 692 bne.b fovfl_op2_done # no 693 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 694fovfl_op2_done: 695 mov.b %d0,DTAG(%a6) # save dst optype tag 696 697fovfl_extract: 698 699#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 700#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 701#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 702#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 703#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 704#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 705 706 clr.l %d0 707 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 708 709 mov.b 1+EXC_CMDREG(%a6),%d1 710 andi.w &0x007f,%d1 # extract extension 711 712 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 713 714 fmov.l &0x0,%fpcr # zero current control regs 715 fmov.l &0x0,%fpsr 716 717 lea FP_SRC(%a6),%a0 718 lea FP_DST(%a6),%a1 719 720# maybe we can make these entry points ONLY the OVFL entry points of each routine. 721 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 722 jsr (tbl_unsupp.l,%pc,%d1.l*1) 723 724# the operation has been emulated. the result is in fp0. 725# the EXOP, if an exception occurred, is in fp1. 726# we must save the default result regardless of whether 727# traps are enabled or disabled. 728 bfextu EXC_CMDREG(%a6){&6:&3},%d0 729 bsr.l store_fpreg 730 731# the exceptional possibilities we have left ourselves with are ONLY overflow 732# and inexact. and, the inexact is such that overflow occurred and was disabled 733# but inexact was enabled. 734 btst &ovfl_bit,FPCR_ENABLE(%a6) 735 bne.b fovfl_ovfl_on 736 737 btst &inex2_bit,FPCR_ENABLE(%a6) 738 bne.b fovfl_inex_on 739 740 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 741 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 742 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 743 744 unlk %a6 745#$# add.l &24,%sp 746 bra.l _fpsp_done 747 748# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 749# in fp1. now, simply jump to _real_ovfl()! 750fovfl_ovfl_on: 751 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 752 753 mov.w &0xe005,2+FP_SRC(%a6) # save exc status 754 755 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 756 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 757 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 758 759 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 760 761 unlk %a6 762 763 bra.l _real_ovfl 764 765# overflow occurred but is disabled. meanwhile, inexact is enabled. therefore, 766# we must jump to real_inex(). 767fovfl_inex_on: 768 769 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 770 771 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 772 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 773 774 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 775 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 776 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 777 778 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 779 780 unlk %a6 781 782 bra.l _real_inex 783 784######################################################################## 785fovfl_out: 786 787 788#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 789#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 790#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 791 792# the src operand is definitely a NORM(!), so tag it as such 793 mov.b &NORM,STAG(%a6) # set src optype tag 794 795 clr.l %d0 796 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 797 798 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 799 800 fmov.l &0x0,%fpcr # zero current control regs 801 fmov.l &0x0,%fpsr 802 803 lea FP_SRC(%a6),%a0 # pass ptr to src operand 804 805 bsr.l fout 806 807 btst &ovfl_bit,FPCR_ENABLE(%a6) 808 bne.w fovfl_ovfl_on 809 810 btst &inex2_bit,FPCR_ENABLE(%a6) 811 bne.w fovfl_inex_on 812 813 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 814 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 815 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 816 817 unlk %a6 818#$# add.l &24,%sp 819 820 btst &0x7,(%sp) # is trace on? 821 beq.l _fpsp_done # no 822 823 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 824 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 825 bra.l _real_trace 826 827######################################################################### 828# XDEF **************************************************************** # 829# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # 830# # 831# This handler should be the first code executed upon taking the # 832# FP Underflow exception in an operating system. # 833# # 834# XREF **************************************************************** # 835# _imem_read_long() - read instruction longword # 836# fix_skewed_ops() - adjust src operand in fsave frame # 837# set_tag_x() - determine optype of src/dst operands # 838# store_fpreg() - store opclass 0 or 2 result to FP regfile # 839# unnorm_fix() - change UNNORM operands to NORM or ZERO # 840# load_fpn2() - load dst operand from FP regfile # 841# fout() - emulate an opclass 3 instruction # 842# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 843# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 844# _real_ovfl() - "callout" for Overflow exception enabled code # 845# _real_inex() - "callout" for Inexact exception enabled code # 846# _real_trace() - "callout" for Trace exception code # 847# # 848# INPUT *************************************************************** # 849# - The system stack contains the FP Unfl exception stack frame # 850# - The fsave frame contains the source operand # 851# # 852# OUTPUT ************************************************************** # 853# Underflow Exception enabled: # 854# - The system stack is unchanged # 855# - The fsave frame contains the adjusted src op for opclass 0,2 # 856# Underflow Exception disabled: # 857# - The system stack is unchanged # 858# - The "exception present" flag in the fsave frame is cleared # 859# # 860# ALGORITHM *********************************************************** # 861# On the 060, if an FP underflow is present as the result of any # 862# instruction, the 060 will take an underflow exception whether the # 863# exception is enabled or disabled in the FPCR. For the disabled case, # 864# This handler emulates the instruction to determine what the correct # 865# default result should be for the operation. This default result is # 866# then stored in either the FP regfile, data regfile, or memory. # 867# Finally, the handler exits through the "callout" _fpsp_done() # 868# denoting that no exceptional conditions exist within the machine. # 869# If the exception is enabled, then this handler must create the # 870# exceptional operand and plave it in the fsave state frame, and store # 871# the default result (only if the instruction is opclass 3). For # 872# exceptions enabled, this handler must exit through the "callout" # 873# _real_unfl() so that the operating system enabled overflow handler # 874# can handle this case. # 875# Two other conditions exist. First, if underflow was disabled # 876# but the inexact exception was enabled and the result was inexact, # 877# this handler must exit through the "callout" _real_inex(). # 878# was inexact. # 879# Also, in the case of an opclass three instruction where # 880# underflow was disabled and the trace exception was enabled, this # 881# handler must exit through the "callout" _real_trace(). # 882# # 883######################################################################### 884 885 global _fpsp_unfl 886_fpsp_unfl: 887 888#$# sub.l &24,%sp # make room for src/dst 889 890 link.w %a6,&-LOCAL_SIZE # init stack frame 891 892 fsave FP_SRC(%a6) # grab the "busy" frame 893 894 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 895 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 896 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 897 898# the FPIAR holds the "current PC" of the faulting instruction 899 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 900 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 901 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 902 bsr.l _imem_read_long # fetch the instruction words 903 mov.l %d0,EXC_OPWORD(%a6) 904 905############################################################################## 906 907 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 908 bne.w funfl_out 909 910 911 lea FP_SRC(%a6),%a0 # pass: ptr to src op 912 bsr.l fix_skewed_ops # fix src op 913 914 lea FP_SRC(%a6),%a0 # pass: ptr to src op 915 bsr.l set_tag_x # tag the operand type 916 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 917 918# bit five of the fp ext word separates the monadic and dyadic operations 919# that can pass through fpsp_unfl(). remember that fcmp, and ftst 920# will never take this exception. 921 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? 922 beq.b funfl_extract # monadic 923 924# now, what's left that's not dyadic is fsincos. we can distinguish it 925# from all dyadics by the '0110xxx pattern 926 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? 927 bne.b funfl_extract # yes 928 929 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 930 bsr.l load_fpn2 # load dst into FP_DST 931 932 lea FP_DST(%a6),%a0 # pass: ptr to dst op 933 bsr.l set_tag_x # tag the operand type 934 cmpi.b %d0,&UNNORM # is operand an UNNORM? 935 bne.b funfl_op2_done # no 936 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 937funfl_op2_done: 938 mov.b %d0,DTAG(%a6) # save dst optype tag 939 940funfl_extract: 941 942#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 943#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 944#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 945#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 946#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 947#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 948 949 clr.l %d0 950 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 951 952 mov.b 1+EXC_CMDREG(%a6),%d1 953 andi.w &0x007f,%d1 # extract extension 954 955 andi.l &0x00ff01ff,USER_FPSR(%a6) 956 957 fmov.l &0x0,%fpcr # zero current control regs 958 fmov.l &0x0,%fpsr 959 960 lea FP_SRC(%a6),%a0 961 lea FP_DST(%a6),%a1 962 963# maybe we can make these entry points ONLY the OVFL entry points of each routine. 964 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 965 jsr (tbl_unsupp.l,%pc,%d1.l*1) 966 967 bfextu EXC_CMDREG(%a6){&6:&3},%d0 968 bsr.l store_fpreg 969 970# The `060 FPU multiplier hardware is such that if the result of a 971# multiply operation is the smallest possible normalized number 972# (0x00000000_80000000_00000000), then the machine will take an 973# underflow exception. Since this is incorrect, we need to check 974# if our emulation, after re-doing the operation, decided that 975# no underflow was called for. We do these checks only in 976# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this 977# special case will simply exit gracefully with the correct result. 978 979# the exceptional possibilities we have left ourselves with are ONLY overflow 980# and inexact. and, the inexact is such that overflow occurred and was disabled 981# but inexact was enabled. 982 btst &unfl_bit,FPCR_ENABLE(%a6) 983 bne.b funfl_unfl_on 984 985funfl_chkinex: 986 btst &inex2_bit,FPCR_ENABLE(%a6) 987 bne.b funfl_inex_on 988 989funfl_exit: 990 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 991 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 993 994 unlk %a6 995#$# add.l &24,%sp 996 bra.l _fpsp_done 997 998# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 999# in fp1 (don't forget to save fp0). what to do now? 1000# well, we simply have to get to go to _real_unfl()! 1001funfl_unfl_on: 1002 1003# The `060 FPU multiplier hardware is such that if the result of a 1004# multiply operation is the smallest possible normalized number 1005# (0x00000000_80000000_00000000), then the machine will take an 1006# underflow exception. Since this is incorrect, we check here to see 1007# if our emulation, after re-doing the operation, decided that 1008# no underflow was called for. 1009 btst &unfl_bit,FPSR_EXCEPT(%a6) 1010 beq.w funfl_chkinex 1011 1012funfl_unfl_on2: 1013 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 1014 1015 mov.w &0xe003,2+FP_SRC(%a6) # save exc status 1016 1017 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1019 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1020 1021 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1022 1023 unlk %a6 1024 1025 bra.l _real_unfl 1026 1027# undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore, 1028# we must jump to real_inex(). 1029funfl_inex_on: 1030 1031# The `060 FPU multiplier hardware is such that if the result of a 1032# multiply operation is the smallest possible normalized number 1033# (0x00000000_80000000_00000000), then the machine will take an 1034# underflow exception. 1035# But, whether bogus or not, if inexact is enabled AND it occurred, 1036# then we have to branch to real_inex. 1037 1038 btst &inex2_bit,FPSR_EXCEPT(%a6) 1039 beq.w funfl_exit 1040 1041funfl_inex_on2: 1042 1043 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack 1044 1045 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 1046 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 1047 1048 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1049 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1050 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1051 1052 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1053 1054 unlk %a6 1055 1056 bra.l _real_inex 1057 1058####################################################################### 1059funfl_out: 1060 1061 1062#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 1063#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 1064#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 1065 1066# the src operand is definitely a NORM(!), so tag it as such 1067 mov.b &NORM,STAG(%a6) # set src optype tag 1068 1069 clr.l %d0 1070 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 1071 1072 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 1073 1074 fmov.l &0x0,%fpcr # zero current control regs 1075 fmov.l &0x0,%fpsr 1076 1077 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1078 1079 bsr.l fout 1080 1081 btst &unfl_bit,FPCR_ENABLE(%a6) 1082 bne.w funfl_unfl_on2 1083 1084 btst &inex2_bit,FPCR_ENABLE(%a6) 1085 bne.w funfl_inex_on2 1086 1087 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1088 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1089 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1090 1091 unlk %a6 1092#$# add.l &24,%sp 1093 1094 btst &0x7,(%sp) # is trace on? 1095 beq.l _fpsp_done # no 1096 1097 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 1098 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 1099 bra.l _real_trace 1100 1101######################################################################### 1102# XDEF **************************************************************** # 1103# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # 1104# Data Type" exception. # 1105# # 1106# This handler should be the first code executed upon taking the # 1107# FP Unimplemented Data Type exception in an operating system. # 1108# # 1109# XREF **************************************************************** # 1110# _imem_read_{word,long}() - read instruction word/longword # 1111# fix_skewed_ops() - adjust src operand in fsave frame # 1112# set_tag_x() - determine optype of src/dst operands # 1113# store_fpreg() - store opclass 0 or 2 result to FP regfile # 1114# unnorm_fix() - change UNNORM operands to NORM or ZERO # 1115# load_fpn2() - load dst operand from FP regfile # 1116# load_fpn1() - load src operand from FP regfile # 1117# fout() - emulate an opclass 3 instruction # 1118# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 1119# _real_inex() - "callout" to operating system inexact handler # 1120# _fpsp_done() - "callout" for exit; work all done # 1121# _real_trace() - "callout" for Trace enabled exception # 1122# funimp_skew() - adjust fsave src ops to "incorrect" value # 1123# _real_snan() - "callout" for SNAN exception # 1124# _real_operr() - "callout" for OPERR exception # 1125# _real_ovfl() - "callout" for OVFL exception # 1126# _real_unfl() - "callout" for UNFL exception # 1127# get_packed() - fetch packed operand from memory # 1128# # 1129# INPUT *************************************************************** # 1130# - The system stack contains the "Unimp Data Type" stk frame # 1131# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # 1132# # 1133# OUTPUT ************************************************************** # 1134# If Inexact exception (opclass 3): # 1135# - The system stack is changed to an Inexact exception stk frame # 1136# If SNAN exception (opclass 3): # 1137# - The system stack is changed to an SNAN exception stk frame # 1138# If OPERR exception (opclass 3): # 1139# - The system stack is changed to an OPERR exception stk frame # 1140# If OVFL exception (opclass 3): # 1141# - The system stack is changed to an OVFL exception stk frame # 1142# If UNFL exception (opclass 3): # 1143# - The system stack is changed to an UNFL exception stack frame # 1144# If Trace exception enabled: # 1145# - The system stack is changed to a Trace exception stack frame # 1146# Else: (normal case) # 1147# - Correct result has been stored as appropriate # 1148# # 1149# ALGORITHM *********************************************************** # 1150# Two main instruction types can enter here: (1) DENORM or UNNORM # 1151# unimplemented data types. These can be either opclass 0,2 or 3 # 1152# instructions, and (2) PACKED unimplemented data format instructions # 1153# also of opclasses 0,2, or 3. # 1154# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # 1155# operand from the fsave state frame and the dst operand (if dyadic) # 1156# from the FP register file. The instruction is then emulated by # 1157# choosing an emulation routine from a table of routines indexed by # 1158# instruction type. Once the instruction has been emulated and result # 1159# saved, then we check to see if any enabled exceptions resulted from # 1160# instruction emulation. If none, then we exit through the "callout" # 1161# _fpsp_done(). If there is an enabled FP exception, then we insert # 1162# this exception into the FPU in the fsave state frame and then exit # 1163# through _fpsp_done(). # 1164# PACKED opclass 0 and 2 is similar in how the instruction is # 1165# emulated and exceptions handled. The differences occur in how the # 1166# handler loads the packed op (by calling get_packed() routine) and # 1167# by the fact that a Trace exception could be pending for PACKED ops. # 1168# If a Trace exception is pending, then the current exception stack # 1169# frame is changed to a Trace exception stack frame and an exit is # 1170# made through _real_trace(). # 1171# For UNNORM/DENORM opclass 3, the actual move out to memory is # 1172# performed by calling the routine fout(). If no exception should occur # 1173# as the result of emulation, then an exit either occurs through # 1174# _fpsp_done() or through _real_trace() if a Trace exception is pending # 1175# (a Trace stack frame must be created here, too). If an FP exception # 1176# should occur, then we must create an exception stack frame of that # 1177# type and jump to either _real_snan(), _real_operr(), _real_inex(), # 1178# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # 1179# emulation is performed in a similar manner. # 1180# # 1181######################################################################### 1182 1183# 1184# (1) DENORM and UNNORM (unimplemented) data types: 1185# 1186# post-instruction 1187# ***************** 1188# * EA * 1189# pre-instruction * * 1190# ***************** ***************** 1191# * 0x0 * 0x0dc * * 0x3 * 0x0dc * 1192# ***************** ***************** 1193# * Next * * Next * 1194# * PC * * PC * 1195# ***************** ***************** 1196# * SR * * SR * 1197# ***************** ***************** 1198# 1199# (2) PACKED format (unsupported) opclasses two and three: 1200# ***************** 1201# * EA * 1202# * * 1203# ***************** 1204# * 0x2 * 0x0dc * 1205# ***************** 1206# * Next * 1207# * PC * 1208# ***************** 1209# * SR * 1210# ***************** 1211# 1212 global _fpsp_unsupp 1213_fpsp_unsupp: 1214 1215 link.w %a6,&-LOCAL_SIZE # init stack frame 1216 1217 fsave FP_SRC(%a6) # save fp state 1218 1219 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1220 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 1221 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 1222 1223 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 1224 bne.b fu_s 1225fu_u: 1226 mov.l %usp,%a0 # fetch user stack pointer 1227 mov.l %a0,EXC_A7(%a6) # save on stack 1228 bra.b fu_cont 1229# if the exception is an opclass zero or two unimplemented data type 1230# exception, then the a7' calculated here is wrong since it doesn't 1231# stack an ea. however, we don't need an a7' for this case anyways. 1232fu_s: 1233 lea 0x4+EXC_EA(%a6),%a0 # load old a7' 1234 mov.l %a0,EXC_A7(%a6) # save on stack 1235 1236fu_cont: 1237 1238# the FPIAR holds the "current PC" of the faulting instruction 1239# the FPIAR should be set correctly for ALL exceptions passing through 1240# this point. 1241 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 1242 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 1243 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 1244 bsr.l _imem_read_long # fetch the instruction words 1245 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 1246 1247############################ 1248 1249 clr.b SPCOND_FLG(%a6) # clear special condition flag 1250 1251# Separate opclass three (fpn-to-mem) ops since they have a different 1252# stack frame and protocol. 1253 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? 1254 bne.w fu_out # yes 1255 1256# Separate packed opclass two instructions. 1257 bfextu EXC_CMDREG(%a6){&0:&6},%d0 1258 cmpi.b %d0,&0x13 1259 beq.w fu_in_pack 1260 1261 1262# I'm not sure at this point what FPSR bits are valid for this instruction. 1263# so, since the emulation routines re-create them anyways, zero exception field 1264 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field 1265 1266 fmov.l &0x0,%fpcr # zero current control regs 1267 fmov.l &0x0,%fpsr 1268 1269# Opclass two w/ memory-to-fpn operation will have an incorrect extended 1270# precision format if the src format was single or double and the 1271# source data type was an INF, NAN, DENORM, or UNNORM 1272 lea FP_SRC(%a6),%a0 # pass ptr to input 1273 bsr.l fix_skewed_ops 1274 1275# we don't know whether the src operand or the dst operand (or both) is the 1276# UNNORM or DENORM. call the function that tags the operand type. if the 1277# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. 1278 lea FP_SRC(%a6),%a0 # pass: ptr to src op 1279 bsr.l set_tag_x # tag the operand type 1280 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1281 bne.b fu_op2 # no 1282 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1283 1284fu_op2: 1285 mov.b %d0,STAG(%a6) # save src optype tag 1286 1287 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1288 1289# bit five of the fp extension word separates the monadic and dyadic operations 1290# at this point 1291 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1292 beq.b fu_extract # monadic 1293 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1294 beq.b fu_extract # yes, so it's monadic, too 1295 1296 bsr.l load_fpn2 # load dst into FP_DST 1297 1298 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1299 bsr.l set_tag_x # tag the operand type 1300 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1301 bne.b fu_op2_done # no 1302 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1303fu_op2_done: 1304 mov.b %d0,DTAG(%a6) # save dst optype tag 1305 1306fu_extract: 1307 clr.l %d0 1308 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1309 1310 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1311 1312 lea FP_SRC(%a6),%a0 1313 lea FP_DST(%a6),%a1 1314 1315 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1316 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1317 1318# 1319# Exceptions in order of precedence: 1320# BSUN : none 1321# SNAN : all dyadic ops 1322# OPERR : fsqrt(-NORM) 1323# OVFL : all except ftst,fcmp 1324# UNFL : all except ftst,fcmp 1325# DZ : fdiv 1326# INEX2 : all except ftst,fcmp 1327# INEX1 : none (packed doesn't go through here) 1328# 1329 1330# we determine the highest priority exception(if any) set by the 1331# emulation routine that has also been enabled by the user. 1332 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set 1333 bne.b fu_in_ena # some are enabled 1334 1335fu_in_cont: 1336# fcmp and ftst do not store any result. 1337 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1338 andi.b &0x38,%d0 # extract bits 3-5 1339 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1340 beq.b fu_in_exit # yes 1341 1342 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1343 bsr.l store_fpreg # store the result 1344 1345fu_in_exit: 1346 1347 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1348 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1349 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1350 1351 unlk %a6 1352 1353 bra.l _fpsp_done 1354 1355fu_in_ena: 1356 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1357 bfffo %d0{&24:&8},%d0 # find highest priority exception 1358 bne.b fu_in_exc # there is at least one set 1359 1360# 1361# No exceptions occurred that were also enabled. Now: 1362# 1363# if (OVFL && ovfl_disabled && inexact_enabled) { 1364# branch to _real_inex() (even if the result was exact!); 1365# } else { 1366# save the result in the proper fp reg (unless the op is fcmp or ftst); 1367# return; 1368# } 1369# 1370 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1371 beq.b fu_in_cont # no 1372 1373fu_in_ovflchk: 1374 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1375 beq.b fu_in_cont # no 1376 bra.w fu_in_exc_ovfl # go insert overflow frame 1377 1378# 1379# An exception occurred and that exception was enabled: 1380# 1381# shift enabled exception field into lo byte of d0; 1382# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1383# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1384# /* 1385# * this is the case where we must call _real_inex() now or else 1386# * there will be no other way to pass it the exceptional operand 1387# */ 1388# call _real_inex(); 1389# } else { 1390# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1391# } 1392# 1393fu_in_exc: 1394 subi.l &24,%d0 # fix offset to be 0-8 1395 cmpi.b %d0,&0x6 # is exception INEX? (6) 1396 bne.b fu_in_exc_exit # no 1397 1398# the enabled exception was inexact 1399 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1400 bne.w fu_in_exc_unfl # yes 1401 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1402 bne.w fu_in_exc_ovfl # yes 1403 1404# here, we insert the correct fsave status value into the fsave frame for the 1405# corresponding exception. the operand in the fsave frame should be the original 1406# src operand. 1407fu_in_exc_exit: 1408 mov.l %d0,-(%sp) # save d0 1409 bsr.l funimp_skew # skew sgl or dbl inputs 1410 mov.l (%sp)+,%d0 # restore d0 1411 1412 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status 1413 1414 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1415 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1416 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1417 1418 frestore FP_SRC(%a6) # restore src op 1419 1420 unlk %a6 1421 1422 bra.l _fpsp_done 1423 1424tbl_except: 1425 short 0xe000,0xe006,0xe004,0xe005 1426 short 0xe003,0xe002,0xe001,0xe001 1427 1428fu_in_exc_unfl: 1429 mov.w &0x4,%d0 1430 bra.b fu_in_exc_exit 1431fu_in_exc_ovfl: 1432 mov.w &0x03,%d0 1433 bra.b fu_in_exc_exit 1434 1435# If the input operand to this operation was opclass two and a single 1436# or double precision denorm, inf, or nan, the operand needs to be 1437# "corrected" in order to have the proper equivalent extended precision 1438# number. 1439 global fix_skewed_ops 1440fix_skewed_ops: 1441 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt 1442 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? 1443 beq.b fso_sgl # yes 1444 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? 1445 beq.b fso_dbl # yes 1446 rts # no 1447 1448fso_sgl: 1449 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1450 andi.w &0x7fff,%d0 # strip sign 1451 cmpi.w %d0,&0x3f80 # is |exp| == $3f80? 1452 beq.b fso_sgl_dnrm_zero # yes 1453 cmpi.w %d0,&0x407f # no; is |exp| == $407f? 1454 beq.b fso_infnan # yes 1455 rts # no 1456 1457fso_sgl_dnrm_zero: 1458 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1459 beq.b fso_zero # it's a skewed zero 1460fso_sgl_dnrm: 1461# here, we count on norm not to alter a0... 1462 bsr.l norm # normalize mantissa 1463 neg.w %d0 # -shft amt 1464 addi.w &0x3f81,%d0 # adjust new exponent 1465 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1466 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1467 rts 1468 1469fso_zero: 1470 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent 1471 rts 1472 1473fso_infnan: 1474 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit 1475 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff 1476 rts 1477 1478fso_dbl: 1479 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1480 andi.w &0x7fff,%d0 # strip sign 1481 cmpi.w %d0,&0x3c00 # is |exp| == $3c00? 1482 beq.b fso_dbl_dnrm_zero # yes 1483 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? 1484 beq.b fso_infnan # yes 1485 rts # no 1486 1487fso_dbl_dnrm_zero: 1488 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1489 bne.b fso_dbl_dnrm # it's a skewed denorm 1490 tst.l LOCAL_LO(%a0) # is it a zero? 1491 beq.b fso_zero # yes 1492fso_dbl_dnrm: 1493# here, we count on norm not to alter a0... 1494 bsr.l norm # normalize mantissa 1495 neg.w %d0 # -shft amt 1496 addi.w &0x3c01,%d0 # adjust new exponent 1497 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1498 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1499 rts 1500 1501################################################################# 1502 1503# fmove out took an unimplemented data type exception. 1504# the src operand is in FP_SRC. Call _fout() to write out the result and 1505# to determine which exceptions, if any, to take. 1506fu_out: 1507 1508# Separate packed move outs from the UNNORM and DENORM move outs. 1509 bfextu EXC_CMDREG(%a6){&3:&3},%d0 1510 cmpi.b %d0,&0x3 1511 beq.w fu_out_pack 1512 cmpi.b %d0,&0x7 1513 beq.w fu_out_pack 1514 1515 1516# I'm not sure at this point what FPSR bits are valid for this instruction. 1517# so, since the emulation routines re-create them anyways, zero exception field. 1518# fmove out doesn't affect ccodes. 1519 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 1520 1521 fmov.l &0x0,%fpcr # zero current control regs 1522 fmov.l &0x0,%fpsr 1523 1524# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine 1525# call here. just figure out what it is... 1526 mov.w FP_SRC_EX(%a6),%d0 # get exponent 1527 andi.w &0x7fff,%d0 # strip sign 1528 beq.b fu_out_denorm # it's a DENORM 1529 1530 lea FP_SRC(%a6),%a0 1531 bsr.l unnorm_fix # yes; fix it 1532 1533 mov.b %d0,STAG(%a6) 1534 1535 bra.b fu_out_cont 1536fu_out_denorm: 1537 mov.b &DENORM,STAG(%a6) 1538fu_out_cont: 1539 1540 clr.l %d0 1541 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1542 1543 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1544 1545 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 1546 bsr.l fout # call fmove out routine 1547 1548# Exceptions in order of precedence: 1549# BSUN : none 1550# SNAN : none 1551# OPERR : fmove.{b,w,l} out of large UNNORM 1552# OVFL : fmove.{s,d} 1553# UNFL : fmove.{s,d,x} 1554# DZ : none 1555# INEX2 : all 1556# INEX1 : none (packed doesn't travel through here) 1557 1558# determine the highest priority exception(if any) set by the 1559# emulation routine that has also been enabled by the user. 1560 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1561 bne.w fu_out_ena # some are enabled 1562 1563fu_out_done: 1564 1565 mov.l EXC_A6(%a6),(%a6) # in case a6 changed 1566 1567# on extended precision opclass three instructions using pre-decrement or 1568# post-increment addressing mode, the address register is not updated. is the 1569# address register was the stack pointer used from user mode, then let's update 1570# it here. if it was used from supervisor mode, then we have to handle this 1571# as a special case. 1572 btst &0x5,EXC_SR(%a6) 1573 bne.b fu_out_done_s 1574 1575 mov.l EXC_A7(%a6),%a0 # restore a7 1576 mov.l %a0,%usp 1577 1578fu_out_done_cont: 1579 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1580 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1581 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1582 1583 unlk %a6 1584 1585 btst &0x7,(%sp) # is trace on? 1586 bne.b fu_out_trace # yes 1587 1588 bra.l _fpsp_done 1589 1590# is the ea mode pre-decrement of the stack pointer from supervisor mode? 1591# ("fmov.x fpm,-(a7)") if so, 1592fu_out_done_s: 1593 cmpi.b SPCOND_FLG(%a6),&mda7_flg 1594 bne.b fu_out_done_cont 1595 1596# the extended precision result is still in fp0. but, we need to save it 1597# somewhere on the stack until we can copy it to its final resting place. 1598# here, we're counting on the top of the stack to be the old place-holders 1599# for fp0/fp1 which have already been restored. that way, we can write 1600# over those destinations with the shifted stack frame. 1601 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1602 1603 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1604 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1605 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1606 1607 mov.l (%a6),%a6 # restore frame pointer 1608 1609 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1610 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1611 1612# now, copy the result to the proper place on the stack 1613 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1614 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1615 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1616 1617 add.l &LOCAL_SIZE-0x8,%sp 1618 1619 btst &0x7,(%sp) 1620 bne.b fu_out_trace 1621 1622 bra.l _fpsp_done 1623 1624fu_out_ena: 1625 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1626 bfffo %d0{&24:&8},%d0 # find highest priority exception 1627 bne.b fu_out_exc # there is at least one set 1628 1629# no exceptions were set. 1630# if a disabled overflow occurred and inexact was enabled but the result 1631# was exact, then a branch to _real_inex() is made. 1632 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1633 beq.w fu_out_done # no 1634 1635fu_out_ovflchk: 1636 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1637 beq.w fu_out_done # no 1638 bra.w fu_inex # yes 1639 1640# 1641# The fp move out that took the "Unimplemented Data Type" exception was 1642# being traced. Since the stack frames are similar, get the "current" PC 1643# from FPIAR and put it in the trace stack frame then jump to _real_trace(). 1644# 1645# UNSUPP FRAME TRACE FRAME 1646# ***************** ***************** 1647# * EA * * Current * 1648# * * * PC * 1649# ***************** ***************** 1650# * 0x3 * 0x0dc * * 0x2 * 0x024 * 1651# ***************** ***************** 1652# * Next * * Next * 1653# * PC * * PC * 1654# ***************** ***************** 1655# * SR * * SR * 1656# ***************** ***************** 1657# 1658fu_out_trace: 1659 mov.w &0x2024,0x6(%sp) 1660 fmov.l %fpiar,0x8(%sp) 1661 bra.l _real_trace 1662 1663# an exception occurred and that exception was enabled. 1664fu_out_exc: 1665 subi.l &24,%d0 # fix offset to be 0-8 1666 1667# we don't mess with the existing fsave frame. just re-insert it and 1668# jump to the "_real_{}()" handler... 1669 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 1670 jmp (tbl_fu_out.b,%pc,%d0.w*1) 1671 1672 swbeg &0x8 1673tbl_fu_out: 1674 short tbl_fu_out - tbl_fu_out # BSUN can't happen 1675 short tbl_fu_out - tbl_fu_out # SNAN can't happen 1676 short fu_operr - tbl_fu_out # OPERR 1677 short fu_ovfl - tbl_fu_out # OVFL 1678 short fu_unfl - tbl_fu_out # UNFL 1679 short tbl_fu_out - tbl_fu_out # DZ can't happen 1680 short fu_inex - tbl_fu_out # INEX2 1681 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here 1682 1683# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just 1684# frestore it. 1685fu_snan: 1686 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1687 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1689 1690 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 1691 mov.w &0xe006,2+FP_SRC(%a6) 1692 1693 frestore FP_SRC(%a6) 1694 1695 unlk %a6 1696 1697 1698 bra.l _real_snan 1699 1700fu_operr: 1701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1704 1705 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 1706 mov.w &0xe004,2+FP_SRC(%a6) 1707 1708 frestore FP_SRC(%a6) 1709 1710 unlk %a6 1711 1712 1713 bra.l _real_operr 1714 1715fu_ovfl: 1716 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1717 1718 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1719 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1720 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1721 1722 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 1723 mov.w &0xe005,2+FP_SRC(%a6) 1724 1725 frestore FP_SRC(%a6) # restore EXOP 1726 1727 unlk %a6 1728 1729 bra.l _real_ovfl 1730 1731# underflow can happen for extended precision. extended precision opclass 1732# three instruction exceptions don't update the stack pointer. so, if the 1733# exception occurred from user mode, then simply update a7 and exit normally. 1734# if the exception occurred from supervisor mode, check if 1735fu_unfl: 1736 mov.l EXC_A6(%a6),(%a6) # restore a6 1737 1738 btst &0x5,EXC_SR(%a6) 1739 bne.w fu_unfl_s 1740 1741 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need 1742 mov.l %a0,%usp # to or not... 1743 1744fu_unfl_cont: 1745 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1746 1747 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1748 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1749 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1750 1751 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1752 mov.w &0xe003,2+FP_SRC(%a6) 1753 1754 frestore FP_SRC(%a6) # restore EXOP 1755 1756 unlk %a6 1757 1758 bra.l _real_unfl 1759 1760fu_unfl_s: 1761 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)? 1762 bne.b fu_unfl_cont 1763 1764# the extended precision result is still in fp0. but, we need to save it 1765# somewhere on the stack until we can copy it to its final resting place 1766# (where the exc frame is currently). make sure it's not at the top of the 1767# frame or it will get overwritten when the exc stack frame is shifted "down". 1768 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1769 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack 1770 1771 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1772 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1773 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1774 1775 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1776 mov.w &0xe003,2+FP_DST(%a6) 1777 1778 frestore FP_DST(%a6) # restore EXOP 1779 1780 mov.l (%a6),%a6 # restore frame pointer 1781 1782 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1783 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1784 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 1785 1786# now, copy the result to the proper place on the stack 1787 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1788 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1789 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1790 1791 add.l &LOCAL_SIZE-0x8,%sp 1792 1793 bra.l _real_unfl 1794 1795# fmove in and out enter here. 1796fu_inex: 1797 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1798 1799 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1801 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1802 1803 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 1804 mov.w &0xe001,2+FP_SRC(%a6) 1805 1806 frestore FP_SRC(%a6) # restore EXOP 1807 1808 unlk %a6 1809 1810 1811 bra.l _real_inex 1812 1813######################################################################### 1814######################################################################### 1815fu_in_pack: 1816 1817 1818# I'm not sure at this point what FPSR bits are valid for this instruction. 1819# so, since the emulation routines re-create them anyways, zero exception field 1820 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field 1821 1822 fmov.l &0x0,%fpcr # zero current control regs 1823 fmov.l &0x0,%fpsr 1824 1825 bsr.l get_packed # fetch packed src operand 1826 1827 lea FP_SRC(%a6),%a0 # pass ptr to src 1828 bsr.l set_tag_x # set src optype tag 1829 1830 mov.b %d0,STAG(%a6) # save src optype tag 1831 1832 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1833 1834# bit five of the fp extension word separates the monadic and dyadic operations 1835# at this point 1836 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1837 beq.b fu_extract_p # monadic 1838 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1839 beq.b fu_extract_p # yes, so it's monadic, too 1840 1841 bsr.l load_fpn2 # load dst into FP_DST 1842 1843 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1844 bsr.l set_tag_x # tag the operand type 1845 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1846 bne.b fu_op2_done_p # no 1847 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1848fu_op2_done_p: 1849 mov.b %d0,DTAG(%a6) # save dst optype tag 1850 1851fu_extract_p: 1852 clr.l %d0 1853 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1854 1855 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1856 1857 lea FP_SRC(%a6),%a0 1858 lea FP_DST(%a6),%a1 1859 1860 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1861 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1862 1863# 1864# Exceptions in order of precedence: 1865# BSUN : none 1866# SNAN : all dyadic ops 1867# OPERR : fsqrt(-NORM) 1868# OVFL : all except ftst,fcmp 1869# UNFL : all except ftst,fcmp 1870# DZ : fdiv 1871# INEX2 : all except ftst,fcmp 1872# INEX1 : all 1873# 1874 1875# we determine the highest priority exception(if any) set by the 1876# emulation routine that has also been enabled by the user. 1877 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1878 bne.w fu_in_ena_p # some are enabled 1879 1880fu_in_cont_p: 1881# fcmp and ftst do not store any result. 1882 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1883 andi.b &0x38,%d0 # extract bits 3-5 1884 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1885 beq.b fu_in_exit_p # yes 1886 1887 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1888 bsr.l store_fpreg # store the result 1889 1890fu_in_exit_p: 1891 1892 btst &0x5,EXC_SR(%a6) # user or supervisor? 1893 bne.w fu_in_exit_s_p # supervisor 1894 1895 mov.l EXC_A7(%a6),%a0 # update user a7 1896 mov.l %a0,%usp 1897 1898fu_in_exit_cont_p: 1899 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1900 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1901 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1902 1903 unlk %a6 # unravel stack frame 1904 1905 btst &0x7,(%sp) # is trace on? 1906 bne.w fu_trace_p # yes 1907 1908 bra.l _fpsp_done # exit to os 1909 1910# the exception occurred in supervisor mode. check to see if the 1911# addressing mode was (a7)+. if so, we'll need to shift the 1912# stack frame "up". 1913fu_in_exit_s_p: 1914 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ 1915 beq.b fu_in_exit_cont_p # no 1916 1917 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1918 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1919 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1920 1921 unlk %a6 # unravel stack frame 1922 1923# shift the stack frame "up". we don't really care about the <ea> field. 1924 mov.l 0x4(%sp),0x10(%sp) 1925 mov.l 0x0(%sp),0xc(%sp) 1926 add.l &0xc,%sp 1927 1928 btst &0x7,(%sp) # is trace on? 1929 bne.w fu_trace_p # yes 1930 1931 bra.l _fpsp_done # exit to os 1932 1933fu_in_ena_p: 1934 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set 1935 bfffo %d0{&24:&8},%d0 # find highest priority exception 1936 bne.b fu_in_exc_p # at least one was set 1937 1938# 1939# No exceptions occurred that were also enabled. Now: 1940# 1941# if (OVFL && ovfl_disabled && inexact_enabled) { 1942# branch to _real_inex() (even if the result was exact!); 1943# } else { 1944# save the result in the proper fp reg (unless the op is fcmp or ftst); 1945# return; 1946# } 1947# 1948 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1949 beq.w fu_in_cont_p # no 1950 1951fu_in_ovflchk_p: 1952 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1953 beq.w fu_in_cont_p # no 1954 bra.w fu_in_exc_ovfl_p # do _real_inex() now 1955 1956# 1957# An exception occurred and that exception was enabled: 1958# 1959# shift enabled exception field into lo byte of d0; 1960# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1961# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1962# /* 1963# * this is the case where we must call _real_inex() now or else 1964# * there will be no other way to pass it the exceptional operand 1965# */ 1966# call _real_inex(); 1967# } else { 1968# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1969# } 1970# 1971fu_in_exc_p: 1972 subi.l &24,%d0 # fix offset to be 0-8 1973 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) 1974 blt.b fu_in_exc_exit_p # no 1975 1976# the enabled exception was inexact 1977 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1978 bne.w fu_in_exc_unfl_p # yes 1979 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1980 bne.w fu_in_exc_ovfl_p # yes 1981 1982# here, we insert the correct fsave status value into the fsave frame for the 1983# corresponding exception. the operand in the fsave frame should be the original 1984# src operand. 1985# as a reminder for future predicted pain and agony, we are passing in fsave the 1986# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. 1987# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! 1988fu_in_exc_exit_p: 1989 btst &0x5,EXC_SR(%a6) # user or supervisor? 1990 bne.w fu_in_exc_exit_s_p # supervisor 1991 1992 mov.l EXC_A7(%a6),%a0 # update user a7 1993 mov.l %a0,%usp 1994 1995fu_in_exc_exit_cont_p: 1996 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 1997 1998 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1999 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2000 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2001 2002 frestore FP_SRC(%a6) # restore src op 2003 2004 unlk %a6 2005 2006 btst &0x7,(%sp) # is trace enabled? 2007 bne.w fu_trace_p # yes 2008 2009 bra.l _fpsp_done 2010 2011tbl_except_p: 2012 short 0xe000,0xe006,0xe004,0xe005 2013 short 0xe003,0xe002,0xe001,0xe001 2014 2015fu_in_exc_ovfl_p: 2016 mov.w &0x3,%d0 2017 bra.w fu_in_exc_exit_p 2018 2019fu_in_exc_unfl_p: 2020 mov.w &0x4,%d0 2021 bra.w fu_in_exc_exit_p 2022 2023fu_in_exc_exit_s_p: 2024 btst &mia7_bit,SPCOND_FLG(%a6) 2025 beq.b fu_in_exc_exit_cont_p 2026 2027 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2028 2029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2032 2033 frestore FP_SRC(%a6) # restore src op 2034 2035 unlk %a6 # unravel stack frame 2036 2037# shift stack frame "up". who cares about <ea> field. 2038 mov.l 0x4(%sp),0x10(%sp) 2039 mov.l 0x0(%sp),0xc(%sp) 2040 add.l &0xc,%sp 2041 2042 btst &0x7,(%sp) # is trace on? 2043 bne.b fu_trace_p # yes 2044 2045 bra.l _fpsp_done # exit to os 2046 2047# 2048# The opclass two PACKED instruction that took an "Unimplemented Data Type" 2049# exception was being traced. Make the "current" PC the FPIAR and put it in the 2050# trace stack frame then jump to _real_trace(). 2051# 2052# UNSUPP FRAME TRACE FRAME 2053# ***************** ***************** 2054# * EA * * Current * 2055# * * * PC * 2056# ***************** ***************** 2057# * 0x2 * 0x0dc * * 0x2 * 0x024 * 2058# ***************** ***************** 2059# * Next * * Next * 2060# * PC * * PC * 2061# ***************** ***************** 2062# * SR * * SR * 2063# ***************** ***************** 2064fu_trace_p: 2065 mov.w &0x2024,0x6(%sp) 2066 fmov.l %fpiar,0x8(%sp) 2067 2068 bra.l _real_trace 2069 2070######################################################### 2071######################################################### 2072fu_out_pack: 2073 2074 2075# I'm not sure at this point what FPSR bits are valid for this instruction. 2076# so, since the emulation routines re-create them anyways, zero exception field. 2077# fmove out doesn't affect ccodes. 2078 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 2079 2080 fmov.l &0x0,%fpcr # zero current control regs 2081 fmov.l &0x0,%fpsr 2082 2083 bfextu EXC_CMDREG(%a6){&6:&3},%d0 2084 bsr.l load_fpn1 2085 2086# unlike other opclass 3, unimplemented data type exceptions, packed must be 2087# able to detect all operand types. 2088 lea FP_SRC(%a6),%a0 2089 bsr.l set_tag_x # tag the operand type 2090 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2091 bne.b fu_op2_p # no 2092 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 2093 2094fu_op2_p: 2095 mov.b %d0,STAG(%a6) # save src optype tag 2096 2097 clr.l %d0 2098 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 2099 2100 lea FP_SRC(%a6),%a0 # pass ptr to src operand 2101 2102 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 2103 bsr.l fout # call fmove out routine 2104 2105# Exceptions in order of precedence: 2106# BSUN : no 2107# SNAN : yes 2108# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) 2109# OVFL : no 2110# UNFL : no 2111# DZ : no 2112# INEX2 : yes 2113# INEX1 : no 2114 2115# determine the highest priority exception(if any) set by the 2116# emulation routine that has also been enabled by the user. 2117 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2118 bne.w fu_out_ena_p # some are enabled 2119 2120fu_out_exit_p: 2121 mov.l EXC_A6(%a6),(%a6) # restore a6 2122 2123 btst &0x5,EXC_SR(%a6) # user or supervisor? 2124 bne.b fu_out_exit_s_p # supervisor 2125 2126 mov.l EXC_A7(%a6),%a0 # update user a7 2127 mov.l %a0,%usp 2128 2129fu_out_exit_cont_p: 2130 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2131 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2132 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2133 2134 unlk %a6 # unravel stack frame 2135 2136 btst &0x7,(%sp) # is trace on? 2137 bne.w fu_trace_p # yes 2138 2139 bra.l _fpsp_done # exit to os 2140 2141# the exception occurred in supervisor mode. check to see if the 2142# addressing mode was -(a7). if so, we'll need to shift the 2143# stack frame "down". 2144fu_out_exit_s_p: 2145 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) 2146 beq.b fu_out_exit_cont_p # no 2147 2148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2151 2152 mov.l (%a6),%a6 # restore frame pointer 2153 2154 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2155 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2156 2157# now, copy the result to the proper place on the stack 2158 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 2159 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 2160 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 2161 2162 add.l &LOCAL_SIZE-0x8,%sp 2163 2164 btst &0x7,(%sp) 2165 bne.w fu_trace_p 2166 2167 bra.l _fpsp_done 2168 2169fu_out_ena_p: 2170 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 2171 bfffo %d0{&24:&8},%d0 # find highest priority exception 2172 beq.w fu_out_exit_p 2173 2174 mov.l EXC_A6(%a6),(%a6) # restore a6 2175 2176# an exception occurred and that exception was enabled. 2177# the only exception possible on packed move out are INEX, OPERR, and SNAN. 2178fu_out_exc_p: 2179 cmpi.b %d0,&0x1a 2180 bgt.w fu_inex_p2 2181 beq.w fu_operr_p 2182 2183fu_snan_p: 2184 btst &0x5,EXC_SR(%a6) 2185 bne.b fu_snan_s_p 2186 2187 mov.l EXC_A7(%a6),%a0 2188 mov.l %a0,%usp 2189 bra.w fu_snan 2190 2191fu_snan_s_p: 2192 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2193 bne.w fu_snan 2194 2195# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2196# the strategy is to move the exception frame "down" 12 bytes. then, we 2197# can store the default result where the exception frame was. 2198 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2199 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2200 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2201 2202 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 2203 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status 2204 2205 frestore FP_SRC(%a6) # restore src operand 2206 2207 mov.l (%a6),%a6 # restore frame pointer 2208 2209 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2210 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2211 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2212 2213# now, we copy the default result to it's proper location 2214 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2215 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2216 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2217 2218 add.l &LOCAL_SIZE-0x8,%sp 2219 2220 2221 bra.l _real_snan 2222 2223fu_operr_p: 2224 btst &0x5,EXC_SR(%a6) 2225 bne.w fu_operr_p_s 2226 2227 mov.l EXC_A7(%a6),%a0 2228 mov.l %a0,%usp 2229 bra.w fu_operr 2230 2231fu_operr_p_s: 2232 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2233 bne.w fu_operr 2234 2235# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2236# the strategy is to move the exception frame "down" 12 bytes. then, we 2237# can store the default result where the exception frame was. 2238 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2239 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2240 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2241 2242 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 2243 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status 2244 2245 frestore FP_SRC(%a6) # restore src operand 2246 2247 mov.l (%a6),%a6 # restore frame pointer 2248 2249 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2250 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2251 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2252 2253# now, we copy the default result to it's proper location 2254 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2255 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2256 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2257 2258 add.l &LOCAL_SIZE-0x8,%sp 2259 2260 2261 bra.l _real_operr 2262 2263fu_inex_p2: 2264 btst &0x5,EXC_SR(%a6) 2265 bne.w fu_inex_s_p2 2266 2267 mov.l EXC_A7(%a6),%a0 2268 mov.l %a0,%usp 2269 bra.w fu_inex 2270 2271fu_inex_s_p2: 2272 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2273 bne.w fu_inex 2274 2275# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2276# the strategy is to move the exception frame "down" 12 bytes. then, we 2277# can store the default result where the exception frame was. 2278 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2279 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2280 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2281 2282 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 2283 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status 2284 2285 frestore FP_SRC(%a6) # restore src operand 2286 2287 mov.l (%a6),%a6 # restore frame pointer 2288 2289 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2290 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2291 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2292 2293# now, we copy the default result to it's proper location 2294 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2295 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2296 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2297 2298 add.l &LOCAL_SIZE-0x8,%sp 2299 2300 2301 bra.l _real_inex 2302 2303######################################################################### 2304 2305# 2306# if we're stuffing a source operand back into an fsave frame then we 2307# have to make sure that for single or double source operands that the 2308# format stuffed is as weird as the hardware usually makes it. 2309# 2310 global funimp_skew 2311funimp_skew: 2312 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier 2313 cmpi.b %d0,&0x1 # was src sgl? 2314 beq.b funimp_skew_sgl # yes 2315 cmpi.b %d0,&0x5 # was src dbl? 2316 beq.b funimp_skew_dbl # yes 2317 rts 2318 2319funimp_skew_sgl: 2320 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2321 andi.w &0x7fff,%d0 # strip sign 2322 beq.b funimp_skew_sgl_not 2323 cmpi.w %d0,&0x3f80 2324 bgt.b funimp_skew_sgl_not 2325 neg.w %d0 # make exponent negative 2326 addi.w &0x3f81,%d0 # find amt to shift 2327 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) 2328 lsr.l %d0,%d1 # shift it 2329 bset &31,%d1 # set j-bit 2330 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) 2331 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent 2332 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent 2333funimp_skew_sgl_not: 2334 rts 2335 2336funimp_skew_dbl: 2337 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2338 andi.w &0x7fff,%d0 # strip sign 2339 beq.b funimp_skew_dbl_not 2340 cmpi.w %d0,&0x3c00 2341 bgt.b funimp_skew_dbl_not 2342 2343 tst.b FP_SRC_EX(%a6) # make "internal format" 2344 smi.b 0x2+FP_SRC(%a6) 2345 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign 2346 clr.l %d0 # clear g,r,s 2347 lea FP_SRC(%a6),%a0 # pass ptr to src op 2348 mov.w &0x3c01,%d1 # pass denorm threshold 2349 bsr.l dnrm_lp # denorm it 2350 mov.w &0x3c00,%d0 # new exponent 2351 tst.b 0x2+FP_SRC(%a6) # is sign set? 2352 beq.b fss_dbl_denorm_done # no 2353 bset &15,%d0 # set sign 2354fss_dbl_denorm_done: 2355 bset &0x7,FP_SRC_HI(%a6) # set j-bit 2356 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent 2357funimp_skew_dbl_not: 2358 rts 2359 2360######################################################################### 2361 global _mem_write2 2362_mem_write2: 2363 btst &0x5,EXC_SR(%a6) 2364 beq.l _dmem_write 2365 mov.l 0x0(%a0),FP_DST_EX(%a6) 2366 mov.l 0x4(%a0),FP_DST_HI(%a6) 2367 mov.l 0x8(%a0),FP_DST_LO(%a6) 2368 clr.l %d1 2369 rts 2370 2371######################################################################### 2372# XDEF **************************************************************** # 2373# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # 2374# effective address" exception. # 2375# # 2376# This handler should be the first code executed upon taking the # 2377# FP Unimplemented Effective Address exception in an operating # 2378# system. # 2379# # 2380# XREF **************************************************************** # 2381# _imem_read_long() - read instruction longword # 2382# fix_skewed_ops() - adjust src operand in fsave frame # 2383# set_tag_x() - determine optype of src/dst operands # 2384# store_fpreg() - store opclass 0 or 2 result to FP regfile # 2385# unnorm_fix() - change UNNORM operands to NORM or ZERO # 2386# load_fpn2() - load dst operand from FP regfile # 2387# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 2388# decbin() - convert packed data to FP binary data # 2389# _real_fpu_disabled() - "callout" for "FPU disabled" exception # 2390# _real_access() - "callout" for access error exception # 2391# _mem_read() - read extended immediate operand from memory # 2392# _fpsp_done() - "callout" for exit; work all done # 2393# _real_trace() - "callout" for Trace enabled exception # 2394# fmovm_dynamic() - emulate dynamic fmovm instruction # 2395# fmovm_ctrl() - emulate fmovm control instruction # 2396# # 2397# INPUT *************************************************************** # 2398# - The system stack contains the "Unimplemented <ea>" stk frame # 2399# # 2400# OUTPUT ************************************************************** # 2401# If access error: # 2402# - The system stack is changed to an access error stack frame # 2403# If FPU disabled: # 2404# - The system stack is changed to an FPU disabled stack frame # 2405# If Trace exception enabled: # 2406# - The system stack is changed to a Trace exception stack frame # 2407# Else: (normal case) # 2408# - None (correct result has been stored as appropriate) # 2409# # 2410# ALGORITHM *********************************************************** # 2411# This exception handles 3 types of operations: # 2412# (1) FP Instructions using extended precision or packed immediate # 2413# addressing mode. # 2414# (2) The "fmovm.x" instruction w/ dynamic register specification. # 2415# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # 2416# # 2417# For immediate data operations, the data is read in w/ a # 2418# _mem_read() "callout", converted to FP binary (if packed), and used # 2419# as the source operand to the instruction specified by the instruction # 2420# word. If no FP exception should be reported ads a result of the # 2421# emulation, then the result is stored to the destination register and # 2422# the handler exits through _fpsp_done(). If an enabled exc has been # 2423# signalled as a result of emulation, then an fsave state frame # 2424# corresponding to the FP exception type must be entered into the 060 # 2425# FPU before exiting. In either the enabled or disabled cases, we # 2426# must also check if a Trace exception is pending, in which case, we # 2427# must create a Trace exception stack frame from the current exception # 2428# stack frame. If no Trace is pending, we simply exit through # 2429# _fpsp_done(). # 2430# For "fmovm.x", call the routine fmovm_dynamic() which will # 2431# decode and emulate the instruction. No FP exceptions can be pending # 2432# as a result of this operation emulation. A Trace exception can be # 2433# pending, though, which means the current stack frame must be changed # 2434# to a Trace stack frame and an exit made through _real_trace(). # 2435# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # 2436# was executed from supervisor mode, this handler must store the FP # 2437# register file values to the system stack by itself since # 2438# fmovm_dynamic() can't handle this. A normal exit is made through # 2439# fpsp_done(). # 2440# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # 2441# Again, a Trace exception may be pending and an exit made through # 2442# _real_trace(). Else, a normal exit is made through _fpsp_done(). # 2443# # 2444# Before any of the above is attempted, it must be checked to # 2445# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken # 2446# before the "FPU disabled" exception, but the "FPU disabled" exception # 2447# has higher priority, we check the disabled bit in the PCR. If set, # 2448# then we must create an 8 word "FPU disabled" exception stack frame # 2449# from the current 4 word exception stack frame. This includes # 2450# reproducing the effective address of the instruction to put on the # 2451# new stack frame. # 2452# # 2453# In the process of all emulation work, if a _mem_read() # 2454# "callout" returns a failing result indicating an access error, then # 2455# we must create an access error stack frame from the current stack # 2456# frame. This information includes a faulting address and a fault- # 2457# status-longword. These are created within this handler. # 2458# # 2459######################################################################### 2460 2461 global _fpsp_effadd 2462_fpsp_effadd: 2463 2464# This exception type takes priority over the "Line F Emulator" 2465# exception. Therefore, the FPU could be disabled when entering here. 2466# So, we must check to see if it's disabled and handle that case separately. 2467 mov.l %d0,-(%sp) # save d0 2468 movc %pcr,%d0 # load proc cr 2469 btst &0x1,%d0 # is FPU disabled? 2470 bne.w iea_disabled # yes 2471 mov.l (%sp)+,%d0 # restore d0 2472 2473 link %a6,&-LOCAL_SIZE # init stack frame 2474 2475 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2476 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 2477 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 2478 2479# PC of instruction that took the exception is the PC in the frame 2480 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2481 2482 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2483 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2484 bsr.l _imem_read_long # fetch the instruction words 2485 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2486 2487######################################################################### 2488 2489 tst.w %d0 # is operation fmovem? 2490 bmi.w iea_fmovm # yes 2491 2492# 2493# here, we will have: 2494# fabs fdabs fsabs facos fmod 2495# fadd fdadd fsadd fasin frem 2496# fcmp fatan fscale 2497# fdiv fddiv fsdiv fatanh fsin 2498# fint fcos fsincos 2499# fintrz fcosh fsinh 2500# fmove fdmove fsmove fetox ftan 2501# fmul fdmul fsmul fetoxm1 ftanh 2502# fneg fdneg fsneg fgetexp ftentox 2503# fsgldiv fgetman ftwotox 2504# fsglmul flog10 2505# fsqrt flog2 2506# fsub fdsub fssub flogn 2507# ftst flognp1 2508# which can all use f<op>.{x,p} 2509# so, now it's immediate data extended precision AND PACKED FORMAT! 2510# 2511iea_op: 2512 andi.l &0x00ff00ff,USER_FPSR(%a6) 2513 2514 btst &0xa,%d0 # is src fmt x or p? 2515 bne.b iea_op_pack # packed 2516 2517 2518 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2519 lea FP_SRC(%a6),%a1 # pass: ptr to super addr 2520 mov.l &0xc,%d0 # pass: 12 bytes 2521 bsr.l _imem_read # read extended immediate 2522 2523 tst.l %d1 # did ifetch fail? 2524 bne.w iea_iacc # yes 2525 2526 bra.b iea_op_setsrc 2527 2528iea_op_pack: 2529 2530 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2531 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 2532 mov.l &0xc,%d0 # pass: 12 bytes 2533 bsr.l _imem_read # read packed operand 2534 2535 tst.l %d1 # did ifetch fail? 2536 bne.w iea_iacc # yes 2537 2538# The packed operand is an INF or a NAN if the exponent field is all ones. 2539 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 2540 cmpi.w %d0,&0x7fff # INF or NAN? 2541 beq.b iea_op_setsrc # operand is an INF or NAN 2542 2543# The packed operand is a zero if the mantissa is all zero, else it's 2544# a normal packed op. 2545 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 2546 andi.b &0x0f,%d0 # clear all but last nybble 2547 bne.b iea_op_gp_not_spec # not a zero 2548 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 2549 bne.b iea_op_gp_not_spec # not a zero 2550 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 2551 beq.b iea_op_setsrc # operand is a ZERO 2552iea_op_gp_not_spec: 2553 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 2554 bsr.l decbin # convert to extended 2555 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 2556 2557iea_op_setsrc: 2558 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer 2559 2560# FP_SRC now holds the src operand. 2561 lea FP_SRC(%a6),%a0 # pass: ptr to src op 2562 bsr.l set_tag_x # tag the operand type 2563 mov.b %d0,STAG(%a6) # could be ANYTHING!!! 2564 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2565 bne.b iea_op_getdst # no 2566 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2567 mov.b %d0,STAG(%a6) # set new optype tag 2568iea_op_getdst: 2569 clr.b STORE_FLG(%a6) # clear "store result" boolean 2570 2571 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 2572 beq.b iea_op_extract # monadic 2573 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? 2574 bne.b iea_op_spec # yes 2575 2576iea_op_loaddst: 2577 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2578 bsr.l load_fpn2 # load dst operand 2579 2580 lea FP_DST(%a6),%a0 # pass: ptr to dst op 2581 bsr.l set_tag_x # tag the operand type 2582 mov.b %d0,DTAG(%a6) # could be ANYTHING!!! 2583 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2584 bne.b iea_op_extract # no 2585 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2586 mov.b %d0,DTAG(%a6) # set new optype tag 2587 bra.b iea_op_extract 2588 2589# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic 2590iea_op_spec: 2591 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? 2592 beq.b iea_op_extract # yes 2593# now, we're left with ftst and fcmp. so, first let's tag them so that they don't 2594# store a result. then, only fcmp will branch back and pick up a dst operand. 2595 st STORE_FLG(%a6) # don't store a final result 2596 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? 2597 beq.b iea_op_loaddst # yes 2598 2599iea_op_extract: 2600 clr.l %d0 2601 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec 2602 2603 mov.b 1+EXC_CMDREG(%a6),%d1 2604 andi.w &0x007f,%d1 # extract extension 2605 2606 fmov.l &0x0,%fpcr 2607 fmov.l &0x0,%fpsr 2608 2609 lea FP_SRC(%a6),%a0 2610 lea FP_DST(%a6),%a1 2611 2612 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 2613 jsr (tbl_unsupp.l,%pc,%d1.l*1) 2614 2615# 2616# Exceptions in order of precedence: 2617# BSUN : none 2618# SNAN : all operations 2619# OPERR : all reg-reg or mem-reg operations that can normally operr 2620# OVFL : same as OPERR 2621# UNFL : same as OPERR 2622# DZ : same as OPERR 2623# INEX2 : same as OPERR 2624# INEX1 : all packed immediate operations 2625# 2626 2627# we determine the highest priority exception(if any) set by the 2628# emulation routine that has also been enabled by the user. 2629 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2630 bne.b iea_op_ena # some are enabled 2631 2632# now, we save the result, unless, of course, the operation was ftst or fcmp. 2633# these don't save results. 2634iea_op_save: 2635 tst.b STORE_FLG(%a6) # does this op store a result? 2636 bne.b iea_op_exit1 # exit with no frestore 2637 2638iea_op_store: 2639 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2640 bsr.l store_fpreg # store the result 2641 2642iea_op_exit1: 2643 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2644 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2645 2646 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2647 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2648 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2649 2650 unlk %a6 # unravel the frame 2651 2652 btst &0x7,(%sp) # is trace on? 2653 bne.w iea_op_trace # yes 2654 2655 bra.l _fpsp_done # exit to os 2656 2657iea_op_ena: 2658 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set 2659 bfffo %d0{&24:&8},%d0 # find highest priority exception 2660 bne.b iea_op_exc # at least one was set 2661 2662# no exception occurred. now, did a disabled, exact overflow occur with inexact 2663# enabled? if so, then we have to stuff an overflow frame into the FPU. 2664 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2665 beq.b iea_op_save 2666 2667iea_op_ovfl: 2668 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 2669 beq.b iea_op_store # no 2670 bra.b iea_op_exc_ovfl # yes 2671 2672# an enabled exception occurred. we have to insert the exception type back into 2673# the machine. 2674iea_op_exc: 2675 subi.l &24,%d0 # fix offset to be 0-8 2676 cmpi.b %d0,&0x6 # is exception INEX? 2677 bne.b iea_op_exc_force # no 2678 2679# the enabled exception was inexact. so, if it occurs with an overflow 2680# or underflow that was disabled, then we have to force an overflow or 2681# underflow frame. 2682 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2683 bne.b iea_op_exc_ovfl # yes 2684 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 2685 bne.b iea_op_exc_unfl # yes 2686 2687iea_op_exc_force: 2688 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2689 bra.b iea_op_exit2 # exit with frestore 2690 2691tbl_iea_except: 2692 short 0xe002, 0xe006, 0xe004, 0xe005 2693 short 0xe003, 0xe002, 0xe001, 0xe001 2694 2695iea_op_exc_ovfl: 2696 mov.w &0xe005,2+FP_SRC(%a6) 2697 bra.b iea_op_exit2 2698 2699iea_op_exc_unfl: 2700 mov.w &0xe003,2+FP_SRC(%a6) 2701 2702iea_op_exit2: 2703 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2704 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2705 2706 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2707 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2708 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2709 2710 frestore FP_SRC(%a6) # restore exceptional state 2711 2712 unlk %a6 # unravel the frame 2713 2714 btst &0x7,(%sp) # is trace on? 2715 bne.b iea_op_trace # yes 2716 2717 bra.l _fpsp_done # exit to os 2718 2719# 2720# The opclass two instruction that took an "Unimplemented Effective Address" 2721# exception was being traced. Make the "current" PC the FPIAR and put it in 2722# the trace stack frame then jump to _real_trace(). 2723# 2724# UNIMP EA FRAME TRACE FRAME 2725# ***************** ***************** 2726# * 0x0 * 0x0f0 * * Current * 2727# ***************** * PC * 2728# * Current * ***************** 2729# * PC * * 0x2 * 0x024 * 2730# ***************** ***************** 2731# * SR * * Next * 2732# ***************** * PC * 2733# ***************** 2734# * SR * 2735# ***************** 2736iea_op_trace: 2737 mov.l (%sp),-(%sp) # shift stack frame "down" 2738 mov.w 0x8(%sp),0x4(%sp) 2739 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 2740 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 2741 2742 bra.l _real_trace 2743 2744######################################################################### 2745iea_fmovm: 2746 btst &14,%d0 # ctrl or data reg 2747 beq.w iea_fmovm_ctrl 2748 2749iea_fmovm_data: 2750 2751 btst &0x5,EXC_SR(%a6) # user or supervisor mode 2752 bne.b iea_fmovm_data_s 2753 2754iea_fmovm_data_u: 2755 mov.l %usp,%a0 2756 mov.l %a0,EXC_A7(%a6) # store current a7 2757 bsr.l fmovm_dynamic # do dynamic fmovm 2758 mov.l EXC_A7(%a6),%a0 # load possibly new a7 2759 mov.l %a0,%usp # update usp 2760 bra.w iea_fmovm_exit 2761 2762iea_fmovm_data_s: 2763 clr.b SPCOND_FLG(%a6) 2764 lea 0x2+EXC_VOFF(%a6),%a0 2765 mov.l %a0,EXC_A7(%a6) 2766 bsr.l fmovm_dynamic # do dynamic fmovm 2767 2768 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2769 beq.w iea_fmovm_data_predec 2770 cmpi.b SPCOND_FLG(%a6),&mia7_flg 2771 bne.w iea_fmovm_exit 2772 2773# right now, d0 = the size. 2774# the data has been fetched from the supervisor stack, but we have not 2775# incremented the stack pointer by the appropriate number of bytes. 2776# do it here. 2777iea_fmovm_data_postinc: 2778 btst &0x7,EXC_SR(%a6) 2779 bne.b iea_fmovm_data_pi_trace 2780 2781 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2782 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) 2783 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2784 2785 lea (EXC_SR,%a6,%d0),%a0 2786 mov.l %a0,EXC_SR(%a6) 2787 2788 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2789 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2791 2792 unlk %a6 2793 mov.l (%sp)+,%sp 2794 bra.l _fpsp_done 2795 2796iea_fmovm_data_pi_trace: 2797 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2798 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) 2799 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2800 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) 2801 2802 lea (EXC_SR-0x4,%a6,%d0),%a0 2803 mov.l %a0,EXC_SR(%a6) 2804 2805 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2806 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2808 2809 unlk %a6 2810 mov.l (%sp)+,%sp 2811 bra.l _real_trace 2812 2813# right now, d1 = size and d0 = the strg. 2814iea_fmovm_data_predec: 2815 mov.b %d1,EXC_VOFF(%a6) # store strg 2816 mov.b %d0,0x1+EXC_VOFF(%a6) # store size 2817 2818 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2820 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2821 2822 mov.l (%a6),-(%sp) # make a copy of a6 2823 mov.l %d0,-(%sp) # save d0 2824 mov.l %d1,-(%sp) # save d1 2825 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC 2826 2827 clr.l %d0 2828 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size 2829 neg.l %d0 # get negative of size 2830 2831 btst &0x7,EXC_SR(%a6) # is trace enabled? 2832 beq.b iea_fmovm_data_p2 2833 2834 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2835 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) 2836 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) 2837 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2838 2839 pea (%a6,%d0) # create final sp 2840 bra.b iea_fmovm_data_p3 2841 2842iea_fmovm_data_p2: 2843 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2844 mov.l (%sp)+,(EXC_PC,%a6,%d0) 2845 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2846 2847 pea (0x4,%a6,%d0) # create final sp 2848 2849iea_fmovm_data_p3: 2850 clr.l %d1 2851 mov.b EXC_VOFF(%a6),%d1 # fetch strg 2852 2853 tst.b %d1 2854 bpl.b fm_1 2855 fmovm.x &0x80,(0x4+0x8,%a6,%d0) 2856 addi.l &0xc,%d0 2857fm_1: 2858 lsl.b &0x1,%d1 2859 bpl.b fm_2 2860 fmovm.x &0x40,(0x4+0x8,%a6,%d0) 2861 addi.l &0xc,%d0 2862fm_2: 2863 lsl.b &0x1,%d1 2864 bpl.b fm_3 2865 fmovm.x &0x20,(0x4+0x8,%a6,%d0) 2866 addi.l &0xc,%d0 2867fm_3: 2868 lsl.b &0x1,%d1 2869 bpl.b fm_4 2870 fmovm.x &0x10,(0x4+0x8,%a6,%d0) 2871 addi.l &0xc,%d0 2872fm_4: 2873 lsl.b &0x1,%d1 2874 bpl.b fm_5 2875 fmovm.x &0x08,(0x4+0x8,%a6,%d0) 2876 addi.l &0xc,%d0 2877fm_5: 2878 lsl.b &0x1,%d1 2879 bpl.b fm_6 2880 fmovm.x &0x04,(0x4+0x8,%a6,%d0) 2881 addi.l &0xc,%d0 2882fm_6: 2883 lsl.b &0x1,%d1 2884 bpl.b fm_7 2885 fmovm.x &0x02,(0x4+0x8,%a6,%d0) 2886 addi.l &0xc,%d0 2887fm_7: 2888 lsl.b &0x1,%d1 2889 bpl.b fm_end 2890 fmovm.x &0x01,(0x4+0x8,%a6,%d0) 2891fm_end: 2892 mov.l 0x4(%sp),%d1 2893 mov.l 0x8(%sp),%d0 2894 mov.l 0xc(%sp),%a6 2895 mov.l (%sp)+,%sp 2896 2897 btst &0x7,(%sp) # is trace enabled? 2898 beq.l _fpsp_done 2899 bra.l _real_trace 2900 2901######################################################################### 2902iea_fmovm_ctrl: 2903 2904 bsr.l fmovm_ctrl # load ctrl regs 2905 2906iea_fmovm_exit: 2907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2910 2911 btst &0x7,EXC_SR(%a6) # is trace on? 2912 bne.b iea_fmovm_trace # yes 2913 2914 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC 2915 2916 unlk %a6 # unravel the frame 2917 2918 bra.l _fpsp_done # exit to os 2919 2920# 2921# The control reg instruction that took an "Unimplemented Effective Address" 2922# exception was being traced. The "Current PC" for the trace frame is the 2923# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. 2924# After fixing the stack frame, jump to _real_trace(). 2925# 2926# UNIMP EA FRAME TRACE FRAME 2927# ***************** ***************** 2928# * 0x0 * 0x0f0 * * Current * 2929# ***************** * PC * 2930# * Current * ***************** 2931# * PC * * 0x2 * 0x024 * 2932# ***************** ***************** 2933# * SR * * Next * 2934# ***************** * PC * 2935# ***************** 2936# * SR * 2937# ***************** 2938# this ain't a pretty solution, but it works: 2939# -restore a6 (not with unlk) 2940# -shift stack frame down over where old a6 used to be 2941# -add LOCAL_SIZE to stack pointer 2942iea_fmovm_trace: 2943 mov.l (%a6),%a6 # restore frame pointer 2944 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) 2945 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) 2946 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) 2947 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 2948 add.l &LOCAL_SIZE,%sp # clear stack frame 2949 2950 bra.l _real_trace 2951 2952######################################################################### 2953# The FPU is disabled and so we should really have taken the "Line 2954# F Emulator" exception. So, here we create an 8-word stack frame 2955# from our 4-word stack frame. This means we must calculate the length 2956# of the faulting instruction to get the "next PC". This is trivial for 2957# immediate operands but requires some extra work for fmovm dynamic 2958# which can use most addressing modes. 2959iea_disabled: 2960 mov.l (%sp)+,%d0 # restore d0 2961 2962 link %a6,&-LOCAL_SIZE # init stack frame 2963 2964 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2965 2966# PC of instruction that took the exception is the PC in the frame 2967 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2969 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2970 bsr.l _imem_read_long # fetch the instruction words 2971 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2972 2973 tst.w %d0 # is instr fmovm? 2974 bmi.b iea_dis_fmovm # yes 2975# instruction is using an extended precision immediate operand. therefore, 2976# the total instruction length is 16 bytes. 2977iea_dis_immed: 2978 mov.l &0x10,%d0 # 16 bytes of instruction 2979 bra.b iea_dis_cont 2980iea_dis_fmovm: 2981 btst &0xe,%d0 # is instr fmovm ctrl 2982 bne.b iea_dis_fmovm_data # no 2983# the instruction is a fmovm.l with 2 or 3 registers. 2984 bfextu %d0{&19:&3},%d1 2985 mov.l &0xc,%d0 2986 cmpi.b %d1,&0x7 # move all regs? 2987 bne.b iea_dis_cont 2988 addq.l &0x4,%d0 2989 bra.b iea_dis_cont 2990# the instruction is an fmovm.x dynamic which can use many addressing 2991# modes and thus can have several different total instruction lengths. 2992# call fmovm_calc_ea which will go through the ea calc process and, 2993# as a by-product, will tell us how long the instruction is. 2994iea_dis_fmovm_data: 2995 clr.l %d0 2996 bsr.l fmovm_calc_ea 2997 mov.l EXC_EXTWPTR(%a6),%d0 2998 sub.l EXC_PC(%a6),%d0 2999iea_dis_cont: 3000 mov.w %d0,EXC_VOFF(%a6) # store stack shift value 3001 3002 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3003 3004 unlk %a6 3005 3006# here, we actually create the 8-word frame from the 4-word frame, 3007# with the "next PC" as additional info. 3008# the <ea> field is let as undefined. 3009 subq.l &0x8,%sp # make room for new stack 3010 mov.l %d0,-(%sp) # save d0 3011 mov.w 0xc(%sp),0x4(%sp) # move SR 3012 mov.l 0xe(%sp),0x6(%sp) # move Current PC 3013 clr.l %d0 3014 mov.w 0x12(%sp),%d0 3015 mov.l 0x6(%sp),0x10(%sp) # move Current PC 3016 add.l %d0,0x6(%sp) # make Next PC 3017 mov.w &0x402c,0xa(%sp) # insert offset,frame format 3018 mov.l (%sp)+,%d0 # restore d0 3019 3020 bra.l _real_fpu_disabled 3021 3022########## 3023 3024iea_iacc: 3025 movc %pcr,%d0 3026 btst &0x1,%d0 3027 bne.b iea_iacc_cont 3028 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3030iea_iacc_cont: 3031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3032 3033 unlk %a6 3034 3035 subq.w &0x8,%sp # make stack frame bigger 3036 mov.l 0x8(%sp),(%sp) # store SR,hi(PC) 3037 mov.w 0xc(%sp),0x4(%sp) # store lo(PC) 3038 mov.w &0x4008,0x6(%sp) # store voff 3039 mov.l 0x2(%sp),0x8(%sp) # store ea 3040 mov.l &0x09428001,0xc(%sp) # store fslw 3041 3042iea_acc_done: 3043 btst &0x5,(%sp) # user or supervisor mode? 3044 beq.b iea_acc_done2 # user 3045 bset &0x2,0xd(%sp) # set supervisor TM bit 3046 3047iea_acc_done2: 3048 bra.l _real_access 3049 3050iea_dacc: 3051 lea -LOCAL_SIZE(%a6),%sp 3052 3053 movc %pcr,%d1 3054 btst &0x1,%d1 3055 bne.b iea_dacc_cont 3056 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3057 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs 3058iea_dacc_cont: 3059 mov.l (%a6),%a6 3060 3061 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) 3062 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) 3063 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) 3064 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) 3065 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) 3066 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) 3067 3068 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 3069 add.w &LOCAL_SIZE-0x4,%sp 3070 3071 bra.b iea_acc_done 3072 3073######################################################################### 3074# XDEF **************************************************************** # 3075# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # 3076# # 3077# This handler should be the first code executed upon taking the # 3078# FP Operand Error exception in an operating system. # 3079# # 3080# XREF **************************************************************** # 3081# _imem_read_long() - read instruction longword # 3082# fix_skewed_ops() - adjust src operand in fsave frame # 3083# _real_operr() - "callout" to operating system operr handler # 3084# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3085# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3086# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # 3087# # 3088# INPUT *************************************************************** # 3089# - The system stack contains the FP Operr exception frame # 3090# - The fsave frame contains the source operand # 3091# # 3092# OUTPUT ************************************************************** # 3093# No access error: # 3094# - The system stack is unchanged # 3095# - The fsave frame contains the adjusted src op for opclass 0,2 # 3096# # 3097# ALGORITHM *********************************************************** # 3098# In a system where the FP Operr exception is enabled, the goal # 3099# is to get to the handler specified at _real_operr(). But, on the 060, # 3100# for opclass zero and two instruction taking this exception, the # 3101# input operand in the fsave frame may be incorrect for some cases # 3102# and needs to be corrected. This handler calls fix_skewed_ops() to # 3103# do just this and then exits through _real_operr(). # 3104# For opclass 3 instructions, the 060 doesn't store the default # 3105# operr result out to memory or data register file as it should. # 3106# This code must emulate the move out before finally exiting through # 3107# _real_inex(). The move out, if to memory, is performed using # 3108# _mem_write() "callout" routines that may return a failing result. # 3109# In this special case, the handler must exit through facc_out() # 3110# which creates an access error stack frame from the current operr # 3111# stack frame. # 3112# # 3113######################################################################### 3114 3115 global _fpsp_operr 3116_fpsp_operr: 3117 3118 link.w %a6,&-LOCAL_SIZE # init stack frame 3119 3120 fsave FP_SRC(%a6) # grab the "busy" frame 3121 3122 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3123 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3124 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3125 3126# the FPIAR holds the "current PC" of the faulting instruction 3127 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3128 3129 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3130 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3131 bsr.l _imem_read_long # fetch the instruction words 3132 mov.l %d0,EXC_OPWORD(%a6) 3133 3134############################################################################## 3135 3136 btst &13,%d0 # is instr an fmove out? 3137 bne.b foperr_out # fmove out 3138 3139 3140# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3141# this would be the case for opclass two operations with a source infinity or 3142# denorm operand in the sgl or dbl format. NANs also become skewed, but can't 3143# cause an operr so we don't need to check for them here. 3144 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3145 bsr.l fix_skewed_ops # fix src op 3146 3147foperr_exit: 3148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3151 3152 frestore FP_SRC(%a6) 3153 3154 unlk %a6 3155 bra.l _real_operr 3156 3157######################################################################## 3158 3159# 3160# the hardware does not save the default result to memory on enabled 3161# operand error exceptions. we do this here before passing control to 3162# the user operand error handler. 3163# 3164# byte, word, and long destination format operations can pass 3165# through here. we simply need to test the sign of the src 3166# operand and save the appropriate minimum or maximum integer value 3167# to the effective address as pointed to by the stacked effective address. 3168# 3169# although packed opclass three operations can take operand error 3170# exceptions, they won't pass through here since they are caught 3171# first by the unsupported data format exception handler. that handler 3172# sends them directly to _real_operr() if necessary. 3173# 3174foperr_out: 3175 3176 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent 3177 andi.w &0x7fff,%d1 3178 cmpi.w %d1,&0x7fff 3179 bne.b foperr_out_not_qnan 3180# the operand is either an infinity or a QNAN. 3181 tst.l FP_SRC_LO(%a6) 3182 bne.b foperr_out_qnan 3183 mov.l FP_SRC_HI(%a6),%d1 3184 andi.l &0x7fffffff,%d1 3185 beq.b foperr_out_not_qnan 3186foperr_out_qnan: 3187 mov.l FP_SRC_HI(%a6),L_SCR1(%a6) 3188 bra.b foperr_out_jmp 3189 3190foperr_out_not_qnan: 3191 mov.l &0x7fffffff,%d1 3192 tst.b FP_SRC_EX(%a6) 3193 bpl.b foperr_out_not_qnan2 3194 addq.l &0x1,%d1 3195foperr_out_not_qnan2: 3196 mov.l %d1,L_SCR1(%a6) 3197 3198foperr_out_jmp: 3199 bfextu %d0{&19:&3},%d0 # extract dst format field 3200 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3201 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 3202 jmp (tbl_operr.b,%pc,%a0) 3203 3204tbl_operr: 3205 short foperr_out_l - tbl_operr # long word integer 3206 short tbl_operr - tbl_operr # sgl prec shouldn't happen 3207 short tbl_operr - tbl_operr # ext prec shouldn't happen 3208 short foperr_exit - tbl_operr # packed won't enter here 3209 short foperr_out_w - tbl_operr # word integer 3210 short tbl_operr - tbl_operr # dbl prec shouldn't happen 3211 short foperr_out_b - tbl_operr # byte integer 3212 short tbl_operr - tbl_operr # packed won't enter here 3213 3214foperr_out_b: 3215 mov.b L_SCR1(%a6),%d0 # load positive default result 3216 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3217 ble.b foperr_out_b_save_dn # yes 3218 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3219 bsr.l _dmem_write_byte # write the default result 3220 3221 tst.l %d1 # did dstore fail? 3222 bne.l facc_out_b # yes 3223 3224 bra.w foperr_exit 3225foperr_out_b_save_dn: 3226 andi.w &0x0007,%d1 3227 bsr.l store_dreg_b # store result to regfile 3228 bra.w foperr_exit 3229 3230foperr_out_w: 3231 mov.w L_SCR1(%a6),%d0 # load positive default result 3232 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3233 ble.b foperr_out_w_save_dn # yes 3234 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3235 bsr.l _dmem_write_word # write the default result 3236 3237 tst.l %d1 # did dstore fail? 3238 bne.l facc_out_w # yes 3239 3240 bra.w foperr_exit 3241foperr_out_w_save_dn: 3242 andi.w &0x0007,%d1 3243 bsr.l store_dreg_w # store result to regfile 3244 bra.w foperr_exit 3245 3246foperr_out_l: 3247 mov.l L_SCR1(%a6),%d0 # load positive default result 3248 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3249 ble.b foperr_out_l_save_dn # yes 3250 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3251 bsr.l _dmem_write_long # write the default result 3252 3253 tst.l %d1 # did dstore fail? 3254 bne.l facc_out_l # yes 3255 3256 bra.w foperr_exit 3257foperr_out_l_save_dn: 3258 andi.w &0x0007,%d1 3259 bsr.l store_dreg_l # store result to regfile 3260 bra.w foperr_exit 3261 3262######################################################################### 3263# XDEF **************************************************************** # 3264# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # 3265# # 3266# This handler should be the first code executed upon taking the # 3267# FP Signalling NAN exception in an operating system. # 3268# # 3269# XREF **************************************************************** # 3270# _imem_read_long() - read instruction longword # 3271# fix_skewed_ops() - adjust src operand in fsave frame # 3272# _real_snan() - "callout" to operating system SNAN handler # 3273# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3274# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3275# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # 3276# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> # 3277# # 3278# INPUT *************************************************************** # 3279# - The system stack contains the FP SNAN exception frame # 3280# - The fsave frame contains the source operand # 3281# # 3282# OUTPUT ************************************************************** # 3283# No access error: # 3284# - The system stack is unchanged # 3285# - The fsave frame contains the adjusted src op for opclass 0,2 # 3286# # 3287# ALGORITHM *********************************************************** # 3288# In a system where the FP SNAN exception is enabled, the goal # 3289# is to get to the handler specified at _real_snan(). But, on the 060, # 3290# for opclass zero and two instructions taking this exception, the # 3291# input operand in the fsave frame may be incorrect for some cases # 3292# and needs to be corrected. This handler calls fix_skewed_ops() to # 3293# do just this and then exits through _real_snan(). # 3294# For opclass 3 instructions, the 060 doesn't store the default # 3295# SNAN result out to memory or data register file as it should. # 3296# This code must emulate the move out before finally exiting through # 3297# _real_snan(). The move out, if to memory, is performed using # 3298# _mem_write() "callout" routines that may return a failing result. # 3299# In this special case, the handler must exit through facc_out() # 3300# which creates an access error stack frame from the current SNAN # 3301# stack frame. # 3302# For the case of an extended precision opclass 3 instruction, # 3303# if the effective addressing mode was -() or ()+, then the address # 3304# register must get updated by calling _calc_ea_fout(). If the <ea> # 3305# was -(a7) from supervisor mode, then the exception frame currently # 3306# on the system stack must be carefully moved "down" to make room # 3307# for the operand being moved. # 3308# # 3309######################################################################### 3310 3311 global _fpsp_snan 3312_fpsp_snan: 3313 3314 link.w %a6,&-LOCAL_SIZE # init stack frame 3315 3316 fsave FP_SRC(%a6) # grab the "busy" frame 3317 3318 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3319 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3320 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3321 3322# the FPIAR holds the "current PC" of the faulting instruction 3323 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3324 3325 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3326 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3327 bsr.l _imem_read_long # fetch the instruction words 3328 mov.l %d0,EXC_OPWORD(%a6) 3329 3330############################################################################## 3331 3332 btst &13,%d0 # is instr an fmove out? 3333 bne.w fsnan_out # fmove out 3334 3335 3336# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3337# this would be the case for opclass two operations with a source infinity or 3338# denorm operand in the sgl or dbl format. NANs also become skewed and must be 3339# fixed here. 3340 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3341 bsr.l fix_skewed_ops # fix src op 3342 3343fsnan_exit: 3344 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3346 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3347 3348 frestore FP_SRC(%a6) 3349 3350 unlk %a6 3351 bra.l _real_snan 3352 3353######################################################################## 3354 3355# 3356# the hardware does not save the default result to memory on enabled 3357# snan exceptions. we do this here before passing control to 3358# the user snan handler. 3359# 3360# byte, word, long, and packed destination format operations can pass 3361# through here. since packed format operations already were handled by 3362# fpsp_unsupp(), then we need to do nothing else for them here. 3363# for byte, word, and long, we simply need to test the sign of the src 3364# operand and save the appropriate minimum or maximum integer value 3365# to the effective address as pointed to by the stacked effective address. 3366# 3367fsnan_out: 3368 3369 bfextu %d0{&19:&3},%d0 # extract dst format field 3370 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3371 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 3372 jmp (tbl_snan.b,%pc,%a0) 3373 3374tbl_snan: 3375 short fsnan_out_l - tbl_snan # long word integer 3376 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen 3377 short fsnan_out_x - tbl_snan # ext prec shouldn't happen 3378 short tbl_snan - tbl_snan # packed needs no help 3379 short fsnan_out_w - tbl_snan # word integer 3380 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen 3381 short fsnan_out_b - tbl_snan # byte integer 3382 short tbl_snan - tbl_snan # packed needs no help 3383 3384fsnan_out_b: 3385 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN 3386 bset &6,%d0 # set SNAN bit 3387 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3388 ble.b fsnan_out_b_dn # yes 3389 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3390 bsr.l _dmem_write_byte # write the default result 3391 3392 tst.l %d1 # did dstore fail? 3393 bne.l facc_out_b # yes 3394 3395 bra.w fsnan_exit 3396fsnan_out_b_dn: 3397 andi.w &0x0007,%d1 3398 bsr.l store_dreg_b # store result to regfile 3399 bra.w fsnan_exit 3400 3401fsnan_out_w: 3402 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN 3403 bset &14,%d0 # set SNAN bit 3404 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3405 ble.b fsnan_out_w_dn # yes 3406 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3407 bsr.l _dmem_write_word # write the default result 3408 3409 tst.l %d1 # did dstore fail? 3410 bne.l facc_out_w # yes 3411 3412 bra.w fsnan_exit 3413fsnan_out_w_dn: 3414 andi.w &0x0007,%d1 3415 bsr.l store_dreg_w # store result to regfile 3416 bra.w fsnan_exit 3417 3418fsnan_out_l: 3419 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN 3420 bset &30,%d0 # set SNAN bit 3421 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3422 ble.b fsnan_out_l_dn # yes 3423 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3424 bsr.l _dmem_write_long # write the default result 3425 3426 tst.l %d1 # did dstore fail? 3427 bne.l facc_out_l # yes 3428 3429 bra.w fsnan_exit 3430fsnan_out_l_dn: 3431 andi.w &0x0007,%d1 3432 bsr.l store_dreg_l # store result to regfile 3433 bra.w fsnan_exit 3434 3435fsnan_out_s: 3436 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3437 ble.b fsnan_out_d_dn # yes 3438 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3439 andi.l &0x80000000,%d0 # keep sign 3440 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3441 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3442 lsr.l &0x8,%d1 # shift mantissa for sgl 3443 or.l %d1,%d0 # create sgl SNAN 3444 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3445 bsr.l _dmem_write_long # write the default result 3446 3447 tst.l %d1 # did dstore fail? 3448 bne.l facc_out_l # yes 3449 3450 bra.w fsnan_exit 3451fsnan_out_d_dn: 3452 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3453 andi.l &0x80000000,%d0 # keep sign 3454 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3455 mov.l %d1,-(%sp) 3456 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3457 lsr.l &0x8,%d1 # shift mantissa for sgl 3458 or.l %d1,%d0 # create sgl SNAN 3459 mov.l (%sp)+,%d1 3460 andi.w &0x0007,%d1 3461 bsr.l store_dreg_l # store result to regfile 3462 bra.w fsnan_exit 3463 3464fsnan_out_d: 3465 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3466 andi.l &0x80000000,%d0 # keep sign 3467 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit 3468 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3469 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space 3470 mov.l &11,%d0 # load shift amt 3471 lsr.l %d0,%d1 3472 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi 3473 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3474 andi.l &0x000007ff,%d1 3475 ror.l %d0,%d1 3476 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space 3477 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa 3478 lsr.l %d0,%d1 3479 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo 3480 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3481 mov.l EXC_EA(%a6),%a1 # pass: dst addr 3482 movq.l &0x8,%d0 # pass: size of 8 bytes 3483 bsr.l _dmem_write # write the default result 3484 3485 tst.l %d1 # did dstore fail? 3486 bne.l facc_out_d # yes 3487 3488 bra.w fsnan_exit 3489 3490# for extended precision, if the addressing mode is pre-decrement or 3491# post-increment, then the address register did not get updated. 3492# in addition, for pre-decrement, the stacked <ea> is incorrect. 3493fsnan_out_x: 3494 clr.b SPCOND_FLG(%a6) # clear special case flag 3495 3496 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) 3497 clr.w 2+FP_SCR0(%a6) 3498 mov.l FP_SRC_HI(%a6),%d0 3499 bset &30,%d0 3500 mov.l %d0,FP_SCR0_HI(%a6) 3501 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) 3502 3503 btst &0x5,EXC_SR(%a6) # supervisor mode exception? 3504 bne.b fsnan_out_x_s # yes 3505 3506 mov.l %usp,%a0 # fetch user stack pointer 3507 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() 3508 mov.l (%a6),EXC_A6(%a6) 3509 3510 bsr.l _calc_ea_fout # find the correct ea,update An 3511 mov.l %a0,%a1 3512 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3513 3514 mov.l EXC_A7(%a6),%a0 3515 mov.l %a0,%usp # restore user stack pointer 3516 mov.l EXC_A6(%a6),(%a6) 3517 3518fsnan_out_x_save: 3519 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3520 movq.l &0xc,%d0 # pass: size of extended 3521 bsr.l _dmem_write # write the default result 3522 3523 tst.l %d1 # did dstore fail? 3524 bne.l facc_out_x # yes 3525 3526 bra.w fsnan_exit 3527 3528fsnan_out_x_s: 3529 mov.l (%a6),EXC_A6(%a6) 3530 3531 bsr.l _calc_ea_fout # find the correct ea,update An 3532 mov.l %a0,%a1 3533 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3534 3535 mov.l EXC_A6(%a6),(%a6) 3536 3537 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 3538 bne.b fsnan_out_x_save # no 3539 3540# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. 3541 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3542 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3543 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3544 3545 frestore FP_SRC(%a6) 3546 3547 mov.l EXC_A6(%a6),%a6 # restore frame pointer 3548 3549 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 3550 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) 3551 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 3552 3553 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) 3554 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) 3555 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) 3556 3557 add.l &LOCAL_SIZE-0x8,%sp 3558 3559 bra.l _real_snan 3560 3561######################################################################### 3562# XDEF **************************************************************** # 3563# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # 3564# # 3565# This handler should be the first code executed upon taking the # 3566# FP Inexact exception in an operating system. # 3567# # 3568# XREF **************************************************************** # 3569# _imem_read_long() - read instruction longword # 3570# fix_skewed_ops() - adjust src operand in fsave frame # 3571# set_tag_x() - determine optype of src/dst operands # 3572# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3573# unnorm_fix() - change UNNORM operands to NORM or ZERO # 3574# load_fpn2() - load dst operand from FP regfile # 3575# smovcr() - emulate an "fmovcr" instruction # 3576# fout() - emulate an opclass 3 instruction # 3577# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 3578# _real_inex() - "callout" to operating system inexact handler # 3579# # 3580# INPUT *************************************************************** # 3581# - The system stack contains the FP Inexact exception frame # 3582# - The fsave frame contains the source operand # 3583# # 3584# OUTPUT ************************************************************** # 3585# - The system stack is unchanged # 3586# - The fsave frame contains the adjusted src op for opclass 0,2 # 3587# # 3588# ALGORITHM *********************************************************** # 3589# In a system where the FP Inexact exception is enabled, the goal # 3590# is to get to the handler specified at _real_inex(). But, on the 060, # 3591# for opclass zero and two instruction taking this exception, the # 3592# hardware doesn't store the correct result to the destination FP # 3593# register as did the '040 and '881/2. This handler must emulate the # 3594# instruction in order to get this value and then store it to the # 3595# correct register before calling _real_inex(). # 3596# For opclass 3 instructions, the 060 doesn't store the default # 3597# inexact result out to memory or data register file as it should. # 3598# This code must emulate the move out by calling fout() before finally # 3599# exiting through _real_inex(). # 3600# # 3601######################################################################### 3602 3603 global _fpsp_inex 3604_fpsp_inex: 3605 3606 link.w %a6,&-LOCAL_SIZE # init stack frame 3607 3608 fsave FP_SRC(%a6) # grab the "busy" frame 3609 3610 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3611 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3612 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3613 3614# the FPIAR holds the "current PC" of the faulting instruction 3615 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3616 3617 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3618 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3619 bsr.l _imem_read_long # fetch the instruction words 3620 mov.l %d0,EXC_OPWORD(%a6) 3621 3622############################################################################## 3623 3624 btst &13,%d0 # is instr an fmove out? 3625 bne.w finex_out # fmove out 3626 3627 3628# the hardware, for "fabs" and "fneg" w/ a long source format, puts the 3629# longword integer directly into the upper longword of the mantissa along 3630# w/ an exponent value of 0x401e. we convert this to extended precision here. 3631 bfextu %d0{&19:&3},%d0 # fetch instr size 3632 bne.b finex_cont # instr size is not long 3633 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? 3634 bne.b finex_cont # no 3635 fmov.l &0x0,%fpcr 3636 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src 3637 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision 3638 mov.w &0xe001,0x2+FP_SRC(%a6) 3639 3640finex_cont: 3641 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3642 bsr.l fix_skewed_ops # fix src op 3643 3644# Here, we zero the ccode and exception byte field since we're going to 3645# emulate the whole instruction. Notice, though, that we don't kill the 3646# INEX1 bit. This is because a packed op has long since been converted 3647# to extended before arriving here. Therefore, we need to retain the 3648# INEX1 bit from when the operand was first converted. 3649 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 3650 3651 fmov.l &0x0,%fpcr # zero current control regs 3652 fmov.l &0x0,%fpsr 3653 3654 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg 3655 cmpi.b %d1,&0x17 # is op an fmovecr? 3656 beq.w finex_fmovcr # yes 3657 3658 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3659 bsr.l set_tag_x # tag the operand type 3660 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 3661 3662# bits four and five of the fp extension word separate the monadic and dyadic 3663# operations that can pass through fpsp_inex(). remember that fcmp and ftst 3664# will never take this exception, but fsincos will. 3665 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 3666 beq.b finex_extract # monadic 3667 3668 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? 3669 bne.b finex_extract # yes 3670 3671 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 3672 bsr.l load_fpn2 # load dst into FP_DST 3673 3674 lea FP_DST(%a6),%a0 # pass: ptr to dst op 3675 bsr.l set_tag_x # tag the operand type 3676 cmpi.b %d0,&UNNORM # is operand an UNNORM? 3677 bne.b finex_op2_done # no 3678 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 3679finex_op2_done: 3680 mov.b %d0,DTAG(%a6) # save dst optype tag 3681 3682finex_extract: 3683 clr.l %d0 3684 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 3685 3686 mov.b 1+EXC_CMDREG(%a6),%d1 3687 andi.w &0x007f,%d1 # extract extension 3688 3689 lea FP_SRC(%a6),%a0 3690 lea FP_DST(%a6),%a1 3691 3692 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 3693 jsr (tbl_unsupp.l,%pc,%d1.l*1) 3694 3695# the operation has been emulated. the result is in fp0. 3696finex_save: 3697 bfextu EXC_CMDREG(%a6){&6:&3},%d0 3698 bsr.l store_fpreg 3699 3700finex_exit: 3701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3704 3705 frestore FP_SRC(%a6) 3706 3707 unlk %a6 3708 bra.l _real_inex 3709 3710finex_fmovcr: 3711 clr.l %d0 3712 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3713 mov.b 1+EXC_CMDREG(%a6),%d1 3714 andi.l &0x0000007f,%d1 # pass rom offset 3715 bsr.l smovcr 3716 bra.b finex_save 3717 3718######################################################################## 3719 3720# 3721# the hardware does not save the default result to memory on enabled 3722# inexact exceptions. we do this here before passing control to 3723# the user inexact handler. 3724# 3725# byte, word, and long destination format operations can pass 3726# through here. so can double and single precision. 3727# although packed opclass three operations can take inexact 3728# exceptions, they won't pass through here since they are caught 3729# first by the unsupported data format exception handler. that handler 3730# sends them directly to _real_inex() if necessary. 3731# 3732finex_out: 3733 3734 mov.b &NORM,STAG(%a6) # src is a NORM 3735 3736 clr.l %d0 3737 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3738 3739 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 3740 3741 lea FP_SRC(%a6),%a0 # pass ptr to src operand 3742 3743 bsr.l fout # store the default result 3744 3745 bra.b finex_exit 3746 3747######################################################################### 3748# XDEF **************************************************************** # 3749# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # 3750# # 3751# This handler should be the first code executed upon taking # 3752# the FP DZ exception in an operating system. # 3753# # 3754# XREF **************************************************************** # 3755# _imem_read_long() - read instruction longword from memory # 3756# fix_skewed_ops() - adjust fsave operand # 3757# _real_dz() - "callout" exit point from FP DZ handler # 3758# # 3759# INPUT *************************************************************** # 3760# - The system stack contains the FP DZ exception stack. # 3761# - The fsave frame contains the source operand. # 3762# # 3763# OUTPUT ************************************************************** # 3764# - The system stack contains the FP DZ exception stack. # 3765# - The fsave frame contains the adjusted source operand. # 3766# # 3767# ALGORITHM *********************************************************** # 3768# In a system where the DZ exception is enabled, the goal is to # 3769# get to the handler specified at _real_dz(). But, on the 060, when the # 3770# exception is taken, the input operand in the fsave state frame may # 3771# be incorrect for some cases and need to be adjusted. So, this package # 3772# adjusts the operand using fix_skewed_ops() and then branches to # 3773# _real_dz(). # 3774# # 3775######################################################################### 3776 3777 global _fpsp_dz 3778_fpsp_dz: 3779 3780 link.w %a6,&-LOCAL_SIZE # init stack frame 3781 3782 fsave FP_SRC(%a6) # grab the "busy" frame 3783 3784 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3785 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3786 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3787 3788# the FPIAR holds the "current PC" of the faulting instruction 3789 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3790 3791 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3792 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3793 bsr.l _imem_read_long # fetch the instruction words 3794 mov.l %d0,EXC_OPWORD(%a6) 3795 3796############################################################################## 3797 3798 3799# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3800# this would be the case for opclass two operations with a source zero 3801# in the sgl or dbl format. 3802 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3803 bsr.l fix_skewed_ops # fix src op 3804 3805fdz_exit: 3806 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3807 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3808 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3809 3810 frestore FP_SRC(%a6) 3811 3812 unlk %a6 3813 bra.l _real_dz 3814 3815######################################################################### 3816# XDEF **************************************************************** # 3817# _fpsp_fline(): 060FPSP entry point for "Line F emulator" # 3818# exception when the "reduced" version of the # 3819# FPSP is implemented that does not emulate # 3820# FP unimplemented instructions. # 3821# # 3822# This handler should be the first code executed upon taking a # 3823# "Line F Emulator" exception in an operating system integrating # 3824# the reduced version of 060FPSP. # 3825# # 3826# XREF **************************************************************** # 3827# _real_fpu_disabled() - Handle "FPU disabled" exceptions # 3828# _real_fline() - Handle all other cases (treated equally) # 3829# # 3830# INPUT *************************************************************** # 3831# - The system stack contains a "Line F Emulator" exception # 3832# stack frame. # 3833# # 3834# OUTPUT ************************************************************** # 3835# - The system stack is unchanged. # 3836# # 3837# ALGORITHM *********************************************************** # 3838# When a "Line F Emulator" exception occurs in a system where # 3839# "FPU Unimplemented" instructions will not be emulated, the exception # 3840# can occur because then FPU is disabled or the instruction is to be # 3841# classifed as "Line F". This module determines which case exists and # 3842# calls the appropriate "callout". # 3843# # 3844######################################################################### 3845 3846 global _fpsp_fline 3847_fpsp_fline: 3848 3849# check to see if the FPU is disabled. if so, jump to the OS entry 3850# point for that condition. 3851 cmpi.w 0x6(%sp),&0x402c 3852 beq.l _real_fpu_disabled 3853 3854 bra.l _real_fline 3855 3856######################################################################### 3857# XDEF **************************************************************** # 3858# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception # 3859# # 3860# XREF **************************************************************** # 3861# inc_areg() - increment an address register # 3862# dec_areg() - decrement an address register # 3863# # 3864# INPUT *************************************************************** # 3865# d0 = number of bytes to adjust <ea> by # 3866# # 3867# OUTPUT ************************************************************** # 3868# None # 3869# # 3870# ALGORITHM *********************************************************** # 3871# "Dummy" CALCulate Effective Address: # 3872# The stacked <ea> for FP unimplemented instructions and opclass # 3873# two packed instructions is correct with the exception of... # 3874# # 3875# 1) -(An) : The register is not updated regardless of size. # 3876# Also, for extended precision and packed, the # 3877# stacked <ea> value is 8 bytes too big # 3878# 2) (An)+ : The register is not updated. # 3879# 3) #<data> : The upper longword of the immediate operand is # 3880# stacked b,w,l and s sizes are completely stacked. # 3881# d,x, and p are not. # 3882# # 3883######################################################################### 3884 3885 global _dcalc_ea 3886_dcalc_ea: 3887 mov.l %d0, %a0 # move # bytes to %a0 3888 3889 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word 3890 mov.l %d0, %d1 # make a copy 3891 3892 andi.w &0x38, %d0 # extract mode field 3893 andi.l &0x7, %d1 # extract reg field 3894 3895 cmpi.b %d0,&0x18 # is mode (An)+ ? 3896 beq.b dcea_pi # yes 3897 3898 cmpi.b %d0,&0x20 # is mode -(An) ? 3899 beq.b dcea_pd # yes 3900 3901 or.w %d1,%d0 # concat mode,reg 3902 cmpi.b %d0,&0x3c # is mode #<data>? 3903 3904 beq.b dcea_imm # yes 3905 3906 mov.l EXC_EA(%a6),%a0 # return <ea> 3907 rts 3908 3909# need to set immediate data flag here since we'll need to do 3910# an imem_read to fetch this later. 3911dcea_imm: 3912 mov.b &immed_flg,SPCOND_FLG(%a6) 3913 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea> 3914 rts 3915 3916# here, the <ea> is stacked correctly. however, we must update the 3917# address register... 3918dcea_pi: 3919 mov.l %a0,%d0 # pass amt to inc by 3920 bsr.l inc_areg # inc addr register 3921 3922 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3923 rts 3924 3925# the <ea> is stacked correctly for all but extended and packed which 3926# the <ea>s are 8 bytes too large. 3927# it would make no sense to have a pre-decrement to a7 in supervisor 3928# mode so we don't even worry about this tricky case here : ) 3929dcea_pd: 3930 mov.l %a0,%d0 # pass amt to dec by 3931 bsr.l dec_areg # dec addr register 3932 3933 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3934 3935 cmpi.b %d0,&0xc # is opsize ext or packed? 3936 beq.b dcea_pd2 # yes 3937 rts 3938dcea_pd2: 3939 sub.l &0x8,%a0 # correct <ea> 3940 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack 3941 rts 3942 3943######################################################################### 3944# XDEF **************************************************************** # 3945# _calc_ea_fout(): calculate correct stacked <ea> for extended # 3946# and packed data opclass 3 operations. # 3947# # 3948# XREF **************************************************************** # 3949# None # 3950# # 3951# INPUT *************************************************************** # 3952# None # 3953# # 3954# OUTPUT ************************************************************** # 3955# a0 = return correct effective address # 3956# # 3957# ALGORITHM *********************************************************** # 3958# For opclass 3 extended and packed data operations, the <ea> # 3959# stacked for the exception is incorrect for -(an) and (an)+ addressing # 3960# modes. Also, while we're at it, the index register itself must get # 3961# updated. # 3962# So, for -(an), we must subtract 8 off of the stacked <ea> value # 3963# and return that value as the correct <ea> and store that value in An. # 3964# For (an)+, the stacked <ea> is correct but we must adjust An by +12. # 3965# # 3966######################################################################### 3967 3968# This calc_ea is currently used to retrieve the correct <ea> 3969# for fmove outs of type extended and packed. 3970 global _calc_ea_fout 3971_calc_ea_fout: 3972 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word 3973 mov.l %d0,%d1 # make a copy 3974 3975 andi.w &0x38,%d0 # extract mode field 3976 andi.l &0x7,%d1 # extract reg field 3977 3978 cmpi.b %d0,&0x18 # is mode (An)+ ? 3979 beq.b ceaf_pi # yes 3980 3981 cmpi.b %d0,&0x20 # is mode -(An) ? 3982 beq.w ceaf_pd # yes 3983 3984 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3985 rts 3986 3987# (An)+ : extended and packed fmove out 3988# : stacked <ea> is correct 3989# : "An" not updated 3990ceaf_pi: 3991 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1 3992 mov.l EXC_EA(%a6),%a0 3993 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1) 3994 3995 swbeg &0x8 3996tbl_ceaf_pi: 3997 short ceaf_pi0 - tbl_ceaf_pi 3998 short ceaf_pi1 - tbl_ceaf_pi 3999 short ceaf_pi2 - tbl_ceaf_pi 4000 short ceaf_pi3 - tbl_ceaf_pi 4001 short ceaf_pi4 - tbl_ceaf_pi 4002 short ceaf_pi5 - tbl_ceaf_pi 4003 short ceaf_pi6 - tbl_ceaf_pi 4004 short ceaf_pi7 - tbl_ceaf_pi 4005 4006ceaf_pi0: 4007 addi.l &0xc,EXC_DREGS+0x8(%a6) 4008 rts 4009ceaf_pi1: 4010 addi.l &0xc,EXC_DREGS+0xc(%a6) 4011 rts 4012ceaf_pi2: 4013 add.l &0xc,%a2 4014 rts 4015ceaf_pi3: 4016 add.l &0xc,%a3 4017 rts 4018ceaf_pi4: 4019 add.l &0xc,%a4 4020 rts 4021ceaf_pi5: 4022 add.l &0xc,%a5 4023 rts 4024ceaf_pi6: 4025 addi.l &0xc,EXC_A6(%a6) 4026 rts 4027ceaf_pi7: 4028 mov.b &mia7_flg,SPCOND_FLG(%a6) 4029 addi.l &0xc,EXC_A7(%a6) 4030 rts 4031 4032# -(An) : extended and packed fmove out 4033# : stacked <ea> = actual <ea> + 8 4034# : "An" not updated 4035ceaf_pd: 4036 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1 4037 mov.l EXC_EA(%a6),%a0 4038 sub.l &0x8,%a0 4039 sub.l &0x8,EXC_EA(%a6) 4040 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1) 4041 4042 swbeg &0x8 4043tbl_ceaf_pd: 4044 short ceaf_pd0 - tbl_ceaf_pd 4045 short ceaf_pd1 - tbl_ceaf_pd 4046 short ceaf_pd2 - tbl_ceaf_pd 4047 short ceaf_pd3 - tbl_ceaf_pd 4048 short ceaf_pd4 - tbl_ceaf_pd 4049 short ceaf_pd5 - tbl_ceaf_pd 4050 short ceaf_pd6 - tbl_ceaf_pd 4051 short ceaf_pd7 - tbl_ceaf_pd 4052 4053ceaf_pd0: 4054 mov.l %a0,EXC_DREGS+0x8(%a6) 4055 rts 4056ceaf_pd1: 4057 mov.l %a0,EXC_DREGS+0xc(%a6) 4058 rts 4059ceaf_pd2: 4060 mov.l %a0,%a2 4061 rts 4062ceaf_pd3: 4063 mov.l %a0,%a3 4064 rts 4065ceaf_pd4: 4066 mov.l %a0,%a4 4067 rts 4068ceaf_pd5: 4069 mov.l %a0,%a5 4070 rts 4071ceaf_pd6: 4072 mov.l %a0,EXC_A6(%a6) 4073 rts 4074ceaf_pd7: 4075 mov.l %a0,EXC_A7(%a6) 4076 mov.b &mda7_flg,SPCOND_FLG(%a6) 4077 rts 4078 4079# 4080# This table holds the offsets of the emulation routines for each individual 4081# math operation relative to the address of this table. Included are 4082# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because 4083# this table is for the version if the 060FPSP without transcendentals. 4084# The location within the table is determined by the extension bits of the 4085# operation longword. 4086# 4087 4088 swbeg &109 4089tbl_unsupp: 4090 long fin - tbl_unsupp # 00: fmove 4091 long fint - tbl_unsupp # 01: fint 4092 long tbl_unsupp - tbl_unsupp # 02: fsinh 4093 long fintrz - tbl_unsupp # 03: fintrz 4094 long fsqrt - tbl_unsupp # 04: fsqrt 4095 long tbl_unsupp - tbl_unsupp 4096 long tbl_unsupp - tbl_unsupp # 06: flognp1 4097 long tbl_unsupp - tbl_unsupp 4098 long tbl_unsupp - tbl_unsupp # 08: fetoxm1 4099 long tbl_unsupp - tbl_unsupp # 09: ftanh 4100 long tbl_unsupp - tbl_unsupp # 0a: fatan 4101 long tbl_unsupp - tbl_unsupp 4102 long tbl_unsupp - tbl_unsupp # 0c: fasin 4103 long tbl_unsupp - tbl_unsupp # 0d: fatanh 4104 long tbl_unsupp - tbl_unsupp # 0e: fsin 4105 long tbl_unsupp - tbl_unsupp # 0f: ftan 4106 long tbl_unsupp - tbl_unsupp # 10: fetox 4107 long tbl_unsupp - tbl_unsupp # 11: ftwotox 4108 long tbl_unsupp - tbl_unsupp # 12: ftentox 4109 long tbl_unsupp - tbl_unsupp 4110 long tbl_unsupp - tbl_unsupp # 14: flogn 4111 long tbl_unsupp - tbl_unsupp # 15: flog10 4112 long tbl_unsupp - tbl_unsupp # 16: flog2 4113 long tbl_unsupp - tbl_unsupp 4114 long fabs - tbl_unsupp # 18: fabs 4115 long tbl_unsupp - tbl_unsupp # 19: fcosh 4116 long fneg - tbl_unsupp # 1a: fneg 4117 long tbl_unsupp - tbl_unsupp 4118 long tbl_unsupp - tbl_unsupp # 1c: facos 4119 long tbl_unsupp - tbl_unsupp # 1d: fcos 4120 long tbl_unsupp - tbl_unsupp # 1e: fgetexp 4121 long tbl_unsupp - tbl_unsupp # 1f: fgetman 4122 long fdiv - tbl_unsupp # 20: fdiv 4123 long tbl_unsupp - tbl_unsupp # 21: fmod 4124 long fadd - tbl_unsupp # 22: fadd 4125 long fmul - tbl_unsupp # 23: fmul 4126 long fsgldiv - tbl_unsupp # 24: fsgldiv 4127 long tbl_unsupp - tbl_unsupp # 25: frem 4128 long tbl_unsupp - tbl_unsupp # 26: fscale 4129 long fsglmul - tbl_unsupp # 27: fsglmul 4130 long fsub - tbl_unsupp # 28: fsub 4131 long tbl_unsupp - tbl_unsupp 4132 long tbl_unsupp - tbl_unsupp 4133 long tbl_unsupp - tbl_unsupp 4134 long tbl_unsupp - tbl_unsupp 4135 long tbl_unsupp - tbl_unsupp 4136 long tbl_unsupp - tbl_unsupp 4137 long tbl_unsupp - tbl_unsupp 4138 long tbl_unsupp - tbl_unsupp # 30: fsincos 4139 long tbl_unsupp - tbl_unsupp # 31: fsincos 4140 long tbl_unsupp - tbl_unsupp # 32: fsincos 4141 long tbl_unsupp - tbl_unsupp # 33: fsincos 4142 long tbl_unsupp - tbl_unsupp # 34: fsincos 4143 long tbl_unsupp - tbl_unsupp # 35: fsincos 4144 long tbl_unsupp - tbl_unsupp # 36: fsincos 4145 long tbl_unsupp - tbl_unsupp # 37: fsincos 4146 long fcmp - tbl_unsupp # 38: fcmp 4147 long tbl_unsupp - tbl_unsupp 4148 long ftst - tbl_unsupp # 3a: ftst 4149 long tbl_unsupp - tbl_unsupp 4150 long tbl_unsupp - tbl_unsupp 4151 long tbl_unsupp - tbl_unsupp 4152 long tbl_unsupp - tbl_unsupp 4153 long tbl_unsupp - tbl_unsupp 4154 long fsin - tbl_unsupp # 40: fsmove 4155 long fssqrt - tbl_unsupp # 41: fssqrt 4156 long tbl_unsupp - tbl_unsupp 4157 long tbl_unsupp - tbl_unsupp 4158 long fdin - tbl_unsupp # 44: fdmove 4159 long fdsqrt - tbl_unsupp # 45: fdsqrt 4160 long tbl_unsupp - tbl_unsupp 4161 long tbl_unsupp - tbl_unsupp 4162 long tbl_unsupp - tbl_unsupp 4163 long tbl_unsupp - tbl_unsupp 4164 long tbl_unsupp - tbl_unsupp 4165 long tbl_unsupp - tbl_unsupp 4166 long tbl_unsupp - tbl_unsupp 4167 long tbl_unsupp - tbl_unsupp 4168 long tbl_unsupp - tbl_unsupp 4169 long tbl_unsupp - tbl_unsupp 4170 long tbl_unsupp - tbl_unsupp 4171 long tbl_unsupp - tbl_unsupp 4172 long tbl_unsupp - tbl_unsupp 4173 long tbl_unsupp - tbl_unsupp 4174 long tbl_unsupp - tbl_unsupp 4175 long tbl_unsupp - tbl_unsupp 4176 long tbl_unsupp - tbl_unsupp 4177 long tbl_unsupp - tbl_unsupp 4178 long fsabs - tbl_unsupp # 58: fsabs 4179 long tbl_unsupp - tbl_unsupp 4180 long fsneg - tbl_unsupp # 5a: fsneg 4181 long tbl_unsupp - tbl_unsupp 4182 long fdabs - tbl_unsupp # 5c: fdabs 4183 long tbl_unsupp - tbl_unsupp 4184 long fdneg - tbl_unsupp # 5e: fdneg 4185 long tbl_unsupp - tbl_unsupp 4186 long fsdiv - tbl_unsupp # 60: fsdiv 4187 long tbl_unsupp - tbl_unsupp 4188 long fsadd - tbl_unsupp # 62: fsadd 4189 long fsmul - tbl_unsupp # 63: fsmul 4190 long fddiv - tbl_unsupp # 64: fddiv 4191 long tbl_unsupp - tbl_unsupp 4192 long fdadd - tbl_unsupp # 66: fdadd 4193 long fdmul - tbl_unsupp # 67: fdmul 4194 long fssub - tbl_unsupp # 68: fssub 4195 long tbl_unsupp - tbl_unsupp 4196 long tbl_unsupp - tbl_unsupp 4197 long tbl_unsupp - tbl_unsupp 4198 long fdsub - tbl_unsupp # 6c: fdsub 4199 4200################################################# 4201# Add this here so non-fp modules can compile. 4202# (smovcr is called from fpsp_inex.) 4203 global smovcr 4204smovcr: 4205 bra.b smovcr 4206 4207######################################################################### 4208# XDEF **************************************************************** # 4209# fmovm_dynamic(): emulate "fmovm" dynamic instruction # 4210# # 4211# XREF **************************************************************** # 4212# fetch_dreg() - fetch data register # 4213# {i,d,}mem_read() - fetch data from memory # 4214# _mem_write() - write data to memory # 4215# iea_iacc() - instruction memory access error occurred # 4216# iea_dacc() - data memory access error occurred # 4217# restore() - restore An index regs if access error occurred # 4218# # 4219# INPUT *************************************************************** # 4220# None # 4221# # 4222# OUTPUT ************************************************************** # 4223# If instr is "fmovm Dn,-(A7)" from supervisor mode, # 4224# d0 = size of dump # 4225# d1 = Dn # 4226# Else if instruction access error, # 4227# d0 = FSLW # 4228# Else if data access error, # 4229# d0 = FSLW # 4230# a0 = address of fault # 4231# Else # 4232# none. # 4233# # 4234# ALGORITHM *********************************************************** # 4235# The effective address must be calculated since this is entered # 4236# from an "Unimplemented Effective Address" exception handler. So, we # 4237# have our own fcalc_ea() routine here. If an access error is flagged # 4238# by a _{i,d,}mem_read() call, we must exit through the special # 4239# handler. # 4240# The data register is determined and its value loaded to get the # 4241# string of FP registers affected. This value is used as an index into # 4242# a lookup table such that we can determine the number of bytes # 4243# involved. # 4244# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used # 4245# to read in all FP values. Again, _mem_read() may fail and require a # 4246# special exit. # 4247# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used # 4248# to write all FP values. _mem_write() may also fail. # 4249# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, # 4250# then we return the size of the dump and the string to the caller # 4251# so that the move can occur outside of this routine. This special # 4252# case is required so that moves to the system stack are handled # 4253# correctly. # 4254# # 4255# DYNAMIC: # 4256# fmovm.x dn, <ea> # 4257# fmovm.x <ea>, dn # 4258# # 4259# <WORD 1> <WORD2> # 4260# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 # 4261# # 4262# & = (0): predecrement addressing mode # 4263# (1): postincrement or control addressing mode # 4264# @ = (0): move listed regs from memory to the FPU # 4265# (1): move listed regs from the FPU to memory # 4266# $$$ : index of data register holding reg select mask # 4267# # 4268# NOTES: # 4269# If the data register holds a zero, then the # 4270# instruction is a nop. # 4271# # 4272######################################################################### 4273 4274 global fmovm_dynamic 4275fmovm_dynamic: 4276 4277# extract the data register in which the bit string resides... 4278 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword 4279 andi.w &0x70,%d1 # extract reg bits 4280 lsr.b &0x4,%d1 # shift into lo bits 4281 4282# fetch the bit string into d0... 4283 bsr.l fetch_dreg # fetch reg string 4284 4285 andi.l &0x000000ff,%d0 # keep only lo byte 4286 4287 mov.l %d0,-(%sp) # save strg 4288 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0 4289 mov.l %d0,-(%sp) # save size 4290 bsr.l fmovm_calc_ea # calculate <ea> 4291 mov.l (%sp)+,%d0 # restore size 4292 mov.l (%sp)+,%d1 # restore strg 4293 4294# if the bit string is a zero, then the operation is a no-op 4295# but, make sure that we've calculated ea and advanced the opword pointer 4296 beq.w fmovm_data_done 4297 4298# separate move ins from move outs... 4299 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out? 4300 beq.w fmovm_data_in # it's a move out 4301 4302############# 4303# MOVE OUT: # 4304############# 4305fmovm_data_out: 4306 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement? 4307 bne.w fmovm_out_ctrl # control 4308 4309############################ 4310fmovm_out_predec: 4311# for predecrement mode, the bit string is the opposite of both control 4312# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0) 4313# here, we convert it to be just like the others... 4314 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1 4315 4316 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 4317 beq.b fmovm_out_ctrl # user 4318 4319fmovm_out_predec_s: 4320 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 4321 bne.b fmovm_out_ctrl 4322 4323# the operation was unfortunately an: fmovm.x dn,-(sp) 4324# called from supervisor mode. 4325# we're also passing "size" and "strg" back to the calling routine 4326 rts 4327 4328############################ 4329fmovm_out_ctrl: 4330 mov.l %a0,%a1 # move <ea> to a1 4331 4332 sub.l %d0,%sp # subtract size of dump 4333 lea (%sp),%a0 4334 4335 tst.b %d1 # should FP0 be moved? 4336 bpl.b fmovm_out_ctrl_fp1 # no 4337 4338 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes 4339 mov.l 0x4+EXC_FP0(%a6),(%a0)+ 4340 mov.l 0x8+EXC_FP0(%a6),(%a0)+ 4341 4342fmovm_out_ctrl_fp1: 4343 lsl.b &0x1,%d1 # should FP1 be moved? 4344 bpl.b fmovm_out_ctrl_fp2 # no 4345 4346 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes 4347 mov.l 0x4+EXC_FP1(%a6),(%a0)+ 4348 mov.l 0x8+EXC_FP1(%a6),(%a0)+ 4349 4350fmovm_out_ctrl_fp2: 4351 lsl.b &0x1,%d1 # should FP2 be moved? 4352 bpl.b fmovm_out_ctrl_fp3 # no 4353 4354 fmovm.x &0x20,(%a0) # yes 4355 add.l &0xc,%a0 4356 4357fmovm_out_ctrl_fp3: 4358 lsl.b &0x1,%d1 # should FP3 be moved? 4359 bpl.b fmovm_out_ctrl_fp4 # no 4360 4361 fmovm.x &0x10,(%a0) # yes 4362 add.l &0xc,%a0 4363 4364fmovm_out_ctrl_fp4: 4365 lsl.b &0x1,%d1 # should FP4 be moved? 4366 bpl.b fmovm_out_ctrl_fp5 # no 4367 4368 fmovm.x &0x08,(%a0) # yes 4369 add.l &0xc,%a0 4370 4371fmovm_out_ctrl_fp5: 4372 lsl.b &0x1,%d1 # should FP5 be moved? 4373 bpl.b fmovm_out_ctrl_fp6 # no 4374 4375 fmovm.x &0x04,(%a0) # yes 4376 add.l &0xc,%a0 4377 4378fmovm_out_ctrl_fp6: 4379 lsl.b &0x1,%d1 # should FP6 be moved? 4380 bpl.b fmovm_out_ctrl_fp7 # no 4381 4382 fmovm.x &0x02,(%a0) # yes 4383 add.l &0xc,%a0 4384 4385fmovm_out_ctrl_fp7: 4386 lsl.b &0x1,%d1 # should FP7 be moved? 4387 bpl.b fmovm_out_ctrl_done # no 4388 4389 fmovm.x &0x01,(%a0) # yes 4390 add.l &0xc,%a0 4391 4392fmovm_out_ctrl_done: 4393 mov.l %a1,L_SCR1(%a6) 4394 4395 lea (%sp),%a0 # pass: supervisor src 4396 mov.l %d0,-(%sp) # save size 4397 bsr.l _dmem_write # copy data to user mem 4398 4399 mov.l (%sp)+,%d0 4400 add.l %d0,%sp # clear fpreg data from stack 4401 4402 tst.l %d1 # did dstore err? 4403 bne.w fmovm_out_err # yes 4404 4405 rts 4406 4407############ 4408# MOVE IN: # 4409############ 4410fmovm_data_in: 4411 mov.l %a0,L_SCR1(%a6) 4412 4413 sub.l %d0,%sp # make room for fpregs 4414 lea (%sp),%a1 4415 4416 mov.l %d1,-(%sp) # save bit string for later 4417 mov.l %d0,-(%sp) # save # of bytes 4418 4419 bsr.l _dmem_read # copy data from user mem 4420 4421 mov.l (%sp)+,%d0 # retrieve # of bytes 4422 4423 tst.l %d1 # did dfetch fail? 4424 bne.w fmovm_in_err # yes 4425 4426 mov.l (%sp)+,%d1 # load bit string 4427 4428 lea (%sp),%a0 # addr of stack 4429 4430 tst.b %d1 # should FP0 be moved? 4431 bpl.b fmovm_data_in_fp1 # no 4432 4433 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes 4434 mov.l (%a0)+,0x4+EXC_FP0(%a6) 4435 mov.l (%a0)+,0x8+EXC_FP0(%a6) 4436 4437fmovm_data_in_fp1: 4438 lsl.b &0x1,%d1 # should FP1 be moved? 4439 bpl.b fmovm_data_in_fp2 # no 4440 4441 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes 4442 mov.l (%a0)+,0x4+EXC_FP1(%a6) 4443 mov.l (%a0)+,0x8+EXC_FP1(%a6) 4444 4445fmovm_data_in_fp2: 4446 lsl.b &0x1,%d1 # should FP2 be moved? 4447 bpl.b fmovm_data_in_fp3 # no 4448 4449 fmovm.x (%a0)+,&0x20 # yes 4450 4451fmovm_data_in_fp3: 4452 lsl.b &0x1,%d1 # should FP3 be moved? 4453 bpl.b fmovm_data_in_fp4 # no 4454 4455 fmovm.x (%a0)+,&0x10 # yes 4456 4457fmovm_data_in_fp4: 4458 lsl.b &0x1,%d1 # should FP4 be moved? 4459 bpl.b fmovm_data_in_fp5 # no 4460 4461 fmovm.x (%a0)+,&0x08 # yes 4462 4463fmovm_data_in_fp5: 4464 lsl.b &0x1,%d1 # should FP5 be moved? 4465 bpl.b fmovm_data_in_fp6 # no 4466 4467 fmovm.x (%a0)+,&0x04 # yes 4468 4469fmovm_data_in_fp6: 4470 lsl.b &0x1,%d1 # should FP6 be moved? 4471 bpl.b fmovm_data_in_fp7 # no 4472 4473 fmovm.x (%a0)+,&0x02 # yes 4474 4475fmovm_data_in_fp7: 4476 lsl.b &0x1,%d1 # should FP7 be moved? 4477 bpl.b fmovm_data_in_done # no 4478 4479 fmovm.x (%a0)+,&0x01 # yes 4480 4481fmovm_data_in_done: 4482 add.l %d0,%sp # remove fpregs from stack 4483 rts 4484 4485##################################### 4486 4487fmovm_data_done: 4488 rts 4489 4490############################################################################## 4491 4492# 4493# table indexed by the operation's bit string that gives the number 4494# of bytes that will be moved. 4495# 4496# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg) 4497# 4498tbl_fmovm_size: 4499 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24 4500 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4501 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4502 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4503 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4504 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4505 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4506 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4507 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4508 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4511 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4512 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4513 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4514 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4515 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4516 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4517 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4518 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4519 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4520 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4521 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4522 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4523 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4524 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4525 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4526 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4527 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4528 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4529 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4530 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60 4531 4532# 4533# table to convert a pre-decrement bit string into a post-increment 4534# or control bit string. 4535# ex: 0x00 ==> 0x00 4536# 0x01 ==> 0x80 4537# 0x02 ==> 0x40 4538# . 4539# . 4540# 0xfd ==> 0xbf 4541# 0xfe ==> 0x7f 4542# 0xff ==> 0xff 4543# 4544tbl_fmovm_convert: 4545 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0 4546 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0 4547 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8 4548 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8 4549 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4 4550 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4 4551 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec 4552 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc 4553 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2 4554 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2 4555 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea 4556 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa 4557 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6 4558 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6 4559 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee 4560 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe 4561 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1 4562 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1 4563 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9 4564 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9 4565 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5 4566 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5 4567 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed 4568 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd 4569 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3 4570 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3 4571 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb 4572 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb 4573 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7 4574 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7 4575 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef 4576 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 4577 4578 global fmovm_calc_ea 4579############################################### 4580# _fmovm_calc_ea: calculate effective address # 4581############################################### 4582fmovm_calc_ea: 4583 mov.l %d0,%a0 # move # bytes to a0 4584 4585# currently, MODE and REG are taken from the EXC_OPWORD. this could be 4586# easily changed if they were inputs passed in registers. 4587 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word 4588 mov.w %d0,%d1 # make a copy 4589 4590 andi.w &0x3f,%d0 # extract mode field 4591 andi.l &0x7,%d1 # extract reg field 4592 4593# jump to the corresponding function for each {MODE,REG} pair. 4594 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance 4595 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode 4596 4597 swbeg &64 4598tbl_fea_mode: 4599 short tbl_fea_mode - tbl_fea_mode 4600 short tbl_fea_mode - tbl_fea_mode 4601 short tbl_fea_mode - tbl_fea_mode 4602 short tbl_fea_mode - tbl_fea_mode 4603 short tbl_fea_mode - tbl_fea_mode 4604 short tbl_fea_mode - tbl_fea_mode 4605 short tbl_fea_mode - tbl_fea_mode 4606 short tbl_fea_mode - tbl_fea_mode 4607 4608 short tbl_fea_mode - tbl_fea_mode 4609 short tbl_fea_mode - tbl_fea_mode 4610 short tbl_fea_mode - tbl_fea_mode 4611 short tbl_fea_mode - tbl_fea_mode 4612 short tbl_fea_mode - tbl_fea_mode 4613 short tbl_fea_mode - tbl_fea_mode 4614 short tbl_fea_mode - tbl_fea_mode 4615 short tbl_fea_mode - tbl_fea_mode 4616 4617 short faddr_ind_a0 - tbl_fea_mode 4618 short faddr_ind_a1 - tbl_fea_mode 4619 short faddr_ind_a2 - tbl_fea_mode 4620 short faddr_ind_a3 - tbl_fea_mode 4621 short faddr_ind_a4 - tbl_fea_mode 4622 short faddr_ind_a5 - tbl_fea_mode 4623 short faddr_ind_a6 - tbl_fea_mode 4624 short faddr_ind_a7 - tbl_fea_mode 4625 4626 short faddr_ind_p_a0 - tbl_fea_mode 4627 short faddr_ind_p_a1 - tbl_fea_mode 4628 short faddr_ind_p_a2 - tbl_fea_mode 4629 short faddr_ind_p_a3 - tbl_fea_mode 4630 short faddr_ind_p_a4 - tbl_fea_mode 4631 short faddr_ind_p_a5 - tbl_fea_mode 4632 short faddr_ind_p_a6 - tbl_fea_mode 4633 short faddr_ind_p_a7 - tbl_fea_mode 4634 4635 short faddr_ind_m_a0 - tbl_fea_mode 4636 short faddr_ind_m_a1 - tbl_fea_mode 4637 short faddr_ind_m_a2 - tbl_fea_mode 4638 short faddr_ind_m_a3 - tbl_fea_mode 4639 short faddr_ind_m_a4 - tbl_fea_mode 4640 short faddr_ind_m_a5 - tbl_fea_mode 4641 short faddr_ind_m_a6 - tbl_fea_mode 4642 short faddr_ind_m_a7 - tbl_fea_mode 4643 4644 short faddr_ind_disp_a0 - tbl_fea_mode 4645 short faddr_ind_disp_a1 - tbl_fea_mode 4646 short faddr_ind_disp_a2 - tbl_fea_mode 4647 short faddr_ind_disp_a3 - tbl_fea_mode 4648 short faddr_ind_disp_a4 - tbl_fea_mode 4649 short faddr_ind_disp_a5 - tbl_fea_mode 4650 short faddr_ind_disp_a6 - tbl_fea_mode 4651 short faddr_ind_disp_a7 - tbl_fea_mode 4652 4653 short faddr_ind_ext - tbl_fea_mode 4654 short faddr_ind_ext - tbl_fea_mode 4655 short faddr_ind_ext - tbl_fea_mode 4656 short faddr_ind_ext - tbl_fea_mode 4657 short faddr_ind_ext - tbl_fea_mode 4658 short faddr_ind_ext - tbl_fea_mode 4659 short faddr_ind_ext - tbl_fea_mode 4660 short faddr_ind_ext - tbl_fea_mode 4661 4662 short fabs_short - tbl_fea_mode 4663 short fabs_long - tbl_fea_mode 4664 short fpc_ind - tbl_fea_mode 4665 short fpc_ind_ext - tbl_fea_mode 4666 short tbl_fea_mode - tbl_fea_mode 4667 short tbl_fea_mode - tbl_fea_mode 4668 short tbl_fea_mode - tbl_fea_mode 4669 short tbl_fea_mode - tbl_fea_mode 4670 4671################################### 4672# Address register indirect: (An) # 4673################################### 4674faddr_ind_a0: 4675 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0 4676 rts 4677 4678faddr_ind_a1: 4679 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1 4680 rts 4681 4682faddr_ind_a2: 4683 mov.l %a2,%a0 # Get current a2 4684 rts 4685 4686faddr_ind_a3: 4687 mov.l %a3,%a0 # Get current a3 4688 rts 4689 4690faddr_ind_a4: 4691 mov.l %a4,%a0 # Get current a4 4692 rts 4693 4694faddr_ind_a5: 4695 mov.l %a5,%a0 # Get current a5 4696 rts 4697 4698faddr_ind_a6: 4699 mov.l (%a6),%a0 # Get current a6 4700 rts 4701 4702faddr_ind_a7: 4703 mov.l EXC_A7(%a6),%a0 # Get current a7 4704 rts 4705 4706##################################################### 4707# Address register indirect w/ postincrement: (An)+ # 4708##################################################### 4709faddr_ind_p_a0: 4710 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4711 mov.l %d0,%d1 4712 add.l %a0,%d1 # Increment 4713 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value 4714 mov.l %d0,%a0 4715 rts 4716 4717faddr_ind_p_a1: 4718 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4719 mov.l %d0,%d1 4720 add.l %a0,%d1 # Increment 4721 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value 4722 mov.l %d0,%a0 4723 rts 4724 4725faddr_ind_p_a2: 4726 mov.l %a2,%d0 # Get current a2 4727 mov.l %d0,%d1 4728 add.l %a0,%d1 # Increment 4729 mov.l %d1,%a2 # Save incr value 4730 mov.l %d0,%a0 4731 rts 4732 4733faddr_ind_p_a3: 4734 mov.l %a3,%d0 # Get current a3 4735 mov.l %d0,%d1 4736 add.l %a0,%d1 # Increment 4737 mov.l %d1,%a3 # Save incr value 4738 mov.l %d0,%a0 4739 rts 4740 4741faddr_ind_p_a4: 4742 mov.l %a4,%d0 # Get current a4 4743 mov.l %d0,%d1 4744 add.l %a0,%d1 # Increment 4745 mov.l %d1,%a4 # Save incr value 4746 mov.l %d0,%a0 4747 rts 4748 4749faddr_ind_p_a5: 4750 mov.l %a5,%d0 # Get current a5 4751 mov.l %d0,%d1 4752 add.l %a0,%d1 # Increment 4753 mov.l %d1,%a5 # Save incr value 4754 mov.l %d0,%a0 4755 rts 4756 4757faddr_ind_p_a6: 4758 mov.l (%a6),%d0 # Get current a6 4759 mov.l %d0,%d1 4760 add.l %a0,%d1 # Increment 4761 mov.l %d1,(%a6) # Save incr value 4762 mov.l %d0,%a0 4763 rts 4764 4765faddr_ind_p_a7: 4766 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag 4767 4768 mov.l EXC_A7(%a6),%d0 # Get current a7 4769 mov.l %d0,%d1 4770 add.l %a0,%d1 # Increment 4771 mov.l %d1,EXC_A7(%a6) # Save incr value 4772 mov.l %d0,%a0 4773 rts 4774 4775#################################################### 4776# Address register indirect w/ predecrement: -(An) # 4777#################################################### 4778faddr_ind_m_a0: 4779 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4780 sub.l %a0,%d0 # Decrement 4781 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value 4782 mov.l %d0,%a0 4783 rts 4784 4785faddr_ind_m_a1: 4786 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4787 sub.l %a0,%d0 # Decrement 4788 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value 4789 mov.l %d0,%a0 4790 rts 4791 4792faddr_ind_m_a2: 4793 mov.l %a2,%d0 # Get current a2 4794 sub.l %a0,%d0 # Decrement 4795 mov.l %d0,%a2 # Save decr value 4796 mov.l %d0,%a0 4797 rts 4798 4799faddr_ind_m_a3: 4800 mov.l %a3,%d0 # Get current a3 4801 sub.l %a0,%d0 # Decrement 4802 mov.l %d0,%a3 # Save decr value 4803 mov.l %d0,%a0 4804 rts 4805 4806faddr_ind_m_a4: 4807 mov.l %a4,%d0 # Get current a4 4808 sub.l %a0,%d0 # Decrement 4809 mov.l %d0,%a4 # Save decr value 4810 mov.l %d0,%a0 4811 rts 4812 4813faddr_ind_m_a5: 4814 mov.l %a5,%d0 # Get current a5 4815 sub.l %a0,%d0 # Decrement 4816 mov.l %d0,%a5 # Save decr value 4817 mov.l %d0,%a0 4818 rts 4819 4820faddr_ind_m_a6: 4821 mov.l (%a6),%d0 # Get current a6 4822 sub.l %a0,%d0 # Decrement 4823 mov.l %d0,(%a6) # Save decr value 4824 mov.l %d0,%a0 4825 rts 4826 4827faddr_ind_m_a7: 4828 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag 4829 4830 mov.l EXC_A7(%a6),%d0 # Get current a7 4831 sub.l %a0,%d0 # Decrement 4832 mov.l %d0,EXC_A7(%a6) # Save decr value 4833 mov.l %d0,%a0 4834 rts 4835 4836######################################################## 4837# Address register indirect w/ displacement: (d16, An) # 4838######################################################## 4839faddr_ind_disp_a0: 4840 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4841 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4842 bsr.l _imem_read_word 4843 4844 tst.l %d1 # did ifetch fail? 4845 bne.l iea_iacc # yes 4846 4847 mov.w %d0,%a0 # sign extend displacement 4848 4849 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16 4850 rts 4851 4852faddr_ind_disp_a1: 4853 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4854 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4855 bsr.l _imem_read_word 4856 4857 tst.l %d1 # did ifetch fail? 4858 bne.l iea_iacc # yes 4859 4860 mov.w %d0,%a0 # sign extend displacement 4861 4862 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16 4863 rts 4864 4865faddr_ind_disp_a2: 4866 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4867 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4868 bsr.l _imem_read_word 4869 4870 tst.l %d1 # did ifetch fail? 4871 bne.l iea_iacc # yes 4872 4873 mov.w %d0,%a0 # sign extend displacement 4874 4875 add.l %a2,%a0 # a2 + d16 4876 rts 4877 4878faddr_ind_disp_a3: 4879 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4880 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4881 bsr.l _imem_read_word 4882 4883 tst.l %d1 # did ifetch fail? 4884 bne.l iea_iacc # yes 4885 4886 mov.w %d0,%a0 # sign extend displacement 4887 4888 add.l %a3,%a0 # a3 + d16 4889 rts 4890 4891faddr_ind_disp_a4: 4892 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4893 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4894 bsr.l _imem_read_word 4895 4896 tst.l %d1 # did ifetch fail? 4897 bne.l iea_iacc # yes 4898 4899 mov.w %d0,%a0 # sign extend displacement 4900 4901 add.l %a4,%a0 # a4 + d16 4902 rts 4903 4904faddr_ind_disp_a5: 4905 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4906 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4907 bsr.l _imem_read_word 4908 4909 tst.l %d1 # did ifetch fail? 4910 bne.l iea_iacc # yes 4911 4912 mov.w %d0,%a0 # sign extend displacement 4913 4914 add.l %a5,%a0 # a5 + d16 4915 rts 4916 4917faddr_ind_disp_a6: 4918 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4919 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4920 bsr.l _imem_read_word 4921 4922 tst.l %d1 # did ifetch fail? 4923 bne.l iea_iacc # yes 4924 4925 mov.w %d0,%a0 # sign extend displacement 4926 4927 add.l (%a6),%a0 # a6 + d16 4928 rts 4929 4930faddr_ind_disp_a7: 4931 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4932 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4933 bsr.l _imem_read_word 4934 4935 tst.l %d1 # did ifetch fail? 4936 bne.l iea_iacc # yes 4937 4938 mov.w %d0,%a0 # sign extend displacement 4939 4940 add.l EXC_A7(%a6),%a0 # a7 + d16 4941 rts 4942 4943######################################################################## 4944# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) # 4945# " " " w/ " (base displacement): (bd, An, Xn) # 4946# Memory indirect postindexed: ([bd, An], Xn, od) # 4947# Memory indirect preindexed: ([bd, An, Xn], od) # 4948######################################################################## 4949faddr_ind_ext: 4950 addq.l &0x8,%d1 4951 bsr.l fetch_dreg # fetch base areg 4952 mov.l %d0,-(%sp) 4953 4954 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4955 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4956 bsr.l _imem_read_word # fetch extword in d0 4957 4958 tst.l %d1 # did ifetch fail? 4959 bne.l iea_iacc # yes 4960 4961 mov.l (%sp)+,%a0 4962 4963 btst &0x8,%d0 4964 bne.w fcalc_mem_ind 4965 4966 mov.l %d0,L_SCR1(%a6) # hold opword 4967 4968 mov.l %d0,%d1 4969 rol.w &0x4,%d1 4970 andi.w &0xf,%d1 # extract index regno 4971 4972# count on fetch_dreg() not to alter a0... 4973 bsr.l fetch_dreg # fetch index 4974 4975 mov.l %d2,-(%sp) # save d2 4976 mov.l L_SCR1(%a6),%d2 # fetch opword 4977 4978 btst &0xb,%d2 # is it word or long? 4979 bne.b faii8_long 4980 ext.l %d0 # sign extend word index 4981faii8_long: 4982 mov.l %d2,%d1 4983 rol.w &0x7,%d1 4984 andi.l &0x3,%d1 # extract scale value 4985 4986 lsl.l %d1,%d0 # shift index by scale 4987 4988 extb.l %d2 # sign extend displacement 4989 add.l %d2,%d0 # index + disp 4990 add.l %d0,%a0 # An + (index + disp) 4991 4992 mov.l (%sp)+,%d2 # restore old d2 4993 rts 4994 4995########################### 4996# Absolute short: (XXX).W # 4997########################### 4998fabs_short: 4999 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5000 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5001 bsr.l _imem_read_word # fetch short address 5002 5003 tst.l %d1 # did ifetch fail? 5004 bne.l iea_iacc # yes 5005 5006 mov.w %d0,%a0 # return <ea> in a0 5007 rts 5008 5009########################## 5010# Absolute long: (XXX).L # 5011########################## 5012fabs_long: 5013 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5014 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5015 bsr.l _imem_read_long # fetch long address 5016 5017 tst.l %d1 # did ifetch fail? 5018 bne.l iea_iacc # yes 5019 5020 mov.l %d0,%a0 # return <ea> in a0 5021 rts 5022 5023####################################################### 5024# Program counter indirect w/ displacement: (d16, PC) # 5025####################################################### 5026fpc_ind: 5027 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5028 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5029 bsr.l _imem_read_word # fetch word displacement 5030 5031 tst.l %d1 # did ifetch fail? 5032 bne.l iea_iacc # yes 5033 5034 mov.w %d0,%a0 # sign extend displacement 5035 5036 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16 5037 5038# _imem_read_word() increased the extwptr by 2. need to adjust here. 5039 subq.l &0x2,%a0 # adjust <ea> 5040 rts 5041 5042########################################################## 5043# PC indirect w/ index(8-bit displacement): (d8, PC, An) # 5044# " " w/ " (base displacement): (bd, PC, An) # 5045# PC memory indirect postindexed: ([bd, PC], Xn, od) # 5046# PC memory indirect preindexed: ([bd, PC, Xn], od) # 5047########################################################## 5048fpc_ind_ext: 5049 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5050 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5051 bsr.l _imem_read_word # fetch ext word 5052 5053 tst.l %d1 # did ifetch fail? 5054 bne.l iea_iacc # yes 5055 5056 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0 5057 subq.l &0x2,%a0 # adjust base 5058 5059 btst &0x8,%d0 # is disp only 8 bits? 5060 bne.w fcalc_mem_ind # calc memory indirect 5061 5062 mov.l %d0,L_SCR1(%a6) # store opword 5063 5064 mov.l %d0,%d1 # make extword copy 5065 rol.w &0x4,%d1 # rotate reg num into place 5066 andi.w &0xf,%d1 # extract register number 5067 5068# count on fetch_dreg() not to alter a0... 5069 bsr.l fetch_dreg # fetch index 5070 5071 mov.l %d2,-(%sp) # save d2 5072 mov.l L_SCR1(%a6),%d2 # fetch opword 5073 5074 btst &0xb,%d2 # is index word or long? 5075 bne.b fpii8_long # long 5076 ext.l %d0 # sign extend word index 5077fpii8_long: 5078 mov.l %d2,%d1 5079 rol.w &0x7,%d1 # rotate scale value into place 5080 andi.l &0x3,%d1 # extract scale value 5081 5082 lsl.l %d1,%d0 # shift index by scale 5083 5084 extb.l %d2 # sign extend displacement 5085 add.l %d2,%d0 # disp + index 5086 add.l %d0,%a0 # An + (index + disp) 5087 5088 mov.l (%sp)+,%d2 # restore temp register 5089 rts 5090 5091# d2 = index 5092# d3 = base 5093# d4 = od 5094# d5 = extword 5095fcalc_mem_ind: 5096 btst &0x6,%d0 # is the index suppressed? 5097 beq.b fcalc_index 5098 5099 movm.l &0x3c00,-(%sp) # save d2-d5 5100 5101 mov.l %d0,%d5 # put extword in d5 5102 mov.l %a0,%d3 # put base in d3 5103 5104 clr.l %d2 # yes, so index = 0 5105 bra.b fbase_supp_ck 5106 5107# index: 5108fcalc_index: 5109 mov.l %d0,L_SCR1(%a6) # save d0 (opword) 5110 bfextu %d0{&16:&4},%d1 # fetch dreg index 5111 bsr.l fetch_dreg 5112 5113 movm.l &0x3c00,-(%sp) # save d2-d5 5114 mov.l %d0,%d2 # put index in d2 5115 mov.l L_SCR1(%a6),%d5 5116 mov.l %a0,%d3 5117 5118 btst &0xb,%d5 # is index word or long? 5119 bne.b fno_ext 5120 ext.l %d2 5121 5122fno_ext: 5123 bfextu %d5{&21:&2},%d0 5124 lsl.l %d0,%d2 5125 5126# base address (passed as parameter in d3): 5127# we clear the value here if it should actually be suppressed. 5128fbase_supp_ck: 5129 btst &0x7,%d5 # is the bd suppressed? 5130 beq.b fno_base_sup 5131 clr.l %d3 5132 5133# base displacement: 5134fno_base_sup: 5135 bfextu %d5{&26:&2},%d0 # get bd size 5136# beq.l fmovm_error # if (size == 0) it's reserved 5137 5138 cmpi.b %d0,&0x2 5139 blt.b fno_bd 5140 beq.b fget_word_bd 5141 5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5143 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5144 bsr.l _imem_read_long 5145 5146 tst.l %d1 # did ifetch fail? 5147 bne.l fcea_iacc # yes 5148 5149 bra.b fchk_ind 5150 5151fget_word_bd: 5152 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5153 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5154 bsr.l _imem_read_word 5155 5156 tst.l %d1 # did ifetch fail? 5157 bne.l fcea_iacc # yes 5158 5159 ext.l %d0 # sign extend bd 5160 5161fchk_ind: 5162 add.l %d0,%d3 # base += bd 5163 5164# outer displacement: 5165fno_bd: 5166 bfextu %d5{&30:&2},%d0 # is od suppressed? 5167 beq.w faii_bd 5168 5169 cmpi.b %d0,&0x2 5170 blt.b fnull_od 5171 beq.b fword_od 5172 5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5174 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5175 bsr.l _imem_read_long 5176 5177 tst.l %d1 # did ifetch fail? 5178 bne.l fcea_iacc # yes 5179 5180 bra.b fadd_them 5181 5182fword_od: 5183 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5184 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5185 bsr.l _imem_read_word 5186 5187 tst.l %d1 # did ifetch fail? 5188 bne.l fcea_iacc # yes 5189 5190 ext.l %d0 # sign extend od 5191 bra.b fadd_them 5192 5193fnull_od: 5194 clr.l %d0 5195 5196fadd_them: 5197 mov.l %d0,%d4 5198 5199 btst &0x2,%d5 # pre or post indexing? 5200 beq.b fpre_indexed 5201 5202 mov.l %d3,%a0 5203 bsr.l _dmem_read_long 5204 5205 tst.l %d1 # did dfetch fail? 5206 bne.w fcea_err # yes 5207 5208 add.l %d2,%d0 # <ea> += index 5209 add.l %d4,%d0 # <ea> += od 5210 bra.b fdone_ea 5211 5212fpre_indexed: 5213 add.l %d2,%d3 # preindexing 5214 mov.l %d3,%a0 5215 bsr.l _dmem_read_long 5216 5217 tst.l %d1 # did dfetch fail? 5218 bne.w fcea_err # yes 5219 5220 add.l %d4,%d0 # ea += od 5221 bra.b fdone_ea 5222 5223faii_bd: 5224 add.l %d2,%d3 # ea = (base + bd) + index 5225 mov.l %d3,%d0 5226fdone_ea: 5227 mov.l %d0,%a0 5228 5229 movm.l (%sp)+,&0x003c # restore d2-d5 5230 rts 5231 5232######################################################### 5233fcea_err: 5234 mov.l %d3,%a0 5235 5236 movm.l (%sp)+,&0x003c # restore d2-d5 5237 mov.w &0x0101,%d0 5238 bra.l iea_dacc 5239 5240fcea_iacc: 5241 movm.l (%sp)+,&0x003c # restore d2-d5 5242 bra.l iea_iacc 5243 5244fmovm_out_err: 5245 bsr.l restore 5246 mov.w &0x00e1,%d0 5247 bra.b fmovm_err 5248 5249fmovm_in_err: 5250 bsr.l restore 5251 mov.w &0x0161,%d0 5252 5253fmovm_err: 5254 mov.l L_SCR1(%a6),%a0 5255 bra.l iea_dacc 5256 5257######################################################################### 5258# XDEF **************************************************************** # 5259# fmovm_ctrl(): emulate fmovm.l of control registers instr # 5260# # 5261# XREF **************************************************************** # 5262# _imem_read_long() - read longword from memory # 5263# iea_iacc() - _imem_read_long() failed; error recovery # 5264# # 5265# INPUT *************************************************************** # 5266# None # 5267# # 5268# OUTPUT ************************************************************** # 5269# If _imem_read_long() doesn't fail: # 5270# USER_FPCR(a6) = new FPCR value # 5271# USER_FPSR(a6) = new FPSR value # 5272# USER_FPIAR(a6) = new FPIAR value # 5273# # 5274# ALGORITHM *********************************************************** # 5275# Decode the instruction type by looking at the extension word # 5276# in order to see how many control registers to fetch from memory. # 5277# Fetch them using _imem_read_long(). If this fetch fails, exit through # 5278# the special access error exit handler iea_iacc(). # 5279# # 5280# Instruction word decoding: # 5281# # 5282# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} # 5283# # 5284# WORD1 WORD2 # 5285# 1111 0010 00 111100 100$ $$00 0000 0000 # 5286# # 5287# $$$ (100): FPCR # 5288# (010): FPSR # 5289# (001): FPIAR # 5290# (000): FPIAR # 5291# # 5292######################################################################### 5293 5294 global fmovm_ctrl 5295fmovm_ctrl: 5296 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits 5297 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ? 5298 beq.w fctrl_in_7 # yes 5299 cmpi.b %d0,&0x98 # fpcr & fpsr ? 5300 beq.w fctrl_in_6 # yes 5301 cmpi.b %d0,&0x94 # fpcr & fpiar ? 5302 beq.b fctrl_in_5 # yes 5303 5304# fmovem.l #<data>, fpsr/fpiar 5305fctrl_in_3: 5306 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5307 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5308 bsr.l _imem_read_long # fetch FPSR from mem 5309 5310 tst.l %d1 # did ifetch fail? 5311 bne.l iea_iacc # yes 5312 5313 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack 5314 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5315 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5316 bsr.l _imem_read_long # fetch FPIAR from mem 5317 5318 tst.l %d1 # did ifetch fail? 5319 bne.l iea_iacc # yes 5320 5321 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5322 rts 5323 5324# fmovem.l #<data>, fpcr/fpiar 5325fctrl_in_5: 5326 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5327 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5328 bsr.l _imem_read_long # fetch FPCR from mem 5329 5330 tst.l %d1 # did ifetch fail? 5331 bne.l iea_iacc # yes 5332 5333 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack 5334 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5335 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5336 bsr.l _imem_read_long # fetch FPIAR from mem 5337 5338 tst.l %d1 # did ifetch fail? 5339 bne.l iea_iacc # yes 5340 5341 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5342 rts 5343 5344# fmovem.l #<data>, fpcr/fpsr 5345fctrl_in_6: 5346 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5347 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5348 bsr.l _imem_read_long # fetch FPCR from mem 5349 5350 tst.l %d1 # did ifetch fail? 5351 bne.l iea_iacc # yes 5352 5353 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5354 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5355 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5356 bsr.l _imem_read_long # fetch FPSR from mem 5357 5358 tst.l %d1 # did ifetch fail? 5359 bne.l iea_iacc # yes 5360 5361 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5362 rts 5363 5364# fmovem.l #<data>, fpcr/fpsr/fpiar 5365fctrl_in_7: 5366 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5367 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5368 bsr.l _imem_read_long # fetch FPCR from mem 5369 5370 tst.l %d1 # did ifetch fail? 5371 bne.l iea_iacc # yes 5372 5373 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5374 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5375 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5376 bsr.l _imem_read_long # fetch FPSR from mem 5377 5378 tst.l %d1 # did ifetch fail? 5379 bne.l iea_iacc # yes 5380 5381 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5382 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5383 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5384 bsr.l _imem_read_long # fetch FPIAR from mem 5385 5386 tst.l %d1 # did ifetch fail? 5387 bne.l iea_iacc # yes 5388 5389 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem 5390 rts 5391 5392########################################################################## 5393 5394######################################################################### 5395# XDEF **************************************************************** # 5396# addsub_scaler2(): scale inputs to fadd/fsub such that no # 5397# OVFL/UNFL exceptions will result # 5398# # 5399# XREF **************************************************************** # 5400# norm() - normalize mantissa after adjusting exponent # 5401# # 5402# INPUT *************************************************************** # 5403# FP_SRC(a6) = fp op1(src) # 5404# FP_DST(a6) = fp op2(dst) # 5405# # 5406# OUTPUT ************************************************************** # 5407# FP_SRC(a6) = fp op1 scaled(src) # 5408# FP_DST(a6) = fp op2 scaled(dst) # 5409# d0 = scale amount # 5410# # 5411# ALGORITHM *********************************************************** # 5412# If the DST exponent is > the SRC exponent, set the DST exponent # 5413# equal to 0x3fff and scale the SRC exponent by the value that the # 5414# DST exponent was scaled by. If the SRC exponent is greater or equal, # 5415# do the opposite. Return this scale factor in d0. # 5416# If the two exponents differ by > the number of mantissa bits # 5417# plus two, then set the smallest exponent to a very small value as a # 5418# quick shortcut. # 5419# # 5420######################################################################### 5421 5422 global addsub_scaler2 5423addsub_scaler2: 5424 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 5425 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 5426 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 5427 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 5428 mov.w SRC_EX(%a0),%d0 5429 mov.w DST_EX(%a1),%d1 5430 mov.w %d0,FP_SCR0_EX(%a6) 5431 mov.w %d1,FP_SCR1_EX(%a6) 5432 5433 andi.w &0x7fff,%d0 5434 andi.w &0x7fff,%d1 5435 mov.w %d0,L_SCR1(%a6) # store src exponent 5436 mov.w %d1,2+L_SCR1(%a6) # store dst exponent 5437 5438 cmp.w %d0, %d1 # is src exp >= dst exp? 5439 bge.l src_exp_ge2 5440 5441# dst exp is > src exp; scale dst to exp = 0x3fff 5442dst_exp_gt2: 5443 bsr.l scale_to_zero_dst 5444 mov.l %d0,-(%sp) # save scale factor 5445 5446 cmpi.b STAG(%a6),&DENORM # is dst denormalized? 5447 bne.b cmpexp12 5448 5449 lea FP_SCR0(%a6),%a0 5450 bsr.l norm # normalize the denorm; result is new exp 5451 neg.w %d0 # new exp = -(shft val) 5452 mov.w %d0,L_SCR1(%a6) # inset new exp 5453 5454cmpexp12: 5455 mov.w 2+L_SCR1(%a6),%d0 5456 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5457 5458 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2? 5459 bge.b quick_scale12 5460 5461 mov.w L_SCR1(%a6),%d0 5462 add.w 0x2(%sp),%d0 # scale src exponent by scale factor 5463 mov.w FP_SCR0_EX(%a6),%d1 5464 and.w &0x8000,%d1 5465 or.w %d1,%d0 # concat {sgn,new exp} 5466 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent 5467 5468 mov.l (%sp)+,%d0 # return SCALE factor 5469 rts 5470 5471quick_scale12: 5472 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent 5473 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1 5474 5475 mov.l (%sp)+,%d0 # return SCALE factor 5476 rts 5477 5478# src exp is >= dst exp; scale src to exp = 0x3fff 5479src_exp_ge2: 5480 bsr.l scale_to_zero_src 5481 mov.l %d0,-(%sp) # save scale factor 5482 5483 cmpi.b DTAG(%a6),&DENORM # is dst denormalized? 5484 bne.b cmpexp22 5485 lea FP_SCR1(%a6),%a0 5486 bsr.l norm # normalize the denorm; result is new exp 5487 neg.w %d0 # new exp = -(shft val) 5488 mov.w %d0,2+L_SCR1(%a6) # inset new exp 5489 5490cmpexp22: 5491 mov.w L_SCR1(%a6),%d0 5492 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5493 5494 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2? 5495 bge.b quick_scale22 5496 5497 mov.w 2+L_SCR1(%a6),%d0 5498 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor 5499 mov.w FP_SCR1_EX(%a6),%d1 5500 andi.w &0x8000,%d1 5501 or.w %d1,%d0 # concat {sgn,new exp} 5502 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent 5503 5504 mov.l (%sp)+,%d0 # return SCALE factor 5505 rts 5506 5507quick_scale22: 5508 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent 5509 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1 5510 5511 mov.l (%sp)+,%d0 # return SCALE factor 5512 rts 5513 5514########################################################################## 5515 5516######################################################################### 5517# XDEF **************************************************************** # 5518# scale_to_zero_src(): scale the exponent of extended precision # 5519# value at FP_SCR0(a6). # 5520# # 5521# XREF **************************************************************** # 5522# norm() - normalize the mantissa if the operand was a DENORM # 5523# # 5524# INPUT *************************************************************** # 5525# FP_SCR0(a6) = extended precision operand to be scaled # 5526# # 5527# OUTPUT ************************************************************** # 5528# FP_SCR0(a6) = scaled extended precision operand # 5529# d0 = scale value # 5530# # 5531# ALGORITHM *********************************************************** # 5532# Set the exponent of the input operand to 0x3fff. Save the value # 5533# of the difference between the original and new exponent. Then, # 5534# normalize the operand if it was a DENORM. Add this normalization # 5535# value to the previous value. Return the result. # 5536# # 5537######################################################################### 5538 5539 global scale_to_zero_src 5540scale_to_zero_src: 5541 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5542 mov.w %d1,%d0 # make a copy 5543 5544 andi.l &0x7fff,%d1 # extract operand's exponent 5545 5546 andi.w &0x8000,%d0 # extract operand's sgn 5547 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5548 5549 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent 5550 5551 cmpi.b STAG(%a6),&DENORM # is operand normalized? 5552 beq.b stzs_denorm # normalize the DENORM 5553 5554stzs_norm: 5555 mov.l &0x3fff,%d0 5556 sub.l %d1,%d0 # scale = BIAS + (-exp) 5557 5558 rts 5559 5560stzs_denorm: 5561 lea FP_SCR0(%a6),%a0 # pass ptr to src op 5562 bsr.l norm # normalize denorm 5563 neg.l %d0 # new exponent = -(shft val) 5564 mov.l %d0,%d1 # prepare for op_norm call 5565 bra.b stzs_norm # finish scaling 5566 5567### 5568 5569######################################################################### 5570# XDEF **************************************************************** # 5571# scale_sqrt(): scale the input operand exponent so a subsequent # 5572# fsqrt operation won't take an exception. # 5573# # 5574# XREF **************************************************************** # 5575# norm() - normalize the mantissa if the operand was a DENORM # 5576# # 5577# INPUT *************************************************************** # 5578# FP_SCR0(a6) = extended precision operand to be scaled # 5579# # 5580# OUTPUT ************************************************************** # 5581# FP_SCR0(a6) = scaled extended precision operand # 5582# d0 = scale value # 5583# # 5584# ALGORITHM *********************************************************** # 5585# If the input operand is a DENORM, normalize it. # 5586# If the exponent of the input operand is even, set the exponent # 5587# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the # 5588# exponent of the input operand is off, set the exponent to ox3fff and # 5589# return a scale factor of "(exp-0x3fff)/2". # 5590# # 5591######################################################################### 5592 5593 global scale_sqrt 5594scale_sqrt: 5595 cmpi.b STAG(%a6),&DENORM # is operand normalized? 5596 beq.b ss_denorm # normalize the DENORM 5597 5598 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5599 andi.l &0x7fff,%d1 # extract operand's exponent 5600 5601 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn 5602 5603 btst &0x0,%d1 # is exp even or odd? 5604 beq.b ss_norm_even 5605 5606 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5607 5608 mov.l &0x3fff,%d0 5609 sub.l %d1,%d0 # scale = BIAS + (-exp) 5610 asr.l &0x1,%d0 # divide scale factor by 2 5611 rts 5612 5613ss_norm_even: 5614 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5615 5616 mov.l &0x3ffe,%d0 5617 sub.l %d1,%d0 # scale = BIAS + (-exp) 5618 asr.l &0x1,%d0 # divide scale factor by 2 5619 rts 5620 5621ss_denorm: 5622 lea FP_SCR0(%a6),%a0 # pass ptr to src op 5623 bsr.l norm # normalize denorm 5624 5625 btst &0x0,%d0 # is exp even or odd? 5626 beq.b ss_denorm_even 5627 5628 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5629 5630 add.l &0x3fff,%d0 5631 asr.l &0x1,%d0 # divide scale factor by 2 5632 rts 5633 5634ss_denorm_even: 5635 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5636 5637 add.l &0x3ffe,%d0 5638 asr.l &0x1,%d0 # divide scale factor by 2 5639 rts 5640 5641### 5642 5643######################################################################### 5644# XDEF **************************************************************** # 5645# scale_to_zero_dst(): scale the exponent of extended precision # 5646# value at FP_SCR1(a6). # 5647# # 5648# XREF **************************************************************** # 5649# norm() - normalize the mantissa if the operand was a DENORM # 5650# # 5651# INPUT *************************************************************** # 5652# FP_SCR1(a6) = extended precision operand to be scaled # 5653# # 5654# OUTPUT ************************************************************** # 5655# FP_SCR1(a6) = scaled extended precision operand # 5656# d0 = scale value # 5657# # 5658# ALGORITHM *********************************************************** # 5659# Set the exponent of the input operand to 0x3fff. Save the value # 5660# of the difference between the original and new exponent. Then, # 5661# normalize the operand if it was a DENORM. Add this normalization # 5662# value to the previous value. Return the result. # 5663# # 5664######################################################################### 5665 5666 global scale_to_zero_dst 5667scale_to_zero_dst: 5668 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp} 5669 mov.w %d1,%d0 # make a copy 5670 5671 andi.l &0x7fff,%d1 # extract operand's exponent 5672 5673 andi.w &0x8000,%d0 # extract operand's sgn 5674 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5675 5676 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent 5677 5678 cmpi.b DTAG(%a6),&DENORM # is operand normalized? 5679 beq.b stzd_denorm # normalize the DENORM 5680 5681stzd_norm: 5682 mov.l &0x3fff,%d0 5683 sub.l %d1,%d0 # scale = BIAS + (-exp) 5684 rts 5685 5686stzd_denorm: 5687 lea FP_SCR1(%a6),%a0 # pass ptr to dst op 5688 bsr.l norm # normalize denorm 5689 neg.l %d0 # new exponent = -(shft val) 5690 mov.l %d0,%d1 # prepare for op_norm call 5691 bra.b stzd_norm # finish scaling 5692 5693########################################################################## 5694 5695######################################################################### 5696# XDEF **************************************************************** # 5697# res_qnan(): return default result w/ QNAN operand for dyadic # 5698# res_snan(): return default result w/ SNAN operand for dyadic # 5699# res_qnan_1op(): return dflt result w/ QNAN operand for monadic # 5700# res_snan_1op(): return dflt result w/ SNAN operand for monadic # 5701# # 5702# XREF **************************************************************** # 5703# None # 5704# # 5705# INPUT *************************************************************** # 5706# FP_SRC(a6) = pointer to extended precision src operand # 5707# FP_DST(a6) = pointer to extended precision dst operand # 5708# # 5709# OUTPUT ************************************************************** # 5710# fp0 = default result # 5711# # 5712# ALGORITHM *********************************************************** # 5713# If either operand (but not both operands) of an operation is a # 5714# nonsignalling NAN, then that NAN is returned as the result. If both # 5715# operands are nonsignalling NANs, then the destination operand # 5716# nonsignalling NAN is returned as the result. # 5717# If either operand to an operation is a signalling NAN (SNAN), # 5718# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap # 5719# enable bit is set in the FPCR, then the trap is taken and the # 5720# destination is not modified. If the SNAN trap enable bit is not set, # 5721# then the SNAN is converted to a nonsignalling NAN (by setting the # 5722# SNAN bit in the operand to one), and the operation continues as # 5723# described in the preceding paragraph, for nonsignalling NANs. # 5724# Make sure the appropriate FPSR bits are set before exiting. # 5725# # 5726######################################################################### 5727 5728 global res_qnan 5729 global res_snan 5730res_qnan: 5731res_snan: 5732 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN? 5733 beq.b dst_snan2 5734 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN? 5735 beq.b dst_qnan2 5736src_nan: 5737 cmp.b STAG(%a6), &QNAN 5738 beq.b src_qnan2 5739 global res_snan_1op 5740res_snan_1op: 5741src_snan2: 5742 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit 5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5744 lea FP_SRC(%a6), %a0 5745 bra.b nan_comp 5746 global res_qnan_1op 5747res_qnan_1op: 5748src_qnan2: 5749 or.l &nan_mask, USER_FPSR(%a6) 5750 lea FP_SRC(%a6), %a0 5751 bra.b nan_comp 5752dst_snan2: 5753 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5754 bset &0x6, FP_DST_HI(%a6) # set SNAN bit 5755 lea FP_DST(%a6), %a0 5756 bra.b nan_comp 5757dst_qnan2: 5758 lea FP_DST(%a6), %a0 5759 cmp.b STAG(%a6), &SNAN 5760 bne nan_done 5761 or.l &aiop_mask+snan_mask, USER_FPSR(%a6) 5762nan_done: 5763 or.l &nan_mask, USER_FPSR(%a6) 5764nan_comp: 5765 btst &0x7, FTEMP_EX(%a0) # is NAN neg? 5766 beq.b nan_not_neg 5767 or.l &neg_mask, USER_FPSR(%a6) 5768nan_not_neg: 5769 fmovm.x (%a0), &0x80 5770 rts 5771 5772######################################################################### 5773# XDEF **************************************************************** # 5774# res_operr(): return default result during operand error # 5775# # 5776# XREF **************************************************************** # 5777# None # 5778# # 5779# INPUT *************************************************************** # 5780# None # 5781# # 5782# OUTPUT ************************************************************** # 5783# fp0 = default operand error result # 5784# # 5785# ALGORITHM *********************************************************** # 5786# An nonsignalling NAN is returned as the default result when # 5787# an operand error occurs for the following cases: # 5788# # 5789# Multiply: (Infinity x Zero) # 5790# Divide : (Zero / Zero) || (Infinity / Infinity) # 5791# # 5792######################################################################### 5793 5794 global res_operr 5795res_operr: 5796 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6) 5797 fmovm.x nan_return(%pc), &0x80 5798 rts 5799 5800nan_return: 5801 long 0x7fff0000, 0xffffffff, 0xffffffff 5802 5803######################################################################### 5804# XDEF **************************************************************** # 5805# _denorm(): denormalize an intermediate result # 5806# # 5807# XREF **************************************************************** # 5808# None # 5809# # 5810# INPUT *************************************************************** # 5811# a0 = points to the operand to be denormalized # 5812# (in the internal extended format) # 5813# # 5814# d0 = rounding precision # 5815# # 5816# OUTPUT ************************************************************** # 5817# a0 = pointer to the denormalized result # 5818# (in the internal extended format) # 5819# # 5820# d0 = guard,round,sticky # 5821# # 5822# ALGORITHM *********************************************************** # 5823# According to the exponent underflow threshold for the given # 5824# precision, shift the mantissa bits to the right in order raise the # 5825# exponent of the operand to the threshold value. While shifting the # 5826# mantissa bits right, maintain the value of the guard, round, and # 5827# sticky bits. # 5828# other notes: # 5829# (1) _denorm() is called by the underflow routines # 5830# (2) _denorm() does NOT affect the status register # 5831# # 5832######################################################################### 5833 5834# 5835# table of exponent threshold values for each precision 5836# 5837tbl_thresh: 5838 short 0x0 5839 short sgl_thresh 5840 short dbl_thresh 5841 5842 global _denorm 5843_denorm: 5844# 5845# Load the exponent threshold for the precision selected and check 5846# to see if (threshold - exponent) is > 65 in which case we can 5847# simply calculate the sticky bit and zero the mantissa. otherwise 5848# we have to call the denormalization routine. 5849# 5850 lsr.b &0x2, %d0 # shift prec to lo bits 5851 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold 5852 mov.w %d1, %d0 # copy d1 into d0 5853 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp 5854 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits) 5855 bpl.b denorm_set_stky # yes; just calc sticky 5856 5857 clr.l %d0 # clear g,r,s 5858 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set? 5859 beq.b denorm_call # no; don't change anything 5860 bset &29, %d0 # yes; set sticky bit 5861 5862denorm_call: 5863 bsr.l dnrm_lp # denormalize the number 5864 rts 5865 5866# 5867# all bit would have been shifted off during the denorm so simply 5868# calculate if the sticky should be set and clear the entire mantissa. 5869# 5870denorm_set_stky: 5871 mov.l &0x20000000, %d0 # set sticky bit in return value 5872 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold 5873 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa) 5874 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa) 5875 rts 5876 5877# # 5878# dnrm_lp(): normalize exponent/mantissa to specified threshhold # 5879# # 5880# INPUT: # 5881# %a0 : points to the operand to be denormalized # 5882# %d0{31:29} : initial guard,round,sticky # 5883# %d1{15:0} : denormalization threshold # 5884# OUTPUT: # 5885# %a0 : points to the denormalized operand # 5886# %d0{31:29} : final guard,round,sticky # 5887# # 5888 5889# *** Local Equates *** # 5890set GRS, L_SCR2 # g,r,s temp storage 5891set FTEMP_LO2, L_SCR1 # FTEMP_LO copy 5892 5893 global dnrm_lp 5894dnrm_lp: 5895 5896# 5897# make a copy of FTEMP_LO and place the g,r,s bits directly after it 5898# in memory so as to make the bitfield extraction for denormalization easier. 5899# 5900 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy 5901 mov.l %d0, GRS(%a6) # place g,r,s after it 5902 5903# 5904# check to see how much less than the underflow threshold the operand 5905# exponent is. 5906# 5907 mov.l %d1, %d0 # copy the denorm threshold 5908 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent 5909 ble.b dnrm_no_lp # d1 <= 0 5910 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ? 5911 blt.b case_1 # yes 5912 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ? 5913 blt.b case_2 # yes 5914 bra.w case_3 # (d1 >= 64) 5915 5916# 5917# No normalization necessary 5918# 5919dnrm_no_lp: 5920 mov.l GRS(%a6), %d0 # restore original g,r,s 5921 rts 5922 5923# 5924# case (0<d1<32) 5925# 5926# %d0 = denorm threshold 5927# %d1 = "n" = amt to shift 5928# 5929# --------------------------------------------------------- 5930# | FTEMP_HI | FTEMP_LO |grs000.........000| 5931# --------------------------------------------------------- 5932# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5933# \ \ \ \ 5934# \ \ \ \ 5935# \ \ \ \ 5936# \ \ \ \ 5937# \ \ \ \ 5938# \ \ \ \ 5939# \ \ \ \ 5940# \ \ \ \ 5941# <-(n)-><-(32 - n)-><------(32)-------><------(32)-------> 5942# --------------------------------------------------------- 5943# |0.....0| NEW_HI | NEW_FTEMP_LO |grs | 5944# --------------------------------------------------------- 5945# 5946case_1: 5947 mov.l %d2, -(%sp) # create temp storage 5948 5949 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 5950 mov.l &32, %d0 5951 sub.w %d1, %d0 # %d0 = 32 - %d1 5952 5953 cmpi.w %d1, &29 # is shft amt >= 29 5954 blt.b case1_extract # no; no fix needed 5955 mov.b GRS(%a6), %d2 5956 or.b %d2, 3+FTEMP_LO2(%a6) 5957 5958case1_extract: 5959 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI 5960 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO 5961 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S 5962 5963 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI 5964 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO 5965 5966 bftst %d0{&2:&30} # were bits shifted off? 5967 beq.b case1_sticky_clear # no; go finish 5968 bset &rnd_stky_bit, %d0 # yes; set sticky bit 5969 5970case1_sticky_clear: 5971 and.l &0xe0000000, %d0 # clear all but G,R,S 5972 mov.l (%sp)+, %d2 # restore temp register 5973 rts 5974 5975# 5976# case (32<=d1<64) 5977# 5978# %d0 = denorm threshold 5979# %d1 = "n" = amt to shift 5980# 5981# --------------------------------------------------------- 5982# | FTEMP_HI | FTEMP_LO |grs000.........000| 5983# --------------------------------------------------------- 5984# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5985# \ \ \ 5986# \ \ \ 5987# \ \ ------------------- 5988# \ -------------------- \ 5989# ------------------- \ \ 5990# \ \ \ 5991# \ \ \ 5992# \ \ \ 5993# <-------(32)------><-(n)-><-(32 - n)-><------(32)-------> 5994# --------------------------------------------------------- 5995# |0...............0|0....0| NEW_LO |grs | 5996# --------------------------------------------------------- 5997# 5998case_2: 5999 mov.l %d2, -(%sp) # create temp storage 6000 6001 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 6002 subi.w &0x20, %d1 # %d1 now between 0 and 32 6003 mov.l &0x20, %d0 6004 sub.w %d1, %d0 # %d0 = 32 - %d1 6005 6006# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize 6007# the number of bits to check for the sticky detect. 6008# it only plays a role in shift amounts of 61-63. 6009 mov.b GRS(%a6), %d2 6010 or.b %d2, 3+FTEMP_LO2(%a6) 6011 6012 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO 6013 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S 6014 6015 bftst %d1{&2:&30} # were any bits shifted off? 6016 bne.b case2_set_sticky # yes; set sticky bit 6017 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off? 6018 bne.b case2_set_sticky # yes; set sticky bit 6019 6020 mov.l %d1, %d0 # move new G,R,S to %d0 6021 bra.b case2_end 6022 6023case2_set_sticky: 6024 mov.l %d1, %d0 # move new G,R,S to %d0 6025 bset &rnd_stky_bit, %d0 # set sticky bit 6026 6027case2_end: 6028 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0 6029 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO 6030 and.l &0xe0000000, %d0 # clear all but G,R,S 6031 6032 mov.l (%sp)+,%d2 # restore temp register 6033 rts 6034 6035# 6036# case (d1>=64) 6037# 6038# %d0 = denorm threshold 6039# %d1 = amt to shift 6040# 6041case_3: 6042 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold 6043 6044 cmpi.w %d1, &65 # is shift amt > 65? 6045 blt.b case3_64 # no; it's == 64 6046 beq.b case3_65 # no; it's == 65 6047 6048# 6049# case (d1>65) 6050# 6051# Shift value is > 65 and out of range. All bits are shifted off. 6052# Return a zero mantissa with the sticky bit set 6053# 6054 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6055 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6056 mov.l &0x20000000, %d0 # set sticky bit 6057 rts 6058 6059# 6060# case (d1 == 64) 6061# 6062# --------------------------------------------------------- 6063# | FTEMP_HI | FTEMP_LO |grs000.........000| 6064# --------------------------------------------------------- 6065# <-------(32)------> 6066# \ \ 6067# \ \ 6068# \ \ 6069# \ ------------------------------ 6070# ------------------------------- \ 6071# \ \ 6072# \ \ 6073# \ \ 6074# <-------(32)------> 6075# --------------------------------------------------------- 6076# |0...............0|0................0|grs | 6077# --------------------------------------------------------- 6078# 6079case3_64: 6080 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6081 mov.l %d0, %d1 # make a copy 6082 and.l &0xc0000000, %d0 # extract G,R 6083 and.l &0x3fffffff, %d1 # extract other bits 6084 6085 bra.b case3_complete 6086 6087# 6088# case (d1 == 65) 6089# 6090# --------------------------------------------------------- 6091# | FTEMP_HI | FTEMP_LO |grs000.........000| 6092# --------------------------------------------------------- 6093# <-------(32)------> 6094# \ \ 6095# \ \ 6096# \ \ 6097# \ ------------------------------ 6098# -------------------------------- \ 6099# \ \ 6100# \ \ 6101# \ \ 6102# <-------(31)-----> 6103# --------------------------------------------------------- 6104# |0...............0|0................0|0rs | 6105# --------------------------------------------------------- 6106# 6107case3_65: 6108 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6109 and.l &0x80000000, %d0 # extract R bit 6110 lsr.l &0x1, %d0 # shift high bit into R bit 6111 and.l &0x7fffffff, %d1 # extract other bits 6112 6113case3_complete: 6114# last operation done was an "and" of the bits shifted off so the condition 6115# codes are already set so branch accordingly. 6116 bne.b case3_set_sticky # yes; go set new sticky 6117 tst.l FTEMP_LO(%a0) # were any bits shifted off? 6118 bne.b case3_set_sticky # yes; go set new sticky 6119 tst.b GRS(%a6) # were any bits shifted off? 6120 bne.b case3_set_sticky # yes; go set new sticky 6121 6122# 6123# no bits were shifted off so don't set the sticky bit. 6124# the guard and 6125# the entire mantissa is zero. 6126# 6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6129 rts 6130 6131# 6132# some bits were shifted off so set the sticky bit. 6133# the entire mantissa is zero. 6134# 6135case3_set_sticky: 6136 bset &rnd_stky_bit,%d0 # set new sticky bit 6137 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6138 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6139 rts 6140 6141######################################################################### 6142# XDEF **************************************************************** # 6143# _round(): round result according to precision/mode # 6144# # 6145# XREF **************************************************************** # 6146# None # 6147# # 6148# INPUT *************************************************************** # 6149# a0 = ptr to input operand in internal extended format # 6150# d1(hi) = contains rounding precision: # 6151# ext = $0000xxxx # 6152# sgl = $0004xxxx # 6153# dbl = $0008xxxx # 6154# d1(lo) = contains rounding mode: # 6155# RN = $xxxx0000 # 6156# RZ = $xxxx0001 # 6157# RM = $xxxx0002 # 6158# RP = $xxxx0003 # 6159# d0{31:29} = contains the g,r,s bits (extended) # 6160# # 6161# OUTPUT ************************************************************** # 6162# a0 = pointer to rounded result # 6163# # 6164# ALGORITHM *********************************************************** # 6165# On return the value pointed to by a0 is correctly rounded, # 6166# a0 is preserved and the g-r-s bits in d0 are cleared. # 6167# The result is not typed - the tag field is invalid. The # 6168# result is still in the internal extended format. # 6169# # 6170# The INEX bit of USER_FPSR will be set if the rounded result was # 6171# inexact (i.e. if any of the g-r-s bits were set). # 6172# # 6173######################################################################### 6174 6175 global _round 6176_round: 6177# 6178# ext_grs() looks at the rounding precision and sets the appropriate 6179# G,R,S bits. 6180# If (G,R,S == 0) then result is exact and round is done, else set 6181# the inex flag in status reg and continue. 6182# 6183 bsr.l ext_grs # extract G,R,S 6184 6185 tst.l %d0 # are G,R,S zero? 6186 beq.w truncate # yes; round is complete 6187 6188 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex 6189 6190# 6191# Use rounding mode as an index into a jump table for these modes. 6192# All of the following assumes grs != 0. 6193# 6194 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset 6195 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler 6196 6197tbl_mode: 6198 short rnd_near - tbl_mode 6199 short truncate - tbl_mode # RZ always truncates 6200 short rnd_mnus - tbl_mode 6201 short rnd_plus - tbl_mode 6202 6203################################################################# 6204# ROUND PLUS INFINITY # 6205# # 6206# If sign of fp number = 0 (positive), then add 1 to l. # 6207################################################################# 6208rnd_plus: 6209 tst.b FTEMP_SGN(%a0) # check for sign 6210 bmi.w truncate # if positive then truncate 6211 6212 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6213 swap %d1 # set up d1 for round prec. 6214 6215 cmpi.b %d1, &s_mode # is prec = sgl? 6216 beq.w add_sgl # yes 6217 bgt.w add_dbl # no; it's dbl 6218 bra.w add_ext # no; it's ext 6219 6220################################################################# 6221# ROUND MINUS INFINITY # 6222# # 6223# If sign of fp number = 1 (negative), then add 1 to l. # 6224################################################################# 6225rnd_mnus: 6226 tst.b FTEMP_SGN(%a0) # check for sign 6227 bpl.w truncate # if negative then truncate 6228 6229 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6230 swap %d1 # set up d1 for round prec. 6231 6232 cmpi.b %d1, &s_mode # is prec = sgl? 6233 beq.w add_sgl # yes 6234 bgt.w add_dbl # no; it's dbl 6235 bra.w add_ext # no; it's ext 6236 6237################################################################# 6238# ROUND NEAREST # 6239# # 6240# If (g=1), then add 1 to l and if (r=s=0), then clear l # 6241# Note that this will round to even in case of a tie. # 6242################################################################# 6243rnd_near: 6244 asl.l &0x1, %d0 # shift g-bit to c-bit 6245 bcc.w truncate # if (g=1) then 6246 6247 swap %d1 # set up d1 for round prec. 6248 6249 cmpi.b %d1, &s_mode # is prec = sgl? 6250 beq.w add_sgl # yes 6251 bgt.w add_dbl # no; it's dbl 6252 bra.w add_ext # no; it's ext 6253 6254# *** LOCAL EQUATES *** 6255set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec 6256set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec 6257 6258######################### 6259# ADD SINGLE # 6260######################### 6261add_sgl: 6262 add.l &ad_1_sgl, FTEMP_HI(%a0) 6263 bcc.b scc_clr # no mantissa overflow 6264 roxr.w FTEMP_HI(%a0) # shift v-bit back in 6265 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in 6266 add.w &0x1, FTEMP_EX(%a0) # and incr exponent 6267scc_clr: 6268 tst.l %d0 # test for rs = 0 6269 bne.b sgl_done 6270 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit 6271sgl_done: 6272 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit 6273 clr.l FTEMP_LO(%a0) # clear d2 6274 rts 6275 6276######################### 6277# ADD EXTENDED # 6278######################### 6279add_ext: 6280 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit 6281 bcc.b xcc_clr # test for carry out 6282 addq.l &1,FTEMP_HI(%a0) # propogate carry 6283 bcc.b xcc_clr 6284 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6285 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6286 roxr.w FTEMP_LO(%a0) 6287 roxr.w FTEMP_LO+2(%a0) 6288 add.w &0x1,FTEMP_EX(%a0) # and inc exp 6289xcc_clr: 6290 tst.l %d0 # test rs = 0 6291 bne.b add_ext_done 6292 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit 6293add_ext_done: 6294 rts 6295 6296######################### 6297# ADD DOUBLE # 6298######################### 6299add_dbl: 6300 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb 6301 bcc.b dcc_clr # no carry 6302 addq.l &0x1, FTEMP_HI(%a0) # propogate carry 6303 bcc.b dcc_clr # no carry 6304 6305 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6306 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6307 roxr.w FTEMP_LO(%a0) 6308 roxr.w FTEMP_LO+2(%a0) 6309 addq.w &0x1, FTEMP_EX(%a0) # incr exponent 6310dcc_clr: 6311 tst.l %d0 # test for rs = 0 6312 bne.b dbl_done 6313 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit 6314 6315dbl_done: 6316 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit 6317 rts 6318 6319########################### 6320# Truncate all other bits # 6321########################### 6322truncate: 6323 swap %d1 # select rnd prec 6324 6325 cmpi.b %d1, &s_mode # is prec sgl? 6326 beq.w sgl_done # yes 6327 bgt.b dbl_done # no; it's dbl 6328 rts # no; it's ext 6329 6330 6331# 6332# ext_grs(): extract guard, round and sticky bits according to 6333# rounding precision. 6334# 6335# INPUT 6336# d0 = extended precision g,r,s (in d0{31:29}) 6337# d1 = {PREC,ROUND} 6338# OUTPUT 6339# d0{31:29} = guard, round, sticky 6340# 6341# The ext_grs extract the guard/round/sticky bits according to the 6342# selected rounding precision. It is called by the round subroutine 6343# only. All registers except d0 are kept intact. d0 becomes an 6344# updated guard,round,sticky in d0{31:29} 6345# 6346# Notes: the ext_grs uses the round PREC, and therefore has to swap d1 6347# prior to usage, and needs to restore d1 to original. this 6348# routine is tightly tied to the round routine and not meant to 6349# uphold standard subroutine calling practices. 6350# 6351 6352ext_grs: 6353 swap %d1 # have d1.w point to round precision 6354 tst.b %d1 # is rnd prec = extended? 6355 bne.b ext_grs_not_ext # no; go handle sgl or dbl 6356 6357# 6358# %d0 actually already hold g,r,s since _round() had it before calling 6359# this function. so, as long as we don't disturb it, we are "returning" it. 6360# 6361ext_grs_ext: 6362 swap %d1 # yes; return to correct positions 6363 rts 6364 6365ext_grs_not_ext: 6366 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3} 6367 6368 cmpi.b %d1, &s_mode # is rnd prec = sgl? 6369 bne.b ext_grs_dbl # no; go handle dbl 6370 6371# 6372# sgl: 6373# 96 64 40 32 0 6374# ----------------------------------------------------- 6375# | EXP |XXXXXXX| |xx | |grs| 6376# ----------------------------------------------------- 6377# <--(24)--->nn\ / 6378# ee --------------------- 6379# ww | 6380# v 6381# gr new sticky 6382# 6383ext_grs_sgl: 6384 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right 6385 mov.l &30, %d2 # of the sgl prec. limits 6386 lsl.l %d2, %d3 # shift g-r bits to MSB of d3 6387 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test 6388 and.l &0x0000003f, %d2 # s bit is the or of all other 6389 bne.b ext_grs_st_stky # bits to the right of g-r 6390 tst.l FTEMP_LO(%a0) # test lower mantissa 6391 bne.b ext_grs_st_stky # if any are set, set sticky 6392 tst.l %d0 # test original g,r,s 6393 bne.b ext_grs_st_stky # if any are set, set sticky 6394 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit 6395 6396# 6397# dbl: 6398# 96 64 32 11 0 6399# ----------------------------------------------------- 6400# | EXP |XXXXXXX| | |xx |grs| 6401# ----------------------------------------------------- 6402# nn\ / 6403# ee ------- 6404# ww | 6405# v 6406# gr new sticky 6407# 6408ext_grs_dbl: 6409 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right 6410 mov.l &30, %d2 # of the dbl prec. limits 6411 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3 6412 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test 6413 and.l &0x000001ff, %d2 # s bit is the or-ing of all 6414 bne.b ext_grs_st_stky # other bits to the right of g-r 6415 tst.l %d0 # test word original g,r,s 6416 bne.b ext_grs_st_stky # if any are set, set sticky 6417 bra.b ext_grs_end_sd # if clear, exit 6418 6419ext_grs_st_stky: 6420 bset &rnd_stky_bit, %d3 # set sticky bit 6421ext_grs_end_sd: 6422 mov.l %d3, %d0 # return grs to d0 6423 6424 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3} 6425 6426 swap %d1 # restore d1 to original 6427 rts 6428 6429######################################################################### 6430# norm(): normalize the mantissa of an extended precision input. the # 6431# input operand should not be normalized already. # 6432# # 6433# XDEF **************************************************************** # 6434# norm() # 6435# # 6436# XREF **************************************************************** # 6437# none # 6438# # 6439# INPUT *************************************************************** # 6440# a0 = pointer fp extended precision operand to normalize # 6441# # 6442# OUTPUT ************************************************************** # 6443# d0 = number of bit positions the mantissa was shifted # 6444# a0 = the input operand's mantissa is normalized; the exponent # 6445# is unchanged. # 6446# # 6447######################################################################### 6448 global norm 6449norm: 6450 mov.l %d2, -(%sp) # create some temp regs 6451 mov.l %d3, -(%sp) 6452 6453 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 6454 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 6455 6456 bfffo %d0{&0:&32}, %d2 # how many places to shift? 6457 beq.b norm_lo # hi(man) is all zeroes! 6458 6459norm_hi: 6460 lsl.l %d2, %d0 # left shift hi(man) 6461 bfextu %d1{&0:%d2}, %d3 # extract lo bits 6462 6463 or.l %d3, %d0 # create hi(man) 6464 lsl.l %d2, %d1 # create lo(man) 6465 6466 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6467 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 6468 6469 mov.l %d2, %d0 # return shift amount 6470 6471 mov.l (%sp)+, %d3 # restore temp regs 6472 mov.l (%sp)+, %d2 6473 6474 rts 6475 6476norm_lo: 6477 bfffo %d1{&0:&32}, %d2 # how many places to shift? 6478 lsl.l %d2, %d1 # shift lo(man) 6479 add.l &32, %d2 # add 32 to shft amount 6480 6481 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 6482 clr.l FTEMP_LO(%a0) # lo(man) is now zero 6483 6484 mov.l %d2, %d0 # return shift amount 6485 6486 mov.l (%sp)+, %d3 # restore temp regs 6487 mov.l (%sp)+, %d2 6488 6489 rts 6490 6491######################################################################### 6492# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 6493# - returns corresponding optype tag # 6494# # 6495# XDEF **************************************************************** # 6496# unnorm_fix() # 6497# # 6498# XREF **************************************************************** # 6499# norm() - normalize the mantissa # 6500# # 6501# INPUT *************************************************************** # 6502# a0 = pointer to unnormalized extended precision number # 6503# # 6504# OUTPUT ************************************************************** # 6505# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 6506# a0 = input operand has been converted to a norm, denorm, or # 6507# zero; both the exponent and mantissa are changed. # 6508# # 6509######################################################################### 6510 6511 global unnorm_fix 6512unnorm_fix: 6513 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 6514 bne.b unnorm_shift # hi(man) is not all zeroes 6515 6516# 6517# hi(man) is all zeroes so see if any bits in lo(man) are set 6518# 6519unnorm_chk_lo: 6520 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 6521 beq.w unnorm_zero # yes 6522 6523 add.w &32, %d0 # no; fix shift distance 6524 6525# 6526# d0 = # shifts needed for complete normalization 6527# 6528unnorm_shift: 6529 clr.l %d1 # clear top word 6530 mov.w FTEMP_EX(%a0), %d1 # extract exponent 6531 and.w &0x7fff, %d1 # strip off sgn 6532 6533 cmp.w %d0, %d1 # will denorm push exp < 0? 6534 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 6535 6536# 6537# exponent would not go < 0. therefore, number stays normalized 6538# 6539 sub.w %d0, %d1 # shift exponent value 6540 mov.w FTEMP_EX(%a0), %d0 # load old exponent 6541 and.w &0x8000, %d0 # save old sign 6542 or.w %d0, %d1 # {sgn,new exp} 6543 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 6544 6545 bsr.l norm # normalize UNNORM 6546 6547 mov.b &NORM, %d0 # return new optype tag 6548 rts 6549 6550# 6551# exponent would go < 0, so only denormalize until exp = 0 6552# 6553unnorm_nrm_zero: 6554 cmp.b %d1, &32 # is exp <= 32? 6555 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 6556 6557 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 6558 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 6559 6560 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6561 lsl.l %d1, %d0 # extract new lo(man) 6562 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 6563 6564 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6565 6566 mov.b &DENORM, %d0 # return new optype tag 6567 rts 6568 6569# 6570# only mantissa bits set are in lo(man) 6571# 6572unnorm_nrm_zero_lrg: 6573 sub.w &32, %d1 # adjust shft amt by 32 6574 6575 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6576 lsl.l %d1, %d0 # left shift lo(man) 6577 6578 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6579 clr.l FTEMP_LO(%a0) # lo(man) = 0 6580 6581 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6582 6583 mov.b &DENORM, %d0 # return new optype tag 6584 rts 6585 6586# 6587# whole mantissa is zero so this UNNORM is actually a zero 6588# 6589unnorm_zero: 6590 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 6591 6592 mov.b &ZERO, %d0 # fix optype tag 6593 rts 6594 6595######################################################################### 6596# XDEF **************************************************************** # 6597# set_tag_x(): return the optype of the input ext fp number # 6598# # 6599# XREF **************************************************************** # 6600# None # 6601# # 6602# INPUT *************************************************************** # 6603# a0 = pointer to extended precision operand # 6604# # 6605# OUTPUT ************************************************************** # 6606# d0 = value of type tag # 6607# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO # 6608# # 6609# ALGORITHM *********************************************************** # 6610# Simply test the exponent, j-bit, and mantissa values to # 6611# determine the type of operand. # 6612# If it's an unnormalized zero, alter the operand and force it # 6613# to be a normal zero. # 6614# # 6615######################################################################### 6616 6617 global set_tag_x 6618set_tag_x: 6619 mov.w FTEMP_EX(%a0), %d0 # extract exponent 6620 andi.w &0x7fff, %d0 # strip off sign 6621 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 6622 beq.b inf_or_nan_x 6623not_inf_or_nan_x: 6624 btst &0x7,FTEMP_HI(%a0) 6625 beq.b not_norm_x 6626is_norm_x: 6627 mov.b &NORM, %d0 6628 rts 6629not_norm_x: 6630 tst.w %d0 # is exponent = 0? 6631 bne.b is_unnorm_x 6632not_unnorm_x: 6633 tst.l FTEMP_HI(%a0) 6634 bne.b is_denorm_x 6635 tst.l FTEMP_LO(%a0) 6636 bne.b is_denorm_x 6637is_zero_x: 6638 mov.b &ZERO, %d0 6639 rts 6640is_denorm_x: 6641 mov.b &DENORM, %d0 6642 rts 6643# must distinguish now "Unnormalized zeroes" which we 6644# must convert to zero. 6645is_unnorm_x: 6646 tst.l FTEMP_HI(%a0) 6647 bne.b is_unnorm_reg_x 6648 tst.l FTEMP_LO(%a0) 6649 bne.b is_unnorm_reg_x 6650# it's an "unnormalized zero". let's convert it to an actual zero... 6651 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent 6652 mov.b &ZERO, %d0 6653 rts 6654is_unnorm_reg_x: 6655 mov.b &UNNORM, %d0 6656 rts 6657inf_or_nan_x: 6658 tst.l FTEMP_LO(%a0) 6659 bne.b is_nan_x 6660 mov.l FTEMP_HI(%a0), %d0 6661 and.l &0x7fffffff, %d0 # msb is a don't care! 6662 bne.b is_nan_x 6663is_inf_x: 6664 mov.b &INF, %d0 6665 rts 6666is_nan_x: 6667 btst &0x6, FTEMP_HI(%a0) 6668 beq.b is_snan_x 6669 mov.b &QNAN, %d0 6670 rts 6671is_snan_x: 6672 mov.b &SNAN, %d0 6673 rts 6674 6675######################################################################### 6676# XDEF **************************************************************** # 6677# set_tag_d(): return the optype of the input dbl fp number # 6678# # 6679# XREF **************************************************************** # 6680# None # 6681# # 6682# INPUT *************************************************************** # 6683# a0 = points to double precision operand # 6684# # 6685# OUTPUT ************************************************************** # 6686# d0 = value of type tag # 6687# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6688# # 6689# ALGORITHM *********************************************************** # 6690# Simply test the exponent, j-bit, and mantissa values to # 6691# determine the type of operand. # 6692# # 6693######################################################################### 6694 6695 global set_tag_d 6696set_tag_d: 6697 mov.l FTEMP(%a0), %d0 6698 mov.l %d0, %d1 6699 6700 andi.l &0x7ff00000, %d0 6701 beq.b zero_or_denorm_d 6702 6703 cmpi.l %d0, &0x7ff00000 6704 beq.b inf_or_nan_d 6705 6706is_norm_d: 6707 mov.b &NORM, %d0 6708 rts 6709zero_or_denorm_d: 6710 and.l &0x000fffff, %d1 6711 bne is_denorm_d 6712 tst.l 4+FTEMP(%a0) 6713 bne is_denorm_d 6714is_zero_d: 6715 mov.b &ZERO, %d0 6716 rts 6717is_denorm_d: 6718 mov.b &DENORM, %d0 6719 rts 6720inf_or_nan_d: 6721 and.l &0x000fffff, %d1 6722 bne is_nan_d 6723 tst.l 4+FTEMP(%a0) 6724 bne is_nan_d 6725is_inf_d: 6726 mov.b &INF, %d0 6727 rts 6728is_nan_d: 6729 btst &19, %d1 6730 bne is_qnan_d 6731is_snan_d: 6732 mov.b &SNAN, %d0 6733 rts 6734is_qnan_d: 6735 mov.b &QNAN, %d0 6736 rts 6737 6738######################################################################### 6739# XDEF **************************************************************** # 6740# set_tag_s(): return the optype of the input sgl fp number # 6741# # 6742# XREF **************************************************************** # 6743# None # 6744# # 6745# INPUT *************************************************************** # 6746# a0 = pointer to single precision operand # 6747# # 6748# OUTPUT ************************************************************** # 6749# d0 = value of type tag # 6750# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6751# # 6752# ALGORITHM *********************************************************** # 6753# Simply test the exponent, j-bit, and mantissa values to # 6754# determine the type of operand. # 6755# # 6756######################################################################### 6757 6758 global set_tag_s 6759set_tag_s: 6760 mov.l FTEMP(%a0), %d0 6761 mov.l %d0, %d1 6762 6763 andi.l &0x7f800000, %d0 6764 beq.b zero_or_denorm_s 6765 6766 cmpi.l %d0, &0x7f800000 6767 beq.b inf_or_nan_s 6768 6769is_norm_s: 6770 mov.b &NORM, %d0 6771 rts 6772zero_or_denorm_s: 6773 and.l &0x007fffff, %d1 6774 bne is_denorm_s 6775is_zero_s: 6776 mov.b &ZERO, %d0 6777 rts 6778is_denorm_s: 6779 mov.b &DENORM, %d0 6780 rts 6781inf_or_nan_s: 6782 and.l &0x007fffff, %d1 6783 bne is_nan_s 6784is_inf_s: 6785 mov.b &INF, %d0 6786 rts 6787is_nan_s: 6788 btst &22, %d1 6789 bne is_qnan_s 6790is_snan_s: 6791 mov.b &SNAN, %d0 6792 rts 6793is_qnan_s: 6794 mov.b &QNAN, %d0 6795 rts 6796 6797######################################################################### 6798# XDEF **************************************************************** # 6799# unf_res(): routine to produce default underflow result of a # 6800# scaled extended precision number; this is used by # 6801# fadd/fdiv/fmul/etc. emulation routines. # 6802# unf_res4(): same as above but for fsglmul/fsgldiv which use # 6803# single round prec and extended prec mode. # 6804# # 6805# XREF **************************************************************** # 6806# _denorm() - denormalize according to scale factor # 6807# _round() - round denormalized number according to rnd prec # 6808# # 6809# INPUT *************************************************************** # 6810# a0 = pointer to extended precison operand # 6811# d0 = scale factor # 6812# d1 = rounding precision/mode # 6813# # 6814# OUTPUT ************************************************************** # 6815# a0 = pointer to default underflow result in extended precision # 6816# d0.b = result FPSR_cc which caller may or may not want to save # 6817# # 6818# ALGORITHM *********************************************************** # 6819# Convert the input operand to "internal format" which means the # 6820# exponent is extended to 16 bits and the sign is stored in the unused # 6821# portion of the extended precison operand. Denormalize the number # 6822# according to the scale factor passed in d0. Then, round the # 6823# denormalized result. # 6824# Set the FPSR_exc bits as appropriate but return the cc bits in # 6825# d0 in case the caller doesn't want to save them (as is the case for # 6826# fmove out). # 6827# unf_res4() for fsglmul/fsgldiv forces the denorm to extended # 6828# precision and the rounding mode to single. # 6829# # 6830######################################################################### 6831 global unf_res 6832unf_res: 6833 mov.l %d1, -(%sp) # save rnd prec,mode on stack 6834 6835 btst &0x7, FTEMP_EX(%a0) # make "internal" format 6836 sne FTEMP_SGN(%a0) 6837 6838 mov.w FTEMP_EX(%a0), %d1 # extract exponent 6839 and.w &0x7fff, %d1 6840 sub.w %d0, %d1 6841 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent 6842 6843 mov.l %a0, -(%sp) # save operand ptr during calls 6844 6845 mov.l 0x4(%sp),%d0 # pass rnd prec. 6846 andi.w &0x00c0,%d0 6847 lsr.w &0x4,%d0 6848 bsr.l _denorm # denorm result 6849 6850 mov.l (%sp),%a0 6851 mov.w 0x6(%sp),%d1 # load prec:mode into %d1 6852 andi.w &0xc0,%d1 # extract rnd prec 6853 lsr.w &0x4,%d1 6854 swap %d1 6855 mov.w 0x6(%sp),%d1 6856 andi.w &0x30,%d1 6857 lsr.w &0x4,%d1 6858 bsr.l _round # round the denorm 6859 6860 mov.l (%sp)+, %a0 6861 6862# result is now rounded properly. convert back to normal format 6863 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue 6864 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6865 beq.b unf_res_chkifzero # no; result is positive 6866 bset &0x7, FTEMP_EX(%a0) # set result sgn 6867 clr.b FTEMP_SGN(%a0) # clear temp sign 6868 6869# the number may have become zero after rounding. set ccodes accordingly. 6870unf_res_chkifzero: 6871 clr.l %d0 6872 tst.l FTEMP_HI(%a0) # is value now a zero? 6873 bne.b unf_res_cont # no 6874 tst.l FTEMP_LO(%a0) 6875 bne.b unf_res_cont # no 6876# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit 6877 bset &z_bit, %d0 # yes; set zero ccode bit 6878 6879unf_res_cont: 6880 6881# 6882# can inex1 also be set along with unfl and inex2??? 6883# 6884# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6885# 6886 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set? 6887 beq.b unf_res_end # no 6888 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl 6889 6890unf_res_end: 6891 add.l &0x4, %sp # clear stack 6892 rts 6893 6894# unf_res() for fsglmul() and fsgldiv(). 6895 global unf_res4 6896unf_res4: 6897 mov.l %d1,-(%sp) # save rnd prec,mode on stack 6898 6899 btst &0x7,FTEMP_EX(%a0) # make "internal" format 6900 sne FTEMP_SGN(%a0) 6901 6902 mov.w FTEMP_EX(%a0),%d1 # extract exponent 6903 and.w &0x7fff,%d1 6904 sub.w %d0,%d1 6905 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent 6906 6907 mov.l %a0,-(%sp) # save operand ptr during calls 6908 6909 clr.l %d0 # force rnd prec = ext 6910 bsr.l _denorm # denorm result 6911 6912 mov.l (%sp),%a0 6913 mov.w &s_mode,%d1 # force rnd prec = sgl 6914 swap %d1 6915 mov.w 0x6(%sp),%d1 # load rnd mode 6916 andi.w &0x30,%d1 # extract rnd prec 6917 lsr.w &0x4,%d1 6918 bsr.l _round # round the denorm 6919 6920 mov.l (%sp)+,%a0 6921 6922# result is now rounded properly. convert back to normal format 6923 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue 6924 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6925 beq.b unf_res4_chkifzero # no; result is positive 6926 bset &0x7,FTEMP_EX(%a0) # set result sgn 6927 clr.b FTEMP_SGN(%a0) # clear temp sign 6928 6929# the number may have become zero after rounding. set ccodes accordingly. 6930unf_res4_chkifzero: 6931 clr.l %d0 6932 tst.l FTEMP_HI(%a0) # is value now a zero? 6933 bne.b unf_res4_cont # no 6934 tst.l FTEMP_LO(%a0) 6935 bne.b unf_res4_cont # no 6936# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit 6937 bset &z_bit,%d0 # yes; set zero ccode bit 6938 6939unf_res4_cont: 6940 6941# 6942# can inex1 also be set along with unfl and inex2??? 6943# 6944# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6945# 6946 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 6947 beq.b unf_res4_end # no 6948 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl 6949 6950unf_res4_end: 6951 add.l &0x4,%sp # clear stack 6952 rts 6953 6954######################################################################### 6955# XDEF **************************************************************** # 6956# ovf_res(): routine to produce the default overflow result of # 6957# an overflowing number. # 6958# ovf_res2(): same as above but the rnd mode/prec are passed # 6959# differently. # 6960# # 6961# XREF **************************************************************** # 6962# none # 6963# # 6964# INPUT *************************************************************** # 6965# d1.b = '-1' => (-); '0' => (+) # 6966# ovf_res(): # 6967# d0 = rnd mode/prec # 6968# ovf_res2(): # 6969# hi(d0) = rnd prec # 6970# lo(d0) = rnd mode # 6971# # 6972# OUTPUT ************************************************************** # 6973# a0 = points to extended precision result # 6974# d0.b = condition code bits # 6975# # 6976# ALGORITHM *********************************************************** # 6977# The default overflow result can be determined by the sign of # 6978# the result and the rounding mode/prec in effect. These bits are # 6979# concatenated together to create an index into the default result # 6980# table. A pointer to the correct result is returned in a0. The # 6981# resulting condition codes are returned in d0 in case the caller # 6982# doesn't want FPSR_cc altered (as is the case for fmove out). # 6983# # 6984######################################################################### 6985 6986 global ovf_res 6987ovf_res: 6988 andi.w &0x10,%d1 # keep result sign 6989 lsr.b &0x4,%d0 # shift prec/mode 6990 or.b %d0,%d1 # concat the two 6991 mov.w %d1,%d0 # make a copy 6992 lsl.b &0x1,%d1 # multiply d1 by 2 6993 bra.b ovf_res_load 6994 6995 global ovf_res2 6996ovf_res2: 6997 and.w &0x10, %d1 # keep result sign 6998 or.b %d0, %d1 # insert rnd mode 6999 swap %d0 7000 or.b %d0, %d1 # insert rnd prec 7001 mov.w %d1, %d0 # make a copy 7002 lsl.b &0x1, %d1 # shift left by 1 7003 7004# 7005# use the rounding mode, precision, and result sign as in index into the 7006# two tables below to fetch the default result and the result ccodes. 7007# 7008ovf_res_load: 7009 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes 7010 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr 7011 7012 rts 7013 7014tbl_ovfl_cc: 7015 byte 0x2, 0x0, 0x0, 0x2 7016 byte 0x2, 0x0, 0x0, 0x2 7017 byte 0x2, 0x0, 0x0, 0x2 7018 byte 0x0, 0x0, 0x0, 0x0 7019 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7020 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7021 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7022 7023tbl_ovfl_result: 7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7025 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ 7026 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM 7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7028 7029 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7030 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ 7031 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM 7032 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7033 7034 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7035 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ 7036 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM 7037 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7038 7039 long 0x00000000,0x00000000,0x00000000,0x00000000 7040 long 0x00000000,0x00000000,0x00000000,0x00000000 7041 long 0x00000000,0x00000000,0x00000000,0x00000000 7042 long 0x00000000,0x00000000,0x00000000,0x00000000 7043 7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7045 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ 7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7047 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP 7048 7049 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7050 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ 7051 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7052 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP 7053 7054 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7055 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ 7056 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7057 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP 7058 7059######################################################################### 7060# XDEF **************************************************************** # 7061# fout(): move from fp register to memory or data register # 7062# # 7063# XREF **************************************************************** # 7064# _round() - needed to create EXOP for sgl/dbl precision # 7065# norm() - needed to create EXOP for extended precision # 7066# ovf_res() - create default overflow result for sgl/dbl precision# 7067# unf_res() - create default underflow result for sgl/dbl prec. # 7068# dst_dbl() - create rounded dbl precision result. # 7069# dst_sgl() - create rounded sgl precision result. # 7070# fetch_dreg() - fetch dynamic k-factor reg for packed. # 7071# bindec() - convert FP binary number to packed number. # 7072# _mem_write() - write data to memory. # 7073# _mem_write2() - write data to memory unless supv mode -(a7) exc.# 7074# _dmem_write_{byte,word,long}() - write data to memory. # 7075# store_dreg_{b,w,l}() - store data to data register file. # 7076# facc_out_{b,w,l,d,x}() - data access error occurred. # 7077# # 7078# INPUT *************************************************************** # 7079# a0 = pointer to extended precision source operand # 7080# d0 = round prec,mode # 7081# # 7082# OUTPUT ************************************************************** # 7083# fp0 : intermediate underflow or overflow result if # 7084# OVFL/UNFL occurred for a sgl or dbl operand # 7085# # 7086# ALGORITHM *********************************************************** # 7087# This routine is accessed by many handlers that need to do an # 7088# opclass three move of an operand out to memory. # 7089# Decode an fmove out (opclass 3) instruction to determine if # 7090# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data # 7091# register or memory. The algorithm uses a standard "fmove" to create # 7092# the rounded result. Also, since exceptions are disabled, this also # 7093# create the correct OPERR default result if appropriate. # 7094# For sgl or dbl precision, overflow or underflow can occur. If # 7095# either occurs and is enabled, the EXOP. # 7096# For extended precision, the stacked <ea> must be fixed along # 7097# w/ the address index register as appropriate w/ _calc_ea_fout(). If # 7098# the source is a denorm and if underflow is enabled, an EXOP must be # 7099# created. # 7100# For packed, the k-factor must be fetched from the instruction # 7101# word or a data register. The <ea> must be fixed as w/ extended # 7102# precision. Then, bindec() is called to create the appropriate # 7103# packed result. # 7104# If at any time an access error is flagged by one of the move- # 7105# to-memory routines, then a special exit must be made so that the # 7106# access error can be handled properly. # 7107# # 7108######################################################################### 7109 7110 global fout 7111fout: 7112 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt 7113 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index 7114 jmp (tbl_fout.b,%pc,%a1) # jump to routine 7115 7116 swbeg &0x8 7117tbl_fout: 7118 short fout_long - tbl_fout 7119 short fout_sgl - tbl_fout 7120 short fout_ext - tbl_fout 7121 short fout_pack - tbl_fout 7122 short fout_word - tbl_fout 7123 short fout_dbl - tbl_fout 7124 short fout_byte - tbl_fout 7125 short fout_pack - tbl_fout 7126 7127################################################################# 7128# fmove.b out ################################################### 7129################################################################# 7130 7131# Only "Unimplemented Data Type" exceptions enter here. The operand 7132# is either a DENORM or a NORM. 7133fout_byte: 7134 tst.b STAG(%a6) # is operand normalized? 7135 bne.b fout_byte_denorm # no 7136 7137 fmovm.x SRC(%a0),&0x80 # load value 7138 7139fout_byte_norm: 7140 fmov.l %d0,%fpcr # insert rnd prec,mode 7141 7142 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode 7143 7144 fmov.l &0x0,%fpcr # clear FPCR 7145 fmov.l %fpsr,%d1 # fetch FPSR 7146 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7147 7148 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7149 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7150 beq.b fout_byte_dn # must save to integer regfile 7151 7152 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7153 bsr.l _dmem_write_byte # write byte 7154 7155 tst.l %d1 # did dstore fail? 7156 bne.l facc_out_b # yes 7157 7158 rts 7159 7160fout_byte_dn: 7161 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7162 andi.w &0x7,%d1 7163 bsr.l store_dreg_b 7164 rts 7165 7166fout_byte_denorm: 7167 mov.l SRC_EX(%a0),%d1 7168 andi.l &0x80000000,%d1 # keep DENORM sign 7169 ori.l &0x00800000,%d1 # make smallest sgl 7170 fmov.s %d1,%fp0 7171 bra.b fout_byte_norm 7172 7173################################################################# 7174# fmove.w out ################################################### 7175################################################################# 7176 7177# Only "Unimplemented Data Type" exceptions enter here. The operand 7178# is either a DENORM or a NORM. 7179fout_word: 7180 tst.b STAG(%a6) # is operand normalized? 7181 bne.b fout_word_denorm # no 7182 7183 fmovm.x SRC(%a0),&0x80 # load value 7184 7185fout_word_norm: 7186 fmov.l %d0,%fpcr # insert rnd prec:mode 7187 7188 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode 7189 7190 fmov.l &0x0,%fpcr # clear FPCR 7191 fmov.l %fpsr,%d1 # fetch FPSR 7192 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7193 7194 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7195 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7196 beq.b fout_word_dn # must save to integer regfile 7197 7198 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7199 bsr.l _dmem_write_word # write word 7200 7201 tst.l %d1 # did dstore fail? 7202 bne.l facc_out_w # yes 7203 7204 rts 7205 7206fout_word_dn: 7207 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7208 andi.w &0x7,%d1 7209 bsr.l store_dreg_w 7210 rts 7211 7212fout_word_denorm: 7213 mov.l SRC_EX(%a0),%d1 7214 andi.l &0x80000000,%d1 # keep DENORM sign 7215 ori.l &0x00800000,%d1 # make smallest sgl 7216 fmov.s %d1,%fp0 7217 bra.b fout_word_norm 7218 7219################################################################# 7220# fmove.l out ################################################### 7221################################################################# 7222 7223# Only "Unimplemented Data Type" exceptions enter here. The operand 7224# is either a DENORM or a NORM. 7225fout_long: 7226 tst.b STAG(%a6) # is operand normalized? 7227 bne.b fout_long_denorm # no 7228 7229 fmovm.x SRC(%a0),&0x80 # load value 7230 7231fout_long_norm: 7232 fmov.l %d0,%fpcr # insert rnd prec:mode 7233 7234 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode 7235 7236 fmov.l &0x0,%fpcr # clear FPCR 7237 fmov.l %fpsr,%d1 # fetch FPSR 7238 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7239 7240fout_long_write: 7241 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7242 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7243 beq.b fout_long_dn # must save to integer regfile 7244 7245 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7246 bsr.l _dmem_write_long # write long 7247 7248 tst.l %d1 # did dstore fail? 7249 bne.l facc_out_l # yes 7250 7251 rts 7252 7253fout_long_dn: 7254 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7255 andi.w &0x7,%d1 7256 bsr.l store_dreg_l 7257 rts 7258 7259fout_long_denorm: 7260 mov.l SRC_EX(%a0),%d1 7261 andi.l &0x80000000,%d1 # keep DENORM sign 7262 ori.l &0x00800000,%d1 # make smallest sgl 7263 fmov.s %d1,%fp0 7264 bra.b fout_long_norm 7265 7266################################################################# 7267# fmove.x out ################################################### 7268################################################################# 7269 7270# Only "Unimplemented Data Type" exceptions enter here. The operand 7271# is either a DENORM or a NORM. 7272# The DENORM causes an Underflow exception. 7273fout_ext: 7274 7275# we copy the extended precision result to FP_SCR0 so that the reserved 7276# 16-bit field gets zeroed. we do this since we promise not to disturb 7277# what's at SRC(a0). 7278 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7279 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field 7280 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7281 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7282 7283 fmovm.x SRC(%a0),&0x80 # return result 7284 7285 bsr.l _calc_ea_fout # fix stacked <ea> 7286 7287 mov.l %a0,%a1 # pass: dst addr 7288 lea FP_SCR0(%a6),%a0 # pass: src addr 7289 mov.l &0xc,%d0 # pass: opsize is 12 bytes 7290 7291# we must not yet write the extended precision data to the stack 7292# in the pre-decrement case from supervisor mode or else we'll corrupt 7293# the stack frame. so, leave it in FP_SRC for now and deal with it later... 7294 cmpi.b SPCOND_FLG(%a6),&mda7_flg 7295 beq.b fout_ext_a7 7296 7297 bsr.l _dmem_write # write ext prec number to memory 7298 7299 tst.l %d1 # did dstore fail? 7300 bne.w fout_ext_err # yes 7301 7302 tst.b STAG(%a6) # is operand normalized? 7303 bne.b fout_ext_denorm # no 7304 rts 7305 7306# the number is a DENORM. must set the underflow exception bit 7307fout_ext_denorm: 7308 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit 7309 7310 mov.b FPCR_ENABLE(%a6),%d0 7311 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 7312 bne.b fout_ext_exc # yes 7313 rts 7314 7315# we don't want to do the write if the exception occurred in supervisor mode 7316# so _mem_write2() handles this for us. 7317fout_ext_a7: 7318 bsr.l _mem_write2 # write ext prec number to memory 7319 7320 tst.l %d1 # did dstore fail? 7321 bne.w fout_ext_err # yes 7322 7323 tst.b STAG(%a6) # is operand normalized? 7324 bne.b fout_ext_denorm # no 7325 rts 7326 7327fout_ext_exc: 7328 lea FP_SCR0(%a6),%a0 7329 bsr.l norm # normalize the mantissa 7330 neg.w %d0 # new exp = -(shft amt) 7331 andi.w &0x7fff,%d0 7332 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign 7333 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7334 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7335 rts 7336 7337fout_ext_err: 7338 mov.l EXC_A6(%a6),(%a6) # fix stacked a6 7339 bra.l facc_out_x 7340 7341######################################################################### 7342# fmove.s out ########################################################### 7343######################################################################### 7344fout_sgl: 7345 andi.b &0x30,%d0 # clear rnd prec 7346 ori.b &s_mode*0x10,%d0 # insert sgl prec 7347 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7348 7349# 7350# operand is a normalized number. first, we check to see if the move out 7351# would cause either an underflow or overflow. these cases are handled 7352# separately. otherwise, set the FPCR to the proper rounding mode and 7353# execute the move. 7354# 7355 mov.w SRC_EX(%a0),%d0 # extract exponent 7356 andi.w &0x7fff,%d0 # strip sign 7357 7358 cmpi.w %d0,&SGL_HI # will operand overflow? 7359 bgt.w fout_sgl_ovfl # yes; go handle OVFL 7360 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL 7361 cmpi.w %d0,&SGL_LO # will operand underflow? 7362 blt.w fout_sgl_unfl # yes; go handle underflow 7363 7364# 7365# NORMs(in range) can be stored out by a simple "fmov.s" 7366# Unnormalized inputs can come through this point. 7367# 7368fout_sgl_exg: 7369 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7370 7371 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7372 fmov.l &0x0,%fpsr # clear FPSR 7373 7374 fmov.s %fp0,%d0 # store does convert and round 7375 7376 fmov.l &0x0,%fpcr # clear FPCR 7377 fmov.l %fpsr,%d1 # save FPSR 7378 7379 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex 7380 7381fout_sgl_exg_write: 7382 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7383 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7384 beq.b fout_sgl_exg_write_dn # must save to integer regfile 7385 7386 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7387 bsr.l _dmem_write_long # write long 7388 7389 tst.l %d1 # did dstore fail? 7390 bne.l facc_out_l # yes 7391 7392 rts 7393 7394fout_sgl_exg_write_dn: 7395 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7396 andi.w &0x7,%d1 7397 bsr.l store_dreg_l 7398 rts 7399 7400# 7401# here, we know that the operand would UNFL if moved out to single prec, 7402# so, denorm and round and then use generic store single routine to 7403# write the value to memory. 7404# 7405fout_sgl_unfl: 7406 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7407 7408 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7409 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7410 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7411 mov.l %a0,-(%sp) 7412 7413 clr.l %d0 # pass: S.F. = 0 7414 7415 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7416 bne.b fout_sgl_unfl_cont # let DENORMs fall through 7417 7418 lea FP_SCR0(%a6),%a0 7419 bsr.l norm # normalize the DENORM 7420 7421fout_sgl_unfl_cont: 7422 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7423 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7424 bsr.l unf_res # calc default underflow result 7425 7426 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7427 bsr.l dst_sgl # convert to single prec 7428 7429 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7430 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7431 beq.b fout_sgl_unfl_dn # must save to integer regfile 7432 7433 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7434 bsr.l _dmem_write_long # write long 7435 7436 tst.l %d1 # did dstore fail? 7437 bne.l facc_out_l # yes 7438 7439 bra.b fout_sgl_unfl_chkexc 7440 7441fout_sgl_unfl_dn: 7442 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7443 andi.w &0x7,%d1 7444 bsr.l store_dreg_l 7445 7446fout_sgl_unfl_chkexc: 7447 mov.b FPCR_ENABLE(%a6),%d1 7448 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7449 bne.w fout_sd_exc_unfl # yes 7450 addq.l &0x4,%sp 7451 rts 7452 7453# 7454# it's definitely an overflow so call ovf_res to get the correct answer 7455# 7456fout_sgl_ovfl: 7457 tst.b 3+SRC_HI(%a0) # is result inexact? 7458 bne.b fout_sgl_ovfl_inex2 7459 tst.l SRC_LO(%a0) # is result inexact? 7460 bne.b fout_sgl_ovfl_inex2 7461 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7462 bra.b fout_sgl_ovfl_cont 7463fout_sgl_ovfl_inex2: 7464 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7465 7466fout_sgl_ovfl_cont: 7467 mov.l %a0,-(%sp) 7468 7469# call ovf_res() w/ sgl prec and the correct rnd mode to create the default 7470# overflow result. DON'T save the returned ccodes from ovf_res() since 7471# fmove out doesn't alter them. 7472 tst.b SRC_EX(%a0) # is operand negative? 7473 smi %d1 # set if so 7474 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode 7475 bsr.l ovf_res # calc OVFL result 7476 fmovm.x (%a0),&0x80 # load default overflow result 7477 fmov.s %fp0,%d0 # store to single 7478 7479 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7480 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7481 beq.b fout_sgl_ovfl_dn # must save to integer regfile 7482 7483 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7484 bsr.l _dmem_write_long # write long 7485 7486 tst.l %d1 # did dstore fail? 7487 bne.l facc_out_l # yes 7488 7489 bra.b fout_sgl_ovfl_chkexc 7490 7491fout_sgl_ovfl_dn: 7492 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7493 andi.w &0x7,%d1 7494 bsr.l store_dreg_l 7495 7496fout_sgl_ovfl_chkexc: 7497 mov.b FPCR_ENABLE(%a6),%d1 7498 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7499 bne.w fout_sd_exc_ovfl # yes 7500 addq.l &0x4,%sp 7501 rts 7502 7503# 7504# move out MAY overflow: 7505# (1) force the exp to 0x3fff 7506# (2) do a move w/ appropriate rnd mode 7507# (3) if exp still equals zero, then insert original exponent 7508# for the correct result. 7509# if exp now equals one, then it overflowed so call ovf_res. 7510# 7511fout_sgl_may_ovfl: 7512 mov.w SRC_EX(%a0),%d1 # fetch current sign 7513 andi.w &0x8000,%d1 # keep it,clear exp 7514 ori.w &0x3fff,%d1 # insert exp = 0 7515 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7516 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7517 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7518 7519 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7520 7521 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7522 fmov.l &0x0,%fpcr # clear FPCR 7523 7524 fabs.x %fp0 # need absolute value 7525 fcmp.b %fp0,&0x2 # did exponent increase? 7526 fblt.w fout_sgl_exg # no; go finish NORM 7527 bra.w fout_sgl_ovfl # yes; go handle overflow 7528 7529################ 7530 7531fout_sd_exc_unfl: 7532 mov.l (%sp)+,%a0 7533 7534 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7535 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7536 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7537 7538 cmpi.b STAG(%a6),&DENORM # was src a DENORM? 7539 bne.b fout_sd_exc_cont # no 7540 7541 lea FP_SCR0(%a6),%a0 7542 bsr.l norm 7543 neg.l %d0 7544 andi.w &0x7fff,%d0 7545 bfins %d0,FP_SCR0_EX(%a6){&1:&15} 7546 bra.b fout_sd_exc_cont 7547 7548fout_sd_exc: 7549fout_sd_exc_ovfl: 7550 mov.l (%sp)+,%a0 # restore a0 7551 7552 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7553 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7554 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7555 7556fout_sd_exc_cont: 7557 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit 7558 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit 7559 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM 7560 7561 mov.b 3+L_SCR3(%a6),%d1 7562 lsr.b &0x4,%d1 7563 andi.w &0x0c,%d1 7564 swap %d1 7565 mov.b 3+L_SCR3(%a6),%d1 7566 lsr.b &0x4,%d1 7567 andi.w &0x03,%d1 7568 clr.l %d0 # pass: zero g,r,s 7569 bsr.l _round # round the DENORM 7570 7571 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative? 7572 beq.b fout_sd_exc_done # no 7573 bset &0x7,FP_SCR0_EX(%a6) # yes 7574 7575fout_sd_exc_done: 7576 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7577 rts 7578 7579################################################################# 7580# fmove.d out ################################################### 7581################################################################# 7582fout_dbl: 7583 andi.b &0x30,%d0 # clear rnd prec 7584 ori.b &d_mode*0x10,%d0 # insert dbl prec 7585 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7586 7587# 7588# operand is a normalized number. first, we check to see if the move out 7589# would cause either an underflow or overflow. these cases are handled 7590# separately. otherwise, set the FPCR to the proper rounding mode and 7591# execute the move. 7592# 7593 mov.w SRC_EX(%a0),%d0 # extract exponent 7594 andi.w &0x7fff,%d0 # strip sign 7595 7596 cmpi.w %d0,&DBL_HI # will operand overflow? 7597 bgt.w fout_dbl_ovfl # yes; go handle OVFL 7598 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL 7599 cmpi.w %d0,&DBL_LO # will operand underflow? 7600 blt.w fout_dbl_unfl # yes; go handle underflow 7601 7602# 7603# NORMs(in range) can be stored out by a simple "fmov.d" 7604# Unnormalized inputs can come through this point. 7605# 7606fout_dbl_exg: 7607 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7608 7609 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7610 fmov.l &0x0,%fpsr # clear FPSR 7611 7612 fmov.d %fp0,L_SCR1(%a6) # store does convert and round 7613 7614 fmov.l &0x0,%fpcr # clear FPCR 7615 fmov.l %fpsr,%d0 # save FPSR 7616 7617 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex 7618 7619 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7620 lea L_SCR1(%a6),%a0 # pass: src addr 7621 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7622 bsr.l _dmem_write # store dbl fop to memory 7623 7624 tst.l %d1 # did dstore fail? 7625 bne.l facc_out_d # yes 7626 7627 rts # no; so we're finished 7628 7629# 7630# here, we know that the operand would UNFL if moved out to double prec, 7631# so, denorm and round and then use generic store double routine to 7632# write the value to memory. 7633# 7634fout_dbl_unfl: 7635 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7636 7637 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7638 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7639 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7640 mov.l %a0,-(%sp) 7641 7642 clr.l %d0 # pass: S.F. = 0 7643 7644 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7645 bne.b fout_dbl_unfl_cont # let DENORMs fall through 7646 7647 lea FP_SCR0(%a6),%a0 7648 bsr.l norm # normalize the DENORM 7649 7650fout_dbl_unfl_cont: 7651 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7653 bsr.l unf_res # calc default underflow result 7654 7655 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7656 bsr.l dst_dbl # convert to single prec 7657 mov.l %d0,L_SCR1(%a6) 7658 mov.l %d1,L_SCR2(%a6) 7659 7660 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7661 lea L_SCR1(%a6),%a0 # pass: src addr 7662 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7663 bsr.l _dmem_write # store dbl fop to memory 7664 7665 tst.l %d1 # did dstore fail? 7666 bne.l facc_out_d # yes 7667 7668 mov.b FPCR_ENABLE(%a6),%d1 7669 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7670 bne.w fout_sd_exc_unfl # yes 7671 addq.l &0x4,%sp 7672 rts 7673 7674# 7675# it's definitely an overflow so call ovf_res to get the correct answer 7676# 7677fout_dbl_ovfl: 7678 mov.w 2+SRC_LO(%a0),%d0 7679 andi.w &0x7ff,%d0 7680 bne.b fout_dbl_ovfl_inex2 7681 7682 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7683 bra.b fout_dbl_ovfl_cont 7684fout_dbl_ovfl_inex2: 7685 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7686 7687fout_dbl_ovfl_cont: 7688 mov.l %a0,-(%sp) 7689 7690# call ovf_res() w/ dbl prec and the correct rnd mode to create the default 7691# overflow result. DON'T save the returned ccodes from ovf_res() since 7692# fmove out doesn't alter them. 7693 tst.b SRC_EX(%a0) # is operand negative? 7694 smi %d1 # set if so 7695 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode 7696 bsr.l ovf_res # calc OVFL result 7697 fmovm.x (%a0),&0x80 # load default overflow result 7698 fmov.d %fp0,L_SCR1(%a6) # store to double 7699 7700 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7701 lea L_SCR1(%a6),%a0 # pass: src addr 7702 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7703 bsr.l _dmem_write # store dbl fop to memory 7704 7705 tst.l %d1 # did dstore fail? 7706 bne.l facc_out_d # yes 7707 7708 mov.b FPCR_ENABLE(%a6),%d1 7709 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7710 bne.w fout_sd_exc_ovfl # yes 7711 addq.l &0x4,%sp 7712 rts 7713 7714# 7715# move out MAY overflow: 7716# (1) force the exp to 0x3fff 7717# (2) do a move w/ appropriate rnd mode 7718# (3) if exp still equals zero, then insert original exponent 7719# for the correct result. 7720# if exp now equals one, then it overflowed so call ovf_res. 7721# 7722fout_dbl_may_ovfl: 7723 mov.w SRC_EX(%a0),%d1 # fetch current sign 7724 andi.w &0x8000,%d1 # keep it,clear exp 7725 ori.w &0x3fff,%d1 # insert exp = 0 7726 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7727 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7728 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7729 7730 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7731 7732 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7733 fmov.l &0x0,%fpcr # clear FPCR 7734 7735 fabs.x %fp0 # need absolute value 7736 fcmp.b %fp0,&0x2 # did exponent increase? 7737 fblt.w fout_dbl_exg # no; go finish NORM 7738 bra.w fout_dbl_ovfl # yes; go handle overflow 7739 7740######################################################################### 7741# XDEF **************************************************************** # 7742# dst_dbl(): create double precision value from extended prec. # 7743# # 7744# XREF **************************************************************** # 7745# None # 7746# # 7747# INPUT *************************************************************** # 7748# a0 = pointer to source operand in extended precision # 7749# # 7750# OUTPUT ************************************************************** # 7751# d0 = hi(double precision result) # 7752# d1 = lo(double precision result) # 7753# # 7754# ALGORITHM *********************************************************** # 7755# # 7756# Changes extended precision to double precision. # 7757# Note: no attempt is made to round the extended value to double. # 7758# dbl_sign = ext_sign # 7759# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) # 7760# get rid of ext integer bit # 7761# dbl_mant = ext_mant{62:12} # 7762# # 7763# --------------- --------------- --------------- # 7764# extended -> |s| exp | |1| ms mant | | ls mant | # 7765# --------------- --------------- --------------- # 7766# 95 64 63 62 32 31 11 0 # 7767# | | # 7768# | | # 7769# | | # 7770# v v # 7771# --------------- --------------- # 7772# double -> |s|exp| mant | | mant | # 7773# --------------- --------------- # 7774# 63 51 32 31 0 # 7775# # 7776######################################################################### 7777 7778dst_dbl: 7779 clr.l %d0 # clear d0 7780 mov.w FTEMP_EX(%a0),%d0 # get exponent 7781 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7782 addi.w &DBL_BIAS,%d0 # add double precision bias 7783 tst.b FTEMP_HI(%a0) # is number a denorm? 7784 bmi.b dst_get_dupper # no 7785 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1 7786dst_get_dupper: 7787 swap %d0 # d0 now in upper word 7788 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp 7789 tst.b FTEMP_EX(%a0) # test sign 7790 bpl.b dst_get_dman # if postive, go process mantissa 7791 bset &0x1f,%d0 # if negative, set sign 7792dst_get_dman: 7793 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7794 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms 7795 or.l %d1,%d0 # put these bits in ms word of double 7796 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack 7797 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7798 mov.l &21,%d0 # load shift count 7799 lsl.l %d0,%d1 # put lower 11 bits in upper bits 7800 mov.l %d1,L_SCR2(%a6) # build lower lword in memory 7801 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa 7802 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double 7803 mov.l L_SCR2(%a6),%d1 7804 or.l %d0,%d1 # put them in double result 7805 mov.l L_SCR1(%a6),%d0 7806 rts 7807 7808######################################################################### 7809# XDEF **************************************************************** # 7810# dst_sgl(): create single precision value from extended prec # 7811# # 7812# XREF **************************************************************** # 7813# # 7814# INPUT *************************************************************** # 7815# a0 = pointer to source operand in extended precision # 7816# # 7817# OUTPUT ************************************************************** # 7818# d0 = single precision result # 7819# # 7820# ALGORITHM *********************************************************** # 7821# # 7822# Changes extended precision to single precision. # 7823# sgl_sign = ext_sign # 7824# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) # 7825# get rid of ext integer bit # 7826# sgl_mant = ext_mant{62:12} # 7827# # 7828# --------------- --------------- --------------- # 7829# extended -> |s| exp | |1| ms mant | | ls mant | # 7830# --------------- --------------- --------------- # 7831# 95 64 63 62 40 32 31 12 0 # 7832# | | # 7833# | | # 7834# | | # 7835# v v # 7836# --------------- # 7837# single -> |s|exp| mant | # 7838# --------------- # 7839# 31 22 0 # 7840# # 7841######################################################################### 7842 7843dst_sgl: 7844 clr.l %d0 7845 mov.w FTEMP_EX(%a0),%d0 # get exponent 7846 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7847 addi.w &SGL_BIAS,%d0 # add single precision bias 7848 tst.b FTEMP_HI(%a0) # is number a denorm? 7849 bmi.b dst_get_supper # no 7850 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1 7851dst_get_supper: 7852 swap %d0 # put exp in upper word of d0 7853 lsl.l &0x7,%d0 # shift it into single exp bits 7854 tst.b FTEMP_EX(%a0) # test sign 7855 bpl.b dst_get_sman # if positive, continue 7856 bset &0x1f,%d0 # if negative, put in sign first 7857dst_get_sman: 7858 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7859 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms 7860 lsr.l &0x8,%d1 # and put them flush right 7861 or.l %d1,%d0 # put these bits in ms word of single 7862 rts 7863 7864############################################################################## 7865fout_pack: 7866 bsr.l _calc_ea_fout # fetch the <ea> 7867 mov.l %a0,-(%sp) 7868 7869 mov.b STAG(%a6),%d0 # fetch input type 7870 bne.w fout_pack_not_norm # input is not NORM 7871 7872fout_pack_norm: 7873 btst &0x4,EXC_CMDREG(%a6) # static or dynamic? 7874 beq.b fout_pack_s # static 7875 7876fout_pack_d: 7877 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg 7878 lsr.b &0x4,%d1 7879 andi.w &0x7,%d1 7880 7881 bsr.l fetch_dreg # fetch Dn w/ k-factor 7882 7883 bra.b fout_pack_type 7884fout_pack_s: 7885 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field 7886 7887fout_pack_type: 7888 bfexts %d0{&25:&7},%d0 # extract k-factor 7889 mov.l %d0,-(%sp) 7890 7891 lea FP_SRC(%a6),%a0 # pass: ptr to input 7892 7893# bindec is currently scrambling FP_SRC for denorm inputs. 7894# we'll have to change this, but for now, tough luck!!! 7895 bsr.l bindec # convert xprec to packed 7896 7897# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields 7898 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields 7899 7900 mov.l (%sp)+,%d0 7901 7902 tst.b 3+FP_SCR0_EX(%a6) 7903 bne.b fout_pack_set 7904 tst.l FP_SCR0_HI(%a6) 7905 bne.b fout_pack_set 7906 tst.l FP_SCR0_LO(%a6) 7907 bne.b fout_pack_set 7908 7909# add the extra condition that only if the k-factor was zero, too, should 7910# we zero the exponent 7911 tst.l %d0 7912 bne.b fout_pack_set 7913# "mantissa" is all zero which means that the answer is zero. but, the '040 7914# algorithm allows the exponent to be non-zero. the 881/2 do not. therefore, 7915# if the mantissa is zero, I will zero the exponent, too. 7916# the question now is whether the exponents sign bit is allowed to be non-zero 7917# for a zero, also... 7918 andi.w &0xf000,FP_SCR0(%a6) 7919 7920fout_pack_set: 7921 7922 lea FP_SCR0(%a6),%a0 # pass: src addr 7923 7924fout_pack_write: 7925 mov.l (%sp)+,%a1 # pass: dst addr 7926 mov.l &0xc,%d0 # pass: opsize is 12 bytes 7927 7928 cmpi.b SPCOND_FLG(%a6),&mda7_flg 7929 beq.b fout_pack_a7 7930 7931 bsr.l _dmem_write # write ext prec number to memory 7932 7933 tst.l %d1 # did dstore fail? 7934 bne.w fout_ext_err # yes 7935 7936 rts 7937 7938# we don't want to do the write if the exception occurred in supervisor mode 7939# so _mem_write2() handles this for us. 7940fout_pack_a7: 7941 bsr.l _mem_write2 # write ext prec number to memory 7942 7943 tst.l %d1 # did dstore fail? 7944 bne.w fout_ext_err # yes 7945 7946 rts 7947 7948fout_pack_not_norm: 7949 cmpi.b %d0,&DENORM # is it a DENORM? 7950 beq.w fout_pack_norm # yes 7951 lea FP_SRC(%a6),%a0 7952 clr.w 2+FP_SRC_EX(%a6) 7953 cmpi.b %d0,&SNAN # is it an SNAN? 7954 beq.b fout_pack_snan # yes 7955 bra.b fout_pack_write # no 7956 7957fout_pack_snan: 7958 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP 7959 bset &0x6,FP_SRC_HI(%a6) # set snan bit 7960 bra.b fout_pack_write 7961 7962######################################################################### 7963# XDEF **************************************************************** # 7964# fmul(): emulates the fmul instruction # 7965# fsmul(): emulates the fsmul instruction # 7966# fdmul(): emulates the fdmul instruction # 7967# # 7968# XREF **************************************************************** # 7969# scale_to_zero_src() - scale src exponent to zero # 7970# scale_to_zero_dst() - scale dst exponent to zero # 7971# unf_res() - return default underflow result # 7972# ovf_res() - return default overflow result # 7973# res_qnan() - return QNAN result # 7974# res_snan() - return SNAN result # 7975# # 7976# INPUT *************************************************************** # 7977# a0 = pointer to extended precision source operand # 7978# a1 = pointer to extended precision destination operand # 7979# d0 rnd prec,mode # 7980# # 7981# OUTPUT ************************************************************** # 7982# fp0 = result # 7983# fp1 = EXOP (if exception occurred) # 7984# # 7985# ALGORITHM *********************************************************** # 7986# Handle NANs, infinities, and zeroes as special cases. Divide # 7987# norms/denorms into ext/sgl/dbl precision. # 7988# For norms/denorms, scale the exponents such that a multiply # 7989# instruction won't cause an exception. Use the regular fmul to # 7990# compute a result. Check if the regular operands would have taken # 7991# an exception. If so, return the default overflow/underflow result # 7992# and return the EXOP if exceptions are enabled. Else, scale the # 7993# result operand to the proper exponent. # 7994# # 7995######################################################################### 7996 7997 align 0x10 7998tbl_fmul_ovfl: 7999 long 0x3fff - 0x7ffe # ext_max 8000 long 0x3fff - 0x407e # sgl_max 8001 long 0x3fff - 0x43fe # dbl_max 8002tbl_fmul_unfl: 8003 long 0x3fff + 0x0001 # ext_unfl 8004 long 0x3fff - 0x3f80 # sgl_unfl 8005 long 0x3fff - 0x3c00 # dbl_unfl 8006 8007 global fsmul 8008fsmul: 8009 andi.b &0x30,%d0 # clear rnd prec 8010 ori.b &s_mode*0x10,%d0 # insert sgl prec 8011 bra.b fmul 8012 8013 global fdmul 8014fdmul: 8015 andi.b &0x30,%d0 8016 ori.b &d_mode*0x10,%d0 # insert dbl prec 8017 8018 global fmul 8019fmul: 8020 mov.l %d0,L_SCR3(%a6) # store rnd info 8021 8022 clr.w %d1 8023 mov.b DTAG(%a6),%d1 8024 lsl.b &0x3,%d1 8025 or.b STAG(%a6),%d1 # combine src tags 8026 bne.w fmul_not_norm # optimize on non-norm input 8027 8028fmul_norm: 8029 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8030 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8031 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8032 8033 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8034 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8035 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8036 8037 bsr.l scale_to_zero_src # scale src exponent 8038 mov.l %d0,-(%sp) # save scale factor 1 8039 8040 bsr.l scale_to_zero_dst # scale dst exponent 8041 8042 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2 8043 8044 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8045 lsr.b &0x6,%d1 # shift to lo bits 8046 mov.l (%sp)+,%d0 # load S.F. 8047 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl? 8048 beq.w fmul_may_ovfl # result may rnd to overflow 8049 blt.w fmul_ovfl # result will overflow 8050 8051 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl? 8052 beq.w fmul_may_unfl # result may rnd to no unfl 8053 bgt.w fmul_unfl # result will underflow 8054 8055# 8056# NORMAL: 8057# - the result of the multiply operation will neither overflow nor underflow. 8058# - do the multiply to the proper precision and rounding mode. 8059# - scale the result exponent using the scale factor. if both operands were 8060# normalized then we really don't need to go through this scaling. but for now, 8061# this will do. 8062# 8063fmul_normal: 8064 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8065 8066 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8067 fmov.l &0x0,%fpsr # clear FPSR 8068 8069 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8070 8071 fmov.l %fpsr,%d1 # save status 8072 fmov.l &0x0,%fpcr # clear FPCR 8073 8074 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8075 8076fmul_normal_exit: 8077 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8078 mov.l %d2,-(%sp) # save d2 8079 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8080 mov.l %d1,%d2 # make a copy 8081 andi.l &0x7fff,%d1 # strip sign 8082 andi.w &0x8000,%d2 # keep old sign 8083 sub.l %d0,%d1 # add scale factor 8084 or.w %d2,%d1 # concat old sign,new exp 8085 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8086 mov.l (%sp)+,%d2 # restore d2 8087 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8088 rts 8089 8090# 8091# OVERFLOW: 8092# - the result of the multiply operation is an overflow. 8093# - do the multiply to the proper precision and rounding mode in order to 8094# set the inexact bits. 8095# - calculate the default result and return it in fp0. 8096# - if overflow or inexact is enabled, we need a multiply result rounded to 8097# extended precision. if the original operation was extended, then we have this 8098# result. if the original operation was single or double, we have to do another 8099# multiply using extended precision and the correct rounding mode. the result 8100# of this operation then has its exponent scaled by -0x6000 to create the 8101# exceptional operand. 8102# 8103fmul_ovfl: 8104 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8105 8106 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8107 fmov.l &0x0,%fpsr # clear FPSR 8108 8109 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8110 8111 fmov.l %fpsr,%d1 # save status 8112 fmov.l &0x0,%fpcr # clear FPCR 8113 8114 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8115 8116# save setting this until now because this is where fmul_may_ovfl may jump in 8117fmul_ovfl_tst: 8118 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8119 8120 mov.b FPCR_ENABLE(%a6),%d1 8121 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8122 bne.b fmul_ovfl_ena # yes 8123 8124# calculate the default result 8125fmul_ovfl_dis: 8126 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8127 sne %d1 # set sign param accordingly 8128 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode 8129 bsr.l ovf_res # calculate default result 8130 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8131 fmovm.x (%a0),&0x80 # return default result in fp0 8132 rts 8133 8134# 8135# OVFL is enabled; Create EXOP: 8136# - if precision is extended, then we have the EXOP. simply bias the exponent 8137# with an extra -0x6000. if the precision is single or double, we need to 8138# calculate a result rounded to extended precision. 8139# 8140fmul_ovfl_ena: 8141 mov.l L_SCR3(%a6),%d1 8142 andi.b &0xc0,%d1 # test the rnd prec 8143 bne.b fmul_ovfl_ena_sd # it's sgl or dbl 8144 8145fmul_ovfl_ena_cont: 8146 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8147 8148 mov.l %d2,-(%sp) # save d2 8149 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8150 mov.w %d1,%d2 # make a copy 8151 andi.l &0x7fff,%d1 # strip sign 8152 sub.l %d0,%d1 # add scale factor 8153 subi.l &0x6000,%d1 # subtract bias 8154 andi.w &0x7fff,%d1 # clear sign bit 8155 andi.w &0x8000,%d2 # keep old sign 8156 or.w %d2,%d1 # concat old sign,new exp 8157 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8158 mov.l (%sp)+,%d2 # restore d2 8159 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8160 bra.b fmul_ovfl_dis 8161 8162fmul_ovfl_ena_sd: 8163 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8164 8165 mov.l L_SCR3(%a6),%d1 8166 andi.b &0x30,%d1 # keep rnd mode only 8167 fmov.l %d1,%fpcr # set FPCR 8168 8169 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8170 8171 fmov.l &0x0,%fpcr # clear FPCR 8172 bra.b fmul_ovfl_ena_cont 8173 8174# 8175# may OVERFLOW: 8176# - the result of the multiply operation MAY overflow. 8177# - do the multiply to the proper precision and rounding mode in order to 8178# set the inexact bits. 8179# - calculate the default result and return it in fp0. 8180# 8181fmul_may_ovfl: 8182 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8183 8184 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8185 fmov.l &0x0,%fpsr # clear FPSR 8186 8187 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8188 8189 fmov.l %fpsr,%d1 # save status 8190 fmov.l &0x0,%fpcr # clear FPCR 8191 8192 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8193 8194 fabs.x %fp0,%fp1 # make a copy of result 8195 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8196 fbge.w fmul_ovfl_tst # yes; overflow has occurred 8197 8198# no, it didn't overflow; we have correct result 8199 bra.w fmul_normal_exit 8200 8201# 8202# UNDERFLOW: 8203# - the result of the multiply operation is an underflow. 8204# - do the multiply to the proper precision and rounding mode in order to 8205# set the inexact bits. 8206# - calculate the default result and return it in fp0. 8207# - if overflow or inexact is enabled, we need a multiply result rounded to 8208# extended precision. if the original operation was extended, then we have this 8209# result. if the original operation was single or double, we have to do another 8210# multiply using extended precision and the correct rounding mode. the result 8211# of this operation then has its exponent scaled by -0x6000 to create the 8212# exceptional operand. 8213# 8214fmul_unfl: 8215 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8216 8217# for fun, let's use only extended precision, round to zero. then, let 8218# the unf_res() routine figure out all the rest. 8219# will we get the correct answer. 8220 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8221 8222 fmov.l &rz_mode*0x10,%fpcr # set FPCR 8223 fmov.l &0x0,%fpsr # clear FPSR 8224 8225 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8226 8227 fmov.l %fpsr,%d1 # save status 8228 fmov.l &0x0,%fpcr # clear FPCR 8229 8230 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8231 8232 mov.b FPCR_ENABLE(%a6),%d1 8233 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8234 bne.b fmul_unfl_ena # yes 8235 8236fmul_unfl_dis: 8237 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8238 8239 lea FP_SCR0(%a6),%a0 # pass: result addr 8240 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8241 bsr.l unf_res # calculate default result 8242 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z' 8243 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8244 rts 8245 8246# 8247# UNFL is enabled. 8248# 8249fmul_unfl_ena: 8250 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 8251 8252 mov.l L_SCR3(%a6),%d1 8253 andi.b &0xc0,%d1 # is precision extended? 8254 bne.b fmul_unfl_ena_sd # no, sgl or dbl 8255 8256# if the rnd mode is anything but RZ, then we have to re-do the above 8257# multiplication becuase we used RZ for all. 8258 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8259 8260fmul_unfl_ena_cont: 8261 fmov.l &0x0,%fpsr # clear FPSR 8262 8263 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8264 8265 fmov.l &0x0,%fpcr # clear FPCR 8266 8267 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 8268 mov.l %d2,-(%sp) # save d2 8269 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8270 mov.l %d1,%d2 # make a copy 8271 andi.l &0x7fff,%d1 # strip sign 8272 andi.w &0x8000,%d2 # keep old sign 8273 sub.l %d0,%d1 # add scale factor 8274 addi.l &0x6000,%d1 # add bias 8275 andi.w &0x7fff,%d1 8276 or.w %d2,%d1 # concat old sign,new exp 8277 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8278 mov.l (%sp)+,%d2 # restore d2 8279 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8280 bra.w fmul_unfl_dis 8281 8282fmul_unfl_ena_sd: 8283 mov.l L_SCR3(%a6),%d1 8284 andi.b &0x30,%d1 # use only rnd mode 8285 fmov.l %d1,%fpcr # set FPCR 8286 8287 bra.b fmul_unfl_ena_cont 8288 8289# MAY UNDERFLOW: 8290# -use the correct rounding mode and precision. this code favors operations 8291# that do not underflow. 8292fmul_may_unfl: 8293 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8294 8295 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8296 fmov.l &0x0,%fpsr # clear FPSR 8297 8298 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8299 8300 fmov.l %fpsr,%d1 # save status 8301 fmov.l &0x0,%fpcr # clear FPCR 8302 8303 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8304 8305 fabs.x %fp0,%fp1 # make a copy of result 8306 fcmp.b %fp1,&0x2 # is |result| > 2.b? 8307 fbgt.w fmul_normal_exit # no; no underflow occurred 8308 fblt.w fmul_unfl # yes; underflow occurred 8309 8310# 8311# we still don't know if underflow occurred. result is ~ equal to 2. but, 8312# we don't know if the result was an underflow that rounded up to a 2 or 8313# a normalized number that rounded down to a 2. so, redo the entire operation 8314# using RZ as the rounding mode to see what the pre-rounded result is. 8315# this case should be relatively rare. 8316# 8317 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand 8318 8319 mov.l L_SCR3(%a6),%d1 8320 andi.b &0xc0,%d1 # keep rnd prec 8321 ori.b &rz_mode*0x10,%d1 # insert RZ 8322 8323 fmov.l %d1,%fpcr # set FPCR 8324 fmov.l &0x0,%fpsr # clear FPSR 8325 8326 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8327 8328 fmov.l &0x0,%fpcr # clear FPCR 8329 fabs.x %fp1 # make absolute value 8330 fcmp.b %fp1,&0x2 # is |result| < 2.b? 8331 fbge.w fmul_normal_exit # no; no underflow occurred 8332 bra.w fmul_unfl # yes, underflow occurred 8333 8334################################################################################ 8335 8336# 8337# Multiply: inputs are not both normalized; what are they? 8338# 8339fmul_not_norm: 8340 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1 8341 jmp (tbl_fmul_op.b,%pc,%d1.w) 8342 8343 swbeg &48 8344tbl_fmul_op: 8345 short fmul_norm - tbl_fmul_op # NORM x NORM 8346 short fmul_zero - tbl_fmul_op # NORM x ZERO 8347 short fmul_inf_src - tbl_fmul_op # NORM x INF 8348 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8349 short fmul_norm - tbl_fmul_op # NORM x DENORM 8350 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8351 short tbl_fmul_op - tbl_fmul_op # 8352 short tbl_fmul_op - tbl_fmul_op # 8353 8354 short fmul_zero - tbl_fmul_op # ZERO x NORM 8355 short fmul_zero - tbl_fmul_op # ZERO x ZERO 8356 short fmul_res_operr - tbl_fmul_op # ZERO x INF 8357 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN 8358 short fmul_zero - tbl_fmul_op # ZERO x DENORM 8359 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN 8360 short tbl_fmul_op - tbl_fmul_op # 8361 short tbl_fmul_op - tbl_fmul_op # 8362 8363 short fmul_inf_dst - tbl_fmul_op # INF x NORM 8364 short fmul_res_operr - tbl_fmul_op # INF x ZERO 8365 short fmul_inf_dst - tbl_fmul_op # INF x INF 8366 short fmul_res_qnan - tbl_fmul_op # INF x QNAN 8367 short fmul_inf_dst - tbl_fmul_op # INF x DENORM 8368 short fmul_res_snan - tbl_fmul_op # INF x SNAN 8369 short tbl_fmul_op - tbl_fmul_op # 8370 short tbl_fmul_op - tbl_fmul_op # 8371 8372 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM 8373 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO 8374 short fmul_res_qnan - tbl_fmul_op # QNAN x INF 8375 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN 8376 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM 8377 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN 8378 short tbl_fmul_op - tbl_fmul_op # 8379 short tbl_fmul_op - tbl_fmul_op # 8380 8381 short fmul_norm - tbl_fmul_op # NORM x NORM 8382 short fmul_zero - tbl_fmul_op # NORM x ZERO 8383 short fmul_inf_src - tbl_fmul_op # NORM x INF 8384 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8385 short fmul_norm - tbl_fmul_op # NORM x DENORM 8386 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8387 short tbl_fmul_op - tbl_fmul_op # 8388 short tbl_fmul_op - tbl_fmul_op # 8389 8390 short fmul_res_snan - tbl_fmul_op # SNAN x NORM 8391 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO 8392 short fmul_res_snan - tbl_fmul_op # SNAN x INF 8393 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN 8394 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM 8395 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN 8396 short tbl_fmul_op - tbl_fmul_op # 8397 short tbl_fmul_op - tbl_fmul_op # 8398 8399fmul_res_operr: 8400 bra.l res_operr 8401fmul_res_snan: 8402 bra.l res_snan 8403fmul_res_qnan: 8404 bra.l res_qnan 8405 8406# 8407# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm) 8408# 8409 global fmul_zero # global for fsglmul 8410fmul_zero: 8411 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8412 mov.b DST_EX(%a1),%d1 8413 eor.b %d0,%d1 8414 bpl.b fmul_zero_p # result ZERO is pos. 8415fmul_zero_n: 8416 fmov.s &0x80000000,%fp0 # load -ZERO 8417 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 8418 rts 8419fmul_zero_p: 8420 fmov.s &0x00000000,%fp0 # load +ZERO 8421 mov.b &z_bmask,FPSR_CC(%a6) # set Z 8422 rts 8423 8424# 8425# Multiply: (inf x inf) || (inf x norm) || (inf x denorm) 8426# 8427# Note: The j-bit for an infinity is a don't-care. However, to be 8428# strictly compatible w/ the 68881/882, we make sure to return an 8429# INF w/ the j-bit set if the input INF j-bit was set. Destination 8430# INFs take priority. 8431# 8432 global fmul_inf_dst # global for fsglmul 8433fmul_inf_dst: 8434 fmovm.x DST(%a1),&0x80 # return INF result in fp0 8435 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8436 mov.b DST_EX(%a1),%d1 8437 eor.b %d0,%d1 8438 bpl.b fmul_inf_dst_p # result INF is pos. 8439fmul_inf_dst_n: 8440 fabs.x %fp0 # clear result sign 8441 fneg.x %fp0 # set result sign 8442 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 8443 rts 8444fmul_inf_dst_p: 8445 fabs.x %fp0 # clear result sign 8446 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 8447 rts 8448 8449 global fmul_inf_src # global for fsglmul 8450fmul_inf_src: 8451 fmovm.x SRC(%a0),&0x80 # return INF result in fp0 8452 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8453 mov.b DST_EX(%a1),%d1 8454 eor.b %d0,%d1 8455 bpl.b fmul_inf_dst_p # result INF is pos. 8456 bra.b fmul_inf_dst_n 8457 8458######################################################################### 8459# XDEF **************************************************************** # 8460# fin(): emulates the fmove instruction # 8461# fsin(): emulates the fsmove instruction # 8462# fdin(): emulates the fdmove instruction # 8463# # 8464# XREF **************************************************************** # 8465# norm() - normalize mantissa for EXOP on denorm # 8466# scale_to_zero_src() - scale src exponent to zero # 8467# ovf_res() - return default overflow result # 8468# unf_res() - return default underflow result # 8469# res_qnan_1op() - return QNAN result # 8470# res_snan_1op() - return SNAN result # 8471# # 8472# INPUT *************************************************************** # 8473# a0 = pointer to extended precision source operand # 8474# d0 = round prec/mode # 8475# # 8476# OUTPUT ************************************************************** # 8477# fp0 = result # 8478# fp1 = EXOP (if exception occurred) # 8479# # 8480# ALGORITHM *********************************************************** # 8481# Handle NANs, infinities, and zeroes as special cases. Divide # 8482# norms into extended, single, and double precision. # 8483# Norms can be emulated w/ a regular fmove instruction. For # 8484# sgl/dbl, must scale exponent and perform an "fmove". Check to see # 8485# if the result would have overflowed/underflowed. If so, use unf_res() # 8486# or ovf_res() to return the default result. Also return EXOP if # 8487# exception is enabled. If no exception, return the default result. # 8488# Unnorms don't pass through here. # 8489# # 8490######################################################################### 8491 8492 global fsin 8493fsin: 8494 andi.b &0x30,%d0 # clear rnd prec 8495 ori.b &s_mode*0x10,%d0 # insert sgl precision 8496 bra.b fin 8497 8498 global fdin 8499fdin: 8500 andi.b &0x30,%d0 # clear rnd prec 8501 ori.b &d_mode*0x10,%d0 # insert dbl precision 8502 8503 global fin 8504fin: 8505 mov.l %d0,L_SCR3(%a6) # store rnd info 8506 8507 mov.b STAG(%a6),%d1 # fetch src optype tag 8508 bne.w fin_not_norm # optimize on non-norm input 8509 8510# 8511# FP MOVE IN: NORMs and DENORMs ONLY! 8512# 8513fin_norm: 8514 andi.b &0xc0,%d0 # is precision extended? 8515 bne.w fin_not_ext # no, so go handle dbl or sgl 8516 8517# 8518# precision selected is extended. so...we cannot get an underflow 8519# or overflow because of rounding to the correct precision. so... 8520# skip the scaling and unscaling... 8521# 8522 tst.b SRC_EX(%a0) # is the operand negative? 8523 bpl.b fin_norm_done # no 8524 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8525fin_norm_done: 8526 fmovm.x SRC(%a0),&0x80 # return result in fp0 8527 rts 8528 8529# 8530# for an extended precision DENORM, the UNFL exception bit is set 8531# the accrued bit is NOT set in this instance(no inexactness!) 8532# 8533fin_denorm: 8534 andi.b &0xc0,%d0 # is precision extended? 8535 bne.w fin_not_ext # no, so go handle dbl or sgl 8536 8537 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8538 tst.b SRC_EX(%a0) # is the operand negative? 8539 bpl.b fin_denorm_done # no 8540 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8541fin_denorm_done: 8542 fmovm.x SRC(%a0),&0x80 # return result in fp0 8543 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 8544 bne.b fin_denorm_unfl_ena # yes 8545 rts 8546 8547# 8548# the input is an extended DENORM and underflow is enabled in the FPCR. 8549# normalize the mantissa and add the bias of 0x6000 to the resulting negative 8550# exponent and insert back into the operand. 8551# 8552fin_denorm_unfl_ena: 8553 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8554 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8555 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8556 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 8557 bsr.l norm # normalize result 8558 neg.w %d0 # new exponent = -(shft val) 8559 addi.w &0x6000,%d0 # add new bias to exponent 8560 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 8561 andi.w &0x8000,%d1 # keep old sign 8562 andi.w &0x7fff,%d0 # clear sign position 8563 or.w %d1,%d0 # concat new exo,old sign 8564 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 8565 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8566 rts 8567 8568# 8569# operand is to be rounded to single or double precision 8570# 8571fin_not_ext: 8572 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 8573 bne.b fin_dbl 8574 8575# 8576# operand is to be rounded to single precision 8577# 8578fin_sgl: 8579 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8580 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8581 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8582 bsr.l scale_to_zero_src # calculate scale factor 8583 8584 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 8585 bge.w fin_sd_unfl # yes; go handle underflow 8586 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 8587 beq.w fin_sd_may_ovfl # maybe; go check 8588 blt.w fin_sd_ovfl # yes; go handle overflow 8589 8590# 8591# operand will NOT overflow or underflow when moved into the fp reg file 8592# 8593fin_sd_normal: 8594 fmov.l &0x0,%fpsr # clear FPSR 8595 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8596 8597 fmov.x FP_SCR0(%a6),%fp0 # perform move 8598 8599 fmov.l %fpsr,%d1 # save FPSR 8600 fmov.l &0x0,%fpcr # clear FPCR 8601 8602 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8603 8604fin_sd_normal_exit: 8605 mov.l %d2,-(%sp) # save d2 8606 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8607 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8608 mov.w %d1,%d2 # make a copy 8609 andi.l &0x7fff,%d1 # strip sign 8610 sub.l %d0,%d1 # add scale factor 8611 andi.w &0x8000,%d2 # keep old sign 8612 or.w %d1,%d2 # concat old sign,new exponent 8613 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 8614 mov.l (%sp)+,%d2 # restore d2 8615 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8616 rts 8617 8618# 8619# operand is to be rounded to double precision 8620# 8621fin_dbl: 8622 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8623 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8624 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8625 bsr.l scale_to_zero_src # calculate scale factor 8626 8627 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 8628 bge.w fin_sd_unfl # yes; go handle underflow 8629 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 8630 beq.w fin_sd_may_ovfl # maybe; go check 8631 blt.w fin_sd_ovfl # yes; go handle overflow 8632 bra.w fin_sd_normal # no; ho handle normalized op 8633 8634# 8635# operand WILL underflow when moved in to the fp register file 8636# 8637fin_sd_unfl: 8638 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8639 8640 tst.b FP_SCR0_EX(%a6) # is operand negative? 8641 bpl.b fin_sd_unfl_tst 8642 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 8643 8644# if underflow or inexact is enabled, then go calculate the EXOP first. 8645fin_sd_unfl_tst: 8646 mov.b FPCR_ENABLE(%a6),%d1 8647 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8648 bne.b fin_sd_unfl_ena # yes 8649 8650fin_sd_unfl_dis: 8651 lea FP_SCR0(%a6),%a0 # pass: result addr 8652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8653 bsr.l unf_res # calculate default result 8654 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 8655 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8656 rts 8657 8658# 8659# operand will underflow AND underflow or inexact is enabled. 8660# therefore, we must return the result rounded to extended precision. 8661# 8662fin_sd_unfl_ena: 8663 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 8664 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 8665 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 8666 8667 mov.l %d2,-(%sp) # save d2 8668 mov.w %d1,%d2 # make a copy 8669 andi.l &0x7fff,%d1 # strip sign 8670 sub.l %d0,%d1 # subtract scale factor 8671 andi.w &0x8000,%d2 # extract old sign 8672 addi.l &0x6000,%d1 # add new bias 8673 andi.w &0x7fff,%d1 8674 or.w %d1,%d2 # concat old sign,new exp 8675 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent 8676 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 8677 mov.l (%sp)+,%d2 # restore d2 8678 bra.b fin_sd_unfl_dis 8679 8680# 8681# operand WILL overflow. 8682# 8683fin_sd_ovfl: 8684 fmov.l &0x0,%fpsr # clear FPSR 8685 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8686 8687 fmov.x FP_SCR0(%a6),%fp0 # perform move 8688 8689 fmov.l &0x0,%fpcr # clear FPCR 8690 fmov.l %fpsr,%d1 # save FPSR 8691 8692 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8693 8694fin_sd_ovfl_tst: 8695 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8696 8697 mov.b FPCR_ENABLE(%a6),%d1 8698 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8699 bne.b fin_sd_ovfl_ena # yes 8700 8701# 8702# OVFL is not enabled; therefore, we must create the default result by 8703# calling ovf_res(). 8704# 8705fin_sd_ovfl_dis: 8706 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8707 sne %d1 # set sign param accordingly 8708 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 8709 bsr.l ovf_res # calculate default result 8710 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8711 fmovm.x (%a0),&0x80 # return default result in fp0 8712 rts 8713 8714# 8715# OVFL is enabled. 8716# the INEX2 bit has already been updated by the round to the correct precision. 8717# now, round to extended(and don't alter the FPSR). 8718# 8719fin_sd_ovfl_ena: 8720 mov.l %d2,-(%sp) # save d2 8721 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8722 mov.l %d1,%d2 # make a copy 8723 andi.l &0x7fff,%d1 # strip sign 8724 andi.w &0x8000,%d2 # keep old sign 8725 sub.l %d0,%d1 # add scale factor 8726 sub.l &0x6000,%d1 # subtract bias 8727 andi.w &0x7fff,%d1 8728 or.w %d2,%d1 8729 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8730 mov.l (%sp)+,%d2 # restore d2 8731 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8732 bra.b fin_sd_ovfl_dis 8733 8734# 8735# the move in MAY overflow. so... 8736# 8737fin_sd_may_ovfl: 8738 fmov.l &0x0,%fpsr # clear FPSR 8739 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8740 8741 fmov.x FP_SCR0(%a6),%fp0 # perform the move 8742 8743 fmov.l %fpsr,%d1 # save status 8744 fmov.l &0x0,%fpcr # clear FPCR 8745 8746 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8747 8748 fabs.x %fp0,%fp1 # make a copy of result 8749 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8750 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred 8751 8752# no, it didn't overflow; we have correct result 8753 bra.w fin_sd_normal_exit 8754 8755########################################################################## 8756 8757# 8758# operand is not a NORM: check its optype and branch accordingly 8759# 8760fin_not_norm: 8761 cmpi.b %d1,&DENORM # weed out DENORM 8762 beq.w fin_denorm 8763 cmpi.b %d1,&SNAN # weed out SNANs 8764 beq.l res_snan_1op 8765 cmpi.b %d1,&QNAN # weed out QNANs 8766 beq.l res_qnan_1op 8767 8768# 8769# do the fmove in; at this point, only possible ops are ZERO and INF. 8770# use fmov to determine ccodes. 8771# prec:mode should be zero at this point but it won't affect answer anyways. 8772# 8773 fmov.x SRC(%a0),%fp0 # do fmove in 8774 fmov.l %fpsr,%d0 # no exceptions possible 8775 rol.l &0x8,%d0 # put ccodes in lo byte 8776 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 8777 rts 8778 8779######################################################################### 8780# XDEF **************************************************************** # 8781# fdiv(): emulates the fdiv instruction # 8782# fsdiv(): emulates the fsdiv instruction # 8783# fddiv(): emulates the fddiv instruction # 8784# # 8785# XREF **************************************************************** # 8786# scale_to_zero_src() - scale src exponent to zero # 8787# scale_to_zero_dst() - scale dst exponent to zero # 8788# unf_res() - return default underflow result # 8789# ovf_res() - return default overflow result # 8790# res_qnan() - return QNAN result # 8791# res_snan() - return SNAN result # 8792# # 8793# INPUT *************************************************************** # 8794# a0 = pointer to extended precision source operand # 8795# a1 = pointer to extended precision destination operand # 8796# d0 rnd prec,mode # 8797# # 8798# OUTPUT ************************************************************** # 8799# fp0 = result # 8800# fp1 = EXOP (if exception occurred) # 8801# # 8802# ALGORITHM *********************************************************** # 8803# Handle NANs, infinities, and zeroes as special cases. Divide # 8804# norms/denorms into ext/sgl/dbl precision. # 8805# For norms/denorms, scale the exponents such that a divide # 8806# instruction won't cause an exception. Use the regular fdiv to # 8807# compute a result. Check if the regular operands would have taken # 8808# an exception. If so, return the default overflow/underflow result # 8809# and return the EXOP if exceptions are enabled. Else, scale the # 8810# result operand to the proper exponent. # 8811# # 8812######################################################################### 8813 8814 align 0x10 8815tbl_fdiv_unfl: 8816 long 0x3fff - 0x0000 # ext_unfl 8817 long 0x3fff - 0x3f81 # sgl_unfl 8818 long 0x3fff - 0x3c01 # dbl_unfl 8819 8820tbl_fdiv_ovfl: 8821 long 0x3fff - 0x7ffe # ext overflow exponent 8822 long 0x3fff - 0x407e # sgl overflow exponent 8823 long 0x3fff - 0x43fe # dbl overflow exponent 8824 8825 global fsdiv 8826fsdiv: 8827 andi.b &0x30,%d0 # clear rnd prec 8828 ori.b &s_mode*0x10,%d0 # insert sgl prec 8829 bra.b fdiv 8830 8831 global fddiv 8832fddiv: 8833 andi.b &0x30,%d0 # clear rnd prec 8834 ori.b &d_mode*0x10,%d0 # insert dbl prec 8835 8836 global fdiv 8837fdiv: 8838 mov.l %d0,L_SCR3(%a6) # store rnd info 8839 8840 clr.w %d1 8841 mov.b DTAG(%a6),%d1 8842 lsl.b &0x3,%d1 8843 or.b STAG(%a6),%d1 # combine src tags 8844 8845 bne.w fdiv_not_norm # optimize on non-norm input 8846 8847# 8848# DIVIDE: NORMs and DENORMs ONLY! 8849# 8850fdiv_norm: 8851 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8852 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8853 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8854 8855 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8856 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8857 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8858 8859 bsr.l scale_to_zero_src # scale src exponent 8860 mov.l %d0,-(%sp) # save scale factor 1 8861 8862 bsr.l scale_to_zero_dst # scale dst exponent 8863 8864 neg.l (%sp) # SCALE FACTOR = scale1 - scale2 8865 add.l %d0,(%sp) 8866 8867 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8868 lsr.b &0x6,%d1 # shift to lo bits 8869 mov.l (%sp)+,%d0 # load S.F. 8870 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow? 8871 ble.w fdiv_may_ovfl # result will overflow 8872 8873 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow? 8874 beq.w fdiv_may_unfl # maybe 8875 bgt.w fdiv_unfl # yes; go handle underflow 8876 8877fdiv_normal: 8878 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8879 8880 fmov.l L_SCR3(%a6),%fpcr # save FPCR 8881 fmov.l &0x0,%fpsr # clear FPSR 8882 8883 fdiv.x FP_SCR0(%a6),%fp0 # perform divide 8884 8885 fmov.l %fpsr,%d1 # save FPSR 8886 fmov.l &0x0,%fpcr # clear FPCR 8887 8888 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8889 8890fdiv_normal_exit: 8891 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 8892 mov.l %d2,-(%sp) # store d2 8893 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8894 mov.l %d1,%d2 # make a copy 8895 andi.l &0x7fff,%d1 # strip sign 8896 andi.w &0x8000,%d2 # keep old sign 8897 sub.l %d0,%d1 # add scale factor 8898 or.w %d2,%d1 # concat old sign,new exp 8899 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8900 mov.l (%sp)+,%d2 # restore d2 8901 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8902 rts 8903 8904tbl_fdiv_ovfl2: 8905 long 0x7fff 8906 long 0x407f 8907 long 0x43ff 8908 8909fdiv_no_ovfl: 8910 mov.l (%sp)+,%d0 # restore scale factor 8911 bra.b fdiv_normal_exit 8912 8913fdiv_may_ovfl: 8914 mov.l %d0,-(%sp) # save scale factor 8915 8916 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8917 8918 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8919 fmov.l &0x0,%fpsr # set FPSR 8920 8921 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8922 8923 fmov.l %fpsr,%d0 8924 fmov.l &0x0,%fpcr 8925 8926 or.l %d0,USER_FPSR(%a6) # save INEX,N 8927 8928 fmovm.x &0x01,-(%sp) # save result to stack 8929 mov.w (%sp),%d0 # fetch new exponent 8930 add.l &0xc,%sp # clear result from stack 8931 andi.l &0x7fff,%d0 # strip sign 8932 sub.l (%sp),%d0 # add scale factor 8933 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4) 8934 blt.b fdiv_no_ovfl 8935 mov.l (%sp)+,%d0 8936 8937fdiv_ovfl_tst: 8938 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8939 8940 mov.b FPCR_ENABLE(%a6),%d1 8941 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8942 bne.b fdiv_ovfl_ena # yes 8943 8944fdiv_ovfl_dis: 8945 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8946 sne %d1 # set sign param accordingly 8947 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 8948 bsr.l ovf_res # calculate default result 8949 or.b %d0,FPSR_CC(%a6) # set INF if applicable 8950 fmovm.x (%a0),&0x80 # return default result in fp0 8951 rts 8952 8953fdiv_ovfl_ena: 8954 mov.l L_SCR3(%a6),%d1 8955 andi.b &0xc0,%d1 # is precision extended? 8956 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl 8957 8958fdiv_ovfl_ena_cont: 8959 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8960 8961 mov.l %d2,-(%sp) # save d2 8962 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8963 mov.w %d1,%d2 # make a copy 8964 andi.l &0x7fff,%d1 # strip sign 8965 sub.l %d0,%d1 # add scale factor 8966 subi.l &0x6000,%d1 # subtract bias 8967 andi.w &0x7fff,%d1 # clear sign bit 8968 andi.w &0x8000,%d2 # keep old sign 8969 or.w %d2,%d1 # concat old sign,new exp 8970 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8971 mov.l (%sp)+,%d2 # restore d2 8972 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8973 bra.b fdiv_ovfl_dis 8974 8975fdiv_ovfl_ena_sd: 8976 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8977 8978 mov.l L_SCR3(%a6),%d1 8979 andi.b &0x30,%d1 # keep rnd mode 8980 fmov.l %d1,%fpcr # set FPCR 8981 8982 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8983 8984 fmov.l &0x0,%fpcr # clear FPCR 8985 bra.b fdiv_ovfl_ena_cont 8986 8987fdiv_unfl: 8988 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8989 8990 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8991 8992 fmov.l &rz_mode*0x10,%fpcr # set FPCR 8993 fmov.l &0x0,%fpsr # clear FPSR 8994 8995 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8996 8997 fmov.l %fpsr,%d1 # save status 8998 fmov.l &0x0,%fpcr # clear FPCR 8999 9000 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9001 9002 mov.b FPCR_ENABLE(%a6),%d1 9003 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 9004 bne.b fdiv_unfl_ena # yes 9005 9006fdiv_unfl_dis: 9007 fmovm.x &0x80,FP_SCR0(%a6) # store out result 9008 9009 lea FP_SCR0(%a6),%a0 # pass: result addr 9010 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9011 bsr.l unf_res # calculate default result 9012 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 9013 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9014 rts 9015 9016# 9017# UNFL is enabled. 9018# 9019fdiv_unfl_ena: 9020 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 9021 9022 mov.l L_SCR3(%a6),%d1 9023 andi.b &0xc0,%d1 # is precision extended? 9024 bne.b fdiv_unfl_ena_sd # no, sgl or dbl 9025 9026 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9027 9028fdiv_unfl_ena_cont: 9029 fmov.l &0x0,%fpsr # clear FPSR 9030 9031 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9032 9033 fmov.l &0x0,%fpcr # clear FPCR 9034 9035 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 9036 mov.l %d2,-(%sp) # save d2 9037 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9038 mov.l %d1,%d2 # make a copy 9039 andi.l &0x7fff,%d1 # strip sign 9040 andi.w &0x8000,%d2 # keep old sign 9041 sub.l %d0,%d1 # add scale factoer 9042 addi.l &0x6000,%d1 # add bias 9043 andi.w &0x7fff,%d1 9044 or.w %d2,%d1 # concat old sign,new exp 9045 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp 9046 mov.l (%sp)+,%d2 # restore d2 9047 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9048 bra.w fdiv_unfl_dis 9049 9050fdiv_unfl_ena_sd: 9051 mov.l L_SCR3(%a6),%d1 9052 andi.b &0x30,%d1 # use only rnd mode 9053 fmov.l %d1,%fpcr # set FPCR 9054 9055 bra.b fdiv_unfl_ena_cont 9056 9057# 9058# the divide operation MAY underflow: 9059# 9060fdiv_may_unfl: 9061 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 9062 9063 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9064 fmov.l &0x0,%fpsr # clear FPSR 9065 9066 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 9067 9068 fmov.l %fpsr,%d1 # save status 9069 fmov.l &0x0,%fpcr # clear FPCR 9070 9071 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9072 9073 fabs.x %fp0,%fp1 # make a copy of result 9074 fcmp.b %fp1,&0x1 # is |result| > 1.b? 9075 fbgt.w fdiv_normal_exit # no; no underflow occurred 9076 fblt.w fdiv_unfl # yes; underflow occurred 9077 9078# 9079# we still don't know if underflow occurred. result is ~ equal to 1. but, 9080# we don't know if the result was an underflow that rounded up to a 1 9081# or a normalized number that rounded down to a 1. so, redo the entire 9082# operation using RZ as the rounding mode to see what the pre-rounded 9083# result is. this case should be relatively rare. 9084# 9085 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 9086 9087 mov.l L_SCR3(%a6),%d1 9088 andi.b &0xc0,%d1 # keep rnd prec 9089 ori.b &rz_mode*0x10,%d1 # insert RZ 9090 9091 fmov.l %d1,%fpcr # set FPCR 9092 fmov.l &0x0,%fpsr # clear FPSR 9093 9094 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9095 9096 fmov.l &0x0,%fpcr # clear FPCR 9097 fabs.x %fp1 # make absolute value 9098 fcmp.b %fp1,&0x1 # is |result| < 1.b? 9099 fbge.w fdiv_normal_exit # no; no underflow occurred 9100 bra.w fdiv_unfl # yes; underflow occurred 9101 9102############################################################################ 9103 9104# 9105# Divide: inputs are not both normalized; what are they? 9106# 9107fdiv_not_norm: 9108 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1 9109 jmp (tbl_fdiv_op.b,%pc,%d1.w*1) 9110 9111 swbeg &48 9112tbl_fdiv_op: 9113 short fdiv_norm - tbl_fdiv_op # NORM / NORM 9114 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO 9115 short fdiv_zero_load - tbl_fdiv_op # NORM / INF 9116 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN 9117 short fdiv_norm - tbl_fdiv_op # NORM / DENORM 9118 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN 9119 short tbl_fdiv_op - tbl_fdiv_op # 9120 short tbl_fdiv_op - tbl_fdiv_op # 9121 9122 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM 9123 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO 9124 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF 9125 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN 9126 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM 9127 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN 9128 short tbl_fdiv_op - tbl_fdiv_op # 9129 short tbl_fdiv_op - tbl_fdiv_op # 9130 9131 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM 9132 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO 9133 short fdiv_res_operr - tbl_fdiv_op # INF / INF 9134 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN 9135 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM 9136 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN 9137 short tbl_fdiv_op - tbl_fdiv_op # 9138 short tbl_fdiv_op - tbl_fdiv_op # 9139 9140 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM 9141 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO 9142 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF 9143 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN 9144 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM 9145 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN 9146 short tbl_fdiv_op - tbl_fdiv_op # 9147 short tbl_fdiv_op - tbl_fdiv_op # 9148 9149 short fdiv_norm - tbl_fdiv_op # DENORM / NORM 9150 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO 9151 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF 9152 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN 9153 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM 9154 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN 9155 short tbl_fdiv_op - tbl_fdiv_op # 9156 short tbl_fdiv_op - tbl_fdiv_op # 9157 9158 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM 9159 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO 9160 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF 9161 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN 9162 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM 9163 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN 9164 short tbl_fdiv_op - tbl_fdiv_op # 9165 short tbl_fdiv_op - tbl_fdiv_op # 9166 9167fdiv_res_qnan: 9168 bra.l res_qnan 9169fdiv_res_snan: 9170 bra.l res_snan 9171fdiv_res_operr: 9172 bra.l res_operr 9173 9174 global fdiv_zero_load # global for fsgldiv 9175fdiv_zero_load: 9176 mov.b SRC_EX(%a0),%d0 # result sign is exclusive 9177 mov.b DST_EX(%a1),%d1 # or of input signs. 9178 eor.b %d0,%d1 9179 bpl.b fdiv_zero_load_p # result is positive 9180 fmov.s &0x80000000,%fp0 # load a -ZERO 9181 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 9182 rts 9183fdiv_zero_load_p: 9184 fmov.s &0x00000000,%fp0 # load a +ZERO 9185 mov.b &z_bmask,FPSR_CC(%a6) # set Z 9186 rts 9187 9188# 9189# The destination was In Range and the source was a ZERO. The result, 9190# therefore, is an INF w/ the proper sign. 9191# So, determine the sign and return a new INF (w/ the j-bit cleared). 9192# 9193 global fdiv_inf_load # global for fsgldiv 9194fdiv_inf_load: 9195 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ 9196 mov.b SRC_EX(%a0),%d0 # load both signs 9197 mov.b DST_EX(%a1),%d1 9198 eor.b %d0,%d1 9199 bpl.b fdiv_inf_load_p # result is positive 9200 fmov.s &0xff800000,%fp0 # make result -INF 9201 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 9202 rts 9203fdiv_inf_load_p: 9204 fmov.s &0x7f800000,%fp0 # make result +INF 9205 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9206 rts 9207 9208# 9209# The destination was an INF w/ an In Range or ZERO source, the result is 9210# an INF w/ the proper sign. 9211# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the 9212# dst INF is set, then then j-bit of the result INF is also set). 9213# 9214 global fdiv_inf_dst # global for fsgldiv 9215fdiv_inf_dst: 9216 mov.b DST_EX(%a1),%d0 # load both signs 9217 mov.b SRC_EX(%a0),%d1 9218 eor.b %d0,%d1 9219 bpl.b fdiv_inf_dst_p # result is positive 9220 9221 fmovm.x DST(%a1),&0x80 # return result in fp0 9222 fabs.x %fp0 # clear sign bit 9223 fneg.x %fp0 # set sign bit 9224 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG 9225 rts 9226 9227fdiv_inf_dst_p: 9228 fmovm.x DST(%a1),&0x80 # return result in fp0 9229 fabs.x %fp0 # return positive INF 9230 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9231 rts 9232 9233######################################################################### 9234# XDEF **************************************************************** # 9235# fneg(): emulates the fneg instruction # 9236# fsneg(): emulates the fsneg instruction # 9237# fdneg(): emulates the fdneg instruction # 9238# # 9239# XREF **************************************************************** # 9240# norm() - normalize a denorm to provide EXOP # 9241# scale_to_zero_src() - scale sgl/dbl source exponent # 9242# ovf_res() - return default overflow result # 9243# unf_res() - return default underflow result # 9244# res_qnan_1op() - return QNAN result # 9245# res_snan_1op() - return SNAN result # 9246# # 9247# INPUT *************************************************************** # 9248# a0 = pointer to extended precision source operand # 9249# d0 = rnd prec,mode # 9250# # 9251# OUTPUT ************************************************************** # 9252# fp0 = result # 9253# fp1 = EXOP (if exception occurred) # 9254# # 9255# ALGORITHM *********************************************************** # 9256# Handle NANs, zeroes, and infinities as special cases. Separate # 9257# norms/denorms into ext/sgl/dbl precisions. Extended precision can be # 9258# emulated by simply setting sign bit. Sgl/dbl operands must be scaled # 9259# and an actual fneg performed to see if overflow/underflow would have # 9260# occurred. If so, return default underflow/overflow result. Else, # 9261# scale the result exponent and return result. FPSR gets set based on # 9262# the result value. # 9263# # 9264######################################################################### 9265 9266 global fsneg 9267fsneg: 9268 andi.b &0x30,%d0 # clear rnd prec 9269 ori.b &s_mode*0x10,%d0 # insert sgl precision 9270 bra.b fneg 9271 9272 global fdneg 9273fdneg: 9274 andi.b &0x30,%d0 # clear rnd prec 9275 ori.b &d_mode*0x10,%d0 # insert dbl prec 9276 9277 global fneg 9278fneg: 9279 mov.l %d0,L_SCR3(%a6) # store rnd info 9280 mov.b STAG(%a6),%d1 9281 bne.w fneg_not_norm # optimize on non-norm input 9282 9283# 9284# NEGATE SIGN : norms and denorms ONLY! 9285# 9286fneg_norm: 9287 andi.b &0xc0,%d0 # is precision extended? 9288 bne.w fneg_not_ext # no; go handle sgl or dbl 9289 9290# 9291# precision selected is extended. so...we can not get an underflow 9292# or overflow because of rounding to the correct precision. so... 9293# skip the scaling and unscaling... 9294# 9295 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9296 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9297 mov.w SRC_EX(%a0),%d0 9298 eori.w &0x8000,%d0 # negate sign 9299 bpl.b fneg_norm_load # sign is positive 9300 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9301fneg_norm_load: 9302 mov.w %d0,FP_SCR0_EX(%a6) 9303 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9304 rts 9305 9306# 9307# for an extended precision DENORM, the UNFL exception bit is set 9308# the accrued bit is NOT set in this instance(no inexactness!) 9309# 9310fneg_denorm: 9311 andi.b &0xc0,%d0 # is precision extended? 9312 bne.b fneg_not_ext # no; go handle sgl or dbl 9313 9314 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9315 9316 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9317 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9318 mov.w SRC_EX(%a0),%d0 9319 eori.w &0x8000,%d0 # negate sign 9320 bpl.b fneg_denorm_done # no 9321 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit 9322fneg_denorm_done: 9323 mov.w %d0,FP_SCR0_EX(%a6) 9324 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9325 9326 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9327 bne.b fneg_ext_unfl_ena # yes 9328 rts 9329 9330# 9331# the input is an extended DENORM and underflow is enabled in the FPCR. 9332# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9333# exponent and insert back into the operand. 9334# 9335fneg_ext_unfl_ena: 9336 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9337 bsr.l norm # normalize result 9338 neg.w %d0 # new exponent = -(shft val) 9339 addi.w &0x6000,%d0 # add new bias to exponent 9340 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9341 andi.w &0x8000,%d1 # keep old sign 9342 andi.w &0x7fff,%d0 # clear sign position 9343 or.w %d1,%d0 # concat old sign, new exponent 9344 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9345 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9346 rts 9347 9348# 9349# operand is either single or double 9350# 9351fneg_not_ext: 9352 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9353 bne.b fneg_dbl 9354 9355# 9356# operand is to be rounded to single precision 9357# 9358fneg_sgl: 9359 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9360 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9361 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9362 bsr.l scale_to_zero_src # calculate scale factor 9363 9364 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9365 bge.w fneg_sd_unfl # yes; go handle underflow 9366 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9367 beq.w fneg_sd_may_ovfl # maybe; go check 9368 blt.w fneg_sd_ovfl # yes; go handle overflow 9369 9370# 9371# operand will NOT overflow or underflow when moved in to the fp reg file 9372# 9373fneg_sd_normal: 9374 fmov.l &0x0,%fpsr # clear FPSR 9375 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9376 9377 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9378 9379 fmov.l %fpsr,%d1 # save FPSR 9380 fmov.l &0x0,%fpcr # clear FPCR 9381 9382 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9383 9384fneg_sd_normal_exit: 9385 mov.l %d2,-(%sp) # save d2 9386 fmovm.x &0x80,FP_SCR0(%a6) # store out result 9387 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 9388 mov.w %d1,%d2 # make a copy 9389 andi.l &0x7fff,%d1 # strip sign 9390 sub.l %d0,%d1 # add scale factor 9391 andi.w &0x8000,%d2 # keep old sign 9392 or.w %d1,%d2 # concat old sign,new exp 9393 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 9394 mov.l (%sp)+,%d2 # restore d2 9395 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9396 rts 9397 9398# 9399# operand is to be rounded to double precision 9400# 9401fneg_dbl: 9402 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9403 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9404 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9405 bsr.l scale_to_zero_src # calculate scale factor 9406 9407 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 9408 bge.b fneg_sd_unfl # yes; go handle underflow 9409 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 9410 beq.w fneg_sd_may_ovfl # maybe; go check 9411 blt.w fneg_sd_ovfl # yes; go handle overflow 9412 bra.w fneg_sd_normal # no; ho handle normalized op 9413 9414# 9415# operand WILL underflow when moved in to the fp register file 9416# 9417fneg_sd_unfl: 9418 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9419 9420 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign 9421 bpl.b fneg_sd_unfl_tst 9422 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 9423 9424# if underflow or inexact is enabled, go calculate EXOP first. 9425fneg_sd_unfl_tst: 9426 mov.b FPCR_ENABLE(%a6),%d1 9427 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 9428 bne.b fneg_sd_unfl_ena # yes 9429 9430fneg_sd_unfl_dis: 9431 lea FP_SCR0(%a6),%a0 # pass: result addr 9432 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9433 bsr.l unf_res # calculate default result 9434 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 9435 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9436 rts 9437 9438# 9439# operand will underflow AND underflow is enabled. 9440# therefore, we must return the result rounded to extended precision. 9441# 9442fneg_sd_unfl_ena: 9443 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 9444 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 9445 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 9446 9447 mov.l %d2,-(%sp) # save d2 9448 mov.l %d1,%d2 # make a copy 9449 andi.l &0x7fff,%d1 # strip sign 9450 andi.w &0x8000,%d2 # keep old sign 9451 sub.l %d0,%d1 # subtract scale factor 9452 addi.l &0x6000,%d1 # add new bias 9453 andi.w &0x7fff,%d1 9454 or.w %d2,%d1 # concat new sign,new exp 9455 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 9456 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 9457 mov.l (%sp)+,%d2 # restore d2 9458 bra.b fneg_sd_unfl_dis 9459 9460# 9461# operand WILL overflow. 9462# 9463fneg_sd_ovfl: 9464 fmov.l &0x0,%fpsr # clear FPSR 9465 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9466 9467 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9468 9469 fmov.l &0x0,%fpcr # clear FPCR 9470 fmov.l %fpsr,%d1 # save FPSR 9471 9472 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9473 9474fneg_sd_ovfl_tst: 9475 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 9476 9477 mov.b FPCR_ENABLE(%a6),%d1 9478 andi.b &0x13,%d1 # is OVFL or INEX enabled? 9479 bne.b fneg_sd_ovfl_ena # yes 9480 9481# 9482# OVFL is not enabled; therefore, we must create the default result by 9483# calling ovf_res(). 9484# 9485fneg_sd_ovfl_dis: 9486 btst &neg_bit,FPSR_CC(%a6) # is result negative? 9487 sne %d1 # set sign param accordingly 9488 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 9489 bsr.l ovf_res # calculate default result 9490 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 9491 fmovm.x (%a0),&0x80 # return default result in fp0 9492 rts 9493 9494# 9495# OVFL is enabled. 9496# the INEX2 bit has already been updated by the round to the correct precision. 9497# now, round to extended(and don't alter the FPSR). 9498# 9499fneg_sd_ovfl_ena: 9500 mov.l %d2,-(%sp) # save d2 9501 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9502 mov.l %d1,%d2 # make a copy 9503 andi.l &0x7fff,%d1 # strip sign 9504 andi.w &0x8000,%d2 # keep old sign 9505 sub.l %d0,%d1 # add scale factor 9506 subi.l &0x6000,%d1 # subtract bias 9507 andi.w &0x7fff,%d1 9508 or.w %d2,%d1 # concat sign,exp 9509 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 9510 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9511 mov.l (%sp)+,%d2 # restore d2 9512 bra.b fneg_sd_ovfl_dis 9513 9514# 9515# the move in MAY underflow. so... 9516# 9517fneg_sd_may_ovfl: 9518 fmov.l &0x0,%fpsr # clear FPSR 9519 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9520 9521 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9522 9523 fmov.l %fpsr,%d1 # save status 9524 fmov.l &0x0,%fpcr # clear FPCR 9525 9526 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9527 9528 fabs.x %fp0,%fp1 # make a copy of result 9529 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 9530 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred 9531 9532# no, it didn't overflow; we have correct result 9533 bra.w fneg_sd_normal_exit 9534 9535########################################################################## 9536 9537# 9538# input is not normalized; what is it? 9539# 9540fneg_not_norm: 9541 cmpi.b %d1,&DENORM # weed out DENORM 9542 beq.w fneg_denorm 9543 cmpi.b %d1,&SNAN # weed out SNAN 9544 beq.l res_snan_1op 9545 cmpi.b %d1,&QNAN # weed out QNAN 9546 beq.l res_qnan_1op 9547 9548# 9549# do the fneg; at this point, only possible ops are ZERO and INF. 9550# use fneg to determine ccodes. 9551# prec:mode should be zero at this point but it won't affect answer anyways. 9552# 9553 fneg.x SRC_EX(%a0),%fp0 # do fneg 9554 fmov.l %fpsr,%d0 9555 rol.l &0x8,%d0 # put ccodes in lo byte 9556 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 9557 rts 9558 9559######################################################################### 9560# XDEF **************************************************************** # 9561# ftst(): emulates the ftest instruction # 9562# # 9563# XREF **************************************************************** # 9564# res{s,q}nan_1op() - set NAN result for monadic instruction # 9565# # 9566# INPUT *************************************************************** # 9567# a0 = pointer to extended precision source operand # 9568# # 9569# OUTPUT ************************************************************** # 9570# none # 9571# # 9572# ALGORITHM *********************************************************** # 9573# Check the source operand tag (STAG) and set the FPCR according # 9574# to the operand type and sign. # 9575# # 9576######################################################################### 9577 9578 global ftst 9579ftst: 9580 mov.b STAG(%a6),%d1 9581 bne.b ftst_not_norm # optimize on non-norm input 9582 9583# 9584# Norm: 9585# 9586ftst_norm: 9587 tst.b SRC_EX(%a0) # is operand negative? 9588 bmi.b ftst_norm_m # yes 9589 rts 9590ftst_norm_m: 9591 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9592 rts 9593 9594# 9595# input is not normalized; what is it? 9596# 9597ftst_not_norm: 9598 cmpi.b %d1,&ZERO # weed out ZERO 9599 beq.b ftst_zero 9600 cmpi.b %d1,&INF # weed out INF 9601 beq.b ftst_inf 9602 cmpi.b %d1,&SNAN # weed out SNAN 9603 beq.l res_snan_1op 9604 cmpi.b %d1,&QNAN # weed out QNAN 9605 beq.l res_qnan_1op 9606 9607# 9608# Denorm: 9609# 9610ftst_denorm: 9611 tst.b SRC_EX(%a0) # is operand negative? 9612 bmi.b ftst_denorm_m # yes 9613 rts 9614ftst_denorm_m: 9615 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9616 rts 9617 9618# 9619# Infinity: 9620# 9621ftst_inf: 9622 tst.b SRC_EX(%a0) # is operand negative? 9623 bmi.b ftst_inf_m # yes 9624ftst_inf_p: 9625 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9626 rts 9627ftst_inf_m: 9628 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits 9629 rts 9630 9631# 9632# Zero: 9633# 9634ftst_zero: 9635 tst.b SRC_EX(%a0) # is operand negative? 9636 bmi.b ftst_zero_m # yes 9637ftst_zero_p: 9638 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9639 rts 9640ftst_zero_m: 9641 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9642 rts 9643 9644######################################################################### 9645# XDEF **************************************************************** # 9646# fint(): emulates the fint instruction # 9647# # 9648# XREF **************************************************************** # 9649# res_{s,q}nan_1op() - set NAN result for monadic operation # 9650# # 9651# INPUT *************************************************************** # 9652# a0 = pointer to extended precision source operand # 9653# d0 = round precision/mode # 9654# # 9655# OUTPUT ************************************************************** # 9656# fp0 = result # 9657# # 9658# ALGORITHM *********************************************************** # 9659# Separate according to operand type. Unnorms don't pass through # 9660# here. For norms, load the rounding mode/prec, execute a "fint", then # 9661# store the resulting FPSR bits. # 9662# For denorms, force the j-bit to a one and do the same as for # 9663# norms. Denorms are so low that the answer will either be a zero or a # 9664# one. # 9665# For zeroes/infs/NANs, return the same while setting the FPSR # 9666# as appropriate. # 9667# # 9668######################################################################### 9669 9670 global fint 9671fint: 9672 mov.b STAG(%a6),%d1 9673 bne.b fint_not_norm # optimize on non-norm input 9674 9675# 9676# Norm: 9677# 9678fint_norm: 9679 andi.b &0x30,%d0 # set prec = ext 9680 9681 fmov.l %d0,%fpcr # set FPCR 9682 fmov.l &0x0,%fpsr # clear FPSR 9683 9684 fint.x SRC(%a0),%fp0 # execute fint 9685 9686 fmov.l &0x0,%fpcr # clear FPCR 9687 fmov.l %fpsr,%d0 # save FPSR 9688 or.l %d0,USER_FPSR(%a6) # set exception bits 9689 9690 rts 9691 9692# 9693# input is not normalized; what is it? 9694# 9695fint_not_norm: 9696 cmpi.b %d1,&ZERO # weed out ZERO 9697 beq.b fint_zero 9698 cmpi.b %d1,&INF # weed out INF 9699 beq.b fint_inf 9700 cmpi.b %d1,&DENORM # weed out DENORM 9701 beq.b fint_denorm 9702 cmpi.b %d1,&SNAN # weed out SNAN 9703 beq.l res_snan_1op 9704 bra.l res_qnan_1op # weed out QNAN 9705 9706# 9707# Denorm: 9708# 9709# for DENORMs, the result will be either (+/-)ZERO or (+/-)1. 9710# also, the INEX2 and AINEX exception bits will be set. 9711# so, we could either set these manually or force the DENORM 9712# to a very small NORM and ship it to the NORM routine. 9713# I do the latter. 9714# 9715fint_denorm: 9716 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9717 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9718 lea FP_SCR0(%a6),%a0 9719 bra.b fint_norm 9720 9721# 9722# Zero: 9723# 9724fint_zero: 9725 tst.b SRC_EX(%a0) # is ZERO negative? 9726 bmi.b fint_zero_m # yes 9727fint_zero_p: 9728 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9729 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9730 rts 9731fint_zero_m: 9732 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9733 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9734 rts 9735 9736# 9737# Infinity: 9738# 9739fint_inf: 9740 fmovm.x SRC(%a0),&0x80 # return result in fp0 9741 tst.b SRC_EX(%a0) # is INF negative? 9742 bmi.b fint_inf_m # yes 9743fint_inf_p: 9744 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9745 rts 9746fint_inf_m: 9747 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9748 rts 9749 9750######################################################################### 9751# XDEF **************************************************************** # 9752# fintrz(): emulates the fintrz instruction # 9753# # 9754# XREF **************************************************************** # 9755# res_{s,q}nan_1op() - set NAN result for monadic operation # 9756# # 9757# INPUT *************************************************************** # 9758# a0 = pointer to extended precision source operand # 9759# d0 = round precision/mode # 9760# # 9761# OUTPUT ************************************************************** # 9762# fp0 = result # 9763# # 9764# ALGORITHM *********************************************************** # 9765# Separate according to operand type. Unnorms don't pass through # 9766# here. For norms, load the rounding mode/prec, execute a "fintrz", # 9767# then store the resulting FPSR bits. # 9768# For denorms, force the j-bit to a one and do the same as for # 9769# norms. Denorms are so low that the answer will either be a zero or a # 9770# one. # 9771# For zeroes/infs/NANs, return the same while setting the FPSR # 9772# as appropriate. # 9773# # 9774######################################################################### 9775 9776 global fintrz 9777fintrz: 9778 mov.b STAG(%a6),%d1 9779 bne.b fintrz_not_norm # optimize on non-norm input 9780 9781# 9782# Norm: 9783# 9784fintrz_norm: 9785 fmov.l &0x0,%fpsr # clear FPSR 9786 9787 fintrz.x SRC(%a0),%fp0 # execute fintrz 9788 9789 fmov.l %fpsr,%d0 # save FPSR 9790 or.l %d0,USER_FPSR(%a6) # set exception bits 9791 9792 rts 9793 9794# 9795# input is not normalized; what is it? 9796# 9797fintrz_not_norm: 9798 cmpi.b %d1,&ZERO # weed out ZERO 9799 beq.b fintrz_zero 9800 cmpi.b %d1,&INF # weed out INF 9801 beq.b fintrz_inf 9802 cmpi.b %d1,&DENORM # weed out DENORM 9803 beq.b fintrz_denorm 9804 cmpi.b %d1,&SNAN # weed out SNAN 9805 beq.l res_snan_1op 9806 bra.l res_qnan_1op # weed out QNAN 9807 9808# 9809# Denorm: 9810# 9811# for DENORMs, the result will be (+/-)ZERO. 9812# also, the INEX2 and AINEX exception bits will be set. 9813# so, we could either set these manually or force the DENORM 9814# to a very small NORM and ship it to the NORM routine. 9815# I do the latter. 9816# 9817fintrz_denorm: 9818 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9819 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9820 lea FP_SCR0(%a6),%a0 9821 bra.b fintrz_norm 9822 9823# 9824# Zero: 9825# 9826fintrz_zero: 9827 tst.b SRC_EX(%a0) # is ZERO negative? 9828 bmi.b fintrz_zero_m # yes 9829fintrz_zero_p: 9830 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9831 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9832 rts 9833fintrz_zero_m: 9834 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9835 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9836 rts 9837 9838# 9839# Infinity: 9840# 9841fintrz_inf: 9842 fmovm.x SRC(%a0),&0x80 # return result in fp0 9843 tst.b SRC_EX(%a0) # is INF negative? 9844 bmi.b fintrz_inf_m # yes 9845fintrz_inf_p: 9846 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9847 rts 9848fintrz_inf_m: 9849 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9850 rts 9851 9852######################################################################### 9853# XDEF **************************************************************** # 9854# fabs(): emulates the fabs instruction # 9855# fsabs(): emulates the fsabs instruction # 9856# fdabs(): emulates the fdabs instruction # 9857# # 9858# XREF **************************************************************** # 9859# norm() - normalize denorm mantissa to provide EXOP # 9860# scale_to_zero_src() - make exponent. = 0; get scale factor # 9861# unf_res() - calculate underflow result # 9862# ovf_res() - calculate overflow result # 9863# res_{s,q}nan_1op() - set NAN result for monadic operation # 9864# # 9865# INPUT *************************************************************** # 9866# a0 = pointer to extended precision source operand # 9867# d0 = rnd precision/mode # 9868# # 9869# OUTPUT ************************************************************** # 9870# fp0 = result # 9871# fp1 = EXOP (if exception occurred) # 9872# # 9873# ALGORITHM *********************************************************** # 9874# Handle NANs, infinities, and zeroes as special cases. Divide # 9875# norms into extended, single, and double precision. # 9876# Simply clear sign for extended precision norm. Ext prec denorm # 9877# gets an EXOP created for it since it's an underflow. # 9878# Double and single precision can overflow and underflow. First, # 9879# scale the operand such that the exponent is zero. Perform an "fabs" # 9880# using the correct rnd mode/prec. Check to see if the original # 9881# exponent would take an exception. If so, use unf_res() or ovf_res() # 9882# to calculate the default result. Also, create the EXOP for the # 9883# exceptional case. If no exception should occur, insert the correct # 9884# result exponent and return. # 9885# Unnorms don't pass through here. # 9886# # 9887######################################################################### 9888 9889 global fsabs 9890fsabs: 9891 andi.b &0x30,%d0 # clear rnd prec 9892 ori.b &s_mode*0x10,%d0 # insert sgl precision 9893 bra.b fabs 9894 9895 global fdabs 9896fdabs: 9897 andi.b &0x30,%d0 # clear rnd prec 9898 ori.b &d_mode*0x10,%d0 # insert dbl precision 9899 9900 global fabs 9901fabs: 9902 mov.l %d0,L_SCR3(%a6) # store rnd info 9903 mov.b STAG(%a6),%d1 9904 bne.w fabs_not_norm # optimize on non-norm input 9905 9906# 9907# ABSOLUTE VALUE: norms and denorms ONLY! 9908# 9909fabs_norm: 9910 andi.b &0xc0,%d0 # is precision extended? 9911 bne.b fabs_not_ext # no; go handle sgl or dbl 9912 9913# 9914# precision selected is extended. so...we can not get an underflow 9915# or overflow because of rounding to the correct precision. so... 9916# skip the scaling and unscaling... 9917# 9918 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9919 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9920 mov.w SRC_EX(%a0),%d1 9921 bclr &15,%d1 # force absolute value 9922 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent 9923 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9924 rts 9925 9926# 9927# for an extended precision DENORM, the UNFL exception bit is set 9928# the accrued bit is NOT set in this instance(no inexactness!) 9929# 9930fabs_denorm: 9931 andi.b &0xc0,%d0 # is precision extended? 9932 bne.b fabs_not_ext # no 9933 9934 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9935 9936 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9937 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9938 mov.w SRC_EX(%a0),%d0 9939 bclr &15,%d0 # clear sign 9940 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent 9941 9942 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9943 9944 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9945 bne.b fabs_ext_unfl_ena 9946 rts 9947 9948# 9949# the input is an extended DENORM and underflow is enabled in the FPCR. 9950# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9951# exponent and insert back into the operand. 9952# 9953fabs_ext_unfl_ena: 9954 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9955 bsr.l norm # normalize result 9956 neg.w %d0 # new exponent = -(shft val) 9957 addi.w &0x6000,%d0 # add new bias to exponent 9958 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9959 andi.w &0x8000,%d1 # keep old sign 9960 andi.w &0x7fff,%d0 # clear sign position 9961 or.w %d1,%d0 # concat old sign, new exponent 9962 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9963 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9964 rts 9965 9966# 9967# operand is either single or double 9968# 9969fabs_not_ext: 9970 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9971 bne.b fabs_dbl 9972 9973# 9974# operand is to be rounded to single precision 9975# 9976fabs_sgl: 9977 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9978 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9979 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9980 bsr.l scale_to_zero_src # calculate scale factor 9981 9982 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9983 bge.w fabs_sd_unfl # yes; go handle underflow 9984 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9985 beq.w fabs_sd_may_ovfl # maybe; go check 9986 blt.w fabs_sd_ovfl # yes; go handle overflow 9987 9988# 9989# operand will NOT overflow or underflow when moved in to the fp reg file 9990# 9991fabs_sd_normal: 9992 fmov.l &0x0,%fpsr # clear FPSR 9993 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9994 9995 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 9996 9997 fmov.l %fpsr,%d1 # save FPSR 9998 fmov.l &0x0,%fpcr # clear FPCR 9999 10000 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10001 10002fabs_sd_normal_exit: 10003 mov.l %d2,-(%sp) # save d2 10004 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10005 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 10006 mov.l %d1,%d2 # make a copy 10007 andi.l &0x7fff,%d1 # strip sign 10008 sub.l %d0,%d1 # add scale factor 10009 andi.w &0x8000,%d2 # keep old sign 10010 or.w %d1,%d2 # concat old sign,new exp 10011 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 10012 mov.l (%sp)+,%d2 # restore d2 10013 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10014 rts 10015 10016# 10017# operand is to be rounded to double precision 10018# 10019fabs_dbl: 10020 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10021 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10022 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10023 bsr.l scale_to_zero_src # calculate scale factor 10024 10025 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 10026 bge.b fabs_sd_unfl # yes; go handle underflow 10027 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 10028 beq.w fabs_sd_may_ovfl # maybe; go check 10029 blt.w fabs_sd_ovfl # yes; go handle overflow 10030 bra.w fabs_sd_normal # no; ho handle normalized op 10031 10032# 10033# operand WILL underflow when moved in to the fp register file 10034# 10035fabs_sd_unfl: 10036 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10037 10038 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value 10039 10040# if underflow or inexact is enabled, go calculate EXOP first. 10041 mov.b FPCR_ENABLE(%a6),%d1 10042 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10043 bne.b fabs_sd_unfl_ena # yes 10044 10045fabs_sd_unfl_dis: 10046 lea FP_SCR0(%a6),%a0 # pass: result addr 10047 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10048 bsr.l unf_res # calculate default result 10049 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 10050 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10051 rts 10052 10053# 10054# operand will underflow AND underflow is enabled. 10055# therefore, we must return the result rounded to extended precision. 10056# 10057fabs_sd_unfl_ena: 10058 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 10059 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 10060 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 10061 10062 mov.l %d2,-(%sp) # save d2 10063 mov.l %d1,%d2 # make a copy 10064 andi.l &0x7fff,%d1 # strip sign 10065 andi.w &0x8000,%d2 # keep old sign 10066 sub.l %d0,%d1 # subtract scale factor 10067 addi.l &0x6000,%d1 # add new bias 10068 andi.w &0x7fff,%d1 10069 or.w %d2,%d1 # concat new sign,new exp 10070 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 10071 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 10072 mov.l (%sp)+,%d2 # restore d2 10073 bra.b fabs_sd_unfl_dis 10074 10075# 10076# operand WILL overflow. 10077# 10078fabs_sd_ovfl: 10079 fmov.l &0x0,%fpsr # clear FPSR 10080 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10081 10082 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10083 10084 fmov.l &0x0,%fpcr # clear FPCR 10085 fmov.l %fpsr,%d1 # save FPSR 10086 10087 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10088 10089fabs_sd_ovfl_tst: 10090 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 10091 10092 mov.b FPCR_ENABLE(%a6),%d1 10093 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10094 bne.b fabs_sd_ovfl_ena # yes 10095 10096# 10097# OVFL is not enabled; therefore, we must create the default result by 10098# calling ovf_res(). 10099# 10100fabs_sd_ovfl_dis: 10101 btst &neg_bit,FPSR_CC(%a6) # is result negative? 10102 sne %d1 # set sign param accordingly 10103 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 10104 bsr.l ovf_res # calculate default result 10105 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10106 fmovm.x (%a0),&0x80 # return default result in fp0 10107 rts 10108 10109# 10110# OVFL is enabled. 10111# the INEX2 bit has already been updated by the round to the correct precision. 10112# now, round to extended(and don't alter the FPSR). 10113# 10114fabs_sd_ovfl_ena: 10115 mov.l %d2,-(%sp) # save d2 10116 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10117 mov.l %d1,%d2 # make a copy 10118 andi.l &0x7fff,%d1 # strip sign 10119 andi.w &0x8000,%d2 # keep old sign 10120 sub.l %d0,%d1 # add scale factor 10121 subi.l &0x6000,%d1 # subtract bias 10122 andi.w &0x7fff,%d1 10123 or.w %d2,%d1 # concat sign,exp 10124 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10125 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10126 mov.l (%sp)+,%d2 # restore d2 10127 bra.b fabs_sd_ovfl_dis 10128 10129# 10130# the move in MAY underflow. so... 10131# 10132fabs_sd_may_ovfl: 10133 fmov.l &0x0,%fpsr # clear FPSR 10134 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10135 10136 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10137 10138 fmov.l %fpsr,%d1 # save status 10139 fmov.l &0x0,%fpcr # clear FPCR 10140 10141 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10142 10143 fabs.x %fp0,%fp1 # make a copy of result 10144 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10145 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred 10146 10147# no, it didn't overflow; we have correct result 10148 bra.w fabs_sd_normal_exit 10149 10150########################################################################## 10151 10152# 10153# input is not normalized; what is it? 10154# 10155fabs_not_norm: 10156 cmpi.b %d1,&DENORM # weed out DENORM 10157 beq.w fabs_denorm 10158 cmpi.b %d1,&SNAN # weed out SNAN 10159 beq.l res_snan_1op 10160 cmpi.b %d1,&QNAN # weed out QNAN 10161 beq.l res_qnan_1op 10162 10163 fabs.x SRC(%a0),%fp0 # force absolute value 10164 10165 cmpi.b %d1,&INF # weed out INF 10166 beq.b fabs_inf 10167fabs_zero: 10168 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10169 rts 10170fabs_inf: 10171 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 10172 rts 10173 10174######################################################################### 10175# XDEF **************************************************************** # 10176# fcmp(): fp compare op routine # 10177# # 10178# XREF **************************************************************** # 10179# res_qnan() - return QNAN result # 10180# res_snan() - return SNAN result # 10181# # 10182# INPUT *************************************************************** # 10183# a0 = pointer to extended precision source operand # 10184# a1 = pointer to extended precision destination operand # 10185# d0 = round prec/mode # 10186# # 10187# OUTPUT ************************************************************** # 10188# None # 10189# # 10190# ALGORITHM *********************************************************** # 10191# Handle NANs and denorms as special cases. For everything else, # 10192# just use the actual fcmp instruction to produce the correct condition # 10193# codes. # 10194# # 10195######################################################################### 10196 10197 global fcmp 10198fcmp: 10199 clr.w %d1 10200 mov.b DTAG(%a6),%d1 10201 lsl.b &0x3,%d1 10202 or.b STAG(%a6),%d1 10203 bne.b fcmp_not_norm # optimize on non-norm input 10204 10205# 10206# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs 10207# 10208fcmp_norm: 10209 fmovm.x DST(%a1),&0x80 # load dst op 10210 10211 fcmp.x %fp0,SRC(%a0) # do compare 10212 10213 fmov.l %fpsr,%d0 # save FPSR 10214 rol.l &0x8,%d0 # extract ccode bits 10215 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set) 10216 10217 rts 10218 10219# 10220# fcmp: inputs are not both normalized; what are they? 10221# 10222fcmp_not_norm: 10223 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1 10224 jmp (tbl_fcmp_op.b,%pc,%d1.w*1) 10225 10226 swbeg &48 10227tbl_fcmp_op: 10228 short fcmp_norm - tbl_fcmp_op # NORM - NORM 10229 short fcmp_norm - tbl_fcmp_op # NORM - ZERO 10230 short fcmp_norm - tbl_fcmp_op # NORM - INF 10231 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN 10232 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM 10233 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN 10234 short tbl_fcmp_op - tbl_fcmp_op # 10235 short tbl_fcmp_op - tbl_fcmp_op # 10236 10237 short fcmp_norm - tbl_fcmp_op # ZERO - NORM 10238 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO 10239 short fcmp_norm - tbl_fcmp_op # ZERO - INF 10240 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN 10241 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM 10242 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN 10243 short tbl_fcmp_op - tbl_fcmp_op # 10244 short tbl_fcmp_op - tbl_fcmp_op # 10245 10246 short fcmp_norm - tbl_fcmp_op # INF - NORM 10247 short fcmp_norm - tbl_fcmp_op # INF - ZERO 10248 short fcmp_norm - tbl_fcmp_op # INF - INF 10249 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN 10250 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM 10251 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN 10252 short tbl_fcmp_op - tbl_fcmp_op # 10253 short tbl_fcmp_op - tbl_fcmp_op # 10254 10255 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM 10256 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO 10257 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF 10258 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN 10259 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM 10260 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN 10261 short tbl_fcmp_op - tbl_fcmp_op # 10262 short tbl_fcmp_op - tbl_fcmp_op # 10263 10264 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM 10265 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO 10266 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF 10267 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN 10268 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM 10269 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN 10270 short tbl_fcmp_op - tbl_fcmp_op # 10271 short tbl_fcmp_op - tbl_fcmp_op # 10272 10273 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM 10274 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO 10275 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF 10276 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN 10277 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM 10278 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN 10279 short tbl_fcmp_op - tbl_fcmp_op # 10280 short tbl_fcmp_op - tbl_fcmp_op # 10281 10282# unlike all other functions for QNAN and SNAN, fcmp does NOT set the 10283# 'N' bit for a negative QNAN or SNAN input so we must squelch it here. 10284fcmp_res_qnan: 10285 bsr.l res_qnan 10286 andi.b &0xf7,FPSR_CC(%a6) 10287 rts 10288fcmp_res_snan: 10289 bsr.l res_snan 10290 andi.b &0xf7,FPSR_CC(%a6) 10291 rts 10292 10293# 10294# DENORMs are a little more difficult. 10295# If you have a 2 DENORMs, then you can just force the j-bit to a one 10296# and use the fcmp_norm routine. 10297# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one 10298# and use the fcmp_norm routine. 10299# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also. 10300# But with a DENORM and a NORM of the same sign, the neg bit is set if the 10301# (1) signs are (+) and the DENORM is the dst or 10302# (2) signs are (-) and the DENORM is the src 10303# 10304 10305fcmp_dnrm_s: 10306 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10307 mov.l SRC_HI(%a0),%d0 10308 bset &31,%d0 # DENORM src; make into small norm 10309 mov.l %d0,FP_SCR0_HI(%a6) 10310 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10311 lea FP_SCR0(%a6),%a0 10312 bra.w fcmp_norm 10313 10314fcmp_dnrm_d: 10315 mov.l DST_EX(%a1),FP_SCR0_EX(%a6) 10316 mov.l DST_HI(%a1),%d0 10317 bset &31,%d0 # DENORM src; make into small norm 10318 mov.l %d0,FP_SCR0_HI(%a6) 10319 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 10320 lea FP_SCR0(%a6),%a1 10321 bra.w fcmp_norm 10322 10323fcmp_dnrm_sd: 10324 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10325 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10326 mov.l DST_HI(%a1),%d0 10327 bset &31,%d0 # DENORM dst; make into small norm 10328 mov.l %d0,FP_SCR1_HI(%a6) 10329 mov.l SRC_HI(%a0),%d0 10330 bset &31,%d0 # DENORM dst; make into small norm 10331 mov.l %d0,FP_SCR0_HI(%a6) 10332 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10333 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10334 lea FP_SCR1(%a6),%a1 10335 lea FP_SCR0(%a6),%a0 10336 bra.w fcmp_norm 10337 10338fcmp_nrm_dnrm: 10339 mov.b SRC_EX(%a0),%d0 # determine if like signs 10340 mov.b DST_EX(%a1),%d1 10341 eor.b %d0,%d1 10342 bmi.w fcmp_dnrm_s 10343 10344# signs are the same, so must determine the answer ourselves. 10345 tst.b %d0 # is src op negative? 10346 bmi.b fcmp_nrm_dnrm_m # yes 10347 rts 10348fcmp_nrm_dnrm_m: 10349 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10350 rts 10351 10352fcmp_dnrm_nrm: 10353 mov.b SRC_EX(%a0),%d0 # determine if like signs 10354 mov.b DST_EX(%a1),%d1 10355 eor.b %d0,%d1 10356 bmi.w fcmp_dnrm_d 10357 10358# signs are the same, so must determine the answer ourselves. 10359 tst.b %d0 # is src op negative? 10360 bpl.b fcmp_dnrm_nrm_m # no 10361 rts 10362fcmp_dnrm_nrm_m: 10363 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10364 rts 10365 10366######################################################################### 10367# XDEF **************************************************************** # 10368# fsglmul(): emulates the fsglmul instruction # 10369# # 10370# XREF **************************************************************** # 10371# scale_to_zero_src() - scale src exponent to zero # 10372# scale_to_zero_dst() - scale dst exponent to zero # 10373# unf_res4() - return default underflow result for sglop # 10374# ovf_res() - return default overflow result # 10375# res_qnan() - return QNAN result # 10376# res_snan() - return SNAN result # 10377# # 10378# INPUT *************************************************************** # 10379# a0 = pointer to extended precision source operand # 10380# a1 = pointer to extended precision destination operand # 10381# d0 rnd prec,mode # 10382# # 10383# OUTPUT ************************************************************** # 10384# fp0 = result # 10385# fp1 = EXOP (if exception occurred) # 10386# # 10387# ALGORITHM *********************************************************** # 10388# Handle NANs, infinities, and zeroes as special cases. Divide # 10389# norms/denorms into ext/sgl/dbl precision. # 10390# For norms/denorms, scale the exponents such that a multiply # 10391# instruction won't cause an exception. Use the regular fsglmul to # 10392# compute a result. Check if the regular operands would have taken # 10393# an exception. If so, return the default overflow/underflow result # 10394# and return the EXOP if exceptions are enabled. Else, scale the # 10395# result operand to the proper exponent. # 10396# # 10397######################################################################### 10398 10399 global fsglmul 10400fsglmul: 10401 mov.l %d0,L_SCR3(%a6) # store rnd info 10402 10403 clr.w %d1 10404 mov.b DTAG(%a6),%d1 10405 lsl.b &0x3,%d1 10406 or.b STAG(%a6),%d1 10407 10408 bne.w fsglmul_not_norm # optimize on non-norm input 10409 10410fsglmul_norm: 10411 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10412 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10413 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10414 10415 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10416 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10417 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10418 10419 bsr.l scale_to_zero_src # scale exponent 10420 mov.l %d0,-(%sp) # save scale factor 1 10421 10422 bsr.l scale_to_zero_dst # scale dst exponent 10423 10424 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2 10425 10426 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl? 10427 beq.w fsglmul_may_ovfl # result may rnd to overflow 10428 blt.w fsglmul_ovfl # result will overflow 10429 10430 cmpi.l %d0,&0x3fff+0x0001 # would result unfl? 10431 beq.w fsglmul_may_unfl # result may rnd to no unfl 10432 bgt.w fsglmul_unfl # result will underflow 10433 10434fsglmul_normal: 10435 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10436 10437 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10438 fmov.l &0x0,%fpsr # clear FPSR 10439 10440 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10441 10442 fmov.l %fpsr,%d1 # save status 10443 fmov.l &0x0,%fpcr # clear FPCR 10444 10445 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10446 10447fsglmul_normal_exit: 10448 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10449 mov.l %d2,-(%sp) # save d2 10450 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10451 mov.l %d1,%d2 # make a copy 10452 andi.l &0x7fff,%d1 # strip sign 10453 andi.w &0x8000,%d2 # keep old sign 10454 sub.l %d0,%d1 # add scale factor 10455 or.w %d2,%d1 # concat old sign,new exp 10456 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10457 mov.l (%sp)+,%d2 # restore d2 10458 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10459 rts 10460 10461fsglmul_ovfl: 10462 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10463 10464 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10465 fmov.l &0x0,%fpsr # clear FPSR 10466 10467 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10468 10469 fmov.l %fpsr,%d1 # save status 10470 fmov.l &0x0,%fpcr # clear FPCR 10471 10472 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10473 10474fsglmul_ovfl_tst: 10475 10476# save setting this until now because this is where fsglmul_may_ovfl may jump in 10477 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex 10478 10479 mov.b FPCR_ENABLE(%a6),%d1 10480 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10481 bne.b fsglmul_ovfl_ena # yes 10482 10483fsglmul_ovfl_dis: 10484 btst &neg_bit,FPSR_CC(%a6) # is result negative? 10485 sne %d1 # set sign param accordingly 10486 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10487 andi.b &0x30,%d0 # force prec = ext 10488 bsr.l ovf_res # calculate default result 10489 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10490 fmovm.x (%a0),&0x80 # return default result in fp0 10491 rts 10492 10493fsglmul_ovfl_ena: 10494 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10495 10496 mov.l %d2,-(%sp) # save d2 10497 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10498 mov.l %d1,%d2 # make a copy 10499 andi.l &0x7fff,%d1 # strip sign 10500 sub.l %d0,%d1 # add scale factor 10501 subi.l &0x6000,%d1 # subtract bias 10502 andi.w &0x7fff,%d1 10503 andi.w &0x8000,%d2 # keep old sign 10504 or.w %d2,%d1 # concat old sign,new exp 10505 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10506 mov.l (%sp)+,%d2 # restore d2 10507 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10508 bra.b fsglmul_ovfl_dis 10509 10510fsglmul_may_ovfl: 10511 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10512 10513 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10514 fmov.l &0x0,%fpsr # clear FPSR 10515 10516 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10517 10518 fmov.l %fpsr,%d1 # save status 10519 fmov.l &0x0,%fpcr # clear FPCR 10520 10521 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10522 10523 fabs.x %fp0,%fp1 # make a copy of result 10524 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10525 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred 10526 10527# no, it didn't overflow; we have correct result 10528 bra.w fsglmul_normal_exit 10529 10530fsglmul_unfl: 10531 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10532 10533 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10534 10535 fmov.l &rz_mode*0x10,%fpcr # set FPCR 10536 fmov.l &0x0,%fpsr # clear FPSR 10537 10538 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10539 10540 fmov.l %fpsr,%d1 # save status 10541 fmov.l &0x0,%fpcr # clear FPCR 10542 10543 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10544 10545 mov.b FPCR_ENABLE(%a6),%d1 10546 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10547 bne.b fsglmul_unfl_ena # yes 10548 10549fsglmul_unfl_dis: 10550 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10551 10552 lea FP_SCR0(%a6),%a0 # pass: result addr 10553 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10554 bsr.l unf_res4 # calculate default result 10555 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10556 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10557 rts 10558 10559# 10560# UNFL is enabled. 10561# 10562fsglmul_unfl_ena: 10563 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10564 10565 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10566 fmov.l &0x0,%fpsr # clear FPSR 10567 10568 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10569 10570 fmov.l &0x0,%fpcr # clear FPCR 10571 10572 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10573 mov.l %d2,-(%sp) # save d2 10574 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10575 mov.l %d1,%d2 # make a copy 10576 andi.l &0x7fff,%d1 # strip sign 10577 andi.w &0x8000,%d2 # keep old sign 10578 sub.l %d0,%d1 # add scale factor 10579 addi.l &0x6000,%d1 # add bias 10580 andi.w &0x7fff,%d1 10581 or.w %d2,%d1 # concat old sign,new exp 10582 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10583 mov.l (%sp)+,%d2 # restore d2 10584 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10585 bra.w fsglmul_unfl_dis 10586 10587fsglmul_may_unfl: 10588 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10589 10590 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10591 fmov.l &0x0,%fpsr # clear FPSR 10592 10593 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10594 10595 fmov.l %fpsr,%d1 # save status 10596 fmov.l &0x0,%fpcr # clear FPCR 10597 10598 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10599 10600 fabs.x %fp0,%fp1 # make a copy of result 10601 fcmp.b %fp1,&0x2 # is |result| > 2.b? 10602 fbgt.w fsglmul_normal_exit # no; no underflow occurred 10603 fblt.w fsglmul_unfl # yes; underflow occurred 10604 10605# 10606# we still don't know if underflow occurred. result is ~ equal to 2. but, 10607# we don't know if the result was an underflow that rounded up to a 2 or 10608# a normalized number that rounded down to a 2. so, redo the entire operation 10609# using RZ as the rounding mode to see what the pre-rounded result is. 10610# this case should be relatively rare. 10611# 10612 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 10613 10614 mov.l L_SCR3(%a6),%d1 10615 andi.b &0xc0,%d1 # keep rnd prec 10616 ori.b &rz_mode*0x10,%d1 # insert RZ 10617 10618 fmov.l %d1,%fpcr # set FPCR 10619 fmov.l &0x0,%fpsr # clear FPSR 10620 10621 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10622 10623 fmov.l &0x0,%fpcr # clear FPCR 10624 fabs.x %fp1 # make absolute value 10625 fcmp.b %fp1,&0x2 # is |result| < 2.b? 10626 fbge.w fsglmul_normal_exit # no; no underflow occurred 10627 bra.w fsglmul_unfl # yes, underflow occurred 10628 10629############################################################################## 10630 10631# 10632# Single Precision Multiply: inputs are not both normalized; what are they? 10633# 10634fsglmul_not_norm: 10635 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1 10636 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1) 10637 10638 swbeg &48 10639tbl_fsglmul_op: 10640 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10641 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10642 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10643 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10644 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10645 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10646 short tbl_fsglmul_op - tbl_fsglmul_op # 10647 short tbl_fsglmul_op - tbl_fsglmul_op # 10648 10649 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM 10650 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO 10651 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF 10652 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN 10653 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM 10654 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN 10655 short tbl_fsglmul_op - tbl_fsglmul_op # 10656 short tbl_fsglmul_op - tbl_fsglmul_op # 10657 10658 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM 10659 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO 10660 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF 10661 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN 10662 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM 10663 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN 10664 short tbl_fsglmul_op - tbl_fsglmul_op # 10665 short tbl_fsglmul_op - tbl_fsglmul_op # 10666 10667 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM 10668 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO 10669 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF 10670 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN 10671 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM 10672 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN 10673 short tbl_fsglmul_op - tbl_fsglmul_op # 10674 short tbl_fsglmul_op - tbl_fsglmul_op # 10675 10676 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10677 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10678 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10679 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10680 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10681 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10682 short tbl_fsglmul_op - tbl_fsglmul_op # 10683 short tbl_fsglmul_op - tbl_fsglmul_op # 10684 10685 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM 10686 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO 10687 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF 10688 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN 10689 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM 10690 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN 10691 short tbl_fsglmul_op - tbl_fsglmul_op # 10692 short tbl_fsglmul_op - tbl_fsglmul_op # 10693 10694fsglmul_res_operr: 10695 bra.l res_operr 10696fsglmul_res_snan: 10697 bra.l res_snan 10698fsglmul_res_qnan: 10699 bra.l res_qnan 10700fsglmul_zero: 10701 bra.l fmul_zero 10702fsglmul_inf_src: 10703 bra.l fmul_inf_src 10704fsglmul_inf_dst: 10705 bra.l fmul_inf_dst 10706 10707######################################################################### 10708# XDEF **************************************************************** # 10709# fsgldiv(): emulates the fsgldiv instruction # 10710# # 10711# XREF **************************************************************** # 10712# scale_to_zero_src() - scale src exponent to zero # 10713# scale_to_zero_dst() - scale dst exponent to zero # 10714# unf_res4() - return default underflow result for sglop # 10715# ovf_res() - return default overflow result # 10716# res_qnan() - return QNAN result # 10717# res_snan() - return SNAN result # 10718# # 10719# INPUT *************************************************************** # 10720# a0 = pointer to extended precision source operand # 10721# a1 = pointer to extended precision destination operand # 10722# d0 rnd prec,mode # 10723# # 10724# OUTPUT ************************************************************** # 10725# fp0 = result # 10726# fp1 = EXOP (if exception occurred) # 10727# # 10728# ALGORITHM *********************************************************** # 10729# Handle NANs, infinities, and zeroes as special cases. Divide # 10730# norms/denorms into ext/sgl/dbl precision. # 10731# For norms/denorms, scale the exponents such that a divide # 10732# instruction won't cause an exception. Use the regular fsgldiv to # 10733# compute a result. Check if the regular operands would have taken # 10734# an exception. If so, return the default overflow/underflow result # 10735# and return the EXOP if exceptions are enabled. Else, scale the # 10736# result operand to the proper exponent. # 10737# # 10738######################################################################### 10739 10740 global fsgldiv 10741fsgldiv: 10742 mov.l %d0,L_SCR3(%a6) # store rnd info 10743 10744 clr.w %d1 10745 mov.b DTAG(%a6),%d1 10746 lsl.b &0x3,%d1 10747 or.b STAG(%a6),%d1 # combine src tags 10748 10749 bne.w fsgldiv_not_norm # optimize on non-norm input 10750 10751# 10752# DIVIDE: NORMs and DENORMs ONLY! 10753# 10754fsgldiv_norm: 10755 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10756 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10757 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10758 10759 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10760 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10761 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10762 10763 bsr.l scale_to_zero_src # calculate scale factor 1 10764 mov.l %d0,-(%sp) # save scale factor 1 10765 10766 bsr.l scale_to_zero_dst # calculate scale factor 2 10767 10768 neg.l (%sp) # S.F. = scale1 - scale2 10769 add.l %d0,(%sp) 10770 10771 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode 10772 lsr.b &0x6,%d1 10773 mov.l (%sp)+,%d0 10774 cmpi.l %d0,&0x3fff-0x7ffe 10775 ble.w fsgldiv_may_ovfl 10776 10777 cmpi.l %d0,&0x3fff-0x0000 # will result underflow? 10778 beq.w fsgldiv_may_unfl # maybe 10779 bgt.w fsgldiv_unfl # yes; go handle underflow 10780 10781fsgldiv_normal: 10782 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10783 10784 fmov.l L_SCR3(%a6),%fpcr # save FPCR 10785 fmov.l &0x0,%fpsr # clear FPSR 10786 10787 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide 10788 10789 fmov.l %fpsr,%d1 # save FPSR 10790 fmov.l &0x0,%fpcr # clear FPCR 10791 10792 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10793 10794fsgldiv_normal_exit: 10795 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 10796 mov.l %d2,-(%sp) # save d2 10797 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10798 mov.l %d1,%d2 # make a copy 10799 andi.l &0x7fff,%d1 # strip sign 10800 andi.w &0x8000,%d2 # keep old sign 10801 sub.l %d0,%d1 # add scale factor 10802 or.w %d2,%d1 # concat old sign,new exp 10803 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10804 mov.l (%sp)+,%d2 # restore d2 10805 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10806 rts 10807 10808fsgldiv_may_ovfl: 10809 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10810 10811 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10812 fmov.l &0x0,%fpsr # set FPSR 10813 10814 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide 10815 10816 fmov.l %fpsr,%d1 10817 fmov.l &0x0,%fpcr 10818 10819 or.l %d1,USER_FPSR(%a6) # save INEX,N 10820 10821 fmovm.x &0x01,-(%sp) # save result to stack 10822 mov.w (%sp),%d1 # fetch new exponent 10823 add.l &0xc,%sp # clear result 10824 andi.l &0x7fff,%d1 # strip sign 10825 sub.l %d0,%d1 # add scale factor 10826 cmp.l %d1,&0x7fff # did divide overflow? 10827 blt.b fsgldiv_normal_exit 10828 10829fsgldiv_ovfl_tst: 10830 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 10831 10832 mov.b FPCR_ENABLE(%a6),%d1 10833 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10834 bne.b fsgldiv_ovfl_ena # yes 10835 10836fsgldiv_ovfl_dis: 10837 btst &neg_bit,FPSR_CC(%a6) # is result negative 10838 sne %d1 # set sign param accordingly 10839 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10840 andi.b &0x30,%d0 # kill precision 10841 bsr.l ovf_res # calculate default result 10842 or.b %d0,FPSR_CC(%a6) # set INF if applicable 10843 fmovm.x (%a0),&0x80 # return default result in fp0 10844 rts 10845 10846fsgldiv_ovfl_ena: 10847 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10848 10849 mov.l %d2,-(%sp) # save d2 10850 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10851 mov.l %d1,%d2 # make a copy 10852 andi.l &0x7fff,%d1 # strip sign 10853 andi.w &0x8000,%d2 # keep old sign 10854 sub.l %d0,%d1 # add scale factor 10855 subi.l &0x6000,%d1 # subtract new bias 10856 andi.w &0x7fff,%d1 # clear ms bit 10857 or.w %d2,%d1 # concat old sign,new exp 10858 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10859 mov.l (%sp)+,%d2 # restore d2 10860 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10861 bra.b fsgldiv_ovfl_dis 10862 10863fsgldiv_unfl: 10864 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10865 10866 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10867 10868 fmov.l &rz_mode*0x10,%fpcr # set FPCR 10869 fmov.l &0x0,%fpsr # clear FPSR 10870 10871 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10872 10873 fmov.l %fpsr,%d1 # save status 10874 fmov.l &0x0,%fpcr # clear FPCR 10875 10876 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10877 10878 mov.b FPCR_ENABLE(%a6),%d1 10879 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10880 bne.b fsgldiv_unfl_ena # yes 10881 10882fsgldiv_unfl_dis: 10883 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10884 10885 lea FP_SCR0(%a6),%a0 # pass: result addr 10886 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10887 bsr.l unf_res4 # calculate default result 10888 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10889 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10890 rts 10891 10892# 10893# UNFL is enabled. 10894# 10895fsgldiv_unfl_ena: 10896 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10897 10898 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10899 fmov.l &0x0,%fpsr # clear FPSR 10900 10901 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10902 10903 fmov.l &0x0,%fpcr # clear FPCR 10904 10905 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10906 mov.l %d2,-(%sp) # save d2 10907 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10908 mov.l %d1,%d2 # make a copy 10909 andi.l &0x7fff,%d1 # strip sign 10910 andi.w &0x8000,%d2 # keep old sign 10911 sub.l %d0,%d1 # add scale factor 10912 addi.l &0x6000,%d1 # add bias 10913 andi.w &0x7fff,%d1 # clear top bit 10914 or.w %d2,%d1 # concat old sign, new exp 10915 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10916 mov.l (%sp)+,%d2 # restore d2 10917 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10918 bra.b fsgldiv_unfl_dis 10919 10920# 10921# the divide operation MAY underflow: 10922# 10923fsgldiv_may_unfl: 10924 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10925 10926 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10927 fmov.l &0x0,%fpsr # clear FPSR 10928 10929 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10930 10931 fmov.l %fpsr,%d1 # save status 10932 fmov.l &0x0,%fpcr # clear FPCR 10933 10934 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10935 10936 fabs.x %fp0,%fp1 # make a copy of result 10937 fcmp.b %fp1,&0x1 # is |result| > 1.b? 10938 fbgt.w fsgldiv_normal_exit # no; no underflow occurred 10939 fblt.w fsgldiv_unfl # yes; underflow occurred 10940 10941# 10942# we still don't know if underflow occurred. result is ~ equal to 1. but, 10943# we don't know if the result was an underflow that rounded up to a 1 10944# or a normalized number that rounded down to a 1. so, redo the entire 10945# operation using RZ as the rounding mode to see what the pre-rounded 10946# result is. this case should be relatively rare. 10947# 10948 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1 10949 10950 clr.l %d1 # clear scratch register 10951 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode 10952 10953 fmov.l %d1,%fpcr # set FPCR 10954 fmov.l &0x0,%fpsr # clear FPSR 10955 10956 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10957 10958 fmov.l &0x0,%fpcr # clear FPCR 10959 fabs.x %fp1 # make absolute value 10960 fcmp.b %fp1,&0x1 # is |result| < 1.b? 10961 fbge.w fsgldiv_normal_exit # no; no underflow occurred 10962 bra.w fsgldiv_unfl # yes; underflow occurred 10963 10964############################################################################ 10965 10966# 10967# Divide: inputs are not both normalized; what are they? 10968# 10969fsgldiv_not_norm: 10970 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1 10971 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1) 10972 10973 swbeg &48 10974tbl_fsgldiv_op: 10975 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM 10976 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO 10977 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF 10978 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN 10979 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM 10980 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN 10981 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10982 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10983 10984 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM 10985 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO 10986 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF 10987 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN 10988 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM 10989 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN 10990 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10991 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10992 10993 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM 10994 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO 10995 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF 10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN 10997 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM 10998 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN 10999 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11000 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11001 11002 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM 11003 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO 11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF 11005 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN 11006 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM 11007 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN 11008 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11009 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11010 11011 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM 11012 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO 11013 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF 11014 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN 11015 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM 11016 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN 11017 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11018 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11019 11020 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM 11021 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO 11022 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF 11023 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN 11024 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM 11025 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN 11026 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11027 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11028 11029fsgldiv_res_qnan: 11030 bra.l res_qnan 11031fsgldiv_res_snan: 11032 bra.l res_snan 11033fsgldiv_res_operr: 11034 bra.l res_operr 11035fsgldiv_inf_load: 11036 bra.l fdiv_inf_load 11037fsgldiv_zero_load: 11038 bra.l fdiv_zero_load 11039fsgldiv_inf_dst: 11040 bra.l fdiv_inf_dst 11041 11042######################################################################### 11043# XDEF **************************************************************** # 11044# fadd(): emulates the fadd instruction # 11045# fsadd(): emulates the fadd instruction # 11046# fdadd(): emulates the fdadd instruction # 11047# # 11048# XREF **************************************************************** # 11049# addsub_scaler2() - scale the operands so they won't take exc # 11050# ovf_res() - return default overflow result # 11051# unf_res() - return default underflow result # 11052# res_qnan() - set QNAN result # 11053# res_snan() - set SNAN result # 11054# res_operr() - set OPERR result # 11055# scale_to_zero_src() - set src operand exponent equal to zero # 11056# scale_to_zero_dst() - set dst operand exponent equal to zero # 11057# # 11058# INPUT *************************************************************** # 11059# a0 = pointer to extended precision source operand # 11060# a1 = pointer to extended precision destination operand # 11061# # 11062# OUTPUT ************************************************************** # 11063# fp0 = result # 11064# fp1 = EXOP (if exception occurred) # 11065# # 11066# ALGORITHM *********************************************************** # 11067# Handle NANs, infinities, and zeroes as special cases. Divide # 11068# norms into extended, single, and double precision. # 11069# Do addition after scaling exponents such that exception won't # 11070# occur. Then, check result exponent to see if exception would have # 11071# occurred. If so, return default result and maybe EXOP. Else, insert # 11072# the correct result exponent and return. Set FPSR bits as appropriate. # 11073# # 11074######################################################################### 11075 11076 global fsadd 11077fsadd: 11078 andi.b &0x30,%d0 # clear rnd prec 11079 ori.b &s_mode*0x10,%d0 # insert sgl prec 11080 bra.b fadd 11081 11082 global fdadd 11083fdadd: 11084 andi.b &0x30,%d0 # clear rnd prec 11085 ori.b &d_mode*0x10,%d0 # insert dbl prec 11086 11087 global fadd 11088fadd: 11089 mov.l %d0,L_SCR3(%a6) # store rnd info 11090 11091 clr.w %d1 11092 mov.b DTAG(%a6),%d1 11093 lsl.b &0x3,%d1 11094 or.b STAG(%a6),%d1 # combine src tags 11095 11096 bne.w fadd_not_norm # optimize on non-norm input 11097 11098# 11099# ADD: norms and denorms 11100# 11101fadd_norm: 11102 bsr.l addsub_scaler2 # scale exponents 11103 11104fadd_zero_entry: 11105 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11106 11107 fmov.l &0x0,%fpsr # clear FPSR 11108 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11109 11110 fadd.x FP_SCR0(%a6),%fp0 # execute add 11111 11112 fmov.l &0x0,%fpcr # clear FPCR 11113 fmov.l %fpsr,%d1 # fetch INEX2,N,Z 11114 11115 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11116 11117 fbeq.w fadd_zero_exit # if result is zero, end now 11118 11119 mov.l %d2,-(%sp) # save d2 11120 11121 fmovm.x &0x01,-(%sp) # save result to stack 11122 11123 mov.w 2+L_SCR3(%a6),%d1 11124 lsr.b &0x6,%d1 11125 11126 mov.w (%sp),%d2 # fetch new sign, exp 11127 andi.l &0x7fff,%d2 # strip sign 11128 sub.l %d0,%d2 # add scale factor 11129 11130 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11131 bge.b fadd_ovfl # yes 11132 11133 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow? 11134 blt.w fadd_unfl # yes 11135 beq.w fadd_may_unfl # maybe; go find out 11136 11137fadd_normal: 11138 mov.w (%sp),%d1 11139 andi.w &0x8000,%d1 # keep sign 11140 or.w %d2,%d1 # concat sign,new exp 11141 mov.w %d1,(%sp) # insert new exponent 11142 11143 fmovm.x (%sp)+,&0x80 # return result in fp0 11144 11145 mov.l (%sp)+,%d2 # restore d2 11146 rts 11147 11148fadd_zero_exit: 11149# fmov.s &0x00000000,%fp0 # return zero in fp0 11150 rts 11151 11152tbl_fadd_ovfl: 11153 long 0x7fff # ext ovfl 11154 long 0x407f # sgl ovfl 11155 long 0x43ff # dbl ovfl 11156 11157tbl_fadd_unfl: 11158 long 0x0000 # ext unfl 11159 long 0x3f81 # sgl unfl 11160 long 0x3c01 # dbl unfl 11161 11162fadd_ovfl: 11163 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11164 11165 mov.b FPCR_ENABLE(%a6),%d1 11166 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11167 bne.b fadd_ovfl_ena # yes 11168 11169 add.l &0xc,%sp 11170fadd_ovfl_dis: 11171 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11172 sne %d1 # set sign param accordingly 11173 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11174 bsr.l ovf_res # calculate default result 11175 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11176 fmovm.x (%a0),&0x80 # return default result in fp0 11177 mov.l (%sp)+,%d2 # restore d2 11178 rts 11179 11180fadd_ovfl_ena: 11181 mov.b L_SCR3(%a6),%d1 11182 andi.b &0xc0,%d1 # is precision extended? 11183 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl 11184 11185fadd_ovfl_ena_cont: 11186 mov.w (%sp),%d1 11187 andi.w &0x8000,%d1 # keep sign 11188 subi.l &0x6000,%d2 # add extra bias 11189 andi.w &0x7fff,%d2 11190 or.w %d2,%d1 # concat sign,new exp 11191 mov.w %d1,(%sp) # insert new exponent 11192 11193 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11194 bra.b fadd_ovfl_dis 11195 11196fadd_ovfl_ena_sd: 11197 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11198 11199 mov.l L_SCR3(%a6),%d1 11200 andi.b &0x30,%d1 # keep rnd mode 11201 fmov.l %d1,%fpcr # set FPCR 11202 11203 fadd.x FP_SCR0(%a6),%fp0 # execute add 11204 11205 fmov.l &0x0,%fpcr # clear FPCR 11206 11207 add.l &0xc,%sp 11208 fmovm.x &0x01,-(%sp) 11209 bra.b fadd_ovfl_ena_cont 11210 11211fadd_unfl: 11212 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11213 11214 add.l &0xc,%sp 11215 11216 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11217 11218 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11219 fmov.l &0x0,%fpsr # clear FPSR 11220 11221 fadd.x FP_SCR0(%a6),%fp0 # execute add 11222 11223 fmov.l &0x0,%fpcr # clear FPCR 11224 fmov.l %fpsr,%d1 # save status 11225 11226 or.l %d1,USER_FPSR(%a6) # save INEX,N 11227 11228 mov.b FPCR_ENABLE(%a6),%d1 11229 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11230 bne.b fadd_unfl_ena # yes 11231 11232fadd_unfl_dis: 11233 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11234 11235 lea FP_SCR0(%a6),%a0 # pass: result addr 11236 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11237 bsr.l unf_res # calculate default result 11238 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 11239 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11240 mov.l (%sp)+,%d2 # restore d2 11241 rts 11242 11243fadd_unfl_ena: 11244 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 11245 11246 mov.l L_SCR3(%a6),%d1 11247 andi.b &0xc0,%d1 # is precision extended? 11248 bne.b fadd_unfl_ena_sd # no; sgl or dbl 11249 11250 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11251 11252fadd_unfl_ena_cont: 11253 fmov.l &0x0,%fpsr # clear FPSR 11254 11255 fadd.x FP_SCR0(%a6),%fp1 # execute multiply 11256 11257 fmov.l &0x0,%fpcr # clear FPCR 11258 11259 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 11260 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11261 mov.l %d1,%d2 # make a copy 11262 andi.l &0x7fff,%d1 # strip sign 11263 andi.w &0x8000,%d2 # keep old sign 11264 sub.l %d0,%d1 # add scale factor 11265 addi.l &0x6000,%d1 # add new bias 11266 andi.w &0x7fff,%d1 # clear top bit 11267 or.w %d2,%d1 # concat sign,new exp 11268 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11270 bra.w fadd_unfl_dis 11271 11272fadd_unfl_ena_sd: 11273 mov.l L_SCR3(%a6),%d1 11274 andi.b &0x30,%d1 # use only rnd mode 11275 fmov.l %d1,%fpcr # set FPCR 11276 11277 bra.b fadd_unfl_ena_cont 11278 11279# 11280# result is equal to the smallest normalized number in the selected precision 11281# if the precision is extended, this result could not have come from an 11282# underflow that rounded up. 11283# 11284fadd_may_unfl: 11285 mov.l L_SCR3(%a6),%d1 11286 andi.b &0xc0,%d1 11287 beq.w fadd_normal # yes; no underflow occurred 11288 11289 mov.l 0x4(%sp),%d1 # extract hi(man) 11290 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11291 bne.w fadd_normal # no; no underflow occurred 11292 11293 tst.l 0x8(%sp) # is lo(man) = 0x0? 11294 bne.w fadd_normal # no; no underflow occurred 11295 11296 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11297 beq.w fadd_normal # no; no underflow occurred 11298 11299# 11300# ok, so now the result has a exponent equal to the smallest normalized 11301# exponent for the selected precision. also, the mantissa is equal to 11302# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11303# g,r,s. 11304# now, we must determine whether the pre-rounded result was an underflow 11305# rounded "up" or a normalized number rounded "down". 11306# so, we do this be re-executing the add using RZ as the rounding mode and 11307# seeing if the new result is smaller or equal to the current result. 11308# 11309 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11310 11311 mov.l L_SCR3(%a6),%d1 11312 andi.b &0xc0,%d1 # keep rnd prec 11313 ori.b &rz_mode*0x10,%d1 # insert rnd mode 11314 fmov.l %d1,%fpcr # set FPCR 11315 fmov.l &0x0,%fpsr # clear FPSR 11316 11317 fadd.x FP_SCR0(%a6),%fp1 # execute add 11318 11319 fmov.l &0x0,%fpcr # clear FPCR 11320 11321 fabs.x %fp0 # compare absolute values 11322 fabs.x %fp1 11323 fcmp.x %fp0,%fp1 # is first result > second? 11324 11325 fbgt.w fadd_unfl # yes; it's an underflow 11326 bra.w fadd_normal # no; it's not an underflow 11327 11328########################################################################## 11329 11330# 11331# Add: inputs are not both normalized; what are they? 11332# 11333fadd_not_norm: 11334 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1 11335 jmp (tbl_fadd_op.b,%pc,%d1.w*1) 11336 11337 swbeg &48 11338tbl_fadd_op: 11339 short fadd_norm - tbl_fadd_op # NORM + NORM 11340 short fadd_zero_src - tbl_fadd_op # NORM + ZERO 11341 short fadd_inf_src - tbl_fadd_op # NORM + INF 11342 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11343 short fadd_norm - tbl_fadd_op # NORM + DENORM 11344 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11345 short tbl_fadd_op - tbl_fadd_op # 11346 short tbl_fadd_op - tbl_fadd_op # 11347 11348 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM 11349 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO 11350 short fadd_inf_src - tbl_fadd_op # ZERO + INF 11351 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11352 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM 11353 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11354 short tbl_fadd_op - tbl_fadd_op # 11355 short tbl_fadd_op - tbl_fadd_op # 11356 11357 short fadd_inf_dst - tbl_fadd_op # INF + NORM 11358 short fadd_inf_dst - tbl_fadd_op # INF + ZERO 11359 short fadd_inf_2 - tbl_fadd_op # INF + INF 11360 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11361 short fadd_inf_dst - tbl_fadd_op # INF + DENORM 11362 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11363 short tbl_fadd_op - tbl_fadd_op # 11364 short tbl_fadd_op - tbl_fadd_op # 11365 11366 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM 11367 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO 11368 short fadd_res_qnan - tbl_fadd_op # QNAN + INF 11369 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN 11370 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM 11371 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN 11372 short tbl_fadd_op - tbl_fadd_op # 11373 short tbl_fadd_op - tbl_fadd_op # 11374 11375 short fadd_norm - tbl_fadd_op # DENORM + NORM 11376 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO 11377 short fadd_inf_src - tbl_fadd_op # DENORM + INF 11378 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11379 short fadd_norm - tbl_fadd_op # DENORM + DENORM 11380 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11381 short tbl_fadd_op - tbl_fadd_op # 11382 short tbl_fadd_op - tbl_fadd_op # 11383 11384 short fadd_res_snan - tbl_fadd_op # SNAN + NORM 11385 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO 11386 short fadd_res_snan - tbl_fadd_op # SNAN + INF 11387 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN 11388 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM 11389 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN 11390 short tbl_fadd_op - tbl_fadd_op # 11391 short tbl_fadd_op - tbl_fadd_op # 11392 11393fadd_res_qnan: 11394 bra.l res_qnan 11395fadd_res_snan: 11396 bra.l res_snan 11397 11398# 11399# both operands are ZEROes 11400# 11401fadd_zero_2: 11402 mov.b SRC_EX(%a0),%d0 # are the signs opposite 11403 mov.b DST_EX(%a1),%d1 11404 eor.b %d0,%d1 11405 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO) 11406 11407# the signs are the same. so determine whether they are positive or negative 11408# and return the appropriately signed zero. 11409 tst.b %d0 # are ZEROes positive or negative? 11410 bmi.b fadd_zero_rm # negative 11411 fmov.s &0x00000000,%fp0 # return +ZERO 11412 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11413 rts 11414 11415# 11416# the ZEROes have opposite signs: 11417# - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. 11418# - -ZERO is returned in the case of RM. 11419# 11420fadd_zero_2_chk_rm: 11421 mov.b 3+L_SCR3(%a6),%d1 11422 andi.b &0x30,%d1 # extract rnd mode 11423 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM? 11424 beq.b fadd_zero_rm # yes 11425 fmov.s &0x00000000,%fp0 # return +ZERO 11426 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11427 rts 11428 11429fadd_zero_rm: 11430 fmov.s &0x80000000,%fp0 # return -ZERO 11431 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z 11432 rts 11433 11434# 11435# one operand is a ZERO and the other is a DENORM or NORM. scale 11436# the DENORM or NORM and jump to the regular fadd routine. 11437# 11438fadd_zero_dst: 11439 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11440 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11441 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11442 bsr.l scale_to_zero_src # scale the operand 11443 clr.w FP_SCR1_EX(%a6) 11444 clr.l FP_SCR1_HI(%a6) 11445 clr.l FP_SCR1_LO(%a6) 11446 bra.w fadd_zero_entry # go execute fadd 11447 11448fadd_zero_src: 11449 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11450 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11451 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11452 bsr.l scale_to_zero_dst # scale the operand 11453 clr.w FP_SCR0_EX(%a6) 11454 clr.l FP_SCR0_HI(%a6) 11455 clr.l FP_SCR0_LO(%a6) 11456 bra.w fadd_zero_entry # go execute fadd 11457 11458# 11459# both operands are INFs. an OPERR will result if the INFs have 11460# different signs. else, an INF of the same sign is returned 11461# 11462fadd_inf_2: 11463 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11464 mov.b DST_EX(%a1),%d1 11465 eor.b %d1,%d0 11466 bmi.l res_operr # weed out (-INF)+(+INF) 11467 11468# ok, so it's not an OPERR. but, we do have to remember to return the 11469# src INF since that's where the 881/882 gets the j-bit from... 11470 11471# 11472# operands are INF and one of {ZERO, INF, DENORM, NORM} 11473# 11474fadd_inf_src: 11475 fmovm.x SRC(%a0),&0x80 # return src INF 11476 tst.b SRC_EX(%a0) # is INF positive? 11477 bpl.b fadd_inf_done # yes; we're done 11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11479 rts 11480 11481# 11482# operands are INF and one of {ZERO, INF, DENORM, NORM} 11483# 11484fadd_inf_dst: 11485 fmovm.x DST(%a1),&0x80 # return dst INF 11486 tst.b DST_EX(%a1) # is INF positive? 11487 bpl.b fadd_inf_done # yes; we're done 11488 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11489 rts 11490 11491fadd_inf_done: 11492 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11493 rts 11494 11495######################################################################### 11496# XDEF **************************************************************** # 11497# fsub(): emulates the fsub instruction # 11498# fssub(): emulates the fssub instruction # 11499# fdsub(): emulates the fdsub instruction # 11500# # 11501# XREF **************************************************************** # 11502# addsub_scaler2() - scale the operands so they won't take exc # 11503# ovf_res() - return default overflow result # 11504# unf_res() - return default underflow result # 11505# res_qnan() - set QNAN result # 11506# res_snan() - set SNAN result # 11507# res_operr() - set OPERR result # 11508# scale_to_zero_src() - set src operand exponent equal to zero # 11509# scale_to_zero_dst() - set dst operand exponent equal to zero # 11510# # 11511# INPUT *************************************************************** # 11512# a0 = pointer to extended precision source operand # 11513# a1 = pointer to extended precision destination operand # 11514# # 11515# OUTPUT ************************************************************** # 11516# fp0 = result # 11517# fp1 = EXOP (if exception occurred) # 11518# # 11519# ALGORITHM *********************************************************** # 11520# Handle NANs, infinities, and zeroes as special cases. Divide # 11521# norms into extended, single, and double precision. # 11522# Do subtraction after scaling exponents such that exception won't# 11523# occur. Then, check result exponent to see if exception would have # 11524# occurred. If so, return default result and maybe EXOP. Else, insert # 11525# the correct result exponent and return. Set FPSR bits as appropriate. # 11526# # 11527######################################################################### 11528 11529 global fssub 11530fssub: 11531 andi.b &0x30,%d0 # clear rnd prec 11532 ori.b &s_mode*0x10,%d0 # insert sgl prec 11533 bra.b fsub 11534 11535 global fdsub 11536fdsub: 11537 andi.b &0x30,%d0 # clear rnd prec 11538 ori.b &d_mode*0x10,%d0 # insert dbl prec 11539 11540 global fsub 11541fsub: 11542 mov.l %d0,L_SCR3(%a6) # store rnd info 11543 11544 clr.w %d1 11545 mov.b DTAG(%a6),%d1 11546 lsl.b &0x3,%d1 11547 or.b STAG(%a6),%d1 # combine src tags 11548 11549 bne.w fsub_not_norm # optimize on non-norm input 11550 11551# 11552# SUB: norms and denorms 11553# 11554fsub_norm: 11555 bsr.l addsub_scaler2 # scale exponents 11556 11557fsub_zero_entry: 11558 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11559 11560 fmov.l &0x0,%fpsr # clear FPSR 11561 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11562 11563 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11564 11565 fmov.l &0x0,%fpcr # clear FPCR 11566 fmov.l %fpsr,%d1 # fetch INEX2, N, Z 11567 11568 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11569 11570 fbeq.w fsub_zero_exit # if result zero, end now 11571 11572 mov.l %d2,-(%sp) # save d2 11573 11574 fmovm.x &0x01,-(%sp) # save result to stack 11575 11576 mov.w 2+L_SCR3(%a6),%d1 11577 lsr.b &0x6,%d1 11578 11579 mov.w (%sp),%d2 # fetch new exponent 11580 andi.l &0x7fff,%d2 # strip sign 11581 sub.l %d0,%d2 # add scale factor 11582 11583 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11584 bge.b fsub_ovfl # yes 11585 11586 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow? 11587 blt.w fsub_unfl # yes 11588 beq.w fsub_may_unfl # maybe; go find out 11589 11590fsub_normal: 11591 mov.w (%sp),%d1 11592 andi.w &0x8000,%d1 # keep sign 11593 or.w %d2,%d1 # insert new exponent 11594 mov.w %d1,(%sp) # insert new exponent 11595 11596 fmovm.x (%sp)+,&0x80 # return result in fp0 11597 11598 mov.l (%sp)+,%d2 # restore d2 11599 rts 11600 11601fsub_zero_exit: 11602# fmov.s &0x00000000,%fp0 # return zero in fp0 11603 rts 11604 11605tbl_fsub_ovfl: 11606 long 0x7fff # ext ovfl 11607 long 0x407f # sgl ovfl 11608 long 0x43ff # dbl ovfl 11609 11610tbl_fsub_unfl: 11611 long 0x0000 # ext unfl 11612 long 0x3f81 # sgl unfl 11613 long 0x3c01 # dbl unfl 11614 11615fsub_ovfl: 11616 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11617 11618 mov.b FPCR_ENABLE(%a6),%d1 11619 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11620 bne.b fsub_ovfl_ena # yes 11621 11622 add.l &0xc,%sp 11623fsub_ovfl_dis: 11624 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11625 sne %d1 # set sign param accordingly 11626 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11627 bsr.l ovf_res # calculate default result 11628 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11629 fmovm.x (%a0),&0x80 # return default result in fp0 11630 mov.l (%sp)+,%d2 # restore d2 11631 rts 11632 11633fsub_ovfl_ena: 11634 mov.b L_SCR3(%a6),%d1 11635 andi.b &0xc0,%d1 # is precision extended? 11636 bne.b fsub_ovfl_ena_sd # no 11637 11638fsub_ovfl_ena_cont: 11639 mov.w (%sp),%d1 # fetch {sgn,exp} 11640 andi.w &0x8000,%d1 # keep sign 11641 subi.l &0x6000,%d2 # subtract new bias 11642 andi.w &0x7fff,%d2 # clear top bit 11643 or.w %d2,%d1 # concat sign,exp 11644 mov.w %d1,(%sp) # insert new exponent 11645 11646 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11647 bra.b fsub_ovfl_dis 11648 11649fsub_ovfl_ena_sd: 11650 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11651 11652 mov.l L_SCR3(%a6),%d1 11653 andi.b &0x30,%d1 # clear rnd prec 11654 fmov.l %d1,%fpcr # set FPCR 11655 11656 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11657 11658 fmov.l &0x0,%fpcr # clear FPCR 11659 11660 add.l &0xc,%sp 11661 fmovm.x &0x01,-(%sp) 11662 bra.b fsub_ovfl_ena_cont 11663 11664fsub_unfl: 11665 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11666 11667 add.l &0xc,%sp 11668 11669 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11670 11671 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11672 fmov.l &0x0,%fpsr # clear FPSR 11673 11674 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11675 11676 fmov.l &0x0,%fpcr # clear FPCR 11677 fmov.l %fpsr,%d1 # save status 11678 11679 or.l %d1,USER_FPSR(%a6) 11680 11681 mov.b FPCR_ENABLE(%a6),%d1 11682 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11683 bne.b fsub_unfl_ena # yes 11684 11685fsub_unfl_dis: 11686 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11687 11688 lea FP_SCR0(%a6),%a0 # pass: result addr 11689 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11690 bsr.l unf_res # calculate default result 11691 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 11692 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11693 mov.l (%sp)+,%d2 # restore d2 11694 rts 11695 11696fsub_unfl_ena: 11697 fmovm.x FP_SCR1(%a6),&0x40 11698 11699 mov.l L_SCR3(%a6),%d1 11700 andi.b &0xc0,%d1 # is precision extended? 11701 bne.b fsub_unfl_ena_sd # no 11702 11703 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11704 11705fsub_unfl_ena_cont: 11706 fmov.l &0x0,%fpsr # clear FPSR 11707 11708 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11709 11710 fmov.l &0x0,%fpcr # clear FPCR 11711 11712 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack 11713 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11714 mov.l %d1,%d2 # make a copy 11715 andi.l &0x7fff,%d1 # strip sign 11716 andi.w &0x8000,%d2 # keep old sign 11717 sub.l %d0,%d1 # add scale factor 11718 addi.l &0x6000,%d1 # subtract new bias 11719 andi.w &0x7fff,%d1 # clear top bit 11720 or.w %d2,%d1 # concat sgn,exp 11721 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11722 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11723 bra.w fsub_unfl_dis 11724 11725fsub_unfl_ena_sd: 11726 mov.l L_SCR3(%a6),%d1 11727 andi.b &0x30,%d1 # clear rnd prec 11728 fmov.l %d1,%fpcr # set FPCR 11729 11730 bra.b fsub_unfl_ena_cont 11731 11732# 11733# result is equal to the smallest normalized number in the selected precision 11734# if the precision is extended, this result could not have come from an 11735# underflow that rounded up. 11736# 11737fsub_may_unfl: 11738 mov.l L_SCR3(%a6),%d1 11739 andi.b &0xc0,%d1 # fetch rnd prec 11740 beq.w fsub_normal # yes; no underflow occurred 11741 11742 mov.l 0x4(%sp),%d1 11743 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11744 bne.w fsub_normal # no; no underflow occurred 11745 11746 tst.l 0x8(%sp) # is lo(man) = 0x0? 11747 bne.w fsub_normal # no; no underflow occurred 11748 11749 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11750 beq.w fsub_normal # no; no underflow occurred 11751 11752# 11753# ok, so now the result has a exponent equal to the smallest normalized 11754# exponent for the selected precision. also, the mantissa is equal to 11755# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11756# g,r,s. 11757# now, we must determine whether the pre-rounded result was an underflow 11758# rounded "up" or a normalized number rounded "down". 11759# so, we do this be re-executing the add using RZ as the rounding mode and 11760# seeing if the new result is smaller or equal to the current result. 11761# 11762 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11763 11764 mov.l L_SCR3(%a6),%d1 11765 andi.b &0xc0,%d1 # keep rnd prec 11766 ori.b &rz_mode*0x10,%d1 # insert rnd mode 11767 fmov.l %d1,%fpcr # set FPCR 11768 fmov.l &0x0,%fpsr # clear FPSR 11769 11770 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11771 11772 fmov.l &0x0,%fpcr # clear FPCR 11773 11774 fabs.x %fp0 # compare absolute values 11775 fabs.x %fp1 11776 fcmp.x %fp0,%fp1 # is first result > second? 11777 11778 fbgt.w fsub_unfl # yes; it's an underflow 11779 bra.w fsub_normal # no; it's not an underflow 11780 11781########################################################################## 11782 11783# 11784# Sub: inputs are not both normalized; what are they? 11785# 11786fsub_not_norm: 11787 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1 11788 jmp (tbl_fsub_op.b,%pc,%d1.w*1) 11789 11790 swbeg &48 11791tbl_fsub_op: 11792 short fsub_norm - tbl_fsub_op # NORM - NORM 11793 short fsub_zero_src - tbl_fsub_op # NORM - ZERO 11794 short fsub_inf_src - tbl_fsub_op # NORM - INF 11795 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11796 short fsub_norm - tbl_fsub_op # NORM - DENORM 11797 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11798 short tbl_fsub_op - tbl_fsub_op # 11799 short tbl_fsub_op - tbl_fsub_op # 11800 11801 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM 11802 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO 11803 short fsub_inf_src - tbl_fsub_op # ZERO - INF 11804 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11805 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM 11806 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11807 short tbl_fsub_op - tbl_fsub_op # 11808 short tbl_fsub_op - tbl_fsub_op # 11809 11810 short fsub_inf_dst - tbl_fsub_op # INF - NORM 11811 short fsub_inf_dst - tbl_fsub_op # INF - ZERO 11812 short fsub_inf_2 - tbl_fsub_op # INF - INF 11813 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11814 short fsub_inf_dst - tbl_fsub_op # INF - DENORM 11815 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11816 short tbl_fsub_op - tbl_fsub_op # 11817 short tbl_fsub_op - tbl_fsub_op # 11818 11819 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM 11820 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO 11821 short fsub_res_qnan - tbl_fsub_op # QNAN - INF 11822 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN 11823 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM 11824 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN 11825 short tbl_fsub_op - tbl_fsub_op # 11826 short tbl_fsub_op - tbl_fsub_op # 11827 11828 short fsub_norm - tbl_fsub_op # DENORM - NORM 11829 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO 11830 short fsub_inf_src - tbl_fsub_op # DENORM - INF 11831 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11832 short fsub_norm - tbl_fsub_op # DENORM - DENORM 11833 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11834 short tbl_fsub_op - tbl_fsub_op # 11835 short tbl_fsub_op - tbl_fsub_op # 11836 11837 short fsub_res_snan - tbl_fsub_op # SNAN - NORM 11838 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO 11839 short fsub_res_snan - tbl_fsub_op # SNAN - INF 11840 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN 11841 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM 11842 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN 11843 short tbl_fsub_op - tbl_fsub_op # 11844 short tbl_fsub_op - tbl_fsub_op # 11845 11846fsub_res_qnan: 11847 bra.l res_qnan 11848fsub_res_snan: 11849 bra.l res_snan 11850 11851# 11852# both operands are ZEROes 11853# 11854fsub_zero_2: 11855 mov.b SRC_EX(%a0),%d0 11856 mov.b DST_EX(%a1),%d1 11857 eor.b %d1,%d0 11858 bpl.b fsub_zero_2_chk_rm 11859 11860# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO 11861 tst.b %d0 # is dst negative? 11862 bmi.b fsub_zero_2_rm # yes 11863 fmov.s &0x00000000,%fp0 # no; return +ZERO 11864 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11865 rts 11866 11867# 11868# the ZEROes have the same signs: 11869# - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP 11870# - -ZERO is returned in the case of RM. 11871# 11872fsub_zero_2_chk_rm: 11873 mov.b 3+L_SCR3(%a6),%d1 11874 andi.b &0x30,%d1 # extract rnd mode 11875 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM? 11876 beq.b fsub_zero_2_rm # yes 11877 fmov.s &0x00000000,%fp0 # no; return +ZERO 11878 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11879 rts 11880 11881fsub_zero_2_rm: 11882 fmov.s &0x80000000,%fp0 # return -ZERO 11883 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG 11884 rts 11885 11886# 11887# one operand is a ZERO and the other is a DENORM or a NORM. 11888# scale the DENORM or NORM and jump to the regular fsub routine. 11889# 11890fsub_zero_dst: 11891 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11892 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11893 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11894 bsr.l scale_to_zero_src # scale the operand 11895 clr.w FP_SCR1_EX(%a6) 11896 clr.l FP_SCR1_HI(%a6) 11897 clr.l FP_SCR1_LO(%a6) 11898 bra.w fsub_zero_entry # go execute fsub 11899 11900fsub_zero_src: 11901 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11902 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11903 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11904 bsr.l scale_to_zero_dst # scale the operand 11905 clr.w FP_SCR0_EX(%a6) 11906 clr.l FP_SCR0_HI(%a6) 11907 clr.l FP_SCR0_LO(%a6) 11908 bra.w fsub_zero_entry # go execute fsub 11909 11910# 11911# both operands are INFs. an OPERR will result if the INFs have the 11912# same signs. else, 11913# 11914fsub_inf_2: 11915 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11916 mov.b DST_EX(%a1),%d1 11917 eor.b %d1,%d0 11918 bpl.l res_operr # weed out (-INF)+(+INF) 11919 11920# ok, so it's not an OPERR. but we do have to remember to return 11921# the src INF since that's where the 881/882 gets the j-bit. 11922 11923fsub_inf_src: 11924 fmovm.x SRC(%a0),&0x80 # return src INF 11925 fneg.x %fp0 # invert sign 11926 fbge.w fsub_inf_done # sign is now positive 11927 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11928 rts 11929 11930fsub_inf_dst: 11931 fmovm.x DST(%a1),&0x80 # return dst INF 11932 tst.b DST_EX(%a1) # is INF negative? 11933 bpl.b fsub_inf_done # no 11934 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11935 rts 11936 11937fsub_inf_done: 11938 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11939 rts 11940 11941######################################################################### 11942# XDEF **************************************************************** # 11943# fsqrt(): emulates the fsqrt instruction # 11944# fssqrt(): emulates the fssqrt instruction # 11945# fdsqrt(): emulates the fdsqrt instruction # 11946# # 11947# XREF **************************************************************** # 11948# scale_sqrt() - scale the source operand # 11949# unf_res() - return default underflow result # 11950# ovf_res() - return default overflow result # 11951# res_qnan_1op() - return QNAN result # 11952# res_snan_1op() - return SNAN result # 11953# # 11954# INPUT *************************************************************** # 11955# a0 = pointer to extended precision source operand # 11956# d0 rnd prec,mode # 11957# # 11958# OUTPUT ************************************************************** # 11959# fp0 = result # 11960# fp1 = EXOP (if exception occurred) # 11961# # 11962# ALGORITHM *********************************************************** # 11963# Handle NANs, infinities, and zeroes as special cases. Divide # 11964# norms/denorms into ext/sgl/dbl precision. # 11965# For norms/denorms, scale the exponents such that a sqrt # 11966# instruction won't cause an exception. Use the regular fsqrt to # 11967# compute a result. Check if the regular operands would have taken # 11968# an exception. If so, return the default overflow/underflow result # 11969# and return the EXOP if exceptions are enabled. Else, scale the # 11970# result operand to the proper exponent. # 11971# # 11972######################################################################### 11973 11974 global fssqrt 11975fssqrt: 11976 andi.b &0x30,%d0 # clear rnd prec 11977 ori.b &s_mode*0x10,%d0 # insert sgl precision 11978 bra.b fsqrt 11979 11980 global fdsqrt 11981fdsqrt: 11982 andi.b &0x30,%d0 # clear rnd prec 11983 ori.b &d_mode*0x10,%d0 # insert dbl precision 11984 11985 global fsqrt 11986fsqrt: 11987 mov.l %d0,L_SCR3(%a6) # store rnd info 11988 clr.w %d1 11989 mov.b STAG(%a6),%d1 11990 bne.w fsqrt_not_norm # optimize on non-norm input 11991 11992# 11993# SQUARE ROOT: norms and denorms ONLY! 11994# 11995fsqrt_norm: 11996 tst.b SRC_EX(%a0) # is operand negative? 11997 bmi.l res_operr # yes 11998 11999 andi.b &0xc0,%d0 # is precision extended? 12000 bne.b fsqrt_not_ext # no; go handle sgl or dbl 12001 12002 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12003 fmov.l &0x0,%fpsr # clear FPSR 12004 12005 fsqrt.x (%a0),%fp0 # execute square root 12006 12007 fmov.l %fpsr,%d1 12008 or.l %d1,USER_FPSR(%a6) # set N,INEX 12009 12010 rts 12011 12012fsqrt_denorm: 12013 tst.b SRC_EX(%a0) # is operand negative? 12014 bmi.l res_operr # yes 12015 12016 andi.b &0xc0,%d0 # is precision extended? 12017 bne.b fsqrt_not_ext # no; go handle sgl or dbl 12018 12019 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12020 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12021 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12022 12023 bsr.l scale_sqrt # calculate scale factor 12024 12025 bra.w fsqrt_sd_normal 12026 12027# 12028# operand is either single or double 12029# 12030fsqrt_not_ext: 12031 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12032 bne.w fsqrt_dbl 12033 12034# 12035# operand is to be rounded to single precision 12036# 12037fsqrt_sgl: 12038 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12039 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12040 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12041 12042 bsr.l scale_sqrt # calculate scale factor 12043 12044 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow? 12045 beq.w fsqrt_sd_may_unfl 12046 bgt.w fsqrt_sd_unfl # yes; go handle underflow 12047 cmpi.l %d0,&0x3fff-0x407f # will move in overflow? 12048 beq.w fsqrt_sd_may_ovfl # maybe; go check 12049 blt.w fsqrt_sd_ovfl # yes; go handle overflow 12050 12051# 12052# operand will NOT overflow or underflow when moved in to the fp reg file 12053# 12054fsqrt_sd_normal: 12055 fmov.l &0x0,%fpsr # clear FPSR 12056 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12057 12058 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12059 12060 fmov.l %fpsr,%d1 # save FPSR 12061 fmov.l &0x0,%fpcr # clear FPCR 12062 12063 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12064 12065fsqrt_sd_normal_exit: 12066 mov.l %d2,-(%sp) # save d2 12067 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12068 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 12069 mov.l %d1,%d2 # make a copy 12070 andi.l &0x7fff,%d1 # strip sign 12071 sub.l %d0,%d1 # add scale factor 12072 andi.w &0x8000,%d2 # keep old sign 12073 or.w %d1,%d2 # concat old sign,new exp 12074 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12075 mov.l (%sp)+,%d2 # restore d2 12076 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12077 rts 12078 12079# 12080# operand is to be rounded to double precision 12081# 12082fsqrt_dbl: 12083 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12084 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12085 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12086 12087 bsr.l scale_sqrt # calculate scale factor 12088 12089 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow? 12090 beq.w fsqrt_sd_may_unfl 12091 bgt.b fsqrt_sd_unfl # yes; go handle underflow 12092 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow? 12093 beq.w fsqrt_sd_may_ovfl # maybe; go check 12094 blt.w fsqrt_sd_ovfl # yes; go handle overflow 12095 bra.w fsqrt_sd_normal # no; ho handle normalized op 12096 12097# we're on the line here and the distinguising characteristic is whether 12098# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number 12099# elsewise fall through to underflow. 12100fsqrt_sd_may_unfl: 12101 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12102 bne.w fsqrt_sd_normal # yes, so no underflow 12103 12104# 12105# operand WILL underflow when moved in to the fp register file 12106# 12107fsqrt_sd_unfl: 12108 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12109 12110 fmov.l &rz_mode*0x10,%fpcr # set FPCR 12111 fmov.l &0x0,%fpsr # clear FPSR 12112 12113 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root 12114 12115 fmov.l %fpsr,%d1 # save status 12116 fmov.l &0x0,%fpcr # clear FPCR 12117 12118 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12119 12120# if underflow or inexact is enabled, go calculate EXOP first. 12121 mov.b FPCR_ENABLE(%a6),%d1 12122 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12123 bne.b fsqrt_sd_unfl_ena # yes 12124 12125fsqrt_sd_unfl_dis: 12126 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12127 12128 lea FP_SCR0(%a6),%a0 # pass: result addr 12129 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12130 bsr.l unf_res # calculate default result 12131 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 12132 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12133 rts 12134 12135# 12136# operand will underflow AND underflow is enabled. 12137# therefore, we must return the result rounded to extended precision. 12138# 12139fsqrt_sd_unfl_ena: 12140 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 12141 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 12142 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 12143 12144 mov.l %d2,-(%sp) # save d2 12145 mov.l %d1,%d2 # make a copy 12146 andi.l &0x7fff,%d1 # strip sign 12147 andi.w &0x8000,%d2 # keep old sign 12148 sub.l %d0,%d1 # subtract scale factor 12149 addi.l &0x6000,%d1 # add new bias 12150 andi.w &0x7fff,%d1 12151 or.w %d2,%d1 # concat new sign,new exp 12152 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 12153 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 12154 mov.l (%sp)+,%d2 # restore d2 12155 bra.b fsqrt_sd_unfl_dis 12156 12157# 12158# operand WILL overflow. 12159# 12160fsqrt_sd_ovfl: 12161 fmov.l &0x0,%fpsr # clear FPSR 12162 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12163 12164 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root 12165 12166 fmov.l &0x0,%fpcr # clear FPCR 12167 fmov.l %fpsr,%d1 # save FPSR 12168 12169 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12170 12171fsqrt_sd_ovfl_tst: 12172 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12173 12174 mov.b FPCR_ENABLE(%a6),%d1 12175 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12176 bne.b fsqrt_sd_ovfl_ena # yes 12177 12178# 12179# OVFL is not enabled; therefore, we must create the default result by 12180# calling ovf_res(). 12181# 12182fsqrt_sd_ovfl_dis: 12183 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12184 sne %d1 # set sign param accordingly 12185 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 12186 bsr.l ovf_res # calculate default result 12187 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 12188 fmovm.x (%a0),&0x80 # return default result in fp0 12189 rts 12190 12191# 12192# OVFL is enabled. 12193# the INEX2 bit has already been updated by the round to the correct precision. 12194# now, round to extended(and don't alter the FPSR). 12195# 12196fsqrt_sd_ovfl_ena: 12197 mov.l %d2,-(%sp) # save d2 12198 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12199 mov.l %d1,%d2 # make a copy 12200 andi.l &0x7fff,%d1 # strip sign 12201 andi.w &0x8000,%d2 # keep old sign 12202 sub.l %d0,%d1 # add scale factor 12203 subi.l &0x6000,%d1 # subtract bias 12204 andi.w &0x7fff,%d1 12205 or.w %d2,%d1 # concat sign,exp 12206 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12207 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12208 mov.l (%sp)+,%d2 # restore d2 12209 bra.b fsqrt_sd_ovfl_dis 12210 12211# 12212# the move in MAY underflow. so... 12213# 12214fsqrt_sd_may_ovfl: 12215 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12216 bne.w fsqrt_sd_ovfl # yes, so overflow 12217 12218 fmov.l &0x0,%fpsr # clear FPSR 12219 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12220 12221 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12222 12223 fmov.l %fpsr,%d1 # save status 12224 fmov.l &0x0,%fpcr # clear FPCR 12225 12226 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12227 12228 fmov.x %fp0,%fp1 # make a copy of result 12229 fcmp.b %fp1,&0x1 # is |result| >= 1.b? 12230 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred 12231 12232# no, it didn't overflow; we have correct result 12233 bra.w fsqrt_sd_normal_exit 12234 12235########################################################################## 12236 12237# 12238# input is not normalized; what is it? 12239# 12240fsqrt_not_norm: 12241 cmpi.b %d1,&DENORM # weed out DENORM 12242 beq.w fsqrt_denorm 12243 cmpi.b %d1,&ZERO # weed out ZERO 12244 beq.b fsqrt_zero 12245 cmpi.b %d1,&INF # weed out INF 12246 beq.b fsqrt_inf 12247 cmpi.b %d1,&SNAN # weed out SNAN 12248 beq.l res_snan_1op 12249 bra.l res_qnan_1op 12250 12251# 12252# fsqrt(+0) = +0 12253# fsqrt(-0) = -0 12254# fsqrt(+INF) = +INF 12255# fsqrt(-INF) = OPERR 12256# 12257fsqrt_zero: 12258 tst.b SRC_EX(%a0) # is ZERO positive or negative? 12259 bmi.b fsqrt_zero_m # negative 12260fsqrt_zero_p: 12261 fmov.s &0x00000000,%fp0 # return +ZERO 12262 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 12263 rts 12264fsqrt_zero_m: 12265 fmov.s &0x80000000,%fp0 # return -ZERO 12266 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 12267 rts 12268 12269fsqrt_inf: 12270 tst.b SRC_EX(%a0) # is INF positive or negative? 12271 bmi.l res_operr # negative 12272fsqrt_inf_p: 12273 fmovm.x SRC(%a0),&0x80 # return +INF in fp0 12274 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 12275 rts 12276 12277######################################################################### 12278# XDEF **************************************************************** # 12279# fetch_dreg(): fetch register according to index in d1 # 12280# # 12281# XREF **************************************************************** # 12282# None # 12283# # 12284# INPUT *************************************************************** # 12285# d1 = index of register to fetch from # 12286# # 12287# OUTPUT ************************************************************** # 12288# d0 = value of register fetched # 12289# # 12290# ALGORITHM *********************************************************** # 12291# According to the index value in d1 which can range from zero # 12292# to fifteen, load the corresponding register file value (where # 12293# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the # 12294# stack. The rest should still be in their original places. # 12295# # 12296######################################################################### 12297 12298# this routine leaves d1 intact for subsequent store_dreg calls. 12299 global fetch_dreg 12300fetch_dreg: 12301 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0 12302 jmp (tbl_fdreg.b,%pc,%d0.w*1) 12303 12304tbl_fdreg: 12305 short fdreg0 - tbl_fdreg 12306 short fdreg1 - tbl_fdreg 12307 short fdreg2 - tbl_fdreg 12308 short fdreg3 - tbl_fdreg 12309 short fdreg4 - tbl_fdreg 12310 short fdreg5 - tbl_fdreg 12311 short fdreg6 - tbl_fdreg 12312 short fdreg7 - tbl_fdreg 12313 short fdreg8 - tbl_fdreg 12314 short fdreg9 - tbl_fdreg 12315 short fdrega - tbl_fdreg 12316 short fdregb - tbl_fdreg 12317 short fdregc - tbl_fdreg 12318 short fdregd - tbl_fdreg 12319 short fdrege - tbl_fdreg 12320 short fdregf - tbl_fdreg 12321 12322fdreg0: 12323 mov.l EXC_DREGS+0x0(%a6),%d0 12324 rts 12325fdreg1: 12326 mov.l EXC_DREGS+0x4(%a6),%d0 12327 rts 12328fdreg2: 12329 mov.l %d2,%d0 12330 rts 12331fdreg3: 12332 mov.l %d3,%d0 12333 rts 12334fdreg4: 12335 mov.l %d4,%d0 12336 rts 12337fdreg5: 12338 mov.l %d5,%d0 12339 rts 12340fdreg6: 12341 mov.l %d6,%d0 12342 rts 12343fdreg7: 12344 mov.l %d7,%d0 12345 rts 12346fdreg8: 12347 mov.l EXC_DREGS+0x8(%a6),%d0 12348 rts 12349fdreg9: 12350 mov.l EXC_DREGS+0xc(%a6),%d0 12351 rts 12352fdrega: 12353 mov.l %a2,%d0 12354 rts 12355fdregb: 12356 mov.l %a3,%d0 12357 rts 12358fdregc: 12359 mov.l %a4,%d0 12360 rts 12361fdregd: 12362 mov.l %a5,%d0 12363 rts 12364fdrege: 12365 mov.l (%a6),%d0 12366 rts 12367fdregf: 12368 mov.l EXC_A7(%a6),%d0 12369 rts 12370 12371######################################################################### 12372# XDEF **************************************************************** # 12373# store_dreg_l(): store longword to data register specified by d1 # 12374# # 12375# XREF **************************************************************** # 12376# None # 12377# # 12378# INPUT *************************************************************** # 12379# d0 = longowrd value to store # 12380# d1 = index of register to fetch from # 12381# # 12382# OUTPUT ************************************************************** # 12383# (data register is updated) # 12384# # 12385# ALGORITHM *********************************************************** # 12386# According to the index value in d1, store the longword value # 12387# in d0 to the corresponding data register. D0/D1 are on the stack # 12388# while the rest are in their initial places. # 12389# # 12390######################################################################### 12391 12392 global store_dreg_l 12393store_dreg_l: 12394 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1 12395 jmp (tbl_sdregl.b,%pc,%d1.w*1) 12396 12397tbl_sdregl: 12398 short sdregl0 - tbl_sdregl 12399 short sdregl1 - tbl_sdregl 12400 short sdregl2 - tbl_sdregl 12401 short sdregl3 - tbl_sdregl 12402 short sdregl4 - tbl_sdregl 12403 short sdregl5 - tbl_sdregl 12404 short sdregl6 - tbl_sdregl 12405 short sdregl7 - tbl_sdregl 12406 12407sdregl0: 12408 mov.l %d0,EXC_DREGS+0x0(%a6) 12409 rts 12410sdregl1: 12411 mov.l %d0,EXC_DREGS+0x4(%a6) 12412 rts 12413sdregl2: 12414 mov.l %d0,%d2 12415 rts 12416sdregl3: 12417 mov.l %d0,%d3 12418 rts 12419sdregl4: 12420 mov.l %d0,%d4 12421 rts 12422sdregl5: 12423 mov.l %d0,%d5 12424 rts 12425sdregl6: 12426 mov.l %d0,%d6 12427 rts 12428sdregl7: 12429 mov.l %d0,%d7 12430 rts 12431 12432######################################################################### 12433# XDEF **************************************************************** # 12434# store_dreg_w(): store word to data register specified by d1 # 12435# # 12436# XREF **************************************************************** # 12437# None # 12438# # 12439# INPUT *************************************************************** # 12440# d0 = word value to store # 12441# d1 = index of register to fetch from # 12442# # 12443# OUTPUT ************************************************************** # 12444# (data register is updated) # 12445# # 12446# ALGORITHM *********************************************************** # 12447# According to the index value in d1, store the word value # 12448# in d0 to the corresponding data register. D0/D1 are on the stack # 12449# while the rest are in their initial places. # 12450# # 12451######################################################################### 12452 12453 global store_dreg_w 12454store_dreg_w: 12455 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1 12456 jmp (tbl_sdregw.b,%pc,%d1.w*1) 12457 12458tbl_sdregw: 12459 short sdregw0 - tbl_sdregw 12460 short sdregw1 - tbl_sdregw 12461 short sdregw2 - tbl_sdregw 12462 short sdregw3 - tbl_sdregw 12463 short sdregw4 - tbl_sdregw 12464 short sdregw5 - tbl_sdregw 12465 short sdregw6 - tbl_sdregw 12466 short sdregw7 - tbl_sdregw 12467 12468sdregw0: 12469 mov.w %d0,2+EXC_DREGS+0x0(%a6) 12470 rts 12471sdregw1: 12472 mov.w %d0,2+EXC_DREGS+0x4(%a6) 12473 rts 12474sdregw2: 12475 mov.w %d0,%d2 12476 rts 12477sdregw3: 12478 mov.w %d0,%d3 12479 rts 12480sdregw4: 12481 mov.w %d0,%d4 12482 rts 12483sdregw5: 12484 mov.w %d0,%d5 12485 rts 12486sdregw6: 12487 mov.w %d0,%d6 12488 rts 12489sdregw7: 12490 mov.w %d0,%d7 12491 rts 12492 12493######################################################################### 12494# XDEF **************************************************************** # 12495# store_dreg_b(): store byte to data register specified by d1 # 12496# # 12497# XREF **************************************************************** # 12498# None # 12499# # 12500# INPUT *************************************************************** # 12501# d0 = byte value to store # 12502# d1 = index of register to fetch from # 12503# # 12504# OUTPUT ************************************************************** # 12505# (data register is updated) # 12506# # 12507# ALGORITHM *********************************************************** # 12508# According to the index value in d1, store the byte value # 12509# in d0 to the corresponding data register. D0/D1 are on the stack # 12510# while the rest are in their initial places. # 12511# # 12512######################################################################### 12513 12514 global store_dreg_b 12515store_dreg_b: 12516 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1 12517 jmp (tbl_sdregb.b,%pc,%d1.w*1) 12518 12519tbl_sdregb: 12520 short sdregb0 - tbl_sdregb 12521 short sdregb1 - tbl_sdregb 12522 short sdregb2 - tbl_sdregb 12523 short sdregb3 - tbl_sdregb 12524 short sdregb4 - tbl_sdregb 12525 short sdregb5 - tbl_sdregb 12526 short sdregb6 - tbl_sdregb 12527 short sdregb7 - tbl_sdregb 12528 12529sdregb0: 12530 mov.b %d0,3+EXC_DREGS+0x0(%a6) 12531 rts 12532sdregb1: 12533 mov.b %d0,3+EXC_DREGS+0x4(%a6) 12534 rts 12535sdregb2: 12536 mov.b %d0,%d2 12537 rts 12538sdregb3: 12539 mov.b %d0,%d3 12540 rts 12541sdregb4: 12542 mov.b %d0,%d4 12543 rts 12544sdregb5: 12545 mov.b %d0,%d5 12546 rts 12547sdregb6: 12548 mov.b %d0,%d6 12549 rts 12550sdregb7: 12551 mov.b %d0,%d7 12552 rts 12553 12554######################################################################### 12555# XDEF **************************************************************** # 12556# inc_areg(): increment an address register by the value in d0 # 12557# # 12558# XREF **************************************************************** # 12559# None # 12560# # 12561# INPUT *************************************************************** # 12562# d0 = amount to increment by # 12563# d1 = index of address register to increment # 12564# # 12565# OUTPUT ************************************************************** # 12566# (address register is updated) # 12567# # 12568# ALGORITHM *********************************************************** # 12569# Typically used for an instruction w/ a post-increment <ea>, # 12570# this routine adds the increment value in d0 to the address register # 12571# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12572# in their original places. # 12573# For a7, if the increment amount is one, then we have to # 12574# increment by two. For any a7 update, set the mia7_flag so that if # 12575# an access error exception occurs later in emulation, this address # 12576# register update can be undone. # 12577# # 12578######################################################################### 12579 12580 global inc_areg 12581inc_areg: 12582 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1 12583 jmp (tbl_iareg.b,%pc,%d1.w*1) 12584 12585tbl_iareg: 12586 short iareg0 - tbl_iareg 12587 short iareg1 - tbl_iareg 12588 short iareg2 - tbl_iareg 12589 short iareg3 - tbl_iareg 12590 short iareg4 - tbl_iareg 12591 short iareg5 - tbl_iareg 12592 short iareg6 - tbl_iareg 12593 short iareg7 - tbl_iareg 12594 12595iareg0: add.l %d0,EXC_DREGS+0x8(%a6) 12596 rts 12597iareg1: add.l %d0,EXC_DREGS+0xc(%a6) 12598 rts 12599iareg2: add.l %d0,%a2 12600 rts 12601iareg3: add.l %d0,%a3 12602 rts 12603iareg4: add.l %d0,%a4 12604 rts 12605iareg5: add.l %d0,%a5 12606 rts 12607iareg6: add.l %d0,(%a6) 12608 rts 12609iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6) 12610 cmpi.b %d0,&0x1 12611 beq.b iareg7b 12612 add.l %d0,EXC_A7(%a6) 12613 rts 12614iareg7b: 12615 addq.l &0x2,EXC_A7(%a6) 12616 rts 12617 12618######################################################################### 12619# XDEF **************************************************************** # 12620# dec_areg(): decrement an address register by the value in d0 # 12621# # 12622# XREF **************************************************************** # 12623# None # 12624# # 12625# INPUT *************************************************************** # 12626# d0 = amount to decrement by # 12627# d1 = index of address register to decrement # 12628# # 12629# OUTPUT ************************************************************** # 12630# (address register is updated) # 12631# # 12632# ALGORITHM *********************************************************** # 12633# Typically used for an instruction w/ a pre-decrement <ea>, # 12634# this routine adds the decrement value in d0 to the address register # 12635# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12636# in their original places. # 12637# For a7, if the decrement amount is one, then we have to # 12638# decrement by two. For any a7 update, set the mda7_flag so that if # 12639# an access error exception occurs later in emulation, this address # 12640# register update can be undone. # 12641# # 12642######################################################################### 12643 12644 global dec_areg 12645dec_areg: 12646 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1 12647 jmp (tbl_dareg.b,%pc,%d1.w*1) 12648 12649tbl_dareg: 12650 short dareg0 - tbl_dareg 12651 short dareg1 - tbl_dareg 12652 short dareg2 - tbl_dareg 12653 short dareg3 - tbl_dareg 12654 short dareg4 - tbl_dareg 12655 short dareg5 - tbl_dareg 12656 short dareg6 - tbl_dareg 12657 short dareg7 - tbl_dareg 12658 12659dareg0: sub.l %d0,EXC_DREGS+0x8(%a6) 12660 rts 12661dareg1: sub.l %d0,EXC_DREGS+0xc(%a6) 12662 rts 12663dareg2: sub.l %d0,%a2 12664 rts 12665dareg3: sub.l %d0,%a3 12666 rts 12667dareg4: sub.l %d0,%a4 12668 rts 12669dareg5: sub.l %d0,%a5 12670 rts 12671dareg6: sub.l %d0,(%a6) 12672 rts 12673dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6) 12674 cmpi.b %d0,&0x1 12675 beq.b dareg7b 12676 sub.l %d0,EXC_A7(%a6) 12677 rts 12678dareg7b: 12679 subq.l &0x2,EXC_A7(%a6) 12680 rts 12681 12682############################################################################## 12683 12684######################################################################### 12685# XDEF **************************************************************** # 12686# load_fpn1(): load FP register value into FP_SRC(a6). # 12687# # 12688# XREF **************************************************************** # 12689# None # 12690# # 12691# INPUT *************************************************************** # 12692# d0 = index of FP register to load # 12693# # 12694# OUTPUT ************************************************************** # 12695# FP_SRC(a6) = value loaded from FP register file # 12696# # 12697# ALGORITHM *********************************************************** # 12698# Using the index in d0, load FP_SRC(a6) with a number from the # 12699# FP register file. # 12700# # 12701######################################################################### 12702 12703 global load_fpn1 12704load_fpn1: 12705 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0 12706 jmp (tbl_load_fpn1.b,%pc,%d0.w*1) 12707 12708tbl_load_fpn1: 12709 short load_fpn1_0 - tbl_load_fpn1 12710 short load_fpn1_1 - tbl_load_fpn1 12711 short load_fpn1_2 - tbl_load_fpn1 12712 short load_fpn1_3 - tbl_load_fpn1 12713 short load_fpn1_4 - tbl_load_fpn1 12714 short load_fpn1_5 - tbl_load_fpn1 12715 short load_fpn1_6 - tbl_load_fpn1 12716 short load_fpn1_7 - tbl_load_fpn1 12717 12718load_fpn1_0: 12719 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6) 12720 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6) 12721 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6) 12722 lea FP_SRC(%a6), %a0 12723 rts 12724load_fpn1_1: 12725 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6) 12726 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6) 12727 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6) 12728 lea FP_SRC(%a6), %a0 12729 rts 12730load_fpn1_2: 12731 fmovm.x &0x20, FP_SRC(%a6) 12732 lea FP_SRC(%a6), %a0 12733 rts 12734load_fpn1_3: 12735 fmovm.x &0x10, FP_SRC(%a6) 12736 lea FP_SRC(%a6), %a0 12737 rts 12738load_fpn1_4: 12739 fmovm.x &0x08, FP_SRC(%a6) 12740 lea FP_SRC(%a6), %a0 12741 rts 12742load_fpn1_5: 12743 fmovm.x &0x04, FP_SRC(%a6) 12744 lea FP_SRC(%a6), %a0 12745 rts 12746load_fpn1_6: 12747 fmovm.x &0x02, FP_SRC(%a6) 12748 lea FP_SRC(%a6), %a0 12749 rts 12750load_fpn1_7: 12751 fmovm.x &0x01, FP_SRC(%a6) 12752 lea FP_SRC(%a6), %a0 12753 rts 12754 12755############################################################################# 12756 12757######################################################################### 12758# XDEF **************************************************************** # 12759# load_fpn2(): load FP register value into FP_DST(a6). # 12760# # 12761# XREF **************************************************************** # 12762# None # 12763# # 12764# INPUT *************************************************************** # 12765# d0 = index of FP register to load # 12766# # 12767# OUTPUT ************************************************************** # 12768# FP_DST(a6) = value loaded from FP register file # 12769# # 12770# ALGORITHM *********************************************************** # 12771# Using the index in d0, load FP_DST(a6) with a number from the # 12772# FP register file. # 12773# # 12774######################################################################### 12775 12776 global load_fpn2 12777load_fpn2: 12778 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0 12779 jmp (tbl_load_fpn2.b,%pc,%d0.w*1) 12780 12781tbl_load_fpn2: 12782 short load_fpn2_0 - tbl_load_fpn2 12783 short load_fpn2_1 - tbl_load_fpn2 12784 short load_fpn2_2 - tbl_load_fpn2 12785 short load_fpn2_3 - tbl_load_fpn2 12786 short load_fpn2_4 - tbl_load_fpn2 12787 short load_fpn2_5 - tbl_load_fpn2 12788 short load_fpn2_6 - tbl_load_fpn2 12789 short load_fpn2_7 - tbl_load_fpn2 12790 12791load_fpn2_0: 12792 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6) 12793 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6) 12794 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6) 12795 lea FP_DST(%a6), %a0 12796 rts 12797load_fpn2_1: 12798 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6) 12799 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6) 12800 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6) 12801 lea FP_DST(%a6), %a0 12802 rts 12803load_fpn2_2: 12804 fmovm.x &0x20, FP_DST(%a6) 12805 lea FP_DST(%a6), %a0 12806 rts 12807load_fpn2_3: 12808 fmovm.x &0x10, FP_DST(%a6) 12809 lea FP_DST(%a6), %a0 12810 rts 12811load_fpn2_4: 12812 fmovm.x &0x08, FP_DST(%a6) 12813 lea FP_DST(%a6), %a0 12814 rts 12815load_fpn2_5: 12816 fmovm.x &0x04, FP_DST(%a6) 12817 lea FP_DST(%a6), %a0 12818 rts 12819load_fpn2_6: 12820 fmovm.x &0x02, FP_DST(%a6) 12821 lea FP_DST(%a6), %a0 12822 rts 12823load_fpn2_7: 12824 fmovm.x &0x01, FP_DST(%a6) 12825 lea FP_DST(%a6), %a0 12826 rts 12827 12828############################################################################# 12829 12830######################################################################### 12831# XDEF **************************************************************** # 12832# store_fpreg(): store an fp value to the fpreg designated d0. # 12833# # 12834# XREF **************************************************************** # 12835# None # 12836# # 12837# INPUT *************************************************************** # 12838# fp0 = extended precision value to store # 12839# d0 = index of floating-point register # 12840# # 12841# OUTPUT ************************************************************** # 12842# None # 12843# # 12844# ALGORITHM *********************************************************** # 12845# Store the value in fp0 to the FP register designated by the # 12846# value in d0. The FP number can be DENORM or SNAN so we have to be # 12847# careful that we don't take an exception here. # 12848# # 12849######################################################################### 12850 12851 global store_fpreg 12852store_fpreg: 12853 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0 12854 jmp (tbl_store_fpreg.b,%pc,%d0.w*1) 12855 12856tbl_store_fpreg: 12857 short store_fpreg_0 - tbl_store_fpreg 12858 short store_fpreg_1 - tbl_store_fpreg 12859 short store_fpreg_2 - tbl_store_fpreg 12860 short store_fpreg_3 - tbl_store_fpreg 12861 short store_fpreg_4 - tbl_store_fpreg 12862 short store_fpreg_5 - tbl_store_fpreg 12863 short store_fpreg_6 - tbl_store_fpreg 12864 short store_fpreg_7 - tbl_store_fpreg 12865 12866store_fpreg_0: 12867 fmovm.x &0x80, EXC_FP0(%a6) 12868 rts 12869store_fpreg_1: 12870 fmovm.x &0x80, EXC_FP1(%a6) 12871 rts 12872store_fpreg_2: 12873 fmovm.x &0x01, -(%sp) 12874 fmovm.x (%sp)+, &0x20 12875 rts 12876store_fpreg_3: 12877 fmovm.x &0x01, -(%sp) 12878 fmovm.x (%sp)+, &0x10 12879 rts 12880store_fpreg_4: 12881 fmovm.x &0x01, -(%sp) 12882 fmovm.x (%sp)+, &0x08 12883 rts 12884store_fpreg_5: 12885 fmovm.x &0x01, -(%sp) 12886 fmovm.x (%sp)+, &0x04 12887 rts 12888store_fpreg_6: 12889 fmovm.x &0x01, -(%sp) 12890 fmovm.x (%sp)+, &0x02 12891 rts 12892store_fpreg_7: 12893 fmovm.x &0x01, -(%sp) 12894 fmovm.x (%sp)+, &0x01 12895 rts 12896 12897######################################################################### 12898# XDEF **************************************************************** # 12899# get_packed(): fetch a packed operand from memory and then # 12900# convert it to a floating-point binary number. # 12901# # 12902# XREF **************************************************************** # 12903# _dcalc_ea() - calculate the correct <ea> # 12904# _mem_read() - fetch the packed operand from memory # 12905# facc_in_x() - the fetch failed so jump to special exit code # 12906# decbin() - convert packed to binary extended precision # 12907# # 12908# INPUT *************************************************************** # 12909# None # 12910# # 12911# OUTPUT ************************************************************** # 12912# If no failure on _mem_read(): # 12913# FP_SRC(a6) = packed operand now as a binary FP number # 12914# # 12915# ALGORITHM *********************************************************** # 12916# Get the correct <ea> whihc is the value on the exception stack # 12917# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. # 12918# Then, fetch the operand from memory. If the fetch fails, exit # 12919# through facc_in_x(). # 12920# If the packed operand is a ZERO,NAN, or INF, convert it to # 12921# its binary representation here. Else, call decbin() which will # 12922# convert the packed value to an extended precision binary value. # 12923# # 12924######################################################################### 12925 12926# the stacked <ea> for packed is correct except for -(An). 12927# the base reg must be updated for both -(An) and (An)+. 12928 global get_packed 12929get_packed: 12930 mov.l &0xc,%d0 # packed is 12 bytes 12931 bsr.l _dcalc_ea # fetch <ea>; correct An 12932 12933 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 12934 mov.l &0xc,%d0 # pass: 12 bytes 12935 bsr.l _dmem_read # read packed operand 12936 12937 tst.l %d1 # did dfetch fail? 12938 bne.l facc_in_x # yes 12939 12940# The packed operand is an INF or a NAN if the exponent field is all ones. 12941 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 12942 cmpi.w %d0,&0x7fff # INF or NAN? 12943 bne.b gp_try_zero # no 12944 rts # operand is an INF or NAN 12945 12946# The packed operand is a zero if the mantissa is all zero, else it's 12947# a normal packed op. 12948gp_try_zero: 12949 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 12950 andi.b &0x0f,%d0 # clear all but last nybble 12951 bne.b gp_not_spec # not a zero 12952 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 12953 bne.b gp_not_spec # not a zero 12954 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 12955 bne.b gp_not_spec # not a zero 12956 rts # operand is a ZERO 12957gp_not_spec: 12958 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 12959 bsr.l decbin # convert to extended 12960 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 12961 rts 12962 12963######################################################################### 12964# decbin(): Converts normalized packed bcd value pointed to by register # 12965# a0 to extended-precision value in fp0. # 12966# # 12967# INPUT *************************************************************** # 12968# a0 = pointer to normalized packed bcd value # 12969# # 12970# OUTPUT ************************************************************** # 12971# fp0 = exact fp representation of the packed bcd value. # 12972# # 12973# ALGORITHM *********************************************************** # 12974# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, # 12975# and NaN operands are dispatched without entering this routine) # 12976# value in 68881/882 format at location (a0). # 12977# # 12978# A1. Convert the bcd exponent to binary by successive adds and # 12979# muls. Set the sign according to SE. Subtract 16 to compensate # 12980# for the mantissa which is to be interpreted as 17 integer # 12981# digits, rather than 1 integer and 16 fraction digits. # 12982# Note: this operation can never overflow. # 12983# # 12984# A2. Convert the bcd mantissa to binary by successive # 12985# adds and muls in FP0. Set the sign according to SM. # 12986# The mantissa digits will be converted with the decimal point # 12987# assumed following the least-significant digit. # 12988# Note: this operation can never overflow. # 12989# # 12990# A3. Count the number of leading/trailing zeros in the # 12991# bcd string. If SE is positive, count the leading zeros; # 12992# if negative, count the trailing zeros. Set the adjusted # 12993# exponent equal to the exponent from A1 and the zero count # 12994# added if SM = 1 and subtracted if SM = 0. Scale the # 12995# mantissa the equivalent of forcing in the bcd value: # 12996# # 12997# SM = 0 a non-zero digit in the integer position # 12998# SM = 1 a non-zero digit in Mant0, lsd of the fraction # 12999# # 13000# this will insure that any value, regardless of its # 13001# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted # 13002# consistently. # 13003# # 13004# A4. Calculate the factor 10^exp in FP1 using a table of # 13005# 10^(2^n) values. To reduce the error in forming factors # 13006# greater than 10^27, a directed rounding scheme is used with # 13007# tables rounded to RN, RM, and RP, according to the table # 13008# in the comments of the pwrten section. # 13009# # 13010# A5. Form the final binary number by scaling the mantissa by # 13011# the exponent factor. This is done by multiplying the # 13012# mantissa in FP0 by the factor in FP1 if the adjusted # 13013# exponent sign is positive, and dividing FP0 by FP1 if # 13014# it is negative. # 13015# # 13016# Clean up and return. Check if the final mul or div was inexact. # 13017# If so, set INEX1 in USER_FPSR. # 13018# # 13019######################################################################### 13020 13021# 13022# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded 13023# to nearest, minus, and plus, respectively. The tables include 13024# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding 13025# is required until the power is greater than 27, however, all 13026# tables include the first 5 for ease of indexing. 13027# 13028RTABLE: 13029 byte 0,0,0,0 13030 byte 2,3,2,3 13031 byte 2,3,3,2 13032 byte 3,2,2,3 13033 13034 set FNIBS,7 13035 set FSTRT,0 13036 13037 set ESTRT,4 13038 set EDIGITS,2 13039 13040 global decbin 13041decbin: 13042 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input 13043 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it 13044 mov.l 0x8(%a0),FP_SCR0_LO(%a6) 13045 13046 lea FP_SCR0(%a6),%a0 13047 13048 movm.l &0x3c00,-(%sp) # save d2-d5 13049 fmovm.x &0x1,-(%sp) # save fp1 13050# 13051# Calculate exponent: 13052# 1. Copy bcd value in memory for use as a working copy. 13053# 2. Calculate absolute value of exponent in d1 by mul and add. 13054# 3. Correct for exponent sign. 13055# 4. Subtract 16 to compensate for interpreting the mant as all integer digits. 13056# (i.e., all digits assumed left of the decimal point.) 13057# 13058# Register usage: 13059# 13060# calc_e: 13061# (*) d0: temp digit storage 13062# (*) d1: accumulator for binary exponent 13063# (*) d2: digit count 13064# (*) d3: offset pointer 13065# ( ) d4: first word of bcd 13066# ( ) a0: pointer to working bcd value 13067# ( ) a6: pointer to original bcd value 13068# (*) FP_SCR1: working copy of original bcd value 13069# (*) L_SCR1: copy of original exponent word 13070# 13071calc_e: 13072 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part 13073 mov.l &ESTRT,%d3 # counter to pick up digits 13074 mov.l (%a0),%d4 # get first word of bcd 13075 clr.l %d1 # zero d1 for accumulator 13076e_gd: 13077 mulu.l &0xa,%d1 # mul partial product by one digit place 13078 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0 13079 add.l %d0,%d1 # d1 = d1 + d0 13080 addq.b &4,%d3 # advance d3 to the next digit 13081 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop 13082 btst &30,%d4 # get SE 13083 beq.b e_pos # don't negate if pos 13084 neg.l %d1 # negate before subtracting 13085e_pos: 13086 sub.l &16,%d1 # sub to compensate for shift of mant 13087 bge.b e_save # if still pos, do not neg 13088 neg.l %d1 # now negative, make pos and set SE 13089 or.l &0x40000000,%d4 # set SE in d4, 13090 or.l &0x40000000,(%a0) # and in working bcd 13091e_save: 13092 mov.l %d1,-(%sp) # save exp on stack 13093# 13094# 13095# Calculate mantissa: 13096# 1. Calculate absolute value of mantissa in fp0 by mul and add. 13097# 2. Correct for mantissa sign. 13098# (i.e., all digits assumed left of the decimal point.) 13099# 13100# Register usage: 13101# 13102# calc_m: 13103# (*) d0: temp digit storage 13104# (*) d1: lword counter 13105# (*) d2: digit count 13106# (*) d3: offset pointer 13107# ( ) d4: words 2 and 3 of bcd 13108# ( ) a0: pointer to working bcd value 13109# ( ) a6: pointer to original bcd value 13110# (*) fp0: mantissa accumulator 13111# ( ) FP_SCR1: working copy of original bcd value 13112# ( ) L_SCR1: copy of original exponent word 13113# 13114calc_m: 13115 mov.l &1,%d1 # word counter, init to 1 13116 fmov.s &0x00000000,%fp0 # accumulator 13117# 13118# 13119# Since the packed number has a long word between the first & second parts, 13120# get the integer digit then skip down & get the rest of the 13121# mantissa. We will unroll the loop once. 13122# 13123 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word 13124 fadd.b %d0,%fp0 # add digit to sum in fp0 13125# 13126# 13127# Get the rest of the mantissa. 13128# 13129loadlw: 13130 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4 13131 mov.l &FSTRT,%d3 # counter to pick up digits 13132 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr 13133md2b: 13134 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10 13135 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend 13136 fadd.b %d0,%fp0 # fp0 = fp0 + digit 13137# 13138# 13139# If all the digits (8) in that long word have been converted (d2=0), 13140# then inc d1 (=2) to point to the next long word and reset d3 to 0 13141# to initialize the digit offset, and set d2 to 7 for the digit count; 13142# else continue with this long word. 13143# 13144 addq.b &4,%d3 # advance d3 to the next digit 13145 dbf.w %d2,md2b # check for last digit in this lw 13146nextlw: 13147 addq.l &1,%d1 # inc lw pointer in mantissa 13148 cmp.l %d1,&2 # test for last lw 13149 ble.b loadlw # if not, get last one 13150# 13151# Check the sign of the mant and make the value in fp0 the same sign. 13152# 13153m_sign: 13154 btst &31,(%a0) # test sign of the mantissa 13155 beq.b ap_st_z # if clear, go to append/strip zeros 13156 fneg.x %fp0 # if set, negate fp0 13157# 13158# Append/strip zeros: 13159# 13160# For adjusted exponents which have an absolute value greater than 27*, 13161# this routine calculates the amount needed to normalize the mantissa 13162# for the adjusted exponent. That number is subtracted from the exp 13163# if the exp was positive, and added if it was negative. The purpose 13164# of this is to reduce the value of the exponent and the possibility 13165# of error in calculation of pwrten. 13166# 13167# 1. Branch on the sign of the adjusted exponent. 13168# 2p.(positive exp) 13169# 2. Check M16 and the digits in lwords 2 and 3 in decending order. 13170# 3. Add one for each zero encountered until a non-zero digit. 13171# 4. Subtract the count from the exp. 13172# 5. Check if the exp has crossed zero in #3 above; make the exp abs 13173# and set SE. 13174# 6. Multiply the mantissa by 10**count. 13175# 2n.(negative exp) 13176# 2. Check the digits in lwords 3 and 2 in decending order. 13177# 3. Add one for each zero encountered until a non-zero digit. 13178# 4. Add the count to the exp. 13179# 5. Check if the exp has crossed zero in #3 above; clear SE. 13180# 6. Divide the mantissa by 10**count. 13181# 13182# *Why 27? If the adjusted exponent is within -28 < expA < 28, than 13183# any adjustment due to append/strip zeros will drive the resultane 13184# exponent towards zero. Since all pwrten constants with a power 13185# of 27 or less are exact, there is no need to use this routine to 13186# attempt to lessen the resultant exponent. 13187# 13188# Register usage: 13189# 13190# ap_st_z: 13191# (*) d0: temp digit storage 13192# (*) d1: zero count 13193# (*) d2: digit count 13194# (*) d3: offset pointer 13195# ( ) d4: first word of bcd 13196# (*) d5: lword counter 13197# ( ) a0: pointer to working bcd value 13198# ( ) FP_SCR1: working copy of original bcd value 13199# ( ) L_SCR1: copy of original exponent word 13200# 13201# 13202# First check the absolute value of the exponent to see if this 13203# routine is necessary. If so, then check the sign of the exponent 13204# and do append (+) or strip (-) zeros accordingly. 13205# This section handles a positive adjusted exponent. 13206# 13207ap_st_z: 13208 mov.l (%sp),%d1 # load expA for range test 13209 cmp.l %d1,&27 # test is with 27 13210 ble.w pwrten # if abs(expA) <28, skip ap/st zeros 13211 btst &30,(%a0) # check sign of exp 13212 bne.b ap_st_n # if neg, go to neg side 13213 clr.l %d1 # zero count reg 13214 mov.l (%a0),%d4 # load lword 1 to d4 13215 bfextu %d4{&28:&4},%d0 # get M16 in d0 13216 bne.b ap_p_fx # if M16 is non-zero, go fix exp 13217 addq.l &1,%d1 # inc zero count 13218 mov.l &1,%d5 # init lword counter 13219 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4 13220 bne.b ap_p_cl # if lw 2 is zero, skip it 13221 addq.l &8,%d1 # and inc count by 8 13222 addq.l &1,%d5 # inc lword counter 13223 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4 13224ap_p_cl: 13225 clr.l %d3 # init offset reg 13226 mov.l &7,%d2 # init digit counter 13227ap_p_gd: 13228 bfextu %d4{%d3:&4},%d0 # get digit 13229 bne.b ap_p_fx # if non-zero, go to fix exp 13230 addq.l &4,%d3 # point to next digit 13231 addq.l &1,%d1 # inc digit counter 13232 dbf.w %d2,ap_p_gd # get next digit 13233ap_p_fx: 13234 mov.l %d1,%d0 # copy counter to d2 13235 mov.l (%sp),%d1 # get adjusted exp from memory 13236 sub.l %d0,%d1 # subtract count from exp 13237 bge.b ap_p_fm # if still pos, go to pwrten 13238 neg.l %d1 # now its neg; get abs 13239 mov.l (%a0),%d4 # load lword 1 to d4 13240 or.l &0x40000000,%d4 # and set SE in d4 13241 or.l &0x40000000,(%a0) # and in memory 13242# 13243# Calculate the mantissa multiplier to compensate for the striping of 13244# zeros from the mantissa. 13245# 13246ap_p_fm: 13247 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13248 clr.l %d3 # init table index 13249 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13250 mov.l &3,%d2 # init d2 to count bits in counter 13251ap_p_el: 13252 asr.l &1,%d0 # shift lsb into carry 13253 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor 13254 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13255ap_p_en: 13256 add.l &12,%d3 # inc d3 to next rtable entry 13257 tst.l %d0 # check if d0 is zero 13258 bne.b ap_p_el # if not, get next bit 13259 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted) 13260 bra.b pwrten # go calc pwrten 13261# 13262# This section handles a negative adjusted exponent. 13263# 13264ap_st_n: 13265 clr.l %d1 # clr counter 13266 mov.l &2,%d5 # set up d5 to point to lword 3 13267 mov.l (%a0,%d5.L*4),%d4 # get lword 3 13268 bne.b ap_n_cl # if not zero, check digits 13269 sub.l &1,%d5 # dec d5 to point to lword 2 13270 addq.l &8,%d1 # inc counter by 8 13271 mov.l (%a0,%d5.L*4),%d4 # get lword 2 13272ap_n_cl: 13273 mov.l &28,%d3 # point to last digit 13274 mov.l &7,%d2 # init digit counter 13275ap_n_gd: 13276 bfextu %d4{%d3:&4},%d0 # get digit 13277 bne.b ap_n_fx # if non-zero, go to exp fix 13278 subq.l &4,%d3 # point to previous digit 13279 addq.l &1,%d1 # inc digit counter 13280 dbf.w %d2,ap_n_gd # get next digit 13281ap_n_fx: 13282 mov.l %d1,%d0 # copy counter to d0 13283 mov.l (%sp),%d1 # get adjusted exp from memory 13284 sub.l %d0,%d1 # subtract count from exp 13285 bgt.b ap_n_fm # if still pos, go fix mantissa 13286 neg.l %d1 # take abs of exp and clr SE 13287 mov.l (%a0),%d4 # load lword 1 to d4 13288 and.l &0xbfffffff,%d4 # and clr SE in d4 13289 and.l &0xbfffffff,(%a0) # and in memory 13290# 13291# Calculate the mantissa multiplier to compensate for the appending of 13292# zeros to the mantissa. 13293# 13294ap_n_fm: 13295 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13296 clr.l %d3 # init table index 13297 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13298 mov.l &3,%d2 # init d2 to count bits in counter 13299ap_n_el: 13300 asr.l &1,%d0 # shift lsb into carry 13301 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor 13302 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13303ap_n_en: 13304 add.l &12,%d3 # inc d3 to next rtable entry 13305 tst.l %d0 # check if d0 is zero 13306 bne.b ap_n_el # if not, get next bit 13307 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted) 13308# 13309# 13310# Calculate power-of-ten factor from adjusted and shifted exponent. 13311# 13312# Register usage: 13313# 13314# pwrten: 13315# (*) d0: temp 13316# ( ) d1: exponent 13317# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp 13318# (*) d3: FPCR work copy 13319# ( ) d4: first word of bcd 13320# (*) a1: RTABLE pointer 13321# calc_p: 13322# (*) d0: temp 13323# ( ) d1: exponent 13324# (*) d3: PWRTxx table index 13325# ( ) a0: pointer to working copy of bcd 13326# (*) a1: PWRTxx pointer 13327# (*) fp1: power-of-ten accumulator 13328# 13329# Pwrten calculates the exponent factor in the selected rounding mode 13330# according to the following table: 13331# 13332# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode 13333# 13334# ANY ANY RN RN 13335# 13336# + + RP RP 13337# - + RP RM 13338# + - RP RM 13339# - - RP RP 13340# 13341# + + RM RM 13342# - + RM RP 13343# + - RM RP 13344# - - RM RM 13345# 13346# + + RZ RM 13347# - + RZ RM 13348# + - RZ RP 13349# - - RZ RP 13350# 13351# 13352pwrten: 13353 mov.l USER_FPCR(%a6),%d3 # get user's FPCR 13354 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits 13355 mov.l (%a0),%d4 # reload 1st bcd word to d4 13356 asl.l &2,%d2 # format d2 to be 13357 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE} 13358 add.l %d0,%d2 # in d2 as index into RTABLE 13359 lea.l RTABLE(%pc),%a1 # load rtable base 13360 mov.b (%a1,%d2),%d0 # load new rounding bits from table 13361 clr.l %d3 # clear d3 to force no exc and extended 13362 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR 13363 fmov.l %d3,%fpcr # write new FPCR 13364 asr.l &1,%d0 # write correct PTENxx table 13365 bcc.b not_rp # to a1 13366 lea.l PTENRP(%pc),%a1 # it is RP 13367 bra.b calc_p # go to init section 13368not_rp: 13369 asr.l &1,%d0 # keep checking 13370 bcc.b not_rm 13371 lea.l PTENRM(%pc),%a1 # it is RM 13372 bra.b calc_p # go to init section 13373not_rm: 13374 lea.l PTENRN(%pc),%a1 # it is RN 13375calc_p: 13376 mov.l %d1,%d0 # copy exp to d0;use d0 13377 bpl.b no_neg # if exp is negative, 13378 neg.l %d0 # invert it 13379 or.l &0x40000000,(%a0) # and set SE bit 13380no_neg: 13381 clr.l %d3 # table index 13382 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13383e_loop: 13384 asr.l &1,%d0 # shift next bit into carry 13385 bcc.b e_next # if zero, skip the mul 13386 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13387e_next: 13388 add.l &12,%d3 # inc d3 to next rtable entry 13389 tst.l %d0 # check if d0 is zero 13390 bne.b e_loop # not zero, continue shifting 13391# 13392# 13393# Check the sign of the adjusted exp and make the value in fp0 the 13394# same sign. If the exp was pos then multiply fp1*fp0; 13395# else divide fp0/fp1. 13396# 13397# Register Usage: 13398# norm: 13399# ( ) a0: pointer to working bcd value 13400# (*) fp0: mantissa accumulator 13401# ( ) fp1: scaling factor - 10**(abs(exp)) 13402# 13403pnorm: 13404 btst &30,(%a0) # test the sign of the exponent 13405 beq.b mul # if clear, go to multiply 13406div: 13407 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp 13408 bra.b end_dec 13409mul: 13410 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp 13411# 13412# 13413# Clean up and return with result in fp0. 13414# 13415# If the final mul/div in decbin incurred an inex exception, 13416# it will be inex2, but will be reported as inex1 by get_op. 13417# 13418end_dec: 13419 fmov.l %fpsr,%d0 # get status register 13420 bclr &inex2_bit+8,%d0 # test for inex2 and clear it 13421 beq.b no_exc # skip this if no exc 13422 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX 13423no_exc: 13424 add.l &0x4,%sp # clear 1 lw param 13425 fmovm.x (%sp)+,&0x40 # restore fp1 13426 movm.l (%sp)+,&0x3c # restore d2-d5 13427 fmov.l &0x0,%fpcr 13428 fmov.l &0x0,%fpsr 13429 rts 13430 13431######################################################################### 13432# bindec(): Converts an input in extended precision format to bcd format# 13433# # 13434# INPUT *************************************************************** # 13435# a0 = pointer to the input extended precision value in memory. # 13436# the input may be either normalized, unnormalized, or # 13437# denormalized. # 13438# d0 = contains the k-factor sign-extended to 32-bits. # 13439# # 13440# OUTPUT ************************************************************** # 13441# FP_SCR0(a6) = bcd format result on the stack. # 13442# # 13443# ALGORITHM *********************************************************** # 13444# # 13445# A1. Set RM and size ext; Set SIGMA = sign of input. # 13446# The k-factor is saved for use in d7. Clear the # 13447# BINDEC_FLG for separating normalized/denormalized # 13448# input. If input is unnormalized or denormalized, # 13449# normalize it. # 13450# # 13451# A2. Set X = abs(input). # 13452# # 13453# A3. Compute ILOG. # 13454# ILOG is the log base 10 of the input value. It is # 13455# approximated by adding e + 0.f when the original # 13456# value is viewed as 2^^e * 1.f in extended precision. # 13457# This value is stored in d6. # 13458# # 13459# A4. Clr INEX bit. # 13460# The operation in A3 above may have set INEX2. # 13461# # 13462# A5. Set ICTR = 0; # 13463# ICTR is a flag used in A13. It must be set before the # 13464# loop entry A6. # 13465# # 13466# A6. Calculate LEN. # 13467# LEN is the number of digits to be displayed. The # 13468# k-factor can dictate either the total number of digits, # 13469# if it is a positive number, or the number of digits # 13470# after the decimal point which are to be included as # 13471# significant. See the 68882 manual for examples. # 13472# If LEN is computed to be greater than 17, set OPERR in # 13473# USER_FPSR. LEN is stored in d4. # 13474# # 13475# A7. Calculate SCALE. # 13476# SCALE is equal to 10^ISCALE, where ISCALE is the number # 13477# of decimal places needed to insure LEN integer digits # 13478# in the output before conversion to bcd. LAMBDA is the # 13479# sign of ISCALE, used in A9. Fp1 contains # 13480# 10^^(abs(ISCALE)) using a rounding mode which is a # 13481# function of the original rounding mode and the signs # 13482# of ISCALE and X. A table is given in the code. # 13483# # 13484# A8. Clr INEX; Force RZ. # 13485# The operation in A3 above may have set INEX2. # 13486# RZ mode is forced for the scaling operation to insure # 13487# only one rounding error. The grs bits are collected in # 13488# the INEX flag for use in A10. # 13489# # 13490# A9. Scale X -> Y. # 13491# The mantissa is scaled to the desired number of # 13492# significant digits. The excess digits are collected # 13493# in INEX2. # 13494# # 13495# A10. Or in INEX. # 13496# If INEX is set, round error occurred. This is # 13497# compensated for by 'or-ing' in the INEX2 flag to # 13498# the lsb of Y. # 13499# # 13500# A11. Restore original FPCR; set size ext. # 13501# Perform FINT operation in the user's rounding mode. # 13502# Keep the size to extended. # 13503# # 13504# A12. Calculate YINT = FINT(Y) according to user's rounding # 13505# mode. The FPSP routine sintd0 is used. The output # 13506# is in fp0. # 13507# # 13508# A13. Check for LEN digits. # 13509# If the int operation results in more than LEN digits, # 13510# or less than LEN -1 digits, adjust ILOG and repeat from # 13511# A6. This test occurs only on the first pass. If the # 13512# result is exactly 10^LEN, decrement ILOG and divide # 13513# the mantissa by 10. # 13514# # 13515# A14. Convert the mantissa to bcd. # 13516# The binstr routine is used to convert the LEN digit # 13517# mantissa to bcd in memory. The input to binstr is # 13518# to be a fraction; i.e. (mantissa)/10^LEN and adjusted # 13519# such that the decimal point is to the left of bit 63. # 13520# The bcd digits are stored in the correct position in # 13521# the final string area in memory. # 13522# # 13523# A15. Convert the exponent to bcd. # 13524# As in A14 above, the exp is converted to bcd and the # 13525# digits are stored in the final string. # 13526# Test the length of the final exponent string. If the # 13527# length is 4, set operr. # 13528# # 13529# A16. Write sign bits to final string. # 13530# # 13531######################################################################### 13532 13533set BINDEC_FLG, EXC_TEMP # DENORM flag 13534 13535# Constants in extended precision 13536PLOG2: 13537 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 13538PLOG2UP1: 13539 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 13540 13541# Constants in single precision 13542FONE: 13543 long 0x3F800000,0x00000000,0x00000000,0x00000000 13544FTWO: 13545 long 0x40000000,0x00000000,0x00000000,0x00000000 13546FTEN: 13547 long 0x41200000,0x00000000,0x00000000,0x00000000 13548F4933: 13549 long 0x459A2800,0x00000000,0x00000000,0x00000000 13550 13551RBDTBL: 13552 byte 0,0,0,0 13553 byte 3,3,2,2 13554 byte 3,2,2,3 13555 byte 2,3,3,2 13556 13557# Implementation Notes: 13558# 13559# The registers are used as follows: 13560# 13561# d0: scratch; LEN input to binstr 13562# d1: scratch 13563# d2: upper 32-bits of mantissa for binstr 13564# d3: scratch;lower 32-bits of mantissa for binstr 13565# d4: LEN 13566# d5: LAMBDA/ICTR 13567# d6: ILOG 13568# d7: k-factor 13569# a0: ptr for original operand/final result 13570# a1: scratch pointer 13571# a2: pointer to FP_X; abs(original value) in ext 13572# fp0: scratch 13573# fp1: scratch 13574# fp2: scratch 13575# F_SCR1: 13576# F_SCR2: 13577# L_SCR1: 13578# L_SCR2: 13579 13580 global bindec 13581bindec: 13582 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2} 13583 fmovm.x &0x7,-(%sp) # {%fp0-%fp2} 13584 13585# A1. Set RM and size ext. Set SIGMA = sign input; 13586# The k-factor is saved for use in d7. Clear BINDEC_FLG for 13587# separating normalized/denormalized input. If the input 13588# is a denormalized number, set the BINDEC_FLG memory word 13589# to signal denorm. If the input is unnormalized, normalize 13590# the input and test for denormalized result. 13591# 13592 fmov.l &rm_mode*0x10,%fpcr # set RM and ext 13593 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check 13594 mov.l %d0,%d7 # move k-factor to d7 13595 13596 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag 13597 cmpi.b STAG(%a6),&DENORM # is input a DENORM? 13598 bne.w A2_str # no; input is a NORM 13599 13600# 13601# Normalize the denorm 13602# 13603un_de_norm: 13604 mov.w (%a0),%d0 13605 and.w &0x7fff,%d0 # strip sign of normalized exp 13606 mov.l 4(%a0),%d1 13607 mov.l 8(%a0),%d2 13608norm_loop: 13609 sub.w &1,%d0 13610 lsl.l &1,%d2 13611 roxl.l &1,%d1 13612 tst.l %d1 13613 bge.b norm_loop 13614# 13615# Test if the normalized input is denormalized 13616# 13617 tst.w %d0 13618 bgt.b pos_exp # if greater than zero, it is a norm 13619 st BINDEC_FLG(%a6) # set flag for denorm 13620pos_exp: 13621 and.w &0x7fff,%d0 # strip sign of normalized exp 13622 mov.w %d0,(%a0) 13623 mov.l %d1,4(%a0) 13624 mov.l %d2,8(%a0) 13625 13626# A2. Set X = abs(input). 13627# 13628A2_str: 13629 mov.l (%a0),FP_SCR1(%a6) # move input to work space 13630 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space 13631 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space 13632 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X) 13633 13634# A3. Compute ILOG. 13635# ILOG is the log base 10 of the input value. It is approx- 13636# imated by adding e + 0.f when the original value is viewed 13637# as 2^^e * 1.f in extended precision. This value is stored 13638# in d6. 13639# 13640# Register usage: 13641# Input/Output 13642# d0: k-factor/exponent 13643# d2: x/x 13644# d3: x/x 13645# d4: x/x 13646# d5: x/x 13647# d6: x/ILOG 13648# d7: k-factor/Unchanged 13649# a0: ptr for original operand/final result 13650# a1: x/x 13651# a2: x/x 13652# fp0: x/float(ILOG) 13653# fp1: x/x 13654# fp2: x/x 13655# F_SCR1:x/x 13656# F_SCR2:Abs(X)/Abs(X) with $3fff exponent 13657# L_SCR1:x/x 13658# L_SCR2:first word of X packed/Unchanged 13659 13660 tst.b BINDEC_FLG(%a6) # check for denorm 13661 beq.b A3_cont # if clr, continue with norm 13662 mov.l &-4933,%d6 # force ILOG = -4933 13663 bra.b A4_str 13664A3_cont: 13665 mov.w FP_SCR1(%a6),%d0 # move exp to d0 13666 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff 13667 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f 13668 sub.w &0x3fff,%d0 # strip off bias 13669 fadd.w %d0,%fp0 # add in exp 13670 fsub.s FONE(%pc),%fp0 # subtract off 1.0 13671 fbge.w pos_res # if pos, branch 13672 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1 13673 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13674 bra.b A4_str # go move out ILOG 13675pos_res: 13676 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2 13677 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13678 13679 13680# A4. Clr INEX bit. 13681# The operation in A3 above may have set INEX2. 13682 13683A4_str: 13684 fmov.l &0,%fpsr # zero all of fpsr - nothing needed 13685 13686 13687# A5. Set ICTR = 0; 13688# ICTR is a flag used in A13. It must be set before the 13689# loop entry A6. The lower word of d5 is used for ICTR. 13690 13691 clr.w %d5 # clear ICTR 13692 13693# A6. Calculate LEN. 13694# LEN is the number of digits to be displayed. The k-factor 13695# can dictate either the total number of digits, if it is 13696# a positive number, or the number of digits after the 13697# original decimal point which are to be included as 13698# significant. See the 68882 manual for examples. 13699# If LEN is computed to be greater than 17, set OPERR in 13700# USER_FPSR. LEN is stored in d4. 13701# 13702# Register usage: 13703# Input/Output 13704# d0: exponent/Unchanged 13705# d2: x/x/scratch 13706# d3: x/x 13707# d4: exc picture/LEN 13708# d5: ICTR/Unchanged 13709# d6: ILOG/Unchanged 13710# d7: k-factor/Unchanged 13711# a0: ptr for original operand/final result 13712# a1: x/x 13713# a2: x/x 13714# fp0: float(ILOG)/Unchanged 13715# fp1: x/x 13716# fp2: x/x 13717# F_SCR1:x/x 13718# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13719# L_SCR1:x/x 13720# L_SCR2:first word of X packed/Unchanged 13721 13722A6_str: 13723 tst.l %d7 # branch on sign of k 13724 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k 13725 mov.l %d7,%d4 # if k > 0, LEN = k 13726 bra.b len_ck # skip to LEN check 13727k_neg: 13728 mov.l %d6,%d4 # first load ILOG to d4 13729 sub.l %d7,%d4 # subtract off k 13730 addq.l &1,%d4 # add in the 1 13731len_ck: 13732 tst.l %d4 # LEN check: branch on sign of LEN 13733 ble.b LEN_ng # if neg, set LEN = 1 13734 cmp.l %d4,&17 # test if LEN > 17 13735 ble.b A7_str # if not, forget it 13736 mov.l &17,%d4 # set max LEN = 17 13737 tst.l %d7 # if negative, never set OPERR 13738 ble.b A7_str # if positive, continue 13739 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 13740 bra.b A7_str # finished here 13741LEN_ng: 13742 mov.l &1,%d4 # min LEN is 1 13743 13744 13745# A7. Calculate SCALE. 13746# SCALE is equal to 10^ISCALE, where ISCALE is the number 13747# of decimal places needed to insure LEN integer digits 13748# in the output before conversion to bcd. LAMBDA is the sign 13749# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using 13750# the rounding mode as given in the following table (see 13751# Coonen, p. 7.23 as ref.; however, the SCALE variable is 13752# of opposite sign in bindec.sa from Coonen). 13753# 13754# Initial USE 13755# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] 13756# ---------------------------------------------- 13757# RN 00 0 0 00/0 RN 13758# RN 00 0 1 00/0 RN 13759# RN 00 1 0 00/0 RN 13760# RN 00 1 1 00/0 RN 13761# RZ 01 0 0 11/3 RP 13762# RZ 01 0 1 11/3 RP 13763# RZ 01 1 0 10/2 RM 13764# RZ 01 1 1 10/2 RM 13765# RM 10 0 0 11/3 RP 13766# RM 10 0 1 10/2 RM 13767# RM 10 1 0 10/2 RM 13768# RM 10 1 1 11/3 RP 13769# RP 11 0 0 10/2 RM 13770# RP 11 0 1 11/3 RP 13771# RP 11 1 0 11/3 RP 13772# RP 11 1 1 10/2 RM 13773# 13774# Register usage: 13775# Input/Output 13776# d0: exponent/scratch - final is 0 13777# d2: x/0 or 24 for A9 13778# d3: x/scratch - offset ptr into PTENRM array 13779# d4: LEN/Unchanged 13780# d5: 0/ICTR:LAMBDA 13781# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) 13782# d7: k-factor/Unchanged 13783# a0: ptr for original operand/final result 13784# a1: x/ptr to PTENRM array 13785# a2: x/x 13786# fp0: float(ILOG)/Unchanged 13787# fp1: x/10^ISCALE 13788# fp2: x/x 13789# F_SCR1:x/x 13790# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13791# L_SCR1:x/x 13792# L_SCR2:first word of X packed/Unchanged 13793 13794A7_str: 13795 tst.l %d7 # test sign of k 13796 bgt.b k_pos # if pos and > 0, skip this 13797 cmp.l %d7,%d6 # test k - ILOG 13798 blt.b k_pos # if ILOG >= k, skip this 13799 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k 13800k_pos: 13801 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0 13802 addq.l &1,%d0 # add the 1 13803 sub.l %d4,%d0 # sub off LEN 13804 swap %d5 # use upper word of d5 for LAMBDA 13805 clr.w %d5 # set it zero initially 13806 clr.w %d2 # set up d2 for very small case 13807 tst.l %d0 # test sign of ISCALE 13808 bge.b iscale # if pos, skip next inst 13809 addq.w &1,%d5 # if neg, set LAMBDA true 13810 cmp.l %d0,&0xffffecd4 # test iscale <= -4908 13811 bgt.b no_inf # if false, skip rest 13812 add.l &24,%d0 # add in 24 to iscale 13813 mov.l &24,%d2 # put 24 in d2 for A9 13814no_inf: 13815 neg.l %d0 # and take abs of ISCALE 13816iscale: 13817 fmov.s FONE(%pc),%fp1 # init fp1 to 1 13818 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits 13819 lsl.w &1,%d1 # put them in bits 2:1 13820 add.w %d5,%d1 # add in LAMBDA 13821 lsl.w &1,%d1 # put them in bits 3:1 13822 tst.l L_SCR2(%a6) # test sign of original x 13823 bge.b x_pos # if pos, don't set bit 0 13824 addq.l &1,%d1 # if neg, set bit 0 13825x_pos: 13826 lea.l RBDTBL(%pc),%a2 # load rbdtbl base 13827 mov.b (%a2,%d1),%d3 # load d3 with new rmode 13828 lsl.l &4,%d3 # put bits in proper position 13829 fmov.l %d3,%fpcr # load bits into fpu 13830 lsr.l &4,%d3 # put bits in proper position 13831 tst.b %d3 # decode new rmode for pten table 13832 bne.b not_rn # if zero, it is RN 13833 lea.l PTENRN(%pc),%a1 # load a1 with RN table base 13834 bra.b rmode # exit decode 13835not_rn: 13836 lsr.b &1,%d3 # get lsb in carry 13837 bcc.b not_rp2 # if carry clear, it is RM 13838 lea.l PTENRP(%pc),%a1 # load a1 with RP table base 13839 bra.b rmode # exit decode 13840not_rp2: 13841 lea.l PTENRM(%pc),%a1 # load a1 with RM table base 13842rmode: 13843 clr.l %d3 # clr table index 13844e_loop2: 13845 lsr.l &1,%d0 # shift next bit into carry 13846 bcc.b e_next2 # if zero, skip the mul 13847 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13848e_next2: 13849 add.l &12,%d3 # inc d3 to next pwrten table entry 13850 tst.l %d0 # test if ISCALE is zero 13851 bne.b e_loop2 # if not, loop 13852 13853# A8. Clr INEX; Force RZ. 13854# The operation in A3 above may have set INEX2. 13855# RZ mode is forced for the scaling operation to insure 13856# only one rounding error. The grs bits are collected in 13857# the INEX flag for use in A10. 13858# 13859# Register usage: 13860# Input/Output 13861 13862 fmov.l &0,%fpsr # clr INEX 13863 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode 13864 13865# A9. Scale X -> Y. 13866# The mantissa is scaled to the desired number of significant 13867# digits. The excess digits are collected in INEX2. If mul, 13868# Check d2 for excess 10 exponential value. If not zero, 13869# the iscale value would have caused the pwrten calculation 13870# to overflow. Only a negative iscale can cause this, so 13871# multiply by 10^(d2), which is now only allowed to be 24, 13872# with a multiply by 10^8 and 10^16, which is exact since 13873# 10^24 is exact. If the input was denormalized, we must 13874# create a busy stack frame with the mul command and the 13875# two operands, and allow the fpu to complete the multiply. 13876# 13877# Register usage: 13878# Input/Output 13879# d0: FPCR with RZ mode/Unchanged 13880# d2: 0 or 24/unchanged 13881# d3: x/x 13882# d4: LEN/Unchanged 13883# d5: ICTR:LAMBDA 13884# d6: ILOG/Unchanged 13885# d7: k-factor/Unchanged 13886# a0: ptr for original operand/final result 13887# a1: ptr to PTENRM array/Unchanged 13888# a2: x/x 13889# fp0: float(ILOG)/X adjusted for SCALE (Y) 13890# fp1: 10^ISCALE/Unchanged 13891# fp2: x/x 13892# F_SCR1:x/x 13893# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13894# L_SCR1:x/x 13895# L_SCR2:first word of X packed/Unchanged 13896 13897A9_str: 13898 fmov.x (%a0),%fp0 # load X from memory 13899 fabs.x %fp0 # use abs(X) 13900 tst.w %d5 # LAMBDA is in lower word of d5 13901 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul 13902 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0 13903 bra.w A10_st # branch to A10 13904 13905sc_mul: 13906 tst.b BINDEC_FLG(%a6) # check for denorm 13907 beq.w A9_norm # if norm, continue with mul 13908 13909# for DENORM, we must calculate: 13910# fp0 = input_op * 10^ISCALE * 10^24 13911# since the input operand is a DENORM, we can't multiply it directly. 13912# so, we do the multiplication of the exponents and mantissas separately. 13913# in this way, we avoid underflow on intermediate stages of the 13914# multiplication and guarantee a result without exception. 13915 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack 13916 13917 mov.w (%sp),%d3 # grab exponent 13918 andi.w &0x7fff,%d3 # clear sign 13919 ori.w &0x8000,(%a0) # make DENORM exp negative 13920 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp 13921 subi.w &0x3fff,%d3 # subtract BIAS 13922 add.w 36(%a1),%d3 13923 subi.w &0x3fff,%d3 # subtract BIAS 13924 add.w 48(%a1),%d3 13925 subi.w &0x3fff,%d3 # subtract BIAS 13926 13927 bmi.w sc_mul_err # is result is DENORM, punt!!! 13928 13929 andi.w &0x8000,(%sp) # keep sign 13930 or.w %d3,(%sp) # insert new exponent 13931 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again 13932 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk 13933 mov.l 0x4(%a0),-(%sp) 13934 mov.l &0x3fff0000,-(%sp) # force exp to zero 13935 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0 13936 fmul.x (%sp)+,%fp0 13937 13938# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13939# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13940 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa 13941 mov.l 36+4(%a1),-(%sp) 13942 mov.l &0x3fff0000,-(%sp) # force exp to zero 13943 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa 13944 mov.l 48+4(%a1),-(%sp) 13945 mov.l &0x3fff0000,-(%sp)# force exp to zero 13946 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8 13947 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16 13948 bra.b A10_st 13949 13950sc_mul_err: 13951 bra.b sc_mul_err 13952 13953A9_norm: 13954 tst.w %d2 # test for small exp case 13955 beq.b A9_con # if zero, continue as normal 13956 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13957 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13958A9_con: 13959 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0 13960 13961# A10. Or in INEX. 13962# If INEX is set, round error occurred. This is compensated 13963# for by 'or-ing' in the INEX2 flag to the lsb of Y. 13964# 13965# Register usage: 13966# Input/Output 13967# d0: FPCR with RZ mode/FPSR with INEX2 isolated 13968# d2: x/x 13969# d3: x/x 13970# d4: LEN/Unchanged 13971# d5: ICTR:LAMBDA 13972# d6: ILOG/Unchanged 13973# d7: k-factor/Unchanged 13974# a0: ptr for original operand/final result 13975# a1: ptr to PTENxx array/Unchanged 13976# a2: x/ptr to FP_SCR1(a6) 13977# fp0: Y/Y with lsb adjusted 13978# fp1: 10^ISCALE/Unchanged 13979# fp2: x/x 13980 13981A10_st: 13982 fmov.l %fpsr,%d0 # get FPSR 13983 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory 13984 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1 13985 btst &9,%d0 # check if INEX2 set 13986 beq.b A11_st # if clear, skip rest 13987 or.l &1,8(%a2) # or in 1 to lsb of mantissa 13988 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu 13989 13990 13991# A11. Restore original FPCR; set size ext. 13992# Perform FINT operation in the user's rounding mode. Keep 13993# the size to extended. The sintdo entry point in the sint 13994# routine expects the FPCR value to be in USER_FPCR for 13995# mode and precision. The original FPCR is saved in L_SCR1. 13996 13997A11_st: 13998 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later 13999 and.l &0x00000030,USER_FPCR(%a6) # set size to ext, 14000# ;block exceptions 14001 14002 14003# A12. Calculate YINT = FINT(Y) according to user's rounding mode. 14004# The FPSP routine sintd0 is used. The output is in fp0. 14005# 14006# Register usage: 14007# Input/Output 14008# d0: FPSR with AINEX cleared/FPCR with size set to ext 14009# d2: x/x/scratch 14010# d3: x/x 14011# d4: LEN/Unchanged 14012# d5: ICTR:LAMBDA/Unchanged 14013# d6: ILOG/Unchanged 14014# d7: k-factor/Unchanged 14015# a0: ptr for original operand/src ptr for sintdo 14016# a1: ptr to PTENxx array/Unchanged 14017# a2: ptr to FP_SCR1(a6)/Unchanged 14018# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored 14019# fp0: Y/YINT 14020# fp1: 10^ISCALE/Unchanged 14021# fp2: x/x 14022# F_SCR1:x/x 14023# F_SCR2:Y adjusted for inex/Y with original exponent 14024# L_SCR1:x/original USER_FPCR 14025# L_SCR2:first word of X packed/Unchanged 14026 14027A12_st: 14028 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1} 14029 mov.l L_SCR1(%a6),-(%sp) 14030 mov.l L_SCR2(%a6),-(%sp) 14031 14032 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6) 14033 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6) 14034 tst.l L_SCR2(%a6) # test sign of original operand 14035 bge.b do_fint12 # if pos, use Y 14036 or.l &0x80000000,(%a0) # if neg, use -Y 14037do_fint12: 14038 mov.l USER_FPSR(%a6),-(%sp) 14039# bsr sintdo # sint routine returns int in fp0 14040 14041 fmov.l USER_FPCR(%a6),%fpcr 14042 fmov.l &0x0,%fpsr # clear the AEXC bits!!! 14043## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode 14044## andi.l &0x00000030,%d0 14045## fmov.l %d0,%fpcr 14046 fint.x FP_SCR1(%a6),%fp0 # do fint() 14047 fmov.l %fpsr,%d0 14048 or.w %d0,FPSR_EXCEPT(%a6) 14049## fmov.l &0x0,%fpcr 14050## fmov.l %fpsr,%d0 # don't keep ccodes 14051## or.w %d0,FPSR_EXCEPT(%a6) 14052 14053 mov.b (%sp),USER_FPSR(%a6) 14054 add.l &4,%sp 14055 14056 mov.l (%sp)+,L_SCR2(%a6) 14057 mov.l (%sp)+,L_SCR1(%a6) 14058 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1} 14059 14060 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent 14061 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR 14062 14063# A13. Check for LEN digits. 14064# If the int operation results in more than LEN digits, 14065# or less than LEN -1 digits, adjust ILOG and repeat from 14066# A6. This test occurs only on the first pass. If the 14067# result is exactly 10^LEN, decrement ILOG and divide 14068# the mantissa by 10. The calculation of 10^LEN cannot 14069# be inexact, since all powers of ten upto 10^27 are exact 14070# in extended precision, so the use of a previous power-of-ten 14071# table will introduce no error. 14072# 14073# 14074# Register usage: 14075# Input/Output 14076# d0: FPCR with size set to ext/scratch final = 0 14077# d2: x/x 14078# d3: x/scratch final = x 14079# d4: LEN/LEN adjusted 14080# d5: ICTR:LAMBDA/LAMBDA:ICTR 14081# d6: ILOG/ILOG adjusted 14082# d7: k-factor/Unchanged 14083# a0: pointer into memory for packed bcd string formation 14084# a1: ptr to PTENxx array/Unchanged 14085# a2: ptr to FP_SCR1(a6)/Unchanged 14086# fp0: int portion of Y/abs(YINT) adjusted 14087# fp1: 10^ISCALE/Unchanged 14088# fp2: x/10^LEN 14089# F_SCR1:x/x 14090# F_SCR2:Y with original exponent/Unchanged 14091# L_SCR1:original USER_FPCR/Unchanged 14092# L_SCR2:first word of X packed/Unchanged 14093 14094A13_st: 14095 swap %d5 # put ICTR in lower word of d5 14096 tst.w %d5 # check if ICTR = 0 14097 bne not_zr # if non-zero, go to second test 14098# 14099# Compute 10^(LEN-1) 14100# 14101 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14102 mov.l %d4,%d0 # put LEN in d0 14103 subq.l &1,%d0 # d0 = LEN -1 14104 clr.l %d3 # clr table index 14105l_loop: 14106 lsr.l &1,%d0 # shift next bit into carry 14107 bcc.b l_next # if zero, skip the mul 14108 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14109l_next: 14110 add.l &12,%d3 # inc d3 to next pwrten table entry 14111 tst.l %d0 # test if LEN is zero 14112 bne.b l_loop # if not, loop 14113# 14114# 10^LEN-1 is computed for this test and A14. If the input was 14115# denormalized, check only the case in which YINT > 10^LEN. 14116# 14117 tst.b BINDEC_FLG(%a6) # check if input was norm 14118 beq.b A13_con # if norm, continue with checking 14119 fabs.x %fp0 # take abs of YINT 14120 bra test_2 14121# 14122# Compare abs(YINT) to 10^(LEN-1) and 10^LEN 14123# 14124A13_con: 14125 fabs.x %fp0 # take abs of YINT 14126 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1) 14127 fbge.w test_2 # if greater, do next test 14128 subq.l &1,%d6 # subtract 1 from ILOG 14129 mov.w &1,%d5 # set ICTR 14130 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14131 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14132 bra.w A6_str # return to A6 and recompute YINT 14133test_2: 14134 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14135 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN 14136 fblt.w A14_st # if less, all is ok, go to A14 14137 fbgt.w fix_ex # if greater, fix and redo 14138 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10 14139 addq.l &1,%d6 # and inc ILOG 14140 bra.b A14_st # and continue elsewhere 14141fix_ex: 14142 addq.l &1,%d6 # increment ILOG by 1 14143 mov.w &1,%d5 # set ICTR 14144 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14145 bra.w A6_str # return to A6 and recompute YINT 14146# 14147# Since ICTR <> 0, we have already been through one adjustment, 14148# and shouldn't have another; this is to check if abs(YINT) = 10^LEN 14149# 10^LEN is again computed using whatever table is in a1 since the 14150# value calculated cannot be inexact. 14151# 14152not_zr: 14153 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14154 mov.l %d4,%d0 # put LEN in d0 14155 clr.l %d3 # clr table index 14156z_loop: 14157 lsr.l &1,%d0 # shift next bit into carry 14158 bcc.b z_next # if zero, skip the mul 14159 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14160z_next: 14161 add.l &12,%d3 # inc d3 to next pwrten table entry 14162 tst.l %d0 # test if LEN is zero 14163 bne.b z_loop # if not, loop 14164 fabs.x %fp0 # get abs(YINT) 14165 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN 14166 fbneq.w A14_st # if not, skip this 14167 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10 14168 addq.l &1,%d6 # and inc ILOG by 1 14169 addq.l &1,%d4 # and inc LEN 14170 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN 14171 14172# A14. Convert the mantissa to bcd. 14173# The binstr routine is used to convert the LEN digit 14174# mantissa to bcd in memory. The input to binstr is 14175# to be a fraction; i.e. (mantissa)/10^LEN and adjusted 14176# such that the decimal point is to the left of bit 63. 14177# The bcd digits are stored in the correct position in 14178# the final string area in memory. 14179# 14180# 14181# Register usage: 14182# Input/Output 14183# d0: x/LEN call to binstr - final is 0 14184# d1: x/0 14185# d2: x/ms 32-bits of mant of abs(YINT) 14186# d3: x/ls 32-bits of mant of abs(YINT) 14187# d4: LEN/Unchanged 14188# d5: ICTR:LAMBDA/LAMBDA:ICTR 14189# d6: ILOG 14190# d7: k-factor/Unchanged 14191# a0: pointer into memory for packed bcd string formation 14192# /ptr to first mantissa byte in result string 14193# a1: ptr to PTENxx array/Unchanged 14194# a2: ptr to FP_SCR1(a6)/Unchanged 14195# fp0: int portion of Y/abs(YINT) adjusted 14196# fp1: 10^ISCALE/Unchanged 14197# fp2: 10^LEN/Unchanged 14198# F_SCR1:x/Work area for final result 14199# F_SCR2:Y with original exponent/Unchanged 14200# L_SCR1:original USER_FPCR/Unchanged 14201# L_SCR2:first word of X packed/Unchanged 14202 14203A14_st: 14204 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion 14205 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN 14206 lea.l FP_SCR0(%a6),%a0 14207 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory 14208 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2 14209 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3 14210 clr.l 4(%a0) # zero word 2 of FP_RES 14211 clr.l 8(%a0) # zero word 3 of FP_RES 14212 mov.l (%a0),%d0 # move exponent to d0 14213 swap %d0 # put exponent in lower word 14214 beq.b no_sft # if zero, don't shift 14215 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract 14216 tst.l %d0 # check if > 1 14217 bgt.b no_sft # if so, don't shift 14218 neg.l %d0 # make exp positive 14219m_loop: 14220 lsr.l &1,%d2 # shift d2:d3 right, add 0s 14221 roxr.l &1,%d3 # the number of places 14222 dbf.w %d0,m_loop # given in d0 14223no_sft: 14224 tst.l %d2 # check for mantissa of zero 14225 bne.b no_zr # if not, go on 14226 tst.l %d3 # continue zero check 14227 beq.b zer_m # if zero, go directly to binstr 14228no_zr: 14229 clr.l %d1 # put zero in d1 for addx 14230 add.l &0x00000080,%d3 # inc at bit 7 14231 addx.l %d1,%d2 # continue inc 14232 and.l &0xffffff80,%d3 # strip off lsb not used by 882 14233zer_m: 14234 mov.l %d4,%d0 # put LEN in d0 for binstr call 14235 addq.l &3,%a0 # a0 points to M16 byte in result 14236 bsr binstr # call binstr to convert mant 14237 14238 14239# A15. Convert the exponent to bcd. 14240# As in A14 above, the exp is converted to bcd and the 14241# digits are stored in the final string. 14242# 14243# Digits are stored in L_SCR1(a6) on return from BINDEC as: 14244# 14245# 32 16 15 0 14246# ----------------------------------------- 14247# | 0 | e3 | e2 | e1 | e4 | X | X | X | 14248# ----------------------------------------- 14249# 14250# And are moved into their proper places in FP_SCR0. If digit e4 14251# is non-zero, OPERR is signaled. In all cases, all 4 digits are 14252# written as specified in the 881/882 manual for packed decimal. 14253# 14254# Register usage: 14255# Input/Output 14256# d0: x/LEN call to binstr - final is 0 14257# d1: x/scratch (0);shift count for final exponent packing 14258# d2: x/ms 32-bits of exp fraction/scratch 14259# d3: x/ls 32-bits of exp fraction 14260# d4: LEN/Unchanged 14261# d5: ICTR:LAMBDA/LAMBDA:ICTR 14262# d6: ILOG 14263# d7: k-factor/Unchanged 14264# a0: ptr to result string/ptr to L_SCR1(a6) 14265# a1: ptr to PTENxx array/Unchanged 14266# a2: ptr to FP_SCR1(a6)/Unchanged 14267# fp0: abs(YINT) adjusted/float(ILOG) 14268# fp1: 10^ISCALE/Unchanged 14269# fp2: 10^LEN/Unchanged 14270# F_SCR1:Work area for final result/BCD result 14271# F_SCR2:Y with original exponent/ILOG/10^4 14272# L_SCR1:original USER_FPCR/Exponent digits on return from binstr 14273# L_SCR2:first word of X packed/Unchanged 14274 14275A15_st: 14276 tst.b BINDEC_FLG(%a6) # check for denorm 14277 beq.b not_denorm 14278 ftest.x %fp0 # test for zero 14279 fbeq.w den_zero # if zero, use k-factor or 4933 14280 fmov.l %d6,%fp0 # float ILOG 14281 fabs.x %fp0 # get abs of ILOG 14282 bra.b convrt 14283den_zero: 14284 tst.l %d7 # check sign of the k-factor 14285 blt.b use_ilog # if negative, use ILOG 14286 fmov.s F4933(%pc),%fp0 # force exponent to 4933 14287 bra.b convrt # do it 14288use_ilog: 14289 fmov.l %d6,%fp0 # float ILOG 14290 fabs.x %fp0 # get abs of ILOG 14291 bra.b convrt 14292not_denorm: 14293 ftest.x %fp0 # test for zero 14294 fbneq.w not_zero # if zero, force exponent 14295 fmov.s FONE(%pc),%fp0 # force exponent to 1 14296 bra.b convrt # do it 14297not_zero: 14298 fmov.l %d6,%fp0 # float ILOG 14299 fabs.x %fp0 # get abs of ILOG 14300convrt: 14301 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4 14302 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory 14303 mov.l 4(%a2),%d2 # move word 2 to d2 14304 mov.l 8(%a2),%d3 # move word 3 to d3 14305 mov.w (%a2),%d0 # move exp to d0 14306 beq.b x_loop_fin # if zero, skip the shift 14307 sub.w &0x3ffd,%d0 # subtract off bias 14308 neg.w %d0 # make exp positive 14309x_loop: 14310 lsr.l &1,%d2 # shift d2:d3 right 14311 roxr.l &1,%d3 # the number of places 14312 dbf.w %d0,x_loop # given in d0 14313x_loop_fin: 14314 clr.l %d1 # put zero in d1 for addx 14315 add.l &0x00000080,%d3 # inc at bit 6 14316 addx.l %d1,%d2 # continue inc 14317 and.l &0xffffff80,%d3 # strip off lsb not used by 882 14318 mov.l &4,%d0 # put 4 in d0 for binstr call 14319 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits 14320 bsr binstr # call binstr to convert exp 14321 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0 14322 mov.l &12,%d1 # use d1 for shift count 14323 lsr.l %d1,%d0 # shift d0 right by 12 14324 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0 14325 lsr.l %d1,%d0 # shift d0 right by 12 14326 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0 14327 tst.b %d0 # check if e4 is zero 14328 beq.b A16_st # if zero, skip rest 14329 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 14330 14331 14332# A16. Write sign bits to final string. 14333# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). 14334# 14335# Register usage: 14336# Input/Output 14337# d0: x/scratch - final is x 14338# d2: x/x 14339# d3: x/x 14340# d4: LEN/Unchanged 14341# d5: ICTR:LAMBDA/LAMBDA:ICTR 14342# d6: ILOG/ILOG adjusted 14343# d7: k-factor/Unchanged 14344# a0: ptr to L_SCR1(a6)/Unchanged 14345# a1: ptr to PTENxx array/Unchanged 14346# a2: ptr to FP_SCR1(a6)/Unchanged 14347# fp0: float(ILOG)/Unchanged 14348# fp1: 10^ISCALE/Unchanged 14349# fp2: 10^LEN/Unchanged 14350# F_SCR1:BCD result with correct signs 14351# F_SCR2:ILOG/10^4 14352# L_SCR1:Exponent digits on return from binstr 14353# L_SCR2:first word of X packed/Unchanged 14354 14355A16_st: 14356 clr.l %d0 # clr d0 for collection of signs 14357 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0 14358 tst.l L_SCR2(%a6) # check sign of original mantissa 14359 bge.b mant_p # if pos, don't set SM 14360 mov.l &2,%d0 # move 2 in to d0 for SM 14361mant_p: 14362 tst.l %d6 # check sign of ILOG 14363 bge.b wr_sgn # if pos, don't set SE 14364 addq.l &1,%d0 # set bit 0 in d0 for SE 14365wr_sgn: 14366 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0 14367 14368# Clean up and restore all registers used. 14369 14370 fmov.l &0,%fpsr # clear possible inex2/ainex bits 14371 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2} 14372 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2} 14373 rts 14374 14375 global PTENRN 14376PTENRN: 14377 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14378 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14379 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14380 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14381 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14382 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14383 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14384 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14385 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14386 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14387 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14388 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14389 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14390 14391 global PTENRP 14392PTENRP: 14393 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14394 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14395 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14396 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14397 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14398 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14399 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 14400 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14401 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14402 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14403 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 14404 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14405 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14406 14407 global PTENRM 14408PTENRM: 14409 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14410 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14411 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14412 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14413 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14414 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 14415 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14416 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 14417 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 14418 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 14419 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14420 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 14421 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 14422 14423######################################################################### 14424# binstr(): Converts a 64-bit binary integer to bcd. # 14425# # 14426# INPUT *************************************************************** # 14427# d2:d3 = 64-bit binary integer # 14428# d0 = desired length (LEN) # 14429# a0 = pointer to start in memory for bcd characters # 14430# (This pointer must point to byte 4 of the first # 14431# lword of the packed decimal memory string.) # 14432# # 14433# OUTPUT ************************************************************** # 14434# a0 = pointer to LEN bcd digits representing the 64-bit integer. # 14435# # 14436# ALGORITHM *********************************************************** # 14437# The 64-bit binary is assumed to have a decimal point before # 14438# bit 63. The fraction is multiplied by 10 using a mul by 2 # 14439# shift and a mul by 8 shift. The bits shifted out of the # 14440# msb form a decimal digit. This process is iterated until # 14441# LEN digits are formed. # 14442# # 14443# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the # 14444# digit formed will be assumed the least significant. This is # 14445# to force the first byte formed to have a 0 in the upper 4 bits. # 14446# # 14447# A2. Beginning of the loop: # 14448# Copy the fraction in d2:d3 to d4:d5. # 14449# # 14450# A3. Multiply the fraction in d2:d3 by 8 using bit-field # 14451# extracts and shifts. The three msbs from d2 will go into d1. # 14452# # 14453# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb # 14454# will be collected by the carry. # 14455# # 14456# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 # 14457# into d2:d3. D1 will contain the bcd digit formed. # 14458# # 14459# A6. Test d7. If zero, the digit formed is the ms digit. If non- # 14460# zero, it is the ls digit. Put the digit in its place in the # 14461# upper word of d0. If it is the ls digit, write the word # 14462# from d0 to memory. # 14463# # 14464# A7. Decrement d6 (LEN counter) and repeat the loop until zero. # 14465# # 14466######################################################################### 14467 14468# Implementation Notes: 14469# 14470# The registers are used as follows: 14471# 14472# d0: LEN counter 14473# d1: temp used to form the digit 14474# d2: upper 32-bits of fraction for mul by 8 14475# d3: lower 32-bits of fraction for mul by 8 14476# d4: upper 32-bits of fraction for mul by 2 14477# d5: lower 32-bits of fraction for mul by 2 14478# d6: temp for bit-field extracts 14479# d7: byte digit formation word;digit count {0,1} 14480# a0: pointer into memory for packed bcd string formation 14481# 14482 14483 global binstr 14484binstr: 14485 movm.l &0xff00,-(%sp) # {%d0-%d7} 14486 14487# 14488# A1: Init d7 14489# 14490 mov.l &1,%d7 # init d7 for second digit 14491 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes 14492# 14493# A2. Copy d2:d3 to d4:d5. Start loop. 14494# 14495loop: 14496 mov.l %d2,%d4 # copy the fraction before muls 14497 mov.l %d3,%d5 # to d4:d5 14498# 14499# A3. Multiply d2:d3 by 8; extract msbs into d1. 14500# 14501 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1 14502 asl.l &3,%d2 # shift d2 left by 3 places 14503 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6 14504 asl.l &3,%d3 # shift d3 left by 3 places 14505 or.l %d6,%d2 # or in msbs from d3 into d2 14506# 14507# A4. Multiply d4:d5 by 2; add carry out to d1. 14508# 14509 asl.l &1,%d5 # mul d5 by 2 14510 roxl.l &1,%d4 # mul d4 by 2 14511 swap %d6 # put 0 in d6 lower word 14512 addx.w %d6,%d1 # add in extend from mul by 2 14513# 14514# A5. Add mul by 8 to mul by 2. D1 contains the digit formed. 14515# 14516 add.l %d5,%d3 # add lower 32 bits 14517 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14518 addx.l %d4,%d2 # add with extend upper 32 bits 14519 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14520 addx.w %d6,%d1 # add in extend from add to d1 14521 swap %d6 # with d6 = 0; put 0 in upper word 14522# 14523# A6. Test d7 and branch. 14524# 14525 tst.w %d7 # if zero, store digit & to loop 14526 beq.b first_d # if non-zero, form byte & write 14527sec_d: 14528 swap %d7 # bring first digit to word d7b 14529 asl.w &4,%d7 # first digit in upper 4 bits d7b 14530 add.w %d1,%d7 # add in ls digit to d7b 14531 mov.b %d7,(%a0)+ # store d7b byte in memory 14532 swap %d7 # put LEN counter in word d7a 14533 clr.w %d7 # set d7a to signal no digits done 14534 dbf.w %d0,loop # do loop some more! 14535 bra.b end_bstr # finished, so exit 14536first_d: 14537 swap %d7 # put digit word in d7b 14538 mov.w %d1,%d7 # put new digit in d7b 14539 swap %d7 # put LEN counter in word d7a 14540 addq.w &1,%d7 # set d7a to signal first digit done 14541 dbf.w %d0,loop # do loop some more! 14542 swap %d7 # put last digit in string 14543 lsl.w &4,%d7 # move it to upper 4 bits 14544 mov.b %d7,(%a0)+ # store it in memory string 14545# 14546# Clean up and return with result in fp0. 14547# 14548end_bstr: 14549 movm.l (%sp)+,&0xff # {%d0-%d7} 14550 rts 14551 14552######################################################################### 14553# XDEF **************************************************************** # 14554# facc_in_b(): dmem_read_byte failed # 14555# facc_in_w(): dmem_read_word failed # 14556# facc_in_l(): dmem_read_long failed # 14557# facc_in_d(): dmem_read of dbl prec failed # 14558# facc_in_x(): dmem_read of ext prec failed # 14559# # 14560# facc_out_b(): dmem_write_byte failed # 14561# facc_out_w(): dmem_write_word failed # 14562# facc_out_l(): dmem_write_long failed # 14563# facc_out_d(): dmem_write of dbl prec failed # 14564# facc_out_x(): dmem_write of ext prec failed # 14565# # 14566# XREF **************************************************************** # 14567# _real_access() - exit through access error handler # 14568# # 14569# INPUT *************************************************************** # 14570# None # 14571# # 14572# OUTPUT ************************************************************** # 14573# None # 14574# # 14575# ALGORITHM *********************************************************** # 14576# Flow jumps here when an FP data fetch call gets an error # 14577# result. This means the operating system wants an access error frame # 14578# made out of the current exception stack frame. # 14579# So, we first call restore() which makes sure that any updated # 14580# -(an)+ register gets returned to its pre-exception value and then # 14581# we change the stack to an acess error stack frame. # 14582# # 14583######################################################################### 14584 14585facc_in_b: 14586 movq.l &0x1,%d0 # one byte 14587 bsr.w restore # fix An 14588 14589 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW 14590 bra.w facc_finish 14591 14592facc_in_w: 14593 movq.l &0x2,%d0 # two bytes 14594 bsr.w restore # fix An 14595 14596 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW 14597 bra.b facc_finish 14598 14599facc_in_l: 14600 movq.l &0x4,%d0 # four bytes 14601 bsr.w restore # fix An 14602 14603 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW 14604 bra.b facc_finish 14605 14606facc_in_d: 14607 movq.l &0x8,%d0 # eight bytes 14608 bsr.w restore # fix An 14609 14610 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14611 bra.b facc_finish 14612 14613facc_in_x: 14614 movq.l &0xc,%d0 # twelve bytes 14615 bsr.w restore # fix An 14616 14617 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14618 bra.b facc_finish 14619 14620################################################################ 14621 14622facc_out_b: 14623 movq.l &0x1,%d0 # one byte 14624 bsr.w restore # restore An 14625 14626 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW 14627 bra.b facc_finish 14628 14629facc_out_w: 14630 movq.l &0x2,%d0 # two bytes 14631 bsr.w restore # restore An 14632 14633 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW 14634 bra.b facc_finish 14635 14636facc_out_l: 14637 movq.l &0x4,%d0 # four bytes 14638 bsr.w restore # restore An 14639 14640 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW 14641 bra.b facc_finish 14642 14643facc_out_d: 14644 movq.l &0x8,%d0 # eight bytes 14645 bsr.w restore # restore An 14646 14647 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14648 bra.b facc_finish 14649 14650facc_out_x: 14651 mov.l &0xc,%d0 # twelve bytes 14652 bsr.w restore # restore An 14653 14654 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14655 14656# here's where we actually create the access error frame from the 14657# current exception stack frame. 14658facc_finish: 14659 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 14660 14661 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 14662 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 14663 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 14664 14665 unlk %a6 14666 14667 mov.l (%sp),-(%sp) # store SR, hi(PC) 14668 mov.l 0x8(%sp),0x4(%sp) # store lo(PC) 14669 mov.l 0xc(%sp),0x8(%sp) # store EA 14670 mov.l &0x00000001,0xc(%sp) # store FSLW 14671 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size) 14672 mov.w &0x4008,0x6(%sp) # store voff 14673 14674 btst &0x5,(%sp) # supervisor or user mode? 14675 beq.b facc_out2 # user 14676 bset &0x2,0xd(%sp) # set supervisor TM bit 14677 14678facc_out2: 14679 bra.l _real_access 14680 14681################################################################## 14682 14683# if the effective addressing mode was predecrement or postincrement, 14684# the emulation has already changed its value to the correct post- 14685# instruction value. but since we're exiting to the access error 14686# handler, then AN must be returned to its pre-instruction value. 14687# we do that here. 14688restore: 14689 mov.b EXC_OPWORD+0x1(%a6),%d1 14690 andi.b &0x38,%d1 # extract opmode 14691 cmpi.b %d1,&0x18 # postinc? 14692 beq.w rest_inc 14693 cmpi.b %d1,&0x20 # predec? 14694 beq.w rest_dec 14695 rts 14696 14697rest_inc: 14698 mov.b EXC_OPWORD+0x1(%a6),%d1 14699 andi.w &0x0007,%d1 # fetch An 14700 14701 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1 14702 jmp (tbl_rest_inc.b,%pc,%d1.w*1) 14703 14704tbl_rest_inc: 14705 short ri_a0 - tbl_rest_inc 14706 short ri_a1 - tbl_rest_inc 14707 short ri_a2 - tbl_rest_inc 14708 short ri_a3 - tbl_rest_inc 14709 short ri_a4 - tbl_rest_inc 14710 short ri_a5 - tbl_rest_inc 14711 short ri_a6 - tbl_rest_inc 14712 short ri_a7 - tbl_rest_inc 14713 14714ri_a0: 14715 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0 14716 rts 14717ri_a1: 14718 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1 14719 rts 14720ri_a2: 14721 sub.l %d0,%a2 # fix a2 14722 rts 14723ri_a3: 14724 sub.l %d0,%a3 # fix a3 14725 rts 14726ri_a4: 14727 sub.l %d0,%a4 # fix a4 14728 rts 14729ri_a5: 14730 sub.l %d0,%a5 # fix a5 14731 rts 14732ri_a6: 14733 sub.l %d0,(%a6) # fix stacked a6 14734 rts 14735# if it's a fmove out instruction, we don't have to fix a7 14736# because we hadn't changed it yet. if it's an opclass two 14737# instruction (data moved in) and the exception was in supervisor 14738# mode, then also also wasn't updated. if it was user mode, then 14739# restore the correct a7 which is in the USP currently. 14740ri_a7: 14741 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out? 14742 bne.b ri_a7_done # out 14743 14744 btst &0x5,EXC_SR(%a6) # user or supervisor? 14745 bne.b ri_a7_done # supervisor 14746 movc %usp,%a0 # restore USP 14747 sub.l %d0,%a0 14748 movc %a0,%usp 14749ri_a7_done: 14750 rts 14751 14752# need to invert adjustment value if the <ea> was predec 14753rest_dec: 14754 neg.l %d0 14755 bra.b rest_inc 14756