1# 2# $NetBSD: ilsp.s,v 1.1 2000/04/14 20:24:39 is Exp $ 3# 4 5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 7# M68000 Hi-Performance Microprocessor Division 8# M68060 Software Package Production Release 9# 10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc. 11# All rights reserved. 12# 13# THE SOFTWARE is provided on an "AS IS" basis and without warranty. 14# To the maximum extent permitted by applicable law, 15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS 17# FOR A PARTICULAR PURPOSE and any warranty against infringement with 18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) 19# and any accompanying written materials. 20# 21# To the maximum extent permitted by applicable law, 22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 26# 27# Motorola assumes no responsibility for the maintenance and support 28# of the SOFTWARE. 29# 30# You are hereby granted a copyright license to use, modify, and distribute the 31# SOFTWARE so long as this entire notice is retained without alteration 32# in any modified and/or redistributed versions, and that such modified 33# versions are clearly identified as such. 34# No licenses are granted by implication, estoppel or otherwise under any 35# patents or trademarks of Motorola, Inc. 36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 38# 39# litop.s: 40# This file is appended to the top of the 060FPLSP package 41# and contains the entry points into the package. The user, in 42# effect, branches to one of the branch table entries located here. 43# 44 45 bra.l _060LSP__idivs64_ 46 short 0x0000 47 bra.l _060LSP__idivu64_ 48 short 0x0000 49 50 bra.l _060LSP__imuls64_ 51 short 0x0000 52 bra.l _060LSP__imulu64_ 53 short 0x0000 54 55 bra.l _060LSP__cmp2_Ab_ 56 short 0x0000 57 bra.l _060LSP__cmp2_Aw_ 58 short 0x0000 59 bra.l _060LSP__cmp2_Al_ 60 short 0x0000 61 bra.l _060LSP__cmp2_Db_ 62 short 0x0000 63 bra.l _060LSP__cmp2_Dw_ 64 short 0x0000 65 bra.l _060LSP__cmp2_Dl_ 66 short 0x0000 67 68# leave room for future possible aditions. 69 align 0x200 70 71######################################################################### 72# XDEF **************************************************************** # 73# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # 74# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # 75# # 76# This is the library version which is accessed as a subroutine # 77# and therefore does not work exactly like the 680X0 div{s,u}.l # 78# 64-bit divide instruction. # 79# # 80# XREF **************************************************************** # 81# None. # 82# # 83# INPUT *************************************************************** # 84# 0x4(sp) = divisor # 85# 0x8(sp) = hi(dividend) # 86# 0xc(sp) = lo(dividend) # 87# 0x10(sp) = pointer to location to place quotient/remainder # 88# # 89# OUTPUT ************************************************************** # 90# 0x10(sp) = points to location of remainder/quotient. # 91# remainder is in first longword, quotient is in 2nd. # 92# # 93# ALGORITHM *********************************************************** # 94# If the operands are signed, make them unsigned and save the # 95# sign info for later. Separate out special cases like divide-by-zero # 96# or 32-bit divides if possible. Else, use a special math algorithm # 97# to calculate the result. # 98# Restore sign info if signed instruction. Set the condition # 99# codes before performing the final "rts". If the divisor was equal to # 100# zero, then perform a divide-by-zero using a 16-bit implemented # 101# divide instruction. This way, the operating system can record that # 102# the event occurred even though it may not point to the correct place. # 103# # 104######################################################################### 105 106set POSNEG, -1 107set NDIVISOR, -2 108set NDIVIDEND, -3 109set DDSECOND, -4 110set DDNORMAL, -8 111set DDQUOTIENT, -12 112set DIV64_CC, -16 113 114########## 115# divs.l # 116########## 117 global _060LSP__idivs64_ 118_060LSP__idivs64_: 119# PROLOGUE BEGIN ######################################################## 120 link.w %a6,&-16 121 movm.l &0x3f00,-(%sp) # save d2-d7 122# fmovm.l &0x0,-(%sp) # save no fpregs 123# PROLOGUE END ########################################################## 124 125 mov.w %cc,DIV64_CC(%a6) 126 st POSNEG(%a6) # signed operation 127 bra.b ldiv64_cont 128 129########## 130# divu.l # 131########## 132 global _060LSP__idivu64_ 133_060LSP__idivu64_: 134# PROLOGUE BEGIN ######################################################## 135 link.w %a6,&-16 136 movm.l &0x3f00,-(%sp) # save d2-d7 137# fmovm.l &0x0,-(%sp) # save no fpregs 138# PROLOGUE END ########################################################## 139 140 mov.w %cc,DIV64_CC(%a6) 141 sf POSNEG(%a6) # unsigned operation 142 143ldiv64_cont: 144 mov.l 0x8(%a6),%d7 # fetch divisor 145 146 beq.w ldiv64eq0 # divisor is = 0!!! 147 148 mov.l 0xc(%a6), %d5 # get dividend hi 149 mov.l 0x10(%a6), %d6 # get dividend lo 150 151# separate signed and unsigned divide 152 tst.b POSNEG(%a6) # signed or unsigned? 153 beq.b ldspecialcases # use positive divide 154 155# save the sign of the divisor 156# make divisor unsigned if it's negative 157 tst.l %d7 # chk sign of divisor 158 slt NDIVISOR(%a6) # save sign of divisor 159 bpl.b ldsgndividend 160 neg.l %d7 # complement negative divisor 161 162# save the sign of the dividend 163# make dividend unsigned if it's negative 164ldsgndividend: 165 tst.l %d5 # chk sign of hi(dividend) 166 slt NDIVIDEND(%a6) # save sign of dividend 167 bpl.b ldspecialcases 168 169 mov.w &0x0, %cc # clear 'X' cc bit 170 negx.l %d6 # complement signed dividend 171 negx.l %d5 172 173# extract some special cases: 174# - is (dividend == 0) ? 175# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) 176ldspecialcases: 177 tst.l %d5 # is (hi(dividend) == 0) 178 bne.b ldnormaldivide # no, so try it the long way 179 180 tst.l %d6 # is (lo(dividend) == 0), too 181 beq.w lddone # yes, so (dividend == 0) 182 183 cmp.l %d7,%d6 # is (divisor <= lo(dividend)) 184 bls.b ld32bitdivide # yes, so use 32 bit divide 185 186 exg %d5,%d6 # q = 0, r = dividend 187 bra.w ldivfinish # can't divide, we're done. 188 189ld32bitdivide: 190 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! 191 192 bra.b ldivfinish 193 194ldnormaldivide: 195# last special case: 196# - is hi(dividend) >= divisor ? if yes, then overflow 197 cmp.l %d7,%d5 198 bls.b lddovf # answer won't fit in 32 bits 199 200# perform the divide algorithm: 201 bsr.l ldclassical # do int divide 202 203# separate into signed and unsigned finishes. 204ldivfinish: 205 tst.b POSNEG(%a6) # do divs, divu separately 206 beq.b lddone # divu has no processing!!! 207 208# it was a divs.l, so ccode setting is a little more complicated... 209 tst.b NDIVIDEND(%a6) # remainder has same sign 210 beq.b ldcc # as dividend. 211 neg.l %d5 # sgn(rem) = sgn(dividend) 212ldcc: 213 mov.b NDIVISOR(%a6), %d0 214 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative 215 beq.b ldqpos # branch to quot positive 216 217# 0x80000000 is the largest number representable as a 32-bit negative 218# number. the negative of 0x80000000 is 0x80000000. 219 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? 220 bhi.b lddovf 221 222 neg.l %d6 # make (-quot) 2's comp 223 224 bra.b lddone 225 226ldqpos: 227 btst &0x1f, %d6 # will (+quot) fit in 32 bits? 228 bne.b lddovf 229 230lddone: 231# if the register numbers are the same, only the quotient gets saved. 232# so, if we always save the quotient second, we save ourselves a cmp&beq 233 andi.w &0x10,DIV64_CC(%a6) 234 mov.w DIV64_CC(%a6),%cc 235 tst.l %d6 # may set 'N' ccode bit 236 237# here, the result is in d1 and d0. the current strategy is to save 238# the values at the location pointed to by a0. 239# use movm here to not disturb the condition codes. 240ldexit: 241 movm.l &0x0060,([0x14,%a6]) # save result 242 243# EPILOGUE BEGIN ######################################################## 244# fmovm.l (%sp)+,&0x0 # restore no fpregs 245 movm.l (%sp)+,&0x00fc # restore d2-d7 246 unlk %a6 247# EPILOGUE END ########################################################## 248 249 rts 250 251# the result should be the unchanged dividend 252lddovf: 253 mov.l 0xc(%a6), %d5 # get dividend hi 254 mov.l 0x10(%a6), %d6 # get dividend lo 255 256 andi.w &0x1c,DIV64_CC(%a6) 257 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit 258 mov.w DIV64_CC(%a6),%cc 259 260 bra.b ldexit 261 262ldiv64eq0: 263 mov.l 0xc(%a6),([0x14,%a6]) 264 mov.l 0x10(%a6),([0x14,%a6],0x4) 265 266 mov.w DIV64_CC(%a6),%cc 267 268# EPILOGUE BEGIN ######################################################## 269# fmovm.l (%sp)+,&0x0 # restore no fpregs 270 movm.l (%sp)+,&0x00fc # restore d2-d7 271 unlk %a6 272# EPILOGUE END ########################################################## 273 274 divu.w &0x0,%d0 # force a divbyzero exception 275 rts 276 277########################################################################### 278######################################################################### 279# This routine uses the 'classical' Algorithm D from Donald Knuth's # 280# Art of Computer Programming, vol II, Seminumerical Algorithms. # 281# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # 282# where U,V are words of the quadword dividend and longword divisor, # 283# and U1, V1 are the most significant words. # 284# # 285# The most sig. longword of the 64 bit dividend must be in %d5, least # 286# in %d6. The divisor must be in the variable ddivisor, and the # 287# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # 288# The quotient is returned in %d6, remainder in %d5, unless the # 289# v (overflow) bit is set in the saved %ccr. If overflow, the dividend # 290# is unchanged. # 291######################################################################### 292ldclassical: 293# if the divisor msw is 0, use simpler algorithm then the full blown 294# one at ddknuth: 295 296 cmpi.l %d7, &0xffff 297 bhi.b lddknuth # go use D. Knuth algorithm 298 299# Since the divisor is only a word (and larger than the mslw of the dividend), 300# a simpler algorithm may be used : 301# In the general case, four quotient words would be created by 302# dividing the divisor word into each dividend word. In this case, 303# the first two quotient words must be zero, or overflow would occur. 304# Since we already checked this case above, we can treat the most significant 305# longword of the dividend as (0) remainder (see Knuth) and merely complete 306# the last two divisions to get a quotient longword and word remainder: 307 308 clr.l %d1 309 swap %d5 # same as r*b if previous step rqd 310 swap %d6 # get u3 to lsw position 311 mov.w %d6, %d5 # rb + u3 312 313 divu.w %d7, %d5 314 315 mov.w %d5, %d1 # first quotient word 316 swap %d6 # get u4 317 mov.w %d6, %d5 # rb + u4 318 319 divu.w %d7, %d5 320 321 swap %d1 322 mov.w %d5, %d1 # 2nd quotient 'digit' 323 clr.w %d5 324 swap %d5 # now remainder 325 mov.l %d1, %d6 # and quotient 326 327 rts 328 329lddknuth: 330# In this algorithm, the divisor is treated as a 2 digit (word) number 331# which is divided into a 3 digit (word) dividend to get one quotient 332# digit (word). After subtraction, the dividend is shifted and the 333# process repeated. Before beginning, the divisor and quotient are 334# 'normalized' so that the process of estimating the quotient digit 335# will yield verifiably correct results.. 336 337 clr.l DDNORMAL(%a6) # count of shifts for normalization 338 clr.b DDSECOND(%a6) # clear flag for quotient digits 339 clr.l %d1 # %d1 will hold trial quotient 340lddnchk: 341 btst &31, %d7 # must we normalize? first word of 342 bne.b lddnormalized # divisor (V1) must be >= 65536/2 343 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts 344 lsl.l &0x1, %d7 # shift the divisor 345 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 346 roxl.l &0x1, %d5 # shift u1,u2 347 bra.w lddnchk 348lddnormalized: 349 350# Now calculate an estimate of the quotient words (msw first, then lsw). 351# The comments use subscripts for the first quotient digit determination. 352 mov.l %d7, %d3 # divisor 353 mov.l %d5, %d2 # dividend mslw 354 swap %d2 355 swap %d3 356 cmp.w %d2, %d3 # V1 = U1 ? 357 bne.b lddqcalc1 358 mov.w &0xffff, %d1 # use max trial quotient word 359 bra.b lddadj0 360lddqcalc1: 361 mov.l %d5, %d1 362 363 divu.w %d3, %d1 # use quotient of mslw/msw 364 365 andi.l &0x0000ffff, %d1 # zero any remainder 366lddadj0: 367 368# now test the trial quotient and adjust. This step plus the 369# normalization assures (according to Knuth) that the trial 370# quotient will be at worst 1 too large. 371 mov.l %d6, -(%sp) 372 clr.w %d6 # word u3 left 373 swap %d6 # in lsw position 374lddadj1: mov.l %d7, %d3 375 mov.l %d1, %d2 376 mulu.w %d7, %d2 # V2q 377 swap %d3 378 mulu.w %d1, %d3 # V1q 379 mov.l %d5, %d4 # U1U2 380 sub.l %d3, %d4 # U1U2 - V1q 381 382 swap %d4 383 384 mov.w %d4,%d0 385 mov.w %d6,%d4 # insert lower word (U3) 386 387 tst.w %d0 # is upper word set? 388 bne.w lddadjd1 389 390# add.l %d6, %d4 # (U1U2 - V1q) + U3 391 392 cmp.l %d2, %d4 393 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? 394 subq.l &0x1, %d1 # yes, decrement and recheck 395 bra.b lddadj1 396lddadjd1: 397# now test the word by multiplying it by the divisor (V1V2) and comparing 398# the 3 digit (word) result with the current dividend words 399 mov.l %d5, -(%sp) # save %d5 (%d6 already saved) 400 mov.l %d1, %d6 401 swap %d6 # shift answer to ms 3 words 402 mov.l %d7, %d5 403 bsr.l ldmm2 404 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor 405 mov.l %d6, %d3 406 mov.l (%sp)+, %d5 # restore dividend 407 mov.l (%sp)+, %d6 408 sub.l %d3, %d6 409 subx.l %d2, %d5 # subtract double precision 410 bcc ldd2nd # no carry, do next quotient digit 411 subq.l &0x1, %d1 # q is one too large 412# need to add back divisor longword to current ms 3 digits of dividend 413# - according to Knuth, this is done only 2 out of 65536 times for random 414# divisor, dividend selection. 415 clr.l %d2 416 mov.l %d7, %d3 417 swap %d3 418 clr.w %d3 # %d3 now ls word of divisor 419 add.l %d3, %d6 # aligned with 3rd word of dividend 420 addx.l %d2, %d5 421 mov.l %d7, %d3 422 clr.w %d3 # %d3 now ms word of divisor 423 swap %d3 # aligned with 2nd word of dividend 424 add.l %d3, %d5 425ldd2nd: 426 tst.b DDSECOND(%a6) # both q words done? 427 bne.b lddremain 428# first quotient digit now correct. store digit and shift the 429# (subtracted) dividend 430 mov.w %d1, DDQUOTIENT(%a6) 431 clr.l %d1 432 swap %d5 433 swap %d6 434 mov.w %d6, %d5 435 clr.w %d6 436 st DDSECOND(%a6) # second digit 437 bra.w lddnormalized 438lddremain: 439# add 2nd word to quotient, get the remainder. 440 mov.w %d1, DDQUOTIENT+2(%a6) 441# shift down one word/digit to renormalize remainder. 442 mov.w %d5, %d6 443 swap %d6 444 swap %d5 445 mov.l DDNORMAL(%a6), %d7 # get norm shift count 446 beq.b lddrn 447 subq.l &0x1, %d7 # set for loop count 448lddnlp: 449 lsr.l &0x1, %d5 # shift into %d6 450 roxr.l &0x1, %d6 451 dbf %d7, lddnlp 452lddrn: 453 mov.l %d6, %d5 # remainder 454 mov.l DDQUOTIENT(%a6), %d6 # quotient 455 456 rts 457ldmm2: 458# factors for the 32X32->64 multiplication are in %d5 and %d6. 459# returns 64 bit result in %d5 (hi) %d6(lo). 460# destroys %d2,%d3,%d4. 461 462# multiply hi,lo words of each factor to get 4 intermediate products 463 mov.l %d6, %d2 464 mov.l %d6, %d3 465 mov.l %d5, %d4 466 swap %d3 467 swap %d4 468 mulu.w %d5, %d6 # %d6 <- lsw*lsw 469 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source 470 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest 471 mulu.w %d4, %d3 # %d3 <- msw*msw 472# now use swap and addx to consolidate to two longwords 473 clr.l %d4 474 swap %d6 475 add.w %d5, %d6 # add msw of l*l to lsw of m*l product 476 addx.w %d4, %d3 # add any carry to m*m product 477 add.w %d2, %d6 # add in lsw of other m*l product 478 addx.w %d4, %d3 # add any carry to m*m product 479 swap %d6 # %d6 is low 32 bits of final product 480 clr.w %d5 481 clr.w %d2 # lsw of two mixed products used, 482 swap %d5 # now use msws of longwords 483 swap %d2 484 add.l %d2, %d5 485 add.l %d3, %d5 # %d5 now ms 32 bits of final product 486 rts 487 488######################################################################### 489# XDEF **************************************************************** # 490# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # 491# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # 492# # 493# This is the library version which is accessed as a subroutine # 494# and therefore does not work exactly like the 680X0 mul{s,u}.l # 495# 64-bit multiply instruction. # 496# # 497# XREF **************************************************************** # 498# None # 499# # 500# INPUT *************************************************************** # 501# 0x4(sp) = multiplier # 502# 0x8(sp) = multiplicand # 503# 0xc(sp) = pointer to location to place 64-bit result # 504# # 505# OUTPUT ************************************************************** # 506# 0xc(sp) = points to location of 64-bit result # 507# # 508# ALGORITHM *********************************************************** # 509# Perform the multiply in pieces using 16x16->32 unsigned # 510# multiplies and "add" instructions. # 511# Set the condition codes as appropriate before performing an # 512# "rts". # 513# # 514######################################################################### 515 516set MUL64_CC, -4 517 518 global _060LSP__imulu64_ 519_060LSP__imulu64_: 520 521# PROLOGUE BEGIN ######################################################## 522 link.w %a6,&-4 523 movm.l &0x3800,-(%sp) # save d2-d4 524# fmovm.l &0x0,-(%sp) # save no fpregs 525# PROLOGUE END ########################################################## 526 527 mov.w %cc,MUL64_CC(%a6) # save incomming ccodes 528 529 mov.l 0x8(%a6),%d0 # store multiplier in d0 530 beq.w mulu64_zero # handle zero separately 531 532 mov.l 0xc(%a6),%d1 # get multiplicand in d1 533 beq.w mulu64_zero # handle zero separately 534 535######################################################################### 536# 63 32 0 # 537# ---------------------------- # 538# | hi(mplier) * hi(mplicand)| # 539# ---------------------------- # 540# ----------------------------- # 541# | hi(mplier) * lo(mplicand) | # 542# ----------------------------- # 543# ----------------------------- # 544# | lo(mplier) * hi(mplicand) | # 545# ----------------------------- # 546# | ----------------------------- # 547# --|-- | lo(mplier) * lo(mplicand) | # 548# | ----------------------------- # 549# ======================================================== # 550# -------------------------------------------------------- # 551# | hi(result) | lo(result) | # 552# -------------------------------------------------------- # 553######################################################################### 554mulu64_alg: 555# load temp registers with operands 556 mov.l %d0,%d2 # mr in d2 557 mov.l %d0,%d3 # mr in d3 558 mov.l %d1,%d4 # md in d4 559 swap %d3 # hi(mr) in lo d3 560 swap %d4 # hi(md) in lo d4 561 562# complete necessary multiplies: 563 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 564 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 565 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 566 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 567 568# add lo portions of [2],[3] to hi portion of [1]. 569# add carries produced from these adds to [4]. 570# lo([1]) is the final lo 16 bits of the result. 571 clr.l %d4 # load d4 w/ zero value 572 swap %d0 # hi([1]) <==> lo([1]) 573 add.w %d1,%d0 # hi([1]) + lo([2]) 574 addx.l %d4,%d3 # [4] + carry 575 add.w %d2,%d0 # hi([1]) + lo([3]) 576 addx.l %d4,%d3 # [4] + carry 577 swap %d0 # lo([1]) <==> hi([1]) 578 579# lo portions of [2],[3] have been added in to final result. 580# now, clear lo, put hi in lo reg, and add to [4] 581 clr.w %d1 # clear lo([2]) 582 clr.w %d2 # clear hi([3]) 583 swap %d1 # hi([2]) in lo d1 584 swap %d2 # hi([3]) in lo d2 585 add.l %d2,%d1 # [4] + hi([2]) 586 add.l %d3,%d1 # [4] + hi([3]) 587 588# now, grab the condition codes. only one that can be set is 'N'. 589# 'N' CAN be set if the operation is unsigned if bit 63 is set. 590 mov.w MUL64_CC(%a6),%d4 591 andi.b &0x10,%d4 # keep old 'X' bit 592 tst.l %d1 # may set 'N' bit 593 bpl.b mulu64_ddone 594 ori.b &0x8,%d4 # set 'N' bit 595mulu64_ddone: 596 mov.w %d4,%cc 597 598# here, the result is in d1 and d0. the current strategy is to save 599# the values at the location pointed to by a0. 600# use movm here to not disturb the condition codes. 601mulu64_end: 602 exg %d1,%d0 603 movm.l &0x0003,([0x10,%a6]) # save result 604 605# EPILOGUE BEGIN ######################################################## 606# fmovm.l (%sp)+,&0x0 # restore no fpregs 607 movm.l (%sp)+,&0x001c # restore d2-d4 608 unlk %a6 609# EPILOGUE END ########################################################## 610 611 rts 612 613# one or both of the operands is zero so the result is also zero. 614# save the zero result to the register file and set the 'Z' ccode bit. 615mulu64_zero: 616 clr.l %d0 617 clr.l %d1 618 619 mov.w MUL64_CC(%a6),%d4 620 andi.b &0x10,%d4 621 ori.b &0x4,%d4 622 mov.w %d4,%cc # set 'Z' ccode bit 623 624 bra.b mulu64_end 625 626########## 627# muls.l # 628########## 629 global _060LSP__imuls64_ 630_060LSP__imuls64_: 631 632# PROLOGUE BEGIN ######################################################## 633 link.w %a6,&-4 634 movm.l &0x3c00,-(%sp) # save d2-d5 635# fmovm.l &0x0,-(%sp) # save no fpregs 636# PROLOGUE END ########################################################## 637 638 mov.w %cc,MUL64_CC(%a6) # save incomming ccodes 639 640 mov.l 0x8(%a6),%d0 # store multiplier in d0 641 beq.b mulu64_zero # handle zero separately 642 643 mov.l 0xc(%a6),%d1 # get multiplicand in d1 644 beq.b mulu64_zero # handle zero separately 645 646 clr.b %d5 # clear sign tag 647 tst.l %d0 # is multiplier negative? 648 bge.b muls64_chk_md_sgn # no 649 neg.l %d0 # make multiplier positive 650 651 ori.b &0x1,%d5 # save multiplier sgn 652 653# the result sign is the exclusive or of the operand sign bits. 654muls64_chk_md_sgn: 655 tst.l %d1 # is multiplicand negative? 656 bge.b muls64_alg # no 657 neg.l %d1 # make multiplicand positive 658 659 eori.b &0x1,%d5 # calculate correct sign 660 661######################################################################### 662# 63 32 0 # 663# ---------------------------- # 664# | hi(mplier) * hi(mplicand)| # 665# ---------------------------- # 666# ----------------------------- # 667# | hi(mplier) * lo(mplicand) | # 668# ----------------------------- # 669# ----------------------------- # 670# | lo(mplier) * hi(mplicand) | # 671# ----------------------------- # 672# | ----------------------------- # 673# --|-- | lo(mplier) * lo(mplicand) | # 674# | ----------------------------- # 675# ======================================================== # 676# -------------------------------------------------------- # 677# | hi(result) | lo(result) | # 678# -------------------------------------------------------- # 679######################################################################### 680muls64_alg: 681# load temp registers with operands 682 mov.l %d0,%d2 # mr in d2 683 mov.l %d0,%d3 # mr in d3 684 mov.l %d1,%d4 # md in d4 685 swap %d3 # hi(mr) in lo d3 686 swap %d4 # hi(md) in lo d4 687 688# complete necessary multiplies: 689 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 690 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 691 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 692 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 693 694# add lo portions of [2],[3] to hi portion of [1]. 695# add carries produced from these adds to [4]. 696# lo([1]) is the final lo 16 bits of the result. 697 clr.l %d4 # load d4 w/ zero value 698 swap %d0 # hi([1]) <==> lo([1]) 699 add.w %d1,%d0 # hi([1]) + lo([2]) 700 addx.l %d4,%d3 # [4] + carry 701 add.w %d2,%d0 # hi([1]) + lo([3]) 702 addx.l %d4,%d3 # [4] + carry 703 swap %d0 # lo([1]) <==> hi([1]) 704 705# lo portions of [2],[3] have been added in to final result. 706# now, clear lo, put hi in lo reg, and add to [4] 707 clr.w %d1 # clear lo([2]) 708 clr.w %d2 # clear hi([3]) 709 swap %d1 # hi([2]) in lo d1 710 swap %d2 # hi([3]) in lo d2 711 add.l %d2,%d1 # [4] + hi([2]) 712 add.l %d3,%d1 # [4] + hi([3]) 713 714 tst.b %d5 # should result be signed? 715 beq.b muls64_done # no 716 717# result should be a signed negative number. 718# compute 2's complement of the unsigned number: 719# -negate all bits and add 1 720muls64_neg: 721 not.l %d0 # negate lo(result) bits 722 not.l %d1 # negate hi(result) bits 723 addq.l &1,%d0 # add 1 to lo(result) 724 addx.l %d4,%d1 # add carry to hi(result) 725 726muls64_done: 727 mov.w MUL64_CC(%a6),%d4 728 andi.b &0x10,%d4 # keep old 'X' bit 729 tst.l %d1 # may set 'N' bit 730 bpl.b muls64_ddone 731 ori.b &0x8,%d4 # set 'N' bit 732muls64_ddone: 733 mov.w %d4,%cc 734 735# here, the result is in d1 and d0. the current strategy is to save 736# the values at the location pointed to by a0. 737# use movm here to not disturb the condition codes. 738muls64_end: 739 exg %d1,%d0 740 movm.l &0x0003,([0x10,%a6]) # save result at (a0) 741 742# EPILOGUE BEGIN ######################################################## 743# fmovm.l (%sp)+,&0x0 # restore no fpregs 744 movm.l (%sp)+,&0x003c # restore d2-d5 745 unlk %a6 746# EPILOGUE END ########################################################## 747 748 rts 749 750# one or both of the operands is zero so the result is also zero. 751# save the zero result to the register file and set the 'Z' ccode bit. 752muls64_zero: 753 clr.l %d0 754 clr.l %d1 755 756 mov.w MUL64_CC(%a6),%d4 757 andi.b &0x10,%d4 758 ori.b &0x4,%d4 759 mov.w %d4,%cc # set 'Z' ccode bit 760 761 bra.b muls64_end 762 763######################################################################### 764# XDEF **************************************************************** # 765# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # 766# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # 767# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # 768# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # 769# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # 770# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # 771# # 772# This is the library version which is accessed as a subroutine # 773# and therefore does not work exactly like the 680X0 "cmp2" # 774# instruction. # 775# # 776# XREF **************************************************************** # 777# None # 778# # 779# INPUT *************************************************************** # 780# 0x4(sp) = Rn # 781# 0x8(sp) = pointer to boundary pair # 782# # 783# OUTPUT ************************************************************** # 784# cc = condition codes are set correctly # 785# # 786# ALGORITHM *********************************************************** # 787# In the interest of simplicity, all operands are converted to # 788# longword size whether the operation is byte, word, or long. The # 789# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is # 790# also sign extended. If Rn is an address register, it need not be sign # 791# extended since the full register is always used. # 792# The condition codes are set correctly before the final "rts". # 793# # 794######################################################################### 795 796set CMP2_CC, -4 797 798 global _060LSP__cmp2_Ab_ 799_060LSP__cmp2_Ab_: 800 801# PROLOGUE BEGIN ######################################################## 802 link.w %a6,&-4 803 movm.l &0x3800,-(%sp) # save d2-d4 804# fmovm.l &0x0,-(%sp) # save no fpregs 805# PROLOGUE END ########################################################## 806 807 mov.w %cc,CMP2_CC(%a6) 808 mov.l 0x8(%a6), %d2 # get regval 809 810 mov.b ([0xc,%a6],0x0),%d0 811 mov.b ([0xc,%a6],0x1),%d1 812 813 extb.l %d0 # sign extend lo bnd 814 extb.l %d1 # sign extend hi bnd 815 bra.w l_cmp2_cmp # go do the compare emulation 816 817 global _060LSP__cmp2_Aw_ 818_060LSP__cmp2_Aw_: 819 820# PROLOGUE BEGIN ######################################################## 821 link.w %a6,&-4 822 movm.l &0x3800,-(%sp) # save d2-d4 823# fmovm.l &0x0,-(%sp) # save no fpregs 824# PROLOGUE END ########################################################## 825 826 mov.w %cc,CMP2_CC(%a6) 827 mov.l 0x8(%a6), %d2 # get regval 828 829 mov.w ([0xc,%a6],0x0),%d0 830 mov.w ([0xc,%a6],0x2),%d1 831 832 ext.l %d0 # sign extend lo bnd 833 ext.l %d1 # sign extend hi bnd 834 bra.w l_cmp2_cmp # go do the compare emulation 835 836 global _060LSP__cmp2_Al_ 837_060LSP__cmp2_Al_: 838 839# PROLOGUE BEGIN ######################################################## 840 link.w %a6,&-4 841 movm.l &0x3800,-(%sp) # save d2-d4 842# fmovm.l &0x0,-(%sp) # save no fpregs 843# PROLOGUE END ########################################################## 844 845 mov.w %cc,CMP2_CC(%a6) 846 mov.l 0x8(%a6), %d2 # get regval 847 848 mov.l ([0xc,%a6],0x0),%d0 849 mov.l ([0xc,%a6],0x4),%d1 850 bra.w l_cmp2_cmp # go do the compare emulation 851 852 global _060LSP__cmp2_Db_ 853_060LSP__cmp2_Db_: 854 855# PROLOGUE BEGIN ######################################################## 856 link.w %a6,&-4 857 movm.l &0x3800,-(%sp) # save d2-d4 858# fmovm.l &0x0,-(%sp) # save no fpregs 859# PROLOGUE END ########################################################## 860 861 mov.w %cc,CMP2_CC(%a6) 862 mov.l 0x8(%a6), %d2 # get regval 863 864 mov.b ([0xc,%a6],0x0),%d0 865 mov.b ([0xc,%a6],0x1),%d1 866 867 extb.l %d0 # sign extend lo bnd 868 extb.l %d1 # sign extend hi bnd 869 870# operation is a data register compare. 871# sign extend byte to long so we can do simple longword compares. 872 extb.l %d2 # sign extend data byte 873 bra.w l_cmp2_cmp # go do the compare emulation 874 875 global _060LSP__cmp2_Dw_ 876_060LSP__cmp2_Dw_: 877 878# PROLOGUE BEGIN ######################################################## 879 link.w %a6,&-4 880 movm.l &0x3800,-(%sp) # save d2-d4 881# fmovm.l &0x0,-(%sp) # save no fpregs 882# PROLOGUE END ########################################################## 883 884 mov.w %cc,CMP2_CC(%a6) 885 mov.l 0x8(%a6), %d2 # get regval 886 887 mov.w ([0xc,%a6],0x0),%d0 888 mov.w ([0xc,%a6],0x2),%d1 889 890 ext.l %d0 # sign extend lo bnd 891 ext.l %d1 # sign extend hi bnd 892 893# operation is a data register compare. 894# sign extend word to long so we can do simple longword compares. 895 ext.l %d2 # sign extend data word 896 bra.w l_cmp2_cmp # go emulate compare 897 898 global _060LSP__cmp2_Dl_ 899_060LSP__cmp2_Dl_: 900 901# PROLOGUE BEGIN ######################################################## 902 link.w %a6,&-4 903 movm.l &0x3800,-(%sp) # save d2-d4 904# fmovm.l &0x0,-(%sp) # save no fpregs 905# PROLOGUE END ########################################################## 906 907 mov.w %cc,CMP2_CC(%a6) 908 mov.l 0x8(%a6), %d2 # get regval 909 910 mov.l ([0xc,%a6],0x0),%d0 911 mov.l ([0xc,%a6],0x4),%d1 912 913# 914# To set the ccodes correctly: 915# (1) save 'Z' bit from (Rn - lo) 916# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) 917# (3) keep 'X', 'N', and 'V' from before instruction 918# (4) combine ccodes 919# 920l_cmp2_cmp: 921 sub.l %d0, %d2 # (Rn - lo) 922 mov.w %cc, %d3 # fetch resulting ccodes 923 andi.b &0x4, %d3 # keep 'Z' bit 924 sub.l %d0, %d1 # (hi - lo) 925 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) 926 927 mov.w %cc, %d4 # fetch resulting ccodes 928 or.b %d4, %d3 # combine w/ earlier ccodes 929 andi.b &0x5, %d3 # keep 'Z' and 'N' 930 931 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes 932 andi.b &0x1a, %d4 # keep 'X','N','V' bits 933 or.b %d3, %d4 # insert new ccodes 934 mov.w %d4,%cc # save new ccodes 935 936# EPILOGUE BEGIN ######################################################## 937# fmovm.l (%sp)+,&0x0 # restore no fpregs 938 movm.l (%sp)+,&0x001c # restore d2-d4 939 unlk %a6 940# EPILOGUE END ########################################################## 941 942 rts 943