xref: /netbsd/sys/arch/m68k/060sp/dist/ilsp.s (revision bf9ec67e)
1#
2# $NetBSD: ilsp.s,v 1.1 2000/04/14 20:24:39 is Exp $
3#
4
5#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7# M68000 Hi-Performance Microprocessor Division
8# M68060 Software Package Production Release
9#
10# M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11# All rights reserved.
12#
13# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14# To the maximum extent permitted by applicable law,
15# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17# FOR A PARTICULAR PURPOSE and any warranty against infringement with
18# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19# and any accompanying written materials.
20#
21# To the maximum extent permitted by applicable law,
22# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24# BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25# ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
26#
27# Motorola assumes no responsibility for the maintenance and support
28# of the SOFTWARE.
29#
30# You are hereby granted a copyright license to use, modify, and distribute the
31# SOFTWARE so long as this entire notice is retained without alteration
32# in any modified and/or redistributed versions, and that such modified
33# versions are clearly identified as such.
34# No licenses are granted by implication, estoppel or otherwise under any
35# patents or trademarks of Motorola, Inc.
36#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
37
38#
39# litop.s:
40# 	This file is appended to the top of the 060FPLSP package
41# and contains the entry points into the package. The user, in
42# effect, branches to one of the branch table entries located here.
43#
44
45	bra.l	_060LSP__idivs64_
46	short	0x0000
47	bra.l	_060LSP__idivu64_
48	short	0x0000
49
50	bra.l	_060LSP__imuls64_
51	short	0x0000
52	bra.l	_060LSP__imulu64_
53	short	0x0000
54
55	bra.l	_060LSP__cmp2_Ab_
56	short	0x0000
57	bra.l	_060LSP__cmp2_Aw_
58	short	0x0000
59	bra.l	_060LSP__cmp2_Al_
60	short	0x0000
61	bra.l	_060LSP__cmp2_Db_
62	short	0x0000
63	bra.l	_060LSP__cmp2_Dw_
64	short	0x0000
65	bra.l	_060LSP__cmp2_Dl_
66	short	0x0000
67
68# leave room for future possible aditions.
69	align	0x200
70
71#########################################################################
72# XDEF ****************************************************************	#
73#	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
74#	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
75#									#
76#	This is the library version which is accessed as a subroutine	#
77# 	and therefore does not work exactly like the 680X0 div{s,u}.l	#
78#	64-bit divide instruction.					#
79#									#
80# XREF ****************************************************************	#
81#	None.								#
82#									#
83# INPUT ***************************************************************	#
84#	0x4(sp)  = divisor						#
85#	0x8(sp)  = hi(dividend)						#
86#	0xc(sp)  = lo(dividend)						#
87#	0x10(sp) = pointer to location to place quotient/remainder	#
88# 									#
89# OUTPUT **************************************************************	#
90#	0x10(sp) = points to location of remainder/quotient.		#
91#		   remainder is in first longword, quotient is in 2nd.	#
92#									#
93# ALGORITHM ***********************************************************	#
94#	If the operands are signed, make them unsigned and save the 	#
95# sign info for later. Separate out special cases like divide-by-zero	#
96# or 32-bit divides if possible. Else, use a special math algorithm	#
97# to calculate the result.						#
98#	Restore sign info if signed instruction. Set the condition 	#
99# codes before performing the final "rts". If the divisor was equal to	#
100# zero, then perform a divide-by-zero using a 16-bit implemented	#
101# divide instruction. This way, the operating system can record that	#
102# the event occurred even though it may not point to the correct place.	#
103#									#
104#########################################################################
105
106set	POSNEG,		-1
107set	NDIVISOR,	-2
108set	NDIVIDEND,	-3
109set	DDSECOND,	-4
110set	DDNORMAL,	-8
111set	DDQUOTIENT,	-12
112set	DIV64_CC,	-16
113
114##########
115# divs.l #
116##########
117	global		_060LSP__idivs64_
118_060LSP__idivs64_:
119# PROLOGUE BEGIN ########################################################
120	link.w		%a6,&-16
121	movm.l		&0x3f00,-(%sp)		# save d2-d7
122#	fmovm.l		&0x0,-(%sp)		# save no fpregs
123# PROLOGUE END ##########################################################
124
125	mov.w		%cc,DIV64_CC(%a6)
126	st		POSNEG(%a6)		# signed operation
127	bra.b		ldiv64_cont
128
129##########
130# divu.l #
131##########
132	global		_060LSP__idivu64_
133_060LSP__idivu64_:
134# PROLOGUE BEGIN ########################################################
135	link.w		%a6,&-16
136	movm.l		&0x3f00,-(%sp)		# save d2-d7
137#	fmovm.l		&0x0,-(%sp)		# save no fpregs
138# PROLOGUE END ##########################################################
139
140	mov.w		%cc,DIV64_CC(%a6)
141	sf		POSNEG(%a6)		# unsigned operation
142
143ldiv64_cont:
144	mov.l		0x8(%a6),%d7		# fetch divisor
145
146	beq.w		ldiv64eq0		# divisor is = 0!!!
147
148	mov.l		0xc(%a6), %d5 		# get dividend hi
149	mov.l		0x10(%a6), %d6 		# get dividend lo
150
151# separate signed and unsigned divide
152	tst.b		POSNEG(%a6)		# signed or unsigned?
153	beq.b		ldspecialcases		# use positive divide
154
155# save the sign of the divisor
156# make divisor unsigned if it's negative
157	tst.l		%d7			# chk sign of divisor
158	slt		NDIVISOR(%a6)		# save sign of divisor
159	bpl.b		ldsgndividend
160	neg.l		%d7			# complement negative divisor
161
162# save the sign of the dividend
163# make dividend unsigned if it's negative
164ldsgndividend:
165	tst.l		%d5			# chk sign of hi(dividend)
166	slt		NDIVIDEND(%a6)		# save sign of dividend
167	bpl.b		ldspecialcases
168
169	mov.w		&0x0, %cc		# clear 'X' cc bit
170	negx.l		%d6			# complement signed dividend
171	negx.l		%d5
172
173# extract some special cases:
174# 	- is (dividend == 0) ?
175#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
176ldspecialcases:
177	tst.l		%d5			# is (hi(dividend) == 0)
178	bne.b		ldnormaldivide		# no, so try it the long way
179
180	tst.l		%d6			# is (lo(dividend) == 0), too
181	beq.w		lddone			# yes, so (dividend == 0)
182
183	cmp.l	 	%d7,%d6			# is (divisor <= lo(dividend))
184	bls.b		ld32bitdivide		# yes, so use 32 bit divide
185
186	exg		%d5,%d6			# q = 0, r = dividend
187	bra.w		ldivfinish		# can't divide, we're done.
188
189ld32bitdivide:
190	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
191
192	bra.b		ldivfinish
193
194ldnormaldivide:
195# last special case:
196# 	- is hi(dividend) >= divisor ? if yes, then overflow
197	cmp.l		%d7,%d5
198	bls.b		lddovf			# answer won't fit in 32 bits
199
200# perform the divide algorithm:
201	bsr.l		ldclassical		# do int divide
202
203# separate into signed and unsigned finishes.
204ldivfinish:
205	tst.b		POSNEG(%a6)		# do divs, divu separately
206	beq.b		lddone			# divu has no processing!!!
207
208# it was a divs.l, so ccode setting is a little more complicated...
209	tst.b		NDIVIDEND(%a6)		# remainder has same sign
210	beq.b		ldcc			# as dividend.
211	neg.l		%d5			# sgn(rem) = sgn(dividend)
212ldcc:
213	mov.b		NDIVISOR(%a6), %d0
214	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
215	beq.b		ldqpos			# branch to quot positive
216
217# 0x80000000 is the largest number representable as a 32-bit negative
218# number. the negative of 0x80000000 is 0x80000000.
219	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
220	bhi.b		lddovf
221
222	neg.l		%d6			# make (-quot) 2's comp
223
224	bra.b		lddone
225
226ldqpos:
227	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
228	bne.b		lddovf
229
230lddone:
231# if the register numbers are the same, only the quotient gets saved.
232# so, if we always save the quotient second, we save ourselves a cmp&beq
233	andi.w		&0x10,DIV64_CC(%a6)
234	mov.w		DIV64_CC(%a6),%cc
235	tst.l		%d6			# may set 'N' ccode bit
236
237# here, the result is in d1 and d0. the current strategy is to save
238# the values at the location pointed to by a0.
239# use movm here to not disturb the condition codes.
240ldexit:
241	movm.l		&0x0060,([0x14,%a6])	# save result
242
243# EPILOGUE BEGIN ########################################################
244#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
245	movm.l		(%sp)+,&0x00fc		# restore d2-d7
246	unlk		%a6
247# EPILOGUE END ##########################################################
248
249	rts
250
251# the result should be the unchanged dividend
252lddovf:
253	mov.l		0xc(%a6), %d5 		# get dividend hi
254	mov.l		0x10(%a6), %d6 		# get dividend lo
255
256	andi.w		&0x1c,DIV64_CC(%a6)
257	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
258	mov.w		DIV64_CC(%a6),%cc
259
260	bra.b		ldexit
261
262ldiv64eq0:
263	mov.l		0xc(%a6),([0x14,%a6])
264	mov.l		0x10(%a6),([0x14,%a6],0x4)
265
266	mov.w		DIV64_CC(%a6),%cc
267
268# EPILOGUE BEGIN ########################################################
269#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
270	movm.l		(%sp)+,&0x00fc		# restore d2-d7
271	unlk		%a6
272# EPILOGUE END ##########################################################
273
274	divu.w		&0x0,%d0		# force a divbyzero exception
275	rts
276
277###########################################################################
278#########################################################################
279# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
280# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
281# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
282# where U,V are words of the quadword dividend and longword divisor,	#
283# and U1, V1 are the most significant words.				#
284# 									#
285# The most sig. longword of the 64 bit dividend must be in %d5, least 	#
286# in %d6. The divisor must be in the variable ddivisor, and the		#
287# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
288# The quotient is returned in %d6, remainder in %d5, unless the		#
289# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
290# is unchanged.								#
291#########################################################################
292ldclassical:
293# if the divisor msw is 0, use simpler algorithm then the full blown
294# one at ddknuth:
295
296	cmpi.l		%d7, &0xffff
297	bhi.b		lddknuth		# go use D. Knuth algorithm
298
299# Since the divisor is only a word (and larger than the mslw of the dividend),
300# a simpler algorithm may be used :
301# In the general case, four quotient words would be created by
302# dividing the divisor word into each dividend word. In this case,
303# the first two quotient words must be zero, or overflow would occur.
304# Since we already checked this case above, we can treat the most significant
305# longword of the dividend as (0) remainder (see Knuth) and merely complete
306# the last two divisions to get a quotient longword and word remainder:
307
308	clr.l		%d1
309	swap		%d5			# same as r*b if previous step rqd
310	swap		%d6			# get u3 to lsw position
311	mov.w		%d6, %d5		# rb + u3
312
313	divu.w		%d7, %d5
314
315	mov.w		%d5, %d1		# first quotient word
316	swap		%d6			# get u4
317	mov.w		%d6, %d5		# rb + u4
318
319	divu.w		%d7, %d5
320
321	swap		%d1
322	mov.w		%d5, %d1		# 2nd quotient 'digit'
323	clr.w		%d5
324	swap		%d5			# now remainder
325	mov.l		%d1, %d6		# and quotient
326
327	rts
328
329lddknuth:
330# In this algorithm, the divisor is treated as a 2 digit (word) number
331# which is divided into a 3 digit (word) dividend to get one quotient
332# digit (word). After subtraction, the dividend is shifted and the
333# process repeated. Before beginning, the divisor and quotient are
334# 'normalized' so that the process of estimating the quotient digit
335# will yield verifiably correct results..
336
337	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
338	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
339	clr.l		%d1			# %d1 will hold trial quotient
340lddnchk:
341	btst		&31, %d7		# must we normalize? first word of
342	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
343	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
344	lsl.l		&0x1, %d7		# shift the divisor
345	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
346	roxl.l		&0x1, %d5		# shift u1,u2
347	bra.w		lddnchk
348lddnormalized:
349
350# Now calculate an estimate of the quotient words (msw first, then lsw).
351# The comments use subscripts for the first quotient digit determination.
352	mov.l		%d7, %d3		# divisor
353	mov.l		%d5, %d2		# dividend mslw
354	swap		%d2
355	swap		%d3
356	cmp.w	 	%d2, %d3		# V1 = U1 ?
357	bne.b		lddqcalc1
358	mov.w		&0xffff, %d1		# use max trial quotient word
359	bra.b		lddadj0
360lddqcalc1:
361	mov.l		%d5, %d1
362
363	divu.w		%d3, %d1		# use quotient of mslw/msw
364
365	andi.l		&0x0000ffff, %d1	# zero any remainder
366lddadj0:
367
368# now test the trial quotient and adjust. This step plus the
369# normalization assures (according to Knuth) that the trial
370# quotient will be at worst 1 too large.
371	mov.l		%d6, -(%sp)
372	clr.w		%d6			# word u3 left
373	swap		%d6			# in lsw position
374lddadj1: mov.l		%d7, %d3
375	mov.l		%d1, %d2
376	mulu.w		%d7, %d2		# V2q
377	swap		%d3
378	mulu.w		%d1, %d3		# V1q
379	mov.l		%d5, %d4		# U1U2
380	sub.l		%d3, %d4		# U1U2 - V1q
381
382	swap		%d4
383
384	mov.w		%d4,%d0
385	mov.w		%d6,%d4			# insert lower word (U3)
386
387	tst.w		%d0			# is upper word set?
388	bne.w		lddadjd1
389
390#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
391
392	cmp.l	 	%d2, %d4
393	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
394	subq.l		&0x1, %d1		# yes, decrement and recheck
395	bra.b		lddadj1
396lddadjd1:
397# now test the word by multiplying it by the divisor (V1V2) and comparing
398# the 3 digit (word) result with the current dividend words
399	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
400	mov.l		%d1, %d6
401	swap		%d6			# shift answer to ms 3 words
402	mov.l		%d7, %d5
403	bsr.l		ldmm2
404	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
405	mov.l		%d6, %d3
406	mov.l		(%sp)+, %d5		# restore dividend
407	mov.l		(%sp)+, %d6
408	sub.l		%d3, %d6
409	subx.l		%d2, %d5		# subtract double precision
410	bcc		ldd2nd			# no carry, do next quotient digit
411	subq.l		&0x1, %d1		# q is one too large
412# need to add back divisor longword to current ms 3 digits of dividend
413# - according to Knuth, this is done only 2 out of 65536 times for random
414# divisor, dividend selection.
415	clr.l		%d2
416	mov.l		%d7, %d3
417	swap		%d3
418	clr.w		%d3			# %d3 now ls word of divisor
419	add.l		%d3, %d6		# aligned with 3rd word of dividend
420	addx.l		%d2, %d5
421	mov.l		%d7, %d3
422	clr.w		%d3			# %d3 now ms word of divisor
423	swap		%d3			# aligned with 2nd word of dividend
424	add.l		%d3, %d5
425ldd2nd:
426	tst.b		DDSECOND(%a6)	# both q words done?
427	bne.b		lddremain
428# first quotient digit now correct. store digit and shift the
429# (subtracted) dividend
430	mov.w		%d1, DDQUOTIENT(%a6)
431	clr.l		%d1
432	swap		%d5
433	swap		%d6
434	mov.w		%d6, %d5
435	clr.w		%d6
436	st		DDSECOND(%a6)		# second digit
437	bra.w		lddnormalized
438lddremain:
439# add 2nd word to quotient, get the remainder.
440	mov.w 		%d1, DDQUOTIENT+2(%a6)
441# shift down one word/digit to renormalize remainder.
442	mov.w		%d5, %d6
443	swap		%d6
444	swap		%d5
445	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
446	beq.b		lddrn
447	subq.l		&0x1, %d7		# set for loop count
448lddnlp:
449	lsr.l		&0x1, %d5		# shift into %d6
450	roxr.l		&0x1, %d6
451	dbf		%d7, lddnlp
452lddrn:
453	mov.l		%d6, %d5		# remainder
454	mov.l		DDQUOTIENT(%a6), %d6 	# quotient
455
456	rts
457ldmm2:
458# factors for the 32X32->64 multiplication are in %d5 and %d6.
459# returns 64 bit result in %d5 (hi) %d6(lo).
460# destroys %d2,%d3,%d4.
461
462# multiply hi,lo words of each factor to get 4 intermediate products
463	mov.l		%d6, %d2
464	mov.l		%d6, %d3
465	mov.l		%d5, %d4
466	swap		%d3
467	swap		%d4
468	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
469	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
470	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
471	mulu.w		%d4, %d3		# %d3 <- msw*msw
472# now use swap and addx to consolidate to two longwords
473	clr.l		%d4
474	swap		%d6
475	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
476	addx.w		%d4, %d3		# add any carry to m*m product
477	add.w		%d2, %d6		# add in lsw of other m*l product
478	addx.w		%d4, %d3		# add any carry to m*m product
479	swap		%d6			# %d6 is low 32 bits of final product
480	clr.w		%d5
481	clr.w		%d2			# lsw of two mixed products used,
482	swap		%d5			# now use msws of longwords
483	swap		%d2
484	add.l		%d2, %d5
485	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
486	rts
487
488#########################################################################
489# XDEF ****************************************************************	#
490#	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
491#	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
492#									#
493#	This is the library version which is accessed as a subroutine	#
494#	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
495#	64-bit multiply instruction.					#
496#									#
497# XREF ****************************************************************	#
498#	None								#
499#									#
500# INPUT ***************************************************************	#
501#	0x4(sp) = multiplier						#
502#	0x8(sp) = multiplicand						#
503#	0xc(sp) = pointer to location to place 64-bit result		#
504# 									#
505# OUTPUT **************************************************************	#
506#	0xc(sp) = points to location of 64-bit result			#
507#									#
508# ALGORITHM ***********************************************************	#
509#	Perform the multiply in pieces using 16x16->32 unsigned		#
510# multiplies and "add" instructions.					#
511#	Set the condition codes as appropriate before performing an	#
512# "rts".								#
513#									#
514#########################################################################
515
516set MUL64_CC, -4
517
518	global		_060LSP__imulu64_
519_060LSP__imulu64_:
520
521# PROLOGUE BEGIN ########################################################
522	link.w		%a6,&-4
523	movm.l		&0x3800,-(%sp)		# save d2-d4
524#	fmovm.l		&0x0,-(%sp)		# save no fpregs
525# PROLOGUE END ##########################################################
526
527	mov.w		%cc,MUL64_CC(%a6)	# save incomming ccodes
528
529	mov.l		0x8(%a6),%d0		# store multiplier in d0
530	beq.w		mulu64_zero		# handle zero separately
531
532	mov.l		0xc(%a6),%d1		# get multiplicand in d1
533	beq.w		mulu64_zero		# handle zero separately
534
535#########################################################################
536#	63			   32				0	#
537# 	----------------------------					#
538# 	| hi(mplier) * hi(mplicand)|					#
539# 	----------------------------					#
540#		     -----------------------------			#
541#		     | hi(mplier) * lo(mplicand) |			#
542#		     -----------------------------			#
543#		     -----------------------------			#
544#		     | lo(mplier) * hi(mplicand) |			#
545#		     -----------------------------			#
546#	  |			   -----------------------------	#
547#	--|--			   | lo(mplier) * lo(mplicand) |	#
548#	  |			   -----------------------------	#
549#	========================================================	#
550#	--------------------------------------------------------	#
551#	|	hi(result)	   |	    lo(result)         |	#
552#	--------------------------------------------------------	#
553#########################################################################
554mulu64_alg:
555# load temp registers with operands
556	mov.l		%d0,%d2			# mr in d2
557	mov.l		%d0,%d3			# mr in d3
558	mov.l		%d1,%d4			# md in d4
559	swap		%d3			# hi(mr) in lo d3
560	swap		%d4			# hi(md) in lo d4
561
562# complete necessary multiplies:
563	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
564	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
565	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
566	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
567
568# add lo portions of [2],[3] to hi portion of [1].
569# add carries produced from these adds to [4].
570# lo([1]) is the final lo 16 bits of the result.
571	clr.l		%d4			# load d4 w/ zero value
572	swap		%d0			# hi([1]) <==> lo([1])
573	add.w		%d1,%d0			# hi([1]) + lo([2])
574	addx.l		%d4,%d3			#    [4]  + carry
575	add.w		%d2,%d0			# hi([1]) + lo([3])
576	addx.l		%d4,%d3			#    [4]  + carry
577	swap		%d0			# lo([1]) <==> hi([1])
578
579# lo portions of [2],[3] have been added in to final result.
580# now, clear lo, put hi in lo reg, and add to [4]
581	clr.w		%d1			# clear lo([2])
582	clr.w		%d2			# clear hi([3])
583	swap		%d1			# hi([2]) in lo d1
584	swap		%d2			# hi([3]) in lo d2
585	add.l		%d2,%d1			#    [4]  + hi([2])
586	add.l		%d3,%d1			#    [4]  + hi([3])
587
588# now, grab the condition codes. only one that can be set is 'N'.
589# 'N' CAN be set if the operation is unsigned if bit 63 is set.
590	mov.w		MUL64_CC(%a6),%d4
591	andi.b		&0x10,%d4		# keep old 'X' bit
592	tst.l		%d1			# may set 'N' bit
593	bpl.b		mulu64_ddone
594	ori.b		&0x8,%d4		# set 'N' bit
595mulu64_ddone:
596	mov.w		%d4,%cc
597
598# here, the result is in d1 and d0. the current strategy is to save
599# the values at the location pointed to by a0.
600# use movm here to not disturb the condition codes.
601mulu64_end:
602	exg		%d1,%d0
603	movm.l		&0x0003,([0x10,%a6])		# save result
604
605# EPILOGUE BEGIN ########################################################
606#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
607	movm.l		(%sp)+,&0x001c		# restore d2-d4
608	unlk		%a6
609# EPILOGUE END ##########################################################
610
611	rts
612
613# one or both of the operands is zero so the result is also zero.
614# save the zero result to the register file and set the 'Z' ccode bit.
615mulu64_zero:
616	clr.l		%d0
617	clr.l		%d1
618
619	mov.w		MUL64_CC(%a6),%d4
620	andi.b		&0x10,%d4
621	ori.b		&0x4,%d4
622	mov.w		%d4,%cc			# set 'Z' ccode bit
623
624	bra.b		mulu64_end
625
626##########
627# muls.l #
628##########
629	global		_060LSP__imuls64_
630_060LSP__imuls64_:
631
632# PROLOGUE BEGIN ########################################################
633	link.w		%a6,&-4
634	movm.l		&0x3c00,-(%sp)		# save d2-d5
635#	fmovm.l		&0x0,-(%sp)		# save no fpregs
636# PROLOGUE END ##########################################################
637
638	mov.w		%cc,MUL64_CC(%a6)	# save incomming ccodes
639
640	mov.l		0x8(%a6),%d0		# store multiplier in d0
641	beq.b		mulu64_zero		# handle zero separately
642
643	mov.l		0xc(%a6),%d1		# get multiplicand in d1
644	beq.b		mulu64_zero		# handle zero separately
645
646	clr.b		%d5			# clear sign tag
647	tst.l		%d0			# is multiplier negative?
648	bge.b		muls64_chk_md_sgn	# no
649	neg.l		%d0			# make multiplier positive
650
651	ori.b		&0x1,%d5		# save multiplier sgn
652
653# the result sign is the exclusive or of the operand sign bits.
654muls64_chk_md_sgn:
655	tst.l		%d1			# is multiplicand negative?
656	bge.b		muls64_alg		# no
657	neg.l		%d1			# make multiplicand positive
658
659	eori.b		&0x1,%d5		# calculate correct sign
660
661#########################################################################
662#	63			   32				0	#
663# 	----------------------------					#
664# 	| hi(mplier) * hi(mplicand)|					#
665# 	----------------------------					#
666#		     -----------------------------			#
667#		     | hi(mplier) * lo(mplicand) |			#
668#		     -----------------------------			#
669#		     -----------------------------			#
670#		     | lo(mplier) * hi(mplicand) |			#
671#		     -----------------------------			#
672#	  |			   -----------------------------	#
673#	--|--			   | lo(mplier) * lo(mplicand) |	#
674#	  |			   -----------------------------	#
675#	========================================================	#
676#	--------------------------------------------------------	#
677#	|	hi(result)	   |	    lo(result)         |	#
678#	--------------------------------------------------------	#
679#########################################################################
680muls64_alg:
681# load temp registers with operands
682	mov.l		%d0,%d2			# mr in d2
683	mov.l		%d0,%d3			# mr in d3
684	mov.l		%d1,%d4			# md in d4
685	swap		%d3			# hi(mr) in lo d3
686	swap		%d4			# hi(md) in lo d4
687
688# complete necessary multiplies:
689	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
690	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
691	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
692	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
693
694# add lo portions of [2],[3] to hi portion of [1].
695# add carries produced from these adds to [4].
696# lo([1]) is the final lo 16 bits of the result.
697	clr.l		%d4			# load d4 w/ zero value
698	swap		%d0			# hi([1]) <==> lo([1])
699	add.w		%d1,%d0			# hi([1]) + lo([2])
700	addx.l		%d4,%d3			#    [4]  + carry
701	add.w		%d2,%d0			# hi([1]) + lo([3])
702	addx.l		%d4,%d3			#    [4]  + carry
703	swap		%d0			# lo([1]) <==> hi([1])
704
705# lo portions of [2],[3] have been added in to final result.
706# now, clear lo, put hi in lo reg, and add to [4]
707	clr.w		%d1			# clear lo([2])
708	clr.w		%d2			# clear hi([3])
709	swap		%d1			# hi([2]) in lo d1
710	swap		%d2			# hi([3]) in lo d2
711	add.l		%d2,%d1			#    [4]  + hi([2])
712	add.l		%d3,%d1			#    [4]  + hi([3])
713
714	tst.b		%d5			# should result be signed?
715	beq.b		muls64_done		# no
716
717# result should be a signed negative number.
718# compute 2's complement of the unsigned number:
719#   -negate all bits and add 1
720muls64_neg:
721	not.l		%d0			# negate lo(result) bits
722	not.l		%d1			# negate hi(result) bits
723	addq.l		&1,%d0			# add 1 to lo(result)
724	addx.l		%d4,%d1			# add carry to hi(result)
725
726muls64_done:
727	mov.w		MUL64_CC(%a6),%d4
728	andi.b		&0x10,%d4		# keep old 'X' bit
729	tst.l		%d1			# may set 'N' bit
730	bpl.b		muls64_ddone
731	ori.b		&0x8,%d4		# set 'N' bit
732muls64_ddone:
733	mov.w		%d4,%cc
734
735# here, the result is in d1 and d0. the current strategy is to save
736# the values at the location pointed to by a0.
737# use movm here to not disturb the condition codes.
738muls64_end:
739	exg		%d1,%d0
740	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
741
742# EPILOGUE BEGIN ########################################################
743#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
744	movm.l		(%sp)+,&0x003c		# restore d2-d5
745	unlk		%a6
746# EPILOGUE END ##########################################################
747
748	rts
749
750# one or both of the operands is zero so the result is also zero.
751# save the zero result to the register file and set the 'Z' ccode bit.
752muls64_zero:
753	clr.l		%d0
754	clr.l		%d1
755
756	mov.w		MUL64_CC(%a6),%d4
757	andi.b		&0x10,%d4
758	ori.b		&0x4,%d4
759	mov.w		%d4,%cc			# set 'Z' ccode bit
760
761	bra.b		muls64_end
762
763#########################################################################
764# XDEF ****************************************************************	#
765#	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
766#	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
767#	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
768#	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
769#	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
770#	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
771#									#
772#	This is the library version which is accessed as a subroutine	#
773#	and therefore does not work exactly like the 680X0 "cmp2"	#
774#	instruction.							#
775#									#
776# XREF ****************************************************************	#
777#	None								#
778#									#
779# INPUT ***************************************************************	#
780#	0x4(sp) = Rn							#
781#	0x8(sp) = pointer to boundary pair				#
782# 									#
783# OUTPUT **************************************************************	#
784#	cc = condition codes are set correctly				#
785#									#
786# ALGORITHM ***********************************************************	#
787# 	In the interest of simplicity, all operands are converted to	#
788# longword size whether the operation is byte, word, or long. The	#
789# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
790# also sign extended. If Rn is an address register, it need not be sign #
791# extended since the full register is always used.			#
792#	The condition codes are set correctly before the final "rts".	#
793#									#
794#########################################################################
795
796set	CMP2_CC,	-4
797
798	global 		_060LSP__cmp2_Ab_
799_060LSP__cmp2_Ab_:
800
801# PROLOGUE BEGIN ########################################################
802	link.w		%a6,&-4
803	movm.l		&0x3800,-(%sp)		# save d2-d4
804#	fmovm.l		&0x0,-(%sp)		# save no fpregs
805# PROLOGUE END ##########################################################
806
807	mov.w		%cc,CMP2_CC(%a6)
808	mov.l		0x8(%a6), %d2 		# get regval
809
810	mov.b		([0xc,%a6],0x0),%d0
811	mov.b		([0xc,%a6],0x1),%d1
812
813	extb.l		%d0			# sign extend lo bnd
814	extb.l		%d1			# sign extend hi bnd
815	bra.w		l_cmp2_cmp		# go do the compare emulation
816
817	global 		_060LSP__cmp2_Aw_
818_060LSP__cmp2_Aw_:
819
820# PROLOGUE BEGIN ########################################################
821	link.w		%a6,&-4
822	movm.l		&0x3800,-(%sp)		# save d2-d4
823#	fmovm.l		&0x0,-(%sp)		# save no fpregs
824# PROLOGUE END ##########################################################
825
826	mov.w		%cc,CMP2_CC(%a6)
827	mov.l		0x8(%a6), %d2 		# get regval
828
829	mov.w		([0xc,%a6],0x0),%d0
830	mov.w		([0xc,%a6],0x2),%d1
831
832	ext.l		%d0			# sign extend lo bnd
833	ext.l		%d1			# sign extend hi bnd
834	bra.w		l_cmp2_cmp		# go do the compare emulation
835
836	global 		_060LSP__cmp2_Al_
837_060LSP__cmp2_Al_:
838
839# PROLOGUE BEGIN ########################################################
840	link.w		%a6,&-4
841	movm.l		&0x3800,-(%sp)		# save d2-d4
842#	fmovm.l		&0x0,-(%sp)		# save no fpregs
843# PROLOGUE END ##########################################################
844
845	mov.w		%cc,CMP2_CC(%a6)
846	mov.l		0x8(%a6), %d2 		# get regval
847
848	mov.l		([0xc,%a6],0x0),%d0
849	mov.l		([0xc,%a6],0x4),%d1
850	bra.w		l_cmp2_cmp		# go do the compare emulation
851
852	global 		_060LSP__cmp2_Db_
853_060LSP__cmp2_Db_:
854
855# PROLOGUE BEGIN ########################################################
856	link.w		%a6,&-4
857	movm.l		&0x3800,-(%sp)		# save d2-d4
858#	fmovm.l		&0x0,-(%sp)		# save no fpregs
859# PROLOGUE END ##########################################################
860
861	mov.w		%cc,CMP2_CC(%a6)
862	mov.l		0x8(%a6), %d2 		# get regval
863
864	mov.b		([0xc,%a6],0x0),%d0
865	mov.b		([0xc,%a6],0x1),%d1
866
867	extb.l		%d0			# sign extend lo bnd
868	extb.l		%d1			# sign extend hi bnd
869
870# operation is a data register compare.
871# sign extend byte to long so we can do simple longword compares.
872	extb.l		%d2			# sign extend data byte
873	bra.w		l_cmp2_cmp		# go do the compare emulation
874
875	global 		_060LSP__cmp2_Dw_
876_060LSP__cmp2_Dw_:
877
878# PROLOGUE BEGIN ########################################################
879	link.w		%a6,&-4
880	movm.l		&0x3800,-(%sp)		# save d2-d4
881#	fmovm.l		&0x0,-(%sp)		# save no fpregs
882# PROLOGUE END ##########################################################
883
884	mov.w		%cc,CMP2_CC(%a6)
885	mov.l		0x8(%a6), %d2 		# get regval
886
887	mov.w		([0xc,%a6],0x0),%d0
888	mov.w		([0xc,%a6],0x2),%d1
889
890	ext.l		%d0			# sign extend lo bnd
891	ext.l		%d1			# sign extend hi bnd
892
893# operation is a data register compare.
894# sign extend word to long so we can do simple longword compares.
895	ext.l		%d2			# sign extend data word
896	bra.w		l_cmp2_cmp		# go emulate compare
897
898	global 		_060LSP__cmp2_Dl_
899_060LSP__cmp2_Dl_:
900
901# PROLOGUE BEGIN ########################################################
902	link.w		%a6,&-4
903	movm.l		&0x3800,-(%sp)		# save d2-d4
904#	fmovm.l		&0x0,-(%sp)		# save no fpregs
905# PROLOGUE END ##########################################################
906
907	mov.w		%cc,CMP2_CC(%a6)
908	mov.l		0x8(%a6), %d2 		# get regval
909
910	mov.l		([0xc,%a6],0x0),%d0
911	mov.l		([0xc,%a6],0x4),%d1
912
913#
914# To set the ccodes correctly:
915# 	(1) save 'Z' bit from (Rn - lo)
916#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
917#	(3) keep 'X', 'N', and 'V' from before instruction
918#	(4) combine ccodes
919#
920l_cmp2_cmp:
921	sub.l		%d0, %d2		# (Rn - lo)
922	mov.w		%cc, %d3		# fetch resulting ccodes
923	andi.b		&0x4, %d3		# keep 'Z' bit
924	sub.l		%d0, %d1		# (hi - lo)
925	cmp.l	 	%d1,%d2			# ((hi - lo) - (Rn - hi))
926
927	mov.w		%cc, %d4		# fetch resulting ccodes
928	or.b		%d4, %d3		# combine w/ earlier ccodes
929	andi.b		&0x5, %d3		# keep 'Z' and 'N'
930
931	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
932	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
933	or.b		%d3, %d4		# insert new ccodes
934	mov.w		%d4,%cc			# save new ccodes
935
936# EPILOGUE BEGIN ########################################################
937#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
938	movm.l		(%sp)+,&0x001c		# restore d2-d4
939	unlk		%a6
940# EPILOGUE END ##########################################################
941
942	rts
943