xref: /netbsd/sys/arch/m68k/fpsp/srem_mod.sa (revision 6550d01e)
1*	$NetBSD: srem_mod.sa,v 1.3 1994/10/26 07:49:58 cgd Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	srem_mod.sa 3.1 12/10/90
35*
36*      The entry point sMOD computes the floating point MOD of the
37*      input values X and Y. The entry point sREM computes the floating
38*      point (IEEE) REM of the input values X and Y.
39*
40*      INPUT
41*      -----
42*      Double-extended value Y is pointed to by address in register
43*      A0. Double-extended value X is located in -12(A0). The values
44*      of X and Y are both nonzero and finite; although either or both
45*      of them can be denormalized. The special cases of zeros, NaNs,
46*      and infinities are handled elsewhere.
47*
48*      OUTPUT
49*      ------
50*      FREM(X,Y) or FMOD(X,Y), depending on entry point.
51*
52*       ALGORITHM
53*       ---------
54*
55*       Step 1.  Save and strip signs of X and Y: signX := sign(X),
56*                signY := sign(Y), X := |X|, Y := |Y|,
57*                signQ := signX EOR signY. Record whether MOD or REM
58*                is requested.
59*
60*       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
61*                If (L < 0) then
62*                   R := X, go to Step 4.
63*                else
64*                   R := 2^(-L)X, j := L.
65*                endif
66*
67*       Step 3.  Perform MOD(X,Y)
68*            3.1 If R = Y, go to Step 9.
69*            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
70*            3.3 If j = 0, go to Step 4.
71*            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
72*                Step 3.1.
73*
74*       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
75*                Last_Subtract := false (used in Step 7 below). If
76*                MOD is requested, go to Step 6.
77*
78*       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
79*            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
80*                Step 6.
81*            5.2 If R > Y/2, then { set Last_Subtract := true,
82*                Q := Q + 1, Y := signY*Y }. Go to Step 6.
83*            5.3 This is the tricky case of R = Y/2. If Q is odd,
84*                then { Q := Q + 1, signX := -signX }.
85*
86*       Step 6.  R := signX*R.
87*
88*       Step 7.  If Last_Subtract = true, R := R - Y.
89*
90*       Step 8.  Return signQ, last 7 bits of Q, and R as required.
91*
92*       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
93*                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
94*                R := 0. Return signQ, last 7 bits of Q, and R.
95*
96
97SREM_MOD    IDNT    2,1 Motorola 040 Floating Point Software Package
98
99	section    8
100
101	include	fpsp.h
102
103Mod_Flag  equ	L_SCR3
104SignY     equ	FP_SCR3+4
105SignX     equ	FP_SCR3+8
106SignQ     equ	FP_SCR3+12
107Sc_Flag   equ	FP_SCR4
108
109Y         equ	FP_SCR1
110Y_Hi      equ	Y+4
111Y_Lo      equ	Y+8
112
113R         equ	FP_SCR2
114R_Hi      equ	R+4
115R_Lo      equ	R+8
116
117
118Scale     DC.L	$00010000,$80000000,$00000000,$00000000
119
120	xref	t_avoid_unsupp
121
122        xdef        smod
123smod:
124
125   Clr.L                Mod_Flag(a6)
126   BRA.B                Mod_Rem
127
128        xdef        srem
129srem:
130
131   Move.L               #1,Mod_Flag(a6)
132
133Mod_Rem:
134*..Save sign of X and Y
135   MoveM.L              D2-D7,-(A7)     ...save data registers
136   Move.W               (A0),D3
137   Move.W               D3,SignY(a6)
138   AndI.L               #$00007FFF,D3   ...Y := |Y|
139
140*
141   Move.L               4(A0),D4
142   Move.L               8(A0),D5        ...(D3,D4,D5) is |Y|
143
144   Tst.L                D3
145   BNE.B                Y_Normal
146
147   Move.L               #$00003FFE,D3	...$3FFD + 1
148   Tst.L                D4
149   BNE.B                HiY_not0
150
151HiY_0:
152   Move.L               D5,D4
153   CLR.L                D5
154   SubI.L               #32,D3
155   CLR.L                D6
156   BFFFO                D4{0:32},D6
157   LSL.L                D6,D4
158   Sub.L                D6,D3           ...(D3,D4,D5) is normalized
159*                                       ...with bias $7FFD
160   BRA.B                Chk_X
161
162HiY_not0:
163   CLR.L                D6
164   BFFFO                D4{0:32},D6
165   Sub.L                D6,D3
166   LSL.L                D6,D4
167   Move.L               D5,D7           ...a copy of D5
168   LSL.L                D6,D5
169   Neg.L                D6
170   AddI.L               #32,D6
171   LSR.L                D6,D7
172   Or.L                 D7,D4           ...(D3,D4,D5) normalized
173*                                       ...with bias $7FFD
174   BRA.B                Chk_X
175
176Y_Normal:
177   AddI.L               #$00003FFE,D3   ...(D3,D4,D5) normalized
178*                                       ...with bias $7FFD
179
180Chk_X:
181   Move.W               -12(A0),D0
182   Move.W               D0,SignX(a6)
183   Move.W               SignY(a6),D1
184   EOr.L                D0,D1
185   AndI.L               #$00008000,D1
186   Move.W               D1,SignQ(a6)	...sign(Q) obtained
187   AndI.L               #$00007FFF,D0
188   Move.L               -8(A0),D1
189   Move.L               -4(A0),D2       ...(D0,D1,D2) is |X|
190   Tst.L                D0
191   BNE.B                X_Normal
192   Move.L               #$00003FFE,D0
193   Tst.L                D1
194   BNE.B                HiX_not0
195
196HiX_0:
197   Move.L               D2,D1
198   CLR.L                D2
199   SubI.L               #32,D0
200   CLR.L                D6
201   BFFFO                D1{0:32},D6
202   LSL.L                D6,D1
203   Sub.L                D6,D0           ...(D0,D1,D2) is normalized
204*                                       ...with bias $7FFD
205   BRA.B                Init
206
207HiX_not0:
208   CLR.L                D6
209   BFFFO                D1{0:32},D6
210   Sub.L                D6,D0
211   LSL.L                D6,D1
212   Move.L               D2,D7           ...a copy of D2
213   LSL.L                D6,D2
214   Neg.L                D6
215   AddI.L               #32,D6
216   LSR.L                D6,D7
217   Or.L                 D7,D1           ...(D0,D1,D2) normalized
218*                                       ...with bias $7FFD
219   BRA.B                Init
220
221X_Normal:
222   AddI.L               #$00003FFE,D0   ...(D0,D1,D2) normalized
223*                                       ...with bias $7FFD
224
225Init:
226*
227   Move.L               D3,L_SCR1(a6)   ...save biased expo(Y)
228   move.l		d0,L_SCR2(a6)	;save d0
229   Sub.L                D3,D0           ...L := expo(X)-expo(Y)
230*   Move.L               D0,L            ...D0 is j
231   CLR.L                D6              ...D6 := carry <- 0
232   CLR.L                D3              ...D3 is Q
233   MoveA.L              #0,A1           ...A1 is k; j+k=L, Q=0
234
235*..(Carry,D1,D2) is R
236   Tst.L                D0
237   BGE.B                Mod_Loop
238
239*..expo(X) < expo(Y). Thus X = mod(X,Y)
240*
241   move.l		L_SCR2(a6),d0	;restore d0
242   BRA.W                Get_Mod
243
244*..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
245
246
247Mod_Loop:
248   Tst.L                D6              ...test carry bit
249   BGT.B                R_GT_Y
250
251*..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
252   Cmp.L                D4,D1           ...compare hi(R) and hi(Y)
253   BNE.B                R_NE_Y
254   Cmp.L                D5,D2           ...compare lo(R) and lo(Y)
255   BNE.B                R_NE_Y
256
257*..At this point, R = Y
258   BRA.W                Rem_is_0
259
260R_NE_Y:
261*..use the borrow of the previous compare
262   BCS.B                R_LT_Y          ...borrow is set iff R < Y
263
264R_GT_Y:
265*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
266*..and Y < (D1,D2) < 2Y. Either way, perform R - Y
267   Sub.L                D5,D2           ...lo(R) - lo(Y)
268   SubX.L               D4,D1           ...hi(R) - hi(Y)
269   CLR.L                D6              ...clear carry
270   AddQ.L               #1,D3           ...Q := Q + 1
271
272R_LT_Y:
273*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
274   Tst.L                D0              ...see if j = 0.
275   BEQ.B                PostLoop
276
277   Add.L                D3,D3           ...Q := 2Q
278   Add.L                D2,D2           ...lo(R) = 2lo(R)
279   AddX.L               D1,D1           ...hi(R) = 2hi(R) + carry
280   SCS                  D6              ...set Carry if 2(R) overflows
281   AddQ.L               #1,A1           ...k := k+1
282   SubQ.L               #1,D0           ...j := j - 1
283*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
284
285   BRA.B                Mod_Loop
286
287PostLoop:
288*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
289
290*..normalize R.
291   Move.L               L_SCR1(a6),D0           ...new biased expo of R
292   Tst.L                D1
293   BNE.B                HiR_not0
294
295HiR_0:
296   Move.L               D2,D1
297   CLR.L                D2
298   SubI.L               #32,D0
299   CLR.L                D6
300   BFFFO                D1{0:32},D6
301   LSL.L                D6,D1
302   Sub.L                D6,D0           ...(D0,D1,D2) is normalized
303*                                       ...with bias $7FFD
304   BRA.B                Get_Mod
305
306HiR_not0:
307   CLR.L                D6
308   BFFFO                D1{0:32},D6
309   BMI.B                Get_Mod         ...already normalized
310   Sub.L                D6,D0
311   LSL.L                D6,D1
312   Move.L               D2,D7           ...a copy of D2
313   LSL.L                D6,D2
314   Neg.L                D6
315   AddI.L               #32,D6
316   LSR.L                D6,D7
317   Or.L                 D7,D1           ...(D0,D1,D2) normalized
318
319*
320Get_Mod:
321   CmpI.L		#$000041FE,D0
322   BGE.B		No_Scale
323Do_Scale:
324   Move.W		D0,R(a6)
325   clr.w		R+2(a6)
326   Move.L		D1,R_Hi(a6)
327   Move.L		D2,R_Lo(a6)
328   Move.L		L_SCR1(a6),D6
329   Move.W		D6,Y(a6)
330   clr.w		Y+2(a6)
331   Move.L		D4,Y_Hi(a6)
332   Move.L		D5,Y_Lo(a6)
333   FMove.X		R(a6),fp0		...no exception
334   Move.L		#1,Sc_Flag(a6)
335   BRA.B		ModOrRem
336No_Scale:
337   Move.L		D1,R_Hi(a6)
338   Move.L		D2,R_Lo(a6)
339   SubI.L		#$3FFE,D0
340   Move.W		D0,R(a6)
341   clr.w		R+2(a6)
342   Move.L		L_SCR1(a6),D6
343   SubI.L		#$3FFE,D6
344   Move.L		D6,L_SCR1(a6)
345   FMove.X		R(a6),fp0
346   Move.W		D6,Y(a6)
347   Move.L		D4,Y_Hi(a6)
348   Move.L		D5,Y_Lo(a6)
349   Clr.L		Sc_Flag(a6)
350
351*
352
353
354ModOrRem:
355   Move.L               Mod_Flag(a6),D6
356   BEQ.B                Fix_Sign
357
358   Move.L               L_SCR1(a6),D6           ...new biased expo(Y)
359   SubQ.L               #1,D6           ...biased expo(Y/2)
360   Cmp.L                D6,D0
361   BLT.B                Fix_Sign
362   BGT.B                Last_Sub
363
364   Cmp.L                D4,D1
365   BNE.B                Not_EQ
366   Cmp.L                D5,D2
367   BNE.B                Not_EQ
368   BRA.W                Tie_Case
369
370Not_EQ:
371   BCS.B                Fix_Sign
372
373Last_Sub:
374*
375   FSub.X		Y(a6),fp0		...no exceptions
376   AddQ.L               #1,D3           ...Q := Q + 1
377
378*
379
380Fix_Sign:
381*..Get sign of X
382   Move.W               SignX(a6),D6
383   BGE.B		Get_Q
384   FNeg.X		fp0
385
386*..Get Q
387*
388Get_Q:
389   clr.l		d6
390   Move.W               SignQ(a6),D6        ...D6 is sign(Q)
391   Move.L               #8,D7
392   LSR.L                D7,D6
393   AndI.L               #$0000007F,D3   ...7 bits of Q
394   Or.L                 D6,D3           ...sign and bits of Q
395   Swap                 D3
396   FMove.L              fpsr,D6
397   AndI.L               #$FF00FFFF,D6
398   Or.L                 D3,D6
399   FMove.L              D6,fpsr         ...put Q in fpsr
400
401*
402Restore:
403   MoveM.L              (A7)+,D2-D7
404   FMove.L              USER_FPCR(a6),fpcr
405   Move.L               Sc_Flag(a6),D0
406   BEQ.B                Finish
407   FMul.X		Scale(pc),fp0	...may cause underflow
408   bra			t_avoid_unsupp	;check for denorm as a
409*					;result of the scaling
410
411Finish:
412	fmove.x		fp0,fp0		;capture exceptions & round
413	rts
414
415Rem_is_0:
416*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
417   AddQ.L               #1,D3
418   CmpI.L               #8,D0           ...D0 is j
419   BGE.B                Q_Big
420
421   LSL.L                D0,D3
422   BRA.B                Set_R_0
423
424Q_Big:
425   CLR.L                D3
426
427Set_R_0:
428   FMove.S		#:00000000,fp0
429   Clr.L		Sc_Flag(a6)
430   BRA.W                Fix_Sign
431
432Tie_Case:
433*..Check parity of Q
434   Move.L               D3,D6
435   AndI.L               #$00000001,D6
436   Tst.L                D6
437   BEq.W                Fix_Sign	...Q is even
438
439*..Q is odd, Q := Q + 1, signX := -signX
440   AddQ.L               #1,D3
441   Move.W               SignX(a6),D6
442   EOrI.L               #$00008000,D6
443   Move.W               D6,SignX(a6)
444   BRA.W                Fix_Sign
445
446   End
447