xref: /netbsd/sys/arch/m68k/fpsp/stwotox.sa (revision bf9ec67e)
1*	$NetBSD: stwotox.sa,v 1.3 1994/10/26 07:50:15 cgd Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	stwotox.sa 3.1 12/10/90
35*
36*	stwotox  --- 2**X
37*	stwotoxd --- 2**X for denormalized X
38*	stentox  --- 10**X
39*	stentoxd --- 10**X for denormalized X
40*
41*	Input: Double-extended number X in location pointed to
42*		by address register a0.
43*
44*	Output: The function values are returned in Fp0.
45*
46*	Accuracy and Monotonicity: The returned result is within 2 ulps in
47*		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
48*		result is subsequently rounded to double precision. The
49*		result is provably monotonic in double precision.
50*
51*	Speed: The program stwotox takes approximately 190 cycles and the
52*		program stentox takes approximately 200 cycles.
53*
54*	Algorithm:
55*
56*	twotox
57*	1. If |X| > 16480, go to ExpBig.
58*
59*	2. If |X| < 2**(-70), go to ExpSm.
60*
61*	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
62*		decompose N as
63*		 N = 64(M + M') + j,  j = 0,1,2,...,63.
64*
65*	4. Overwrite r := r * log2. Then
66*		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
67*		Go to expr to compute that expression.
68*
69*	tentox
70*	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
71*
72*	2. If |X| < 2**(-70), go to ExpSm.
73*
74*	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
75*		N := round-to-int(y). Decompose N as
76*		 N = 64(M + M') + j,  j = 0,1,2,...,63.
77*
78*	4. Define r as
79*		r := ((X - N*L1)-N*L2) * L10
80*		where L1, L2 are the leading and trailing parts of log_10(2)/64
81*		and L10 is the natural log of 10. Then
82*		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
83*		Go to expr to compute that expression.
84*
85*	expr
86*	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
87*
88*	2. Overwrite Fact1 and Fact2 by
89*		Fact1 := 2**(M) * Fact1
90*		Fact2 := 2**(M) * Fact2
91*		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
92*
93*	3. Calculate P where 1 + P approximates exp(r):
94*		P = r + r*r*(A1+r*(A2+...+r*A5)).
95*
96*	4. Let AdjFact := 2**(M'). Return
97*		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
98*		Exit.
99*
100*	ExpBig
101*	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
102*		underflow by Tiny * Tiny.
103*
104*	ExpSm
105*	1. Return 1 + X.
106*
107
108STWOTOX	IDNT	2,1 Motorola 040 Floating Point Software Package
109
110	section	8
111
112	include	fpsp.h
113
114BOUNDS1	DC.L $3FB98000,$400D80C0 ... 2^(-70),16480
115BOUNDS2	DC.L $3FB98000,$400B9B07 ... 2^(-70),16480 LOG2/LOG10
116
117L2TEN64	DC.L $406A934F,$0979A371 ... 64LOG10/LOG2
118L10TWO1	DC.L $3F734413,$509F8000 ... LOG2/64LOG10
119
120L10TWO2	DC.L $BFCD0000,$C0219DC1,$DA994FD2,$00000000
121
122LOG10	DC.L $40000000,$935D8DDD,$AAA8AC17,$00000000
123
124LOG2	DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000
125
126EXPA5	DC.L $3F56C16D,$6F7BD0B2
127EXPA4	DC.L $3F811112,$302C712C
128EXPA3	DC.L $3FA55555,$55554CC1
129EXPA2	DC.L $3FC55555,$55554A54
130EXPA1	DC.L $3FE00000,$00000000,$00000000,$00000000
131
132HUGE	DC.L $7FFE0000,$FFFFFFFF,$FFFFFFFF,$00000000
133TINY	DC.L $00010000,$FFFFFFFF,$FFFFFFFF,$00000000
134
135EXPTBL
136	DC.L  $3FFF0000,$80000000,$00000000,$3F738000
137	DC.L  $3FFF0000,$8164D1F3,$BC030773,$3FBEF7CA
138	DC.L  $3FFF0000,$82CD8698,$AC2BA1D7,$3FBDF8A9
139	DC.L  $3FFF0000,$843A28C3,$ACDE4046,$3FBCD7C9
140	DC.L  $3FFF0000,$85AAC367,$CC487B15,$BFBDE8DA
141	DC.L  $3FFF0000,$871F6196,$9E8D1010,$3FBDE85C
142	DC.L  $3FFF0000,$88980E80,$92DA8527,$3FBEBBF1
143	DC.L  $3FFF0000,$8A14D575,$496EFD9A,$3FBB80CA
144	DC.L  $3FFF0000,$8B95C1E3,$EA8BD6E7,$BFBA8373
145	DC.L  $3FFF0000,$8D1ADF5B,$7E5BA9E6,$BFBE9670
146	DC.L  $3FFF0000,$8EA4398B,$45CD53C0,$3FBDB700
147	DC.L  $3FFF0000,$9031DC43,$1466B1DC,$3FBEEEB0
148	DC.L  $3FFF0000,$91C3D373,$AB11C336,$3FBBFD6D
149	DC.L  $3FFF0000,$935A2B2F,$13E6E92C,$BFBDB319
150	DC.L  $3FFF0000,$94F4EFA8,$FEF70961,$3FBDBA2B
151	DC.L  $3FFF0000,$96942D37,$20185A00,$3FBE91D5
152	DC.L  $3FFF0000,$9837F051,$8DB8A96F,$3FBE8D5A
153	DC.L  $3FFF0000,$99E04593,$20B7FA65,$BFBCDE7B
154	DC.L  $3FFF0000,$9B8D39B9,$D54E5539,$BFBEBAAF
155	DC.L  $3FFF0000,$9D3ED9A7,$2CFFB751,$BFBD86DA
156	DC.L  $3FFF0000,$9EF53260,$91A111AE,$BFBEBEDD
157	DC.L  $3FFF0000,$A0B0510F,$B9714FC2,$3FBCC96E
158	DC.L  $3FFF0000,$A2704303,$0C496819,$BFBEC90B
159	DC.L  $3FFF0000,$A43515AE,$09E6809E,$3FBBD1DB
160	DC.L  $3FFF0000,$A5FED6A9,$B15138EA,$3FBCE5EB
161	DC.L  $3FFF0000,$A7CD93B4,$E965356A,$BFBEC274
162	DC.L  $3FFF0000,$A9A15AB4,$EA7C0EF8,$3FBEA83C
163	DC.L  $3FFF0000,$AB7A39B5,$A93ED337,$3FBECB00
164	DC.L  $3FFF0000,$AD583EEA,$42A14AC6,$3FBE9301
165	DC.L  $3FFF0000,$AF3B78AD,$690A4375,$BFBD8367
166	DC.L  $3FFF0000,$B123F581,$D2AC2590,$BFBEF05F
167	DC.L  $3FFF0000,$B311C412,$A9112489,$3FBDFB3C
168	DC.L  $3FFF0000,$B504F333,$F9DE6484,$3FBEB2FB
169	DC.L  $3FFF0000,$B6FD91E3,$28D17791,$3FBAE2CB
170	DC.L  $3FFF0000,$B8FBAF47,$62FB9EE9,$3FBCDC3C
171	DC.L  $3FFF0000,$BAFF5AB2,$133E45FB,$3FBEE9AA
172	DC.L  $3FFF0000,$BD08A39F,$580C36BF,$BFBEAEFD
173	DC.L  $3FFF0000,$BF1799B6,$7A731083,$BFBCBF51
174	DC.L  $3FFF0000,$C12C4CCA,$66709456,$3FBEF88A
175	DC.L  $3FFF0000,$C346CCDA,$24976407,$3FBD83B2
176	DC.L  $3FFF0000,$C5672A11,$5506DADD,$3FBDF8AB
177	DC.L  $3FFF0000,$C78D74C8,$ABB9B15D,$BFBDFB17
178	DC.L  $3FFF0000,$C9B9BD86,$6E2F27A3,$BFBEFE3C
179	DC.L  $3FFF0000,$CBEC14FE,$F2727C5D,$BFBBB6F8
180	DC.L  $3FFF0000,$CE248C15,$1F8480E4,$BFBCEE53
181	DC.L  $3FFF0000,$D06333DA,$EF2B2595,$BFBDA4AE
182	DC.L  $3FFF0000,$D2A81D91,$F12AE45A,$3FBC9124
183	DC.L  $3FFF0000,$D4F35AAB,$CFEDFA1F,$3FBEB243
184	DC.L  $3FFF0000,$D744FCCA,$D69D6AF4,$3FBDE69A
185	DC.L  $3FFF0000,$D99D15C2,$78AFD7B6,$BFB8BC61
186	DC.L  $3FFF0000,$DBFBB797,$DAF23755,$3FBDF610
187	DC.L  $3FFF0000,$DE60F482,$5E0E9124,$BFBD8BE1
188	DC.L  $3FFF0000,$E0CCDEEC,$2A94E111,$3FBACB12
189	DC.L  $3FFF0000,$E33F8972,$BE8A5A51,$3FBB9BFE
190	DC.L  $3FFF0000,$E5B906E7,$7C8348A8,$3FBCF2F4
191	DC.L  $3FFF0000,$E8396A50,$3C4BDC68,$3FBEF22F
192	DC.L  $3FFF0000,$EAC0C6E7,$DD24392F,$BFBDBF4A
193	DC.L  $3FFF0000,$ED4F301E,$D9942B84,$3FBEC01A
194	DC.L  $3FFF0000,$EFE4B99B,$DCDAF5CB,$3FBE8CAC
195	DC.L  $3FFF0000,$F281773C,$59FFB13A,$BFBCBB3F
196	DC.L  $3FFF0000,$F5257D15,$2486CC2C,$3FBEF73A
197	DC.L  $3FFF0000,$F7D0DF73,$0AD13BB9,$BFB8B795
198	DC.L  $3FFF0000,$FA83B2DB,$722A033A,$3FBEF84B
199	DC.L  $3FFF0000,$FD3E0C0C,$F486C175,$BFBEF581
200
201N	equ	L_SCR1
202
203X	equ	FP_SCR1
204XDCARE	equ	X+2
205XFRAC	equ	X+4
206
207ADJFACT	equ	FP_SCR2
208
209FACT1	equ	FP_SCR3
210FACT1HI	equ	FACT1+4
211FACT1LOW equ	FACT1+8
212
213FACT2	equ	FP_SCR4
214FACT2HI	equ	FACT2+4
215FACT2LOW equ	FACT2+8
216
217	xref	t_unfl
218	xref	t_ovfl
219	xref	t_frcinx
220
221	xdef	stwotoxd
222stwotoxd:
223*--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
224
225	fmove.l		d1,fpcr		...set user's rounding mode/precision
226	Fmove.S		#:3F800000,FP0  ...RETURN 1 + X
227	move.l		(a0),d0
228	or.l		#$00800001,d0
229	fadd.s		d0,fp0
230	bra		t_frcinx
231
232	xdef	stwotox
233stwotox:
234*--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
235	FMOVEM.X	(a0),FP0	...LOAD INPUT, do not set cc's
236
237	MOVE.L		(A0),D0
238	MOVE.W		4(A0),D0
239	FMOVE.X		FP0,X(a6)
240	ANDI.L		#$7FFFFFFF,D0
241
242	CMPI.L		#$3FB98000,D0		...|X| >= 2**(-70)?
243	BGE.B		TWOOK1
244	BRA.W		EXPBORS
245
246TWOOK1:
247	CMPI.L		#$400D80C0,D0		...|X| > 16480?
248	BLE.B		TWOMAIN
249	BRA.W		EXPBORS
250
251
252TWOMAIN:
253*--USUAL CASE, 2^(-70) <= |X| <= 16480
254
255	FMOVE.X		FP0,FP1
256	FMUL.S		#:42800000,FP1  ...64 * X
257
258	FMOVE.L		FP1,N(a6)		...N = ROUND-TO-INT(64 X)
259	MOVE.L		d2,-(sp)
260	LEA		EXPTBL,a1 	...LOAD ADDRESS OF TABLE OF 2^(J/64)
261	FMOVE.L		N(a6),FP1		...N --> FLOATING FMT
262	MOVE.L		N(a6),D0
263	MOVE.L		D0,d2
264	ANDI.L		#$3F,D0		...D0 IS J
265	ASL.L		#4,D0		...DISPLACEMENT FOR 2^(J/64)
266	ADDA.L		D0,a1		...ADDRESS FOR 2^(J/64)
267	ASR.L		#6,d2		...d2 IS L, N = 64L + J
268	MOVE.L		d2,D0
269	ASR.L		#1,D0		...D0 IS M
270	SUB.L		D0,d2		...d2 IS M', N = 64(M+M') + J
271	ADDI.L		#$3FFF,d2
272	MOVE.W		d2,ADJFACT(a6) 	...ADJFACT IS 2^(M')
273	MOVE.L		(sp)+,d2
274*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
275*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
276*--ADJFACT = 2^(M').
277*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
278
279	FMUL.S		#:3C800000,FP1  ...(1/64)*N
280	MOVE.L		(a1)+,FACT1(a6)
281	MOVE.L		(a1)+,FACT1HI(a6)
282	MOVE.L		(a1)+,FACT1LOW(a6)
283	MOVE.W		(a1)+,FACT2(a6)
284	clr.w		FACT2+2(a6)
285
286	FSUB.X		FP1,FP0	 	...X - (1/64)*INT(64 X)
287
288	MOVE.W		(a1)+,FACT2HI(a6)
289	clr.w		FACT2HI+2(a6)
290	clr.l		FACT2LOW(a6)
291	ADD.W		D0,FACT1(a6)
292
293	FMUL.X		LOG2,FP0	...FP0 IS R
294	ADD.W		D0,FACT2(a6)
295
296	BRA.W		expr
297
298EXPBORS:
299*--FPCR, D0 SAVED
300	CMPI.L		#$3FFF8000,D0
301	BGT.B		EXPBIG
302
303EXPSM:
304*--|X| IS SMALL, RETURN 1 + X
305
306	FMOVE.L		d1,FPCR		;restore users exceptions
307	FADD.S		#:3F800000,FP0  ...RETURN 1 + X
308
309	bra		t_frcinx
310
311EXPBIG:
312*--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
313*--REGISTERS SAVE SO FAR ARE FPCR AND  D0
314	MOVE.L		X(a6),D0
315	TST.L		D0
316	BLT.B		EXPNEG
317
318	bclr.b		#7,(a0)		;t_ovfl expects positive value
319	bra		t_ovfl
320
321EXPNEG:
322	bclr.b		#7,(a0)		;t_unfl expects positive value
323	bra		t_unfl
324
325	xdef	stentoxd
326stentoxd:
327*--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
328
329	fmove.l		d1,fpcr		...set user's rounding mode/precision
330	Fmove.S		#:3F800000,FP0  ...RETURN 1 + X
331	move.l		(a0),d0
332	or.l		#$00800001,d0
333	fadd.s		d0,fp0
334	bra		t_frcinx
335
336	xdef	stentox
337stentox:
338*--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
339	FMOVEM.X	(a0),FP0	...LOAD INPUT, do not set cc's
340
341	MOVE.L		(A0),D0
342	MOVE.W		4(A0),D0
343	FMOVE.X		FP0,X(a6)
344	ANDI.L		#$7FFFFFFF,D0
345
346	CMPI.L		#$3FB98000,D0		...|X| >= 2**(-70)?
347	BGE.B		TENOK1
348	BRA.W		EXPBORS
349
350TENOK1:
351	CMPI.L		#$400B9B07,D0		...|X| <= 16480*log2/log10 ?
352	BLE.B		TENMAIN
353	BRA.W		EXPBORS
354
355TENMAIN:
356*--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
357
358	FMOVE.X		FP0,FP1
359	FMUL.D		L2TEN64,FP1	...X*64*LOG10/LOG2
360
361	FMOVE.L		FP1,N(a6)		...N=INT(X*64*LOG10/LOG2)
362	MOVE.L		d2,-(sp)
363	LEA		EXPTBL,a1 	...LOAD ADDRESS OF TABLE OF 2^(J/64)
364	FMOVE.L		N(a6),FP1		...N --> FLOATING FMT
365	MOVE.L		N(a6),D0
366	MOVE.L		D0,d2
367	ANDI.L		#$3F,D0		...D0 IS J
368	ASL.L		#4,D0		...DISPLACEMENT FOR 2^(J/64)
369	ADDA.L		D0,a1		...ADDRESS FOR 2^(J/64)
370	ASR.L		#6,d2		...d2 IS L, N = 64L + J
371	MOVE.L		d2,D0
372	ASR.L		#1,D0		...D0 IS M
373	SUB.L		D0,d2		...d2 IS M', N = 64(M+M') + J
374	ADDI.L		#$3FFF,d2
375	MOVE.W		d2,ADJFACT(a6) 	...ADJFACT IS 2^(M')
376	MOVE.L		(sp)+,d2
377
378*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
379*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
380*--ADJFACT = 2^(M').
381*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
382
383	FMOVE.X		FP1,FP2
384
385	FMUL.D		L10TWO1,FP1	...N*(LOG2/64LOG10)_LEAD
386	MOVE.L		(a1)+,FACT1(a6)
387
388	FMUL.X		L10TWO2,FP2	...N*(LOG2/64LOG10)_TRAIL
389
390	MOVE.L		(a1)+,FACT1HI(a6)
391	MOVE.L		(a1)+,FACT1LOW(a6)
392	FSUB.X		FP1,FP0		...X - N L_LEAD
393	MOVE.W		(a1)+,FACT2(a6)
394
395	FSUB.X		FP2,FP0		...X - N L_TRAIL
396
397	clr.w		FACT2+2(a6)
398	MOVE.W		(a1)+,FACT2HI(a6)
399	clr.w		FACT2HI+2(a6)
400	clr.l		FACT2LOW(a6)
401
402	FMUL.X		LOG10,FP0	...FP0 IS R
403
404	ADD.W		D0,FACT1(a6)
405	ADD.W		D0,FACT2(a6)
406
407expr:
408*--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
409*--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
410*--FP0 IS R. THE FOLLOWING CODE COMPUTES
411*--	2**(M'+M) * 2**(J/64) * EXP(R)
412
413	FMOVE.X		FP0,FP1
414	FMUL.X		FP1,FP1		...FP1 IS S = R*R
415
416	FMOVE.D		EXPA5,FP2	...FP2 IS A5
417	FMOVE.D		EXPA4,FP3	...FP3 IS A4
418
419	FMUL.X		FP1,FP2		...FP2 IS S*A5
420	FMUL.X		FP1,FP3		...FP3 IS S*A4
421
422	FADD.D		EXPA3,FP2	...FP2 IS A3+S*A5
423	FADD.D		EXPA2,FP3	...FP3 IS A2+S*A4
424
425	FMUL.X		FP1,FP2		...FP2 IS S*(A3+S*A5)
426	FMUL.X		FP1,FP3		...FP3 IS S*(A2+S*A4)
427
428	FADD.D		EXPA1,FP2	...FP2 IS A1+S*(A3+S*A5)
429	FMUL.X		FP0,FP3		...FP3 IS R*S*(A2+S*A4)
430
431	FMUL.X		FP1,FP2		...FP2 IS S*(A1+S*(A3+S*A5))
432	FADD.X		FP3,FP0		...FP0 IS R+R*S*(A2+S*A4)
433
434	FADD.X		FP2,FP0		...FP0 IS EXP(R) - 1
435
436
437*--FINAL RECONSTRUCTION PROCESS
438*--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
439
440	FMUL.X		FACT1(a6),FP0
441	FADD.X		FACT2(a6),FP0
442	FADD.X		FACT1(a6),FP0
443
444	FMOVE.L		d1,FPCR		;restore users exceptions
445	clr.w		ADJFACT+2(a6)
446	move.l		#$80000000,ADJFACT+4(a6)
447	clr.l		ADJFACT+8(a6)
448	FMUL.X		ADJFACT(a6),FP0	...FINAL ADJUSTMENT
449
450	bra		t_frcinx
451
452	end
453