xref: /netbsd/sys/arch/m68k/fpsp/slogn.sa (revision 6550d01e)
1*	$NetBSD: slogn.sa,v 1.4 2000/03/13 23:52:32 soren Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	slogn.sa 3.1 12/10/90
35*
36*	slogn computes the natural logarithm of an
37*	input value. slognd does the same except the input value is a
38*	denormalized number. slognp1 computes log(1+X), and slognp1d
39*	computes log(1+X) for denormalized X.
40*
41*	Input: Double-extended value in memory location pointed to by address
42*		register a0.
43*
44*	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
45*
46*	Accuracy and Monotonicity: The returned result is within 2 ulps in
47*		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
48*		result is subsequently rounded to double precision. The
49*		result is provably monotonic in double precision.
50*
51*	Speed: The program slogn takes approximately 190 cycles for input
52*		argument X such that |X-1| >= 1/16, which is the usual
53*		situation. For those arguments, slognp1 takes approximately
54*		 210 cycles. For the less common arguments, the program will
55*		 run no worse than 10% slower.
56*
57*	Algorithm:
58*	LOGN:
59*	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
60*		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
61*
62*	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
63*		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
64*		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
65*
66*	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
67*		log(1+u) = poly.
68*
69*	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
70*		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
71*		beforehand and stored in the program.
72*
73*	lognp1:
74*	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
75*		u where u = 2X/(2+X). Otherwise, move on to Step 2.
76*
77*	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
78*		of the algorithm for LOGN and compute log(1+X) as
79*		k*log(2) + log(F) + poly where poly approximates log(1+u),
80*		u = (Y-F)/F.
81*
82*	Implementation Notes:
83*	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
84*		need to be tabulated. Moreover, the values of 1/F are also
85*		tabulated so that the division in (Y-F)/F can be performed by a
86*		multiplication.
87*
88*	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
89*		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
90*
91*	Note 3. To fully exploit the pipeline, polynomials are usually separated
92*		into two parts evaluated independently before being added up.
93*
94
95slogn	IDNT	2,1 Motorola 040 Floating Point Software Package
96
97	section	8
98
99	include fpsp.h
100
101BOUNDS1  DC.L $3FFEF07D,$3FFF8841
102BOUNDS2  DC.L $3FFE8000,$3FFFC000
103
104LOGOF2	DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000
105
106one	DC.L $3F800000
107zero	DC.L $00000000
108infty	DC.L $7F800000
109negone	DC.L $BF800000
110
111LOGA6	DC.L $3FC2499A,$B5E4040B
112LOGA5	DC.L $BFC555B5,$848CB7DB
113
114LOGA4	DC.L $3FC99999,$987D8730
115LOGA3	DC.L $BFCFFFFF,$FF6F7E97
116
117LOGA2	DC.L $3FD55555,$555555A4
118LOGA1	DC.L $BFE00000,$00000008
119
120LOGB5	DC.L $3F175496,$ADD7DAD6
121LOGB4	DC.L $3F3C71C2,$FE80C7E0
122
123LOGB3	DC.L $3F624924,$928BCCFF
124LOGB2	DC.L $3F899999,$999995EC
125
126LOGB1	DC.L $3FB55555,$55555555
127TWO	DC.L $40000000,$00000000
128
129LTHOLD	DC.L $3f990000,$80000000,$00000000,$00000000
130
131LOGTBL:
132	DC.L  $3FFE0000,$FE03F80F,$E03F80FE,$00000000
133	DC.L  $3FF70000,$FF015358,$833C47E2,$00000000
134	DC.L  $3FFE0000,$FA232CF2,$52138AC0,$00000000
135	DC.L  $3FF90000,$BDC8D83E,$AD88D549,$00000000
136	DC.L  $3FFE0000,$F6603D98,$0F6603DA,$00000000
137	DC.L  $3FFA0000,$9CF43DCF,$F5EAFD48,$00000000
138	DC.L  $3FFE0000,$F2B9D648,$0F2B9D65,$00000000
139	DC.L  $3FFA0000,$DA16EB88,$CB8DF614,$00000000
140	DC.L  $3FFE0000,$EF2EB71F,$C4345238,$00000000
141	DC.L  $3FFB0000,$8B29B775,$1BD70743,$00000000
142	DC.L  $3FFE0000,$EBBDB2A5,$C1619C8C,$00000000
143	DC.L  $3FFB0000,$A8D839F8,$30C1FB49,$00000000
144	DC.L  $3FFE0000,$E865AC7B,$7603A197,$00000000
145	DC.L  $3FFB0000,$C61A2EB1,$8CD907AD,$00000000
146	DC.L  $3FFE0000,$E525982A,$F70C880E,$00000000
147	DC.L  $3FFB0000,$E2F2A47A,$DE3A18AF,$00000000
148	DC.L  $3FFE0000,$E1FC780E,$1FC780E2,$00000000
149	DC.L  $3FFB0000,$FF64898E,$DF55D551,$00000000
150	DC.L  $3FFE0000,$DEE95C4C,$A037BA57,$00000000
151	DC.L  $3FFC0000,$8DB956A9,$7B3D0148,$00000000
152	DC.L  $3FFE0000,$DBEB61EE,$D19C5958,$00000000
153	DC.L  $3FFC0000,$9B8FE100,$F47BA1DE,$00000000
154	DC.L  $3FFE0000,$D901B203,$6406C80E,$00000000
155	DC.L  $3FFC0000,$A9372F1D,$0DA1BD17,$00000000
156	DC.L  $3FFE0000,$D62B80D6,$2B80D62C,$00000000
157	DC.L  $3FFC0000,$B6B07F38,$CE90E46B,$00000000
158	DC.L  $3FFE0000,$D3680D36,$80D3680D,$00000000
159	DC.L  $3FFC0000,$C3FD0329,$06488481,$00000000
160	DC.L  $3FFE0000,$D0B69FCB,$D2580D0B,$00000000
161	DC.L  $3FFC0000,$D11DE0FF,$15AB18CA,$00000000
162	DC.L  $3FFE0000,$CE168A77,$25080CE1,$00000000
163	DC.L  $3FFC0000,$DE1433A1,$6C66B150,$00000000
164	DC.L  $3FFE0000,$CB8727C0,$65C393E0,$00000000
165	DC.L  $3FFC0000,$EAE10B5A,$7DDC8ADD,$00000000
166	DC.L  $3FFE0000,$C907DA4E,$871146AD,$00000000
167	DC.L  $3FFC0000,$F7856E5E,$E2C9B291,$00000000
168	DC.L  $3FFE0000,$C6980C69,$80C6980C,$00000000
169	DC.L  $3FFD0000,$82012CA5,$A68206D7,$00000000
170	DC.L  $3FFE0000,$C4372F85,$5D824CA6,$00000000
171	DC.L  $3FFD0000,$882C5FCD,$7256A8C5,$00000000
172	DC.L  $3FFE0000,$C1E4BBD5,$95F6E947,$00000000
173	DC.L  $3FFD0000,$8E44C60B,$4CCFD7DE,$00000000
174	DC.L  $3FFE0000,$BFA02FE8,$0BFA02FF,$00000000
175	DC.L  $3FFD0000,$944AD09E,$F4351AF6,$00000000
176	DC.L  $3FFE0000,$BD691047,$07661AA3,$00000000
177	DC.L  $3FFD0000,$9A3EECD4,$C3EAA6B2,$00000000
178	DC.L  $3FFE0000,$BB3EE721,$A54D880C,$00000000
179	DC.L  $3FFD0000,$A0218434,$353F1DE8,$00000000
180	DC.L  $3FFE0000,$B92143FA,$36F5E02E,$00000000
181	DC.L  $3FFD0000,$A5F2FCAB,$BBC506DA,$00000000
182	DC.L  $3FFE0000,$B70FBB5A,$19BE3659,$00000000
183	DC.L  $3FFD0000,$ABB3B8BA,$2AD362A5,$00000000
184	DC.L  $3FFE0000,$B509E68A,$9B94821F,$00000000
185	DC.L  $3FFD0000,$B1641795,$CE3CA97B,$00000000
186	DC.L  $3FFE0000,$B30F6352,$8917C80B,$00000000
187	DC.L  $3FFD0000,$B7047551,$5D0F1C61,$00000000
188	DC.L  $3FFE0000,$B11FD3B8,$0B11FD3C,$00000000
189	DC.L  $3FFD0000,$BC952AFE,$EA3D13E1,$00000000
190	DC.L  $3FFE0000,$AF3ADDC6,$80AF3ADE,$00000000
191	DC.L  $3FFD0000,$C2168ED0,$F458BA4A,$00000000
192	DC.L  $3FFE0000,$AD602B58,$0AD602B6,$00000000
193	DC.L  $3FFD0000,$C788F439,$B3163BF1,$00000000
194	DC.L  $3FFE0000,$AB8F69E2,$8359CD11,$00000000
195	DC.L  $3FFD0000,$CCECAC08,$BF04565D,$00000000
196	DC.L  $3FFE0000,$A9C84A47,$A07F5638,$00000000
197	DC.L  $3FFD0000,$D2420487,$2DD85160,$00000000
198	DC.L  $3FFE0000,$A80A80A8,$0A80A80B,$00000000
199	DC.L  $3FFD0000,$D7894992,$3BC3588A,$00000000
200	DC.L  $3FFE0000,$A655C439,$2D7B73A8,$00000000
201	DC.L  $3FFD0000,$DCC2C4B4,$9887DACC,$00000000
202	DC.L  $3FFE0000,$A4A9CF1D,$96833751,$00000000
203	DC.L  $3FFD0000,$E1EEBD3E,$6D6A6B9E,$00000000
204	DC.L  $3FFE0000,$A3065E3F,$AE7CD0E0,$00000000
205	DC.L  $3FFD0000,$E70D785C,$2F9F5BDC,$00000000
206	DC.L  $3FFE0000,$A16B312E,$A8FC377D,$00000000
207	DC.L  $3FFD0000,$EC1F392C,$5179F283,$00000000
208	DC.L  $3FFE0000,$9FD809FD,$809FD80A,$00000000
209	DC.L  $3FFD0000,$F12440D3,$E36130E6,$00000000
210	DC.L  $3FFE0000,$9E4CAD23,$DD5F3A20,$00000000
211	DC.L  $3FFD0000,$F61CCE92,$346600BB,$00000000
212	DC.L  $3FFE0000,$9CC8E160,$C3FB19B9,$00000000
213	DC.L  $3FFD0000,$FB091FD3,$8145630A,$00000000
214	DC.L  $3FFE0000,$9B4C6F9E,$F03A3CAA,$00000000
215	DC.L  $3FFD0000,$FFE97042,$BFA4C2AD,$00000000
216	DC.L  $3FFE0000,$99D722DA,$BDE58F06,$00000000
217	DC.L  $3FFE0000,$825EFCED,$49369330,$00000000
218	DC.L  $3FFE0000,$9868C809,$868C8098,$00000000
219	DC.L  $3FFE0000,$84C37A7A,$B9A905C9,$00000000
220	DC.L  $3FFE0000,$97012E02,$5C04B809,$00000000
221	DC.L  $3FFE0000,$87224C2E,$8E645FB7,$00000000
222	DC.L  $3FFE0000,$95A02568,$095A0257,$00000000
223	DC.L  $3FFE0000,$897B8CAC,$9F7DE298,$00000000
224	DC.L  $3FFE0000,$94458094,$45809446,$00000000
225	DC.L  $3FFE0000,$8BCF55DE,$C4CD05FE,$00000000
226	DC.L  $3FFE0000,$92F11384,$0497889C,$00000000
227	DC.L  $3FFE0000,$8E1DC0FB,$89E125E5,$00000000
228	DC.L  $3FFE0000,$91A2B3C4,$D5E6F809,$00000000
229	DC.L  $3FFE0000,$9066E68C,$955B6C9B,$00000000
230	DC.L  $3FFE0000,$905A3863,$3E06C43B,$00000000
231	DC.L  $3FFE0000,$92AADE74,$C7BE59E0,$00000000
232	DC.L  $3FFE0000,$8F1779D9,$FDC3A219,$00000000
233	DC.L  $3FFE0000,$94E9BFF6,$15845643,$00000000
234	DC.L  $3FFE0000,$8DDA5202,$37694809,$00000000
235	DC.L  $3FFE0000,$9723A1B7,$20134203,$00000000
236	DC.L  $3FFE0000,$8CA29C04,$6514E023,$00000000
237	DC.L  $3FFE0000,$995899C8,$90EB8990,$00000000
238	DC.L  $3FFE0000,$8B70344A,$139BC75A,$00000000
239	DC.L  $3FFE0000,$9B88BDAA,$3A3DAE2F,$00000000
240	DC.L  $3FFE0000,$8A42F870,$5669DB46,$00000000
241	DC.L  $3FFE0000,$9DB4224F,$FFE1157C,$00000000
242	DC.L  $3FFE0000,$891AC73A,$E9819B50,$00000000
243	DC.L  $3FFE0000,$9FDADC26,$8B7A12DA,$00000000
244	DC.L  $3FFE0000,$87F78087,$F78087F8,$00000000
245	DC.L  $3FFE0000,$A1FCFF17,$CE733BD4,$00000000
246	DC.L  $3FFE0000,$86D90544,$7A34ACC6,$00000000
247	DC.L  $3FFE0000,$A41A9E8F,$5446FB9F,$00000000
248	DC.L  $3FFE0000,$85BF3761,$2CEE3C9B,$00000000
249	DC.L  $3FFE0000,$A633CD7E,$6771CD8B,$00000000
250	DC.L  $3FFE0000,$84A9F9C8,$084A9F9D,$00000000
251	DC.L  $3FFE0000,$A8489E60,$0B435A5E,$00000000
252	DC.L  $3FFE0000,$83993052,$3FBE3368,$00000000
253	DC.L  $3FFE0000,$AA59233C,$CCA4BD49,$00000000
254	DC.L  $3FFE0000,$828CBFBE,$B9A020A3,$00000000
255	DC.L  $3FFE0000,$AC656DAE,$6BCC4985,$00000000
256	DC.L  $3FFE0000,$81848DA8,$FAF0D277,$00000000
257	DC.L  $3FFE0000,$AE6D8EE3,$60BB2468,$00000000
258	DC.L  $3FFE0000,$80808080,$80808081,$00000000
259	DC.L  $3FFE0000,$B07197A2,$3C46C654,$00000000
260
261ADJK	equ	L_SCR1
262
263X	equ	FP_SCR1
264XDCARE	equ	X+2
265XFRAC	equ	X+4
266
267F	equ	FP_SCR2
268FFRAC	equ	F+4
269
270KLOG2	equ	FP_SCR3
271
272SAVEU	equ	FP_SCR4
273
274	xref	t_frcinx
275	xref	t_extdnrm
276	xref	t_operr
277	xref	t_dz
278
279	xdef	slognd
280slognd:
281*--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
282
283	MOVE.L		#-100,ADJK(a6)	...INPUT = 2^(ADJK) * FP0
284
285*----normalize the input value by left shifting k bits (k to be determined
286*----below), adjusting exponent and storing -k to  ADJK
287*----the value TWOTO100 is no longer needed.
288*----Note that this code assumes the denormalized input is NON-ZERO.
289
290     MoveM.L	D2-D7,-(A7)		...save some registers
291     Clr.L	D3			...D3 is exponent of smallest norm. #
292     Move.L	4(A0),D4
293     Move.L	8(A0),D5		...(D4,D5) is (Hi_X,Lo_X)
294     Clr.L	D2			...D2 used for holding K
295
296     Tst.L	D4
297     BNE.B	HiX_not0
298
299HiX_0:
300     Move.L	D5,D4
301     Clr.L	D5
302     Move.L	#32,D2
303     Clr.L	D6
304     BFFFO      D4{0:32},D6
305     LSL.L      D6,D4
306     Add.L	D6,D2			...(D3,D4,D5) is normalized
307
308     Move.L	D3,X(a6)
309     Move.L	D4,XFRAC(a6)
310     Move.L	D5,XFRAC+4(a6)
311     Neg.L	D2
312     Move.L	D2,ADJK(a6)
313     FMove.X	X(a6),FP0
314     MoveM.L	(A7)+,D2-D7		...restore registers
315     LEA	X(a6),A0
316     Bra.B	LOGBGN			...begin regular log(X)
317
318
319HiX_not0:
320     Clr.L	D6
321     BFFFO	D4{0:32},D6		...find first 1
322     Move.L	D6,D2			...get k
323     LSL.L	D6,D4
324     Move.L	D5,D7			...a copy of D5
325     LSL.L	D6,D5
326     Neg.L	D6
327     AddI.L	#32,D6
328     LSR.L	D6,D7
329     Or.L	D7,D4			...(D3,D4,D5) normalized
330
331     Move.L	D3,X(a6)
332     Move.L	D4,XFRAC(a6)
333     Move.L	D5,XFRAC+4(a6)
334     Neg.L	D2
335     Move.L	D2,ADJK(a6)
336     FMove.X	X(a6),FP0
337     MoveM.L	(A7)+,D2-D7		...restore registers
338     LEA	X(a6),A0
339     Bra.B	LOGBGN			...begin regular log(X)
340
341
342	xdef	slogn
343slogn:
344*--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
345
346	FMOVE.X		(A0),FP0	...LOAD INPUT
347	CLR.L		ADJK(a6)
348
349LOGBGN:
350*--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
351*--A FINITE, NON-ZERO, NORMALIZED NUMBER.
352
353	move.l	(a0),d0
354	move.w	4(a0),d0
355
356	move.l	(a0),X(a6)
357	move.l	4(a0),X+4(a6)
358	move.l	8(a0),X+8(a6)
359
360	TST.L	D0		...CHECK IF X IS NEGATIVE
361	BLT.W	LOGNEG		...LOG OF NEGATIVE ARGUMENT IS INVALID
362	CMP2.L	BOUNDS1,D0	...X IS POSITIVE, CHECK IF X IS NEAR 1
363	BCC.W	LOGNEAR1	...BOUNDS IS ROUGHLY [15/16, 17/16]
364
365LOGMAIN:
366*--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
367
368*--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
369*--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
370*--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
371*--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
372*--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
373*--LOG(1+U) CAN BE VERY EFFICIENT.
374*--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
375*--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
376
377*--GET K, Y, F, AND ADDRESS OF 1/F.
378	ASR.L	#8,D0
379	ASR.L	#8,D0		...SHIFTED 16 BITS, BIASED EXPO. OF X
380	SUBI.L	#$3FFF,D0 	...THIS IS K
381	ADD.L	ADJK(a6),D0	...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
382	LEA	LOGTBL,A0 	...BASE ADDRESS OF 1/F AND LOG(F)
383	FMOVE.L	D0,FP1		...CONVERT K TO FLOATING-POINT FORMAT
384
385*--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
386	MOVE.L	#$3FFF0000,X(a6)	...X IS NOW Y, I.E. 2^(-K)*X
387	MOVE.L	XFRAC(a6),FFRAC(a6)
388	ANDI.L	#$FE000000,FFRAC(a6) ...FIRST 7 BITS OF Y
389	ORI.L	#$01000000,FFRAC(a6) ...GET F: ATTACH A 1 AT THE EIGHTH BIT
390	MOVE.L	FFRAC(a6),D0	...READY TO GET ADDRESS OF 1/F
391	ANDI.L	#$7E000000,D0
392	ASR.L	#8,D0
393	ASR.L	#8,D0
394	ASR.L	#4,D0		...SHIFTED 20, D0 IS THE DISPLACEMENT
395	ADDA.L	D0,A0		...A0 IS THE ADDRESS FOR 1/F
396
397	FMOVE.X	X(a6),FP0
398	move.l	#$3fff0000,F(a6)
399	clr.l	F+8(a6)
400	FSUB.X	F(a6),FP0		...Y-F
401	FMOVEm.X FP2/fp3,-(sp)	...SAVE FP2 WHILE FP0 IS NOT READY
402*--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
403*--REGISTERS SAVED: FPCR, FP1, FP2
404
405LP1CONT1:
406*--AN RE-ENTRY POINT FOR LOGNP1
407	FMUL.X	(A0),FP0	...FP0 IS U = (Y-F)/F
408	FMUL.X	LOGOF2,FP1	...GET K*LOG2 WHILE FP0 IS NOT READY
409	FMOVE.X	FP0,FP2
410	FMUL.X	FP2,FP2		...FP2 IS V=U*U
411	FMOVE.X	FP1,KLOG2(a6)	...PUT K*LOG2 IN MEMEORY, FREE FP1
412
413*--LOG(1+U) IS APPROXIMATED BY
414*--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
415*--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
416
417	FMOVE.X	FP2,FP3
418	FMOVE.X	FP2,FP1
419
420	FMUL.D	LOGA6,FP1	...V*A6
421	FMUL.D	LOGA5,FP2	...V*A5
422
423	FADD.D	LOGA4,FP1	...A4+V*A6
424	FADD.D	LOGA3,FP2	...A3+V*A5
425
426	FMUL.X	FP3,FP1		...V*(A4+V*A6)
427	FMUL.X	FP3,FP2		...V*(A3+V*A5)
428
429	FADD.D	LOGA2,FP1	...A2+V*(A4+V*A6)
430	FADD.D	LOGA1,FP2	...A1+V*(A3+V*A5)
431
432	FMUL.X	FP3,FP1		...V*(A2+V*(A4+V*A6))
433	ADDA.L	#16,A0		...ADDRESS OF LOG(F)
434	FMUL.X	FP3,FP2		...V*(A1+V*(A3+V*A5)), FP3 RELEASED
435
436	FMUL.X	FP0,FP1		...U*V*(A2+V*(A4+V*A6))
437	FADD.X	FP2,FP0		...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
438
439	FADD.X	(A0),FP1	...LOG(F)+U*V*(A2+V*(A4+V*A6))
440	FMOVEm.X  (sp)+,FP2/fp3	...RESTORE FP2
441	FADD.X	FP1,FP0		...FP0 IS LOG(F) + LOG(1+U)
442
443	fmove.l	d1,fpcr
444	FADD.X	KLOG2(a6),FP0	...FINAL ADD
445	bra	t_frcinx
446
447
448LOGNEAR1:
449*--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
450	FMOVE.X	FP0,FP1
451	FSUB.S	one,FP1		...FP1 IS X-1
452	FADD.S	one,FP0		...FP0 IS X+1
453	FADD.X	FP1,FP1		...FP1 IS 2(X-1)
454*--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
455*--IN U, U = 2(X-1)/(X+1) = FP1/FP0
456
457LP1CONT2:
458*--THIS IS AN RE-ENTRY POINT FOR LOGNP1
459	FDIV.X	FP0,FP1		...FP1 IS U
460	FMOVEm.X FP2/fp3,-(sp)	 ...SAVE FP2
461*--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
462*--LET V=U*U, W=V*V, CALCULATE
463*--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
464*--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
465	FMOVE.X	FP1,FP0
466	FMUL.X	FP0,FP0	...FP0 IS V
467	FMOVE.X	FP1,SAVEU(a6) ...STORE U IN MEMORY, FREE FP1
468	FMOVE.X	FP0,FP1
469	FMUL.X	FP1,FP1	...FP1 IS W
470
471	FMOVE.D	LOGB5,FP3
472	FMOVE.D	LOGB4,FP2
473
474	FMUL.X	FP1,FP3	...W*B5
475	FMUL.X	FP1,FP2	...W*B4
476
477	FADD.D	LOGB3,FP3 ...B3+W*B5
478	FADD.D	LOGB2,FP2 ...B2+W*B4
479
480	FMUL.X	FP3,FP1	...W*(B3+W*B5), FP3 RELEASED
481
482	FMUL.X	FP0,FP2	...V*(B2+W*B4)
483
484	FADD.D	LOGB1,FP1 ...B1+W*(B3+W*B5)
485	FMUL.X	SAVEU(a6),FP0 ...FP0 IS U*V
486
487	FADD.X	FP2,FP1	...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
488	FMOVEm.X (sp)+,FP2/fp3 ...FP2 RESTORED
489
490	FMUL.X	FP1,FP0	...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
491
492	fmove.l	d1,fpcr
493	FADD.X	SAVEU(a6),FP0
494	bra	t_frcinx
495	rts
496
497LOGNEG:
498*--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
499	bra	t_operr
500
501	xdef	slognp1d
502slognp1d:
503*--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
504* Simply return the denorm
505
506	bra	t_extdnrm
507
508	xdef	slognp1
509slognp1:
510*--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
511
512	FMOVE.X	(A0),FP0	...LOAD INPUT
513	fabs.x	fp0		;test magnitude
514	fcmp.x	LTHOLD,fp0	;compare with min threshold
515	fbgt.w	LP1REAL		;if greater, continue
516	fmove.l	#0,fpsr		;clr N flag from compare
517	fmove.l	d1,fpcr
518	fmove.x	(a0),fp0	;return signed argument
519	bra	t_frcinx
520
521LP1REAL:
522	FMOVE.X	(A0),FP0	...LOAD INPUT
523	CLR.L	ADJK(a6)
524	FMOVE.X	FP0,FP1	...FP1 IS INPUT Z
525	FADD.S	one,FP0	...X := ROUND(1+Z)
526	FMOVE.X	FP0,X(a6)
527	MOVE.W	XFRAC(a6),XDCARE(a6)
528	MOVE.L	X(a6),D0
529	TST.L	D0
530	BLE.W	LP1NEG0	...LOG OF ZERO OR -VE
531	CMP2.L	BOUNDS2,D0
532	BCS.W	LOGMAIN	...BOUNDS2 IS [1/2,3/2]
533*--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
534*--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
535*--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
536
537LP1NEAR1:
538*--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
539	CMP2.L	BOUNDS1,D0
540	BCS.B	LP1CARE
541
542LP1ONE16:
543*--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
544*--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
545	FADD.X	FP1,FP1	...FP1 IS 2Z
546	FADD.S	one,FP0	...FP0 IS 1+X
547*--U = FP1/FP0
548	BRA.W	LP1CONT2
549
550LP1CARE:
551*--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
552*--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
553*--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
554*--THERE ARE ONLY TWO CASES.
555*--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
556*--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
557*--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
558*--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
559
560	MOVE.L	XFRAC(a6),FFRAC(a6)
561	ANDI.L	#$FE000000,FFRAC(a6)
562	ORI.L	#$01000000,FFRAC(a6)	...F OBTAINED
563	CMPI.L	#$3FFF8000,D0	...SEE IF 1+Z > 1
564	BGE.B	KISZERO
565
566KISNEG1:
567	FMOVE.S	TWO,FP0
568	move.l	#$3fff0000,F(a6)
569	clr.l	F+8(a6)
570	FSUB.X	F(a6),FP0	...2-F
571	MOVE.L	FFRAC(a6),D0
572	ANDI.L	#$7E000000,D0
573	ASR.L	#8,D0
574	ASR.L	#8,D0
575	ASR.L	#4,D0		...D0 CONTAINS DISPLACEMENT FOR 1/F
576	FADD.X	FP1,FP1		...GET 2Z
577	FMOVEm.X FP2/fp3,-(sp)	...SAVE FP2
578	FADD.X	FP1,FP0		...FP0 IS Y-F = (2-F)+2Z
579	LEA	LOGTBL,A0	...A0 IS ADDRESS OF 1/F
580	ADDA.L	D0,A0
581	FMOVE.S	negone,FP1	...FP1 IS K = -1
582	BRA.W	LP1CONT1
583
584KISZERO:
585	FMOVE.S	one,FP0
586	move.l	#$3fff0000,F(a6)
587	clr.l	F+8(a6)
588	FSUB.X	F(a6),FP0		...1-F
589	MOVE.L	FFRAC(a6),D0
590	ANDI.L	#$7E000000,D0
591	ASR.L	#8,D0
592	ASR.L	#8,D0
593	ASR.L	#4,D0
594	FADD.X	FP1,FP0		...FP0 IS Y-F
595	FMOVEm.X FP2/fp3,-(sp)	...FP2 SAVED
596	LEA	LOGTBL,A0
597	ADDA.L	D0,A0	 	...A0 IS ADDRESS OF 1/F
598	FMOVE.S	zero,FP1	...FP1 IS K = 0
599	BRA.W	LP1CONT1
600
601LP1NEG0:
602*--FPCR SAVED. D0 IS X IN COMPACT FORM.
603	TST.L	D0
604	BLT.B	LP1NEG
605LP1ZERO:
606	FMOVE.S	negone,FP0
607
608	fmove.l	d1,fpcr
609	bra t_dz
610
611LP1NEG:
612	FMOVE.S	zero,FP0
613
614	fmove.l	d1,fpcr
615	bra	t_operr
616
617	end
618