xref: /netbsd/sys/arch/m68k/fpsp/ssin.sa (revision 6550d01e)
1*	$NetBSD: ssin.sa,v 1.4 2000/03/13 23:52:32 soren Exp $
2
3*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4*	M68000 Hi-Performance Microprocessor Division
5*	M68040 Software Package
6*
7*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
8*	All rights reserved.
9*
10*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11*	To the maximum extent permitted by applicable law,
12*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14*	PARTICULAR PURPOSE and any warranty against infringement with
15*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16*	and any accompanying written materials.
17*
18*	To the maximum extent permitted by applicable law,
19*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
24*	and support of the SOFTWARE.
25*
26*	You are hereby granted a copyright license to use, modify, and
27*	distribute the SOFTWARE so long as this entire notice is retained
28*	without alteration in any modified and/or redistributed versions,
29*	and that such modified versions are clearly identified as such.
30*	No licenses are granted by implication, estoppel or otherwise
31*	under any patents or trademarks of Motorola, Inc.
32
33*
34*	ssin.sa 3.3 7/29/91
35*
36*	The entry point sSIN computes the sine of an input argument
37*	sCOS computes the cosine, and sSINCOS computes both. The
38*	corresponding entry points with a "d" computes the same
39*	corresponding function values for denormalized inputs.
40*
41*	Input: Double-extended number X in location pointed to
42*		by address register a0.
43*
44*	Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
45*		COS is requested. Otherwise, for SINCOS, sin(X) is returned
46*		in Fp0, and cos(X) is returned in Fp1.
47*
48*	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
49*
50*	Accuracy and Monotonicity: The returned result is within 1 ulp in
51*		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
52*		result is subsequently rounded to double precision. The
53*		result is provably monotonic in double precision.
54*
55*	Speed: The programs sSIN and sCOS take approximately 150 cycles for
56*		input argument X such that |X| < 15Pi, which is the usual
57*		situation. The speed for sSINCOS is approximately 190 cycles.
58*
59*	Algorithm:
60*
61*	SIN and COS:
62*	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
63*
64*	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
65*
66*	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
67*		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
68*		k by k := k + AdjN.
69*
70*	4. If k is even, go to 6.
71*
72*	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
73*		where cos(r) is approximated by an even polynomial in r,
74*		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
75*		Exit.
76*
77*	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
78*		where sin(r) is approximated by an odd polynomial in r
79*		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
80*		Exit.
81*
82*	7. If |X| > 1, go to 9.
83*
84*	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
85*
86*	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
87*
88*	SINCOS:
89*	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
90*
91*	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
92*		k = N mod 4, so in particular, k = 0,1,2,or 3.
93*
94*	3. If k is even, go to 5.
95*
96*	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
97*		j1 exclusive or with the l.s.b. of k.
98*		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
99*		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
100*		sin(r) and cos(r) are computed as odd and even polynomials
101*		in r, respectively. Exit
102*
103*	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
104*		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
105*		sin(r) and cos(r) are computed as odd and even polynomials
106*		in r, respectively. Exit
107*
108*	6. If |X| > 1, go to 8.
109*
110*	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
111*
112*	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
113*
114
115SSIN	IDNT	2,1 Motorola 040 Floating Point Software Package
116
117	section	8
118
119	include	fpsp.h
120
121BOUNDS1	DC.L $3FD78000,$4004BC7E
122TWOBYPI	DC.L $3FE45F30,$6DC9C883
123
124SINA7	DC.L $BD6AAA77,$CCC994F5
125SINA6	DC.L $3DE61209,$7AAE8DA1
126
127SINA5	DC.L $BE5AE645,$2A118AE4
128SINA4	DC.L $3EC71DE3,$A5341531
129
130SINA3	DC.L $BF2A01A0,$1A018B59,$00000000,$00000000
131
132SINA2	DC.L $3FF80000,$88888888,$888859AF,$00000000
133
134SINA1	DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
135
136COSB8	DC.L $3D2AC4D0,$D6011EE3
137COSB7	DC.L $BDA9396F,$9F45AC19
138
139COSB6	DC.L $3E21EED9,$0612C972
140COSB5	DC.L $BE927E4F,$B79D9FCF
141
142COSB4	DC.L $3EFA01A0,$1A01D423,$00000000,$00000000
143
144COSB3	DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000
145
146COSB2	DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E
147COSB1	DC.L $BF000000
148
149INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A
150
151TWOPI1	DC.L $40010000,$C90FDAA2,$00000000,$00000000
152TWOPI2	DC.L $3FDF0000,$85A308D4,$00000000,$00000000
153
154	xref	PITBL
155
156INARG	equ	FP_SCR4
157
158X	equ	FP_SCR5
159XDCARE	equ	X+2
160XFRAC	equ	X+4
161
162RPRIME	equ	FP_SCR1
163SPRIME	equ	FP_SCR2
164
165POSNEG1	equ	L_SCR1
166TWOTO63	equ	L_SCR1
167
168ENDFLAG	equ	L_SCR2
169N	equ	L_SCR2
170
171ADJN	equ	L_SCR3
172
173	xref	t_frcinx
174	xref	t_extdnrm
175	xref	sto_cos
176
177	xdef	ssind
178ssind:
179*--SIN(X) = X FOR DENORMALIZED X
180	bra		t_extdnrm
181
182	xdef	scosd
183scosd:
184*--COS(X) = 1 FOR DENORMALIZED X
185
186	FMOVE.S		#:3F800000,FP0
187*
188*	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
189*
190	fmove.l		#0,fpsr
191*
192	bra		t_frcinx
193
194	xdef	ssin
195ssin:
196*--SET ADJN TO 0
197	CLR.L		ADJN(a6)
198	BRA.B		SINBGN
199
200	xdef	scos
201scos:
202*--SET ADJN TO 1
203	MOVE.L		#1,ADJN(a6)
204
205SINBGN:
206*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
207
208	FMOVE.X		(a0),FP0	...LOAD INPUT
209
210	MOVE.L		(A0),D0
211	MOVE.W		4(A0),D0
212	FMOVE.X		FP0,X(a6)
213	ANDI.L		#$7FFFFFFF,D0		...COMPACTIFY X
214
215	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
216	BGE.B		SOK1
217	BRA.W		SINSM
218
219SOK1:
220	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
221	BLT.B		SINMAIN
222	BRA.W		REDUCEX
223
224SINMAIN:
225*--THIS IS THE USUAL CASE, |X| <= 15 PI.
226*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
227	FMOVE.X		FP0,FP1
228	FMUL.D		TWOBYPI,FP1	...X*2/PI
229
230*--HIDE THE NEXT THREE INSTRUCTIONS
231	LEA		PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
232
233
234*--FP1 IS NOW READY
235	FMOVE.L		FP1,N(a6)		...CONVERT TO INTEGER
236
237	MOVE.L		N(a6),D0
238	ASL.L		#4,D0
239	ADDA.L		D0,A1	...A1 IS THE ADDRESS OF N*PIBY2
240*				...WHICH IS IN TWO PIECES Y1 & Y2
241
242	FSUB.X		(A1)+,FP0	...X-Y1
243*--HIDE THE NEXT ONE
244	FSUB.S		(A1),FP0	...FP0 IS R = (X-Y1)-Y2
245
246SINCONT:
247*--continuation from REDUCEX
248
249*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
250	MOVE.L		N(a6),D0
251	ADD.L		ADJN(a6),D0	...SEE IF D0 IS ODD OR EVEN
252	ROR.L		#1,D0	...D0 WAS ODD IFF D0 IS NEGATIVE
253	TST.L		D0
254	BLT.W		COSPOLY
255
256SINPOLY:
257*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
258*--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
259*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
260*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
261*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
262*--WHERE T=S*S.
263*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
264*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
265	FMOVE.X		FP0,X(a6)	...X IS R
266	FMUL.X		FP0,FP0	...FP0 IS S
267*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
268	FMOVE.D		SINA7,FP3
269	FMOVE.D		SINA6,FP2
270*--FP0 IS NOW READY
271	FMOVE.X		FP0,FP1
272	FMUL.X		FP1,FP1	...FP1 IS T
273*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
274
275	ROR.L		#1,D0
276	ANDI.L		#$80000000,D0
277*				...LEAST SIG. BIT OF D0 IN SIGN POSITION
278	EOR.L		D0,X(a6)	...X IS NOW R'= SGN*R
279
280	FMUL.X		FP1,FP3	...TA7
281	FMUL.X		FP1,FP2	...TA6
282
283	FADD.D		SINA5,FP3 ...A5+TA7
284	FADD.D		SINA4,FP2 ...A4+TA6
285
286	FMUL.X		FP1,FP3	...T(A5+TA7)
287	FMUL.X		FP1,FP2	...T(A4+TA6)
288
289	FADD.D		SINA3,FP3 ...A3+T(A5+TA7)
290	FADD.X		SINA2,FP2 ...A2+T(A4+TA6)
291
292	FMUL.X		FP3,FP1	...T(A3+T(A5+TA7))
293
294	FMUL.X		FP0,FP2	...S(A2+T(A4+TA6))
295	FADD.X		SINA1,FP1 ...A1+T(A3+T(A5+TA7))
296	FMUL.X		X(a6),FP0	...R'*S
297
298	FADD.X		FP2,FP1	...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
299*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
300*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
301
302
303	FMUL.X		FP1,FP0		...SIN(R')-R'
304*--FP1 RELEASED.
305
306	FMOVE.L		d1,FPCR		;restore users exceptions
307	FADD.X		X(a6),FP0		;last inst - possible exception set
308	bra		t_frcinx
309
310
311COSPOLY:
312*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
313*--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
314*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
315*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
316*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
317*--WHERE T=S*S.
318*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
319*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
320*--AND IS THEREFORE STORED AS SINGLE PRECISION.
321
322	FMUL.X		FP0,FP0	...FP0 IS S
323*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
324	FMOVE.D		COSB8,FP2
325	FMOVE.D		COSB7,FP3
326*--FP0 IS NOW READY
327	FMOVE.X		FP0,FP1
328	FMUL.X		FP1,FP1	...FP1 IS T
329*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
330	FMOVE.X		FP0,X(a6)	...X IS S
331	ROR.L		#1,D0
332	ANDI.L		#$80000000,D0
333*			...LEAST SIG. BIT OF D0 IN SIGN POSITION
334
335	FMUL.X		FP1,FP2	...TB8
336*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
337	EOR.L		D0,X(a6)	...X IS NOW S'= SGN*S
338	ANDI.L		#$80000000,D0
339
340	FMUL.X		FP1,FP3	...TB7
341*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
342	ORI.L		#$3F800000,D0	...D0 IS SGN IN SINGLE
343	MOVE.L		D0,POSNEG1(a6)
344
345	FADD.D		COSB6,FP2 ...B6+TB8
346	FADD.D		COSB5,FP3 ...B5+TB7
347
348	FMUL.X		FP1,FP2	...T(B6+TB8)
349	FMUL.X		FP1,FP3	...T(B5+TB7)
350
351	FADD.D		COSB4,FP2 ...B4+T(B6+TB8)
352	FADD.X		COSB3,FP3 ...B3+T(B5+TB7)
353
354	FMUL.X		FP1,FP2	...T(B4+T(B6+TB8))
355	FMUL.X		FP3,FP1	...T(B3+T(B5+TB7))
356
357	FADD.X		COSB2,FP2 ...B2+T(B4+T(B6+TB8))
358	FADD.S		COSB1,FP1 ...B1+T(B3+T(B5+TB7))
359
360	FMUL.X		FP2,FP0	...S(B2+T(B4+T(B6+TB8)))
361*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
362*--FP2 RELEASED.
363
364
365	FADD.X		FP1,FP0
366*--FP1 RELEASED
367
368	FMUL.X		X(a6),FP0
369
370	FMOVE.L		d1,FPCR		;restore users exceptions
371	FADD.S		POSNEG1(a6),FP0	;last inst - possible exception set
372	bra		t_frcinx
373
374
375SINBORS:
376*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
377*--IF |X| < 2**(-40), RETURN X OR 1.
378	CMPI.L		#$3FFF8000,D0
379	BGT.B		REDUCEX
380
381
382SINSM:
383	MOVE.L		ADJN(a6),D0
384	TST.L		D0
385	BGT.B		COSTINY
386
387SINTINY:
388	CLR.W		XDCARE(a6)	...JUST IN CASE
389	FMOVE.L		d1,FPCR		;restore users exceptions
390	FMOVE.X		X(a6),FP0		;last inst - possible exception set
391	bra		t_frcinx
392
393
394COSTINY:
395	FMOVE.S		#:3F800000,FP0
396
397	FMOVE.L		d1,FPCR		;restore users exceptions
398	FSUB.S		#:00800000,FP0	;last inst - possible exception set
399	bra		t_frcinx
400
401
402REDUCEX:
403*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
404*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
405*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
406
407	FMOVEM.X	FP2-FP5,-(A7)	...save FP2 through FP5
408	MOVE.L		D2,-(A7)
409        FMOVE.S         #:00000000,FP1
410*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
411*--there is a danger of unwanted overflow in first LOOP iteration.  In this
412*--case, reduce argument by one remainder step to make subsequent reduction
413*--safe.
414	cmpi.l	#$7ffeffff,d0		;is argument dangerously large?
415	bne.b	LOOP
416	move.l	#$7ffe0000,FP_SCR2(a6)	;yes
417*					;create 2**16383*PI/2
418	move.l	#$c90fdaa2,FP_SCR2+4(a6)
419	clr.l	FP_SCR2+8(a6)
420	ftst.x	fp0			;test sign of argument
421	move.l	#$7fdc0000,FP_SCR3(a6)	;create low half of 2**16383*
422*					;PI/2 at FP_SCR3
423	move.l	#$85a308d3,FP_SCR3+4(a6)
424	clr.l   FP_SCR3+8(a6)
425	fblt.w	red_neg
426	or.w	#$8000,FP_SCR2(a6)	;positive arg
427	or.w	#$8000,FP_SCR3(a6)
428red_neg:
429	fadd.x  FP_SCR2(a6),fp0		;high part of reduction is exact
430	fmove.x  fp0,fp1		;save high result in fp1
431	fadd.x  FP_SCR3(a6),fp0		;low part of reduction
432	fsub.x  fp0,fp1			;determine low component of result
433	fadd.x  FP_SCR3(a6),fp1		;fp0/fp1 are reduced argument.
434
435*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
436*--integer quotient will be stored in N
437*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
438
439LOOP:
440	FMOVE.X		FP0,INARG(a6)	...+-2**K * F, 1 <= F < 2
441	MOVE.W		INARG(a6),D0
442        MOVE.L          D0,A1		...save a copy of D0
443	ANDI.L		#$00007FFF,D0
444	SUBI.L		#$00003FFF,D0	...D0 IS K
445	CMPI.L		#28,D0
446	BLE.B		LASTLOOP
447CONTLOOP:
448	SUBI.L		#27,D0	 ...D0 IS L := K-27
449	CLR.L		ENDFLAG(a6)
450	BRA.B		WORK
451LASTLOOP:
452	CLR.L		D0		...D0 IS L := 0
453	MOVE.L		#1,ENDFLAG(a6)
454
455WORK:
456*--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
457*--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
458
459*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
460*--2**L * (PIby2_1), 2**L * (PIby2_2)
461
462	MOVE.L		#$00003FFE,D2	...BIASED EXPO OF 2/PI
463	SUB.L		D0,D2		...BIASED EXPO OF 2**(-L)*(2/PI)
464
465	MOVE.L		#$A2F9836E,FP_SCR1+4(a6)
466	MOVE.L		#$4E44152A,FP_SCR1+8(a6)
467	MOVE.W		D2,FP_SCR1(a6)	...FP_SCR1 is 2**(-L)*(2/PI)
468
469	FMOVE.X		FP0,FP2
470	FMUL.X		FP_SCR1(a6),FP2
471*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
472*--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
473*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
474*--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
475*--US THE DESIRED VALUE IN FLOATING POINT.
476
477*--HIDE SIX CYCLES OF INSTRUCTION
478        MOVE.L		A1,D2
479        SWAP		D2
480	ANDI.L		#$80000000,D2
481	ORI.L		#$5F000000,D2	...D2 IS SIGN(INARG)*2**63 IN SGL
482	MOVE.L		D2,TWOTO63(a6)
483
484	MOVE.L		D0,D2
485	ADDI.L		#$00003FFF,D2	...BIASED EXPO OF 2**L * (PI/2)
486
487*--FP2 IS READY
488	FADD.S		TWOTO63(a6),FP2	...THE FRACTIONAL PART OF FP1 IS ROUNDED
489
490*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
491        MOVE.W		D2,FP_SCR2(a6)
492	CLR.W           FP_SCR2+2(a6)
493	MOVE.L		#$C90FDAA2,FP_SCR2+4(a6)
494	CLR.L		FP_SCR2+8(a6)		...FP_SCR2 is  2**(L) * Piby2_1
495
496*--FP2 IS READY
497	FSUB.S		TWOTO63(a6),FP2		...FP2 is N
498
499	ADDI.L		#$00003FDD,D0
500        MOVE.W		D0,FP_SCR3(a6)
501	CLR.W           FP_SCR3+2(a6)
502	MOVE.L		#$85A308D3,FP_SCR3+4(a6)
503	CLR.L		FP_SCR3+8(a6)		...FP_SCR3 is 2**(L) * Piby2_2
504
505	MOVE.L		ENDFLAG(a6),D0
506
507*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
508*--P2 = 2**(L) * Piby2_2
509	FMOVE.X		FP2,FP4
510	FMul.X		FP_SCR2(a6),FP4		...W = N*P1
511	FMove.X		FP2,FP5
512	FMul.X		FP_SCR3(a6),FP5		...w = N*P2
513	FMove.X		FP4,FP3
514*--we want P+p = W+w  but  |p| <= half ulp of P
515*--Then, we need to compute  A := R-P   and  a := r-p
516	FAdd.X		FP5,FP3			...FP3 is P
517	FSub.X		FP3,FP4			...W-P
518
519	FSub.X		FP3,FP0			...FP0 is A := R - P
520        FAdd.X		FP5,FP4			...FP4 is p = (W-P)+w
521
522	FMove.X		FP0,FP3			...FP3 A
523	FSub.X		FP4,FP1			...FP1 is a := r - p
524
525*--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
526*--|r| <= half ulp of R.
527	FAdd.X		FP1,FP0			...FP0 is R := A+a
528*--No need to calculate r if this is the last loop
529	TST.L		D0
530	BGT.W		RESTORE
531
532*--Need to calculate r
533	FSub.X		FP0,FP3			...A-R
534	FAdd.X		FP3,FP1			...FP1 is r := (A-R)+a
535	BRA.W		LOOP
536
537RESTORE:
538        FMOVE.L		FP2,N(a6)
539	MOVE.L		(A7)+,D2
540	FMOVEM.X	(A7)+,FP2-FP5
541
542
543	MOVE.L		ADJN(a6),D0
544	CMPI.L		#4,D0
545
546	BLT.W		SINCONT
547	BRA.B		SCCONT
548
549	xdef	ssincosd
550ssincosd:
551*--SIN AND COS OF X FOR DENORMALIZED X
552
553	FMOVE.S		#:3F800000,FP1
554	bsr		sto_cos		;store cosine result
555	bra		t_extdnrm
556
557	xdef	ssincos
558ssincos:
559*--SET ADJN TO 4
560	MOVE.L		#4,ADJN(a6)
561
562	FMOVE.X		(a0),FP0	...LOAD INPUT
563
564	MOVE.L		(A0),D0
565	MOVE.W		4(A0),D0
566	FMOVE.X		FP0,X(a6)
567	ANDI.L		#$7FFFFFFF,D0		...COMPACTIFY X
568
569	CMPI.L		#$3FD78000,D0		...|X| >= 2**(-40)?
570	BGE.B		SCOK1
571	BRA.W		SCSM
572
573SCOK1:
574	CMPI.L		#$4004BC7E,D0		...|X| < 15 PI?
575	BLT.B		SCMAIN
576	BRA.W		REDUCEX
577
578
579SCMAIN:
580*--THIS IS THE USUAL CASE, |X| <= 15 PI.
581*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
582	FMOVE.X		FP0,FP1
583	FMUL.D		TWOBYPI,FP1	...X*2/PI
584
585*--HIDE THE NEXT THREE INSTRUCTIONS
586	LEA		PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
587
588
589*--FP1 IS NOW READY
590	FMOVE.L		FP1,N(a6)		...CONVERT TO INTEGER
591
592	MOVE.L		N(a6),D0
593	ASL.L		#4,D0
594	ADDA.L		D0,A1		...ADDRESS OF N*PIBY2, IN Y1, Y2
595
596	FSUB.X		(A1)+,FP0	...X-Y1
597        FSUB.S		(A1),FP0	...FP0 IS R = (X-Y1)-Y2
598
599SCCONT:
600*--continuation point from REDUCEX
601
602*--HIDE THE NEXT TWO
603	MOVE.L		N(a6),D0
604	ROR.L		#1,D0
605
606	TST.L		D0		...D0 < 0 IFF N IS ODD
607	BGE.W		NEVEN
608
609NODD:
610*--REGISTERS SAVED SO FAR: D0, A0, FP2.
611
612	FMOVE.X		FP0,RPRIME(a6)
613	FMUL.X		FP0,FP0	 ...FP0 IS S = R*R
614	FMOVE.D		SINA7,FP1	...A7
615	FMOVE.D		COSB8,FP2	...B8
616	FMUL.X		FP0,FP1	 ...SA7
617	MOVE.L		d2,-(A7)
618	MOVE.L		D0,d2
619	FMUL.X		FP0,FP2	 ...SB8
620	ROR.L		#1,d2
621	ANDI.L		#$80000000,d2
622
623	FADD.D		SINA6,FP1	...A6+SA7
624	EOR.L		D0,d2
625	ANDI.L		#$80000000,d2
626	FADD.D		COSB7,FP2	...B7+SB8
627
628	FMUL.X		FP0,FP1	 ...S(A6+SA7)
629	EOR.L		d2,RPRIME(a6)
630	MOVE.L		(A7)+,d2
631	FMUL.X		FP0,FP2	 ...S(B7+SB8)
632	ROR.L		#1,D0
633	ANDI.L		#$80000000,D0
634
635	FADD.D		SINA5,FP1	...A5+S(A6+SA7)
636	MOVE.L		#$3F800000,POSNEG1(a6)
637	EOR.L		D0,POSNEG1(a6)
638	FADD.D		COSB6,FP2	...B6+S(B7+SB8)
639
640	FMUL.X		FP0,FP1	 ...S(A5+S(A6+SA7))
641	FMUL.X		FP0,FP2	 ...S(B6+S(B7+SB8))
642	FMOVE.X		FP0,SPRIME(a6)
643
644	FADD.D		SINA4,FP1	...A4+S(A5+S(A6+SA7))
645	EOR.L		D0,SPRIME(a6)
646	FADD.D		COSB5,FP2	...B5+S(B6+S(B7+SB8))
647
648	FMUL.X		FP0,FP1	 ...S(A4+...)
649	FMUL.X		FP0,FP2	 ...S(B5+...)
650
651	FADD.D		SINA3,FP1	...A3+S(A4+...)
652	FADD.D		COSB4,FP2	...B4+S(B5+...)
653
654	FMUL.X		FP0,FP1	 ...S(A3+...)
655	FMUL.X		FP0,FP2	 ...S(B4+...)
656
657	FADD.X		SINA2,FP1	...A2+S(A3+...)
658	FADD.X		COSB3,FP2	...B3+S(B4+...)
659
660	FMUL.X		FP0,FP1	 ...S(A2+...)
661	FMUL.X		FP0,FP2	 ...S(B3+...)
662
663	FADD.X		SINA1,FP1	...A1+S(A2+...)
664	FADD.X		COSB2,FP2	...B2+S(B3+...)
665
666	FMUL.X		FP0,FP1	 ...S(A1+...)
667	FMUL.X		FP2,FP0	 ...S(B2+...)
668
669
670
671	FMUL.X		RPRIME(a6),FP1	...R'S(A1+...)
672	FADD.S		COSB1,FP0	...B1+S(B2...)
673	FMUL.X		SPRIME(a6),FP0	...S'(B1+S(B2+...))
674
675	move.l		d1,-(sp)	;restore users mode & precision
676	andi.l		#$ff,d1		;mask off all exceptions
677	fmove.l		d1,FPCR
678	FADD.X		RPRIME(a6),FP1	...COS(X)
679	bsr		sto_cos		;store cosine result
680	FMOVE.L		(sp)+,FPCR	;restore users exceptions
681	FADD.S		POSNEG1(a6),FP0	...SIN(X)
682
683	bra		t_frcinx
684
685
686NEVEN:
687*--REGISTERS SAVED SO FAR: FP2.
688
689	FMOVE.X		FP0,RPRIME(a6)
690	FMUL.X		FP0,FP0	 ...FP0 IS S = R*R
691	FMOVE.D		COSB8,FP1			...B8
692	FMOVE.D		SINA7,FP2			...A7
693	FMUL.X		FP0,FP1	 ...SB8
694	FMOVE.X		FP0,SPRIME(a6)
695	FMUL.X		FP0,FP2	 ...SA7
696	ROR.L		#1,D0
697	ANDI.L		#$80000000,D0
698	FADD.D		COSB7,FP1	...B7+SB8
699	FADD.D		SINA6,FP2	...A6+SA7
700	EOR.L		D0,RPRIME(a6)
701	EOR.L		D0,SPRIME(a6)
702	FMUL.X		FP0,FP1	 ...S(B7+SB8)
703	ORI.L		#$3F800000,D0
704	MOVE.L		D0,POSNEG1(a6)
705	FMUL.X		FP0,FP2	 ...S(A6+SA7)
706
707	FADD.D		COSB6,FP1	...B6+S(B7+SB8)
708	FADD.D		SINA5,FP2	...A5+S(A6+SA7)
709
710	FMUL.X		FP0,FP1	 ...S(B6+S(B7+SB8))
711	FMUL.X		FP0,FP2	 ...S(A5+S(A6+SA7))
712
713	FADD.D		COSB5,FP1	...B5+S(B6+S(B7+SB8))
714	FADD.D		SINA4,FP2	...A4+S(A5+S(A6+SA7))
715
716	FMUL.X		FP0,FP1	 ...S(B5+...)
717	FMUL.X		FP0,FP2	 ...S(A4+...)
718
719	FADD.D		COSB4,FP1	...B4+S(B5+...)
720	FADD.D		SINA3,FP2	...A3+S(A4+...)
721
722	FMUL.X		FP0,FP1	 ...S(B4+...)
723	FMUL.X		FP0,FP2	 ...S(A3+...)
724
725	FADD.X		COSB3,FP1	...B3+S(B4+...)
726	FADD.X		SINA2,FP2	...A2+S(A3+...)
727
728	FMUL.X		FP0,FP1	 ...S(B3+...)
729	FMUL.X		FP0,FP2	 ...S(A2+...)
730
731	FADD.X		COSB2,FP1	...B2+S(B3+...)
732	FADD.X		SINA1,FP2	...A1+S(A2+...)
733
734	FMUL.X		FP0,FP1	 ...S(B2+...)
735	fmul.x		fp2,fp0	 ...s(a1+...)
736
737
738
739	FADD.S		COSB1,FP1	...B1+S(B2...)
740	FMUL.X		RPRIME(a6),FP0	...R'S(A1+...)
741	FMUL.X		SPRIME(a6),FP1	...S'(B1+S(B2+...))
742
743	move.l		d1,-(sp)	;save users mode & precision
744	andi.l		#$ff,d1		;mask off all exceptions
745	fmove.l		d1,FPCR
746	FADD.S		POSNEG1(a6),FP1	...COS(X)
747	bsr		sto_cos		;store cosine result
748	FMOVE.L		(sp)+,FPCR	;restore users exceptions
749	FADD.X		RPRIME(a6),FP0	...SIN(X)
750
751	bra		t_frcinx
752
753SCBORS:
754	CMPI.L		#$3FFF8000,D0
755	BGT.W		REDUCEX
756
757
758SCSM:
759	CLR.W		XDCARE(a6)
760	FMOVE.S		#:3F800000,FP1
761
762	move.l		d1,-(sp)	;save users mode & precision
763	andi.l		#$ff,d1		;mask off all exceptions
764	fmove.l		d1,FPCR
765	FSUB.S		#:00800000,FP1
766	bsr		sto_cos		;store cosine result
767	FMOVE.L		(sp)+,FPCR	;restore users exceptions
768	FMOVE.X		X(a6),FP0
769	bra		t_frcinx
770
771	end
772