1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vcos_ultra3.S"
30
31#include "libm.h"
32#if defined(LIBMVEC_SO_BUILD)
33	.weak	__vcos
34	.type	__vcos,#function
35	__vcos = __vcos_ultra3
36#endif
37
38	RO_DATA
39	.align	64
40constants:
41	.word	0x42c80000,0x00000000	! 3 * 2^44
42	.word	0x43380000,0x00000000	! 3 * 2^51
43	.word	0x3fe45f30,0x6dc9c883	! invpio2
44	.word	0x3ff921fb,0x54442c00	! pio2_1
45	.word	0x3d318469,0x898cc400	! pio2_2
46	.word	0x3a71701b,0x839a2520	! pio2_3
47	.word	0xbfc55555,0x55555533	! pp1
48	.word	0x3f811111,0x10e7d53b	! pp2
49	.word	0xbf2a0167,0xe6b3cf9b	! pp3
50	.word	0xbfdfffff,0xffffff65	! qq1
51	.word	0x3fa55555,0x54f88ed0	! qq2
52	.word	0xbf56c12c,0xdd185f60	! qq3
53
54! local storage indices
55
56#define xsave		STACK_BIAS-0x8
57#define ysave		STACK_BIAS-0x10
58#define nsave		STACK_BIAS-0x14
59#define sxsave		STACK_BIAS-0x18
60#define sysave		STACK_BIAS-0x1c
61#define biguns		STACK_BIAS-0x20
62#define nk3		STACK_BIAS-0x24
63#define nk2		STACK_BIAS-0x28
64#define nk1		STACK_BIAS-0x2c
65#define nk0		STACK_BIAS-0x30
66#define junk		STACK_BIAS-0x38
67! sizeof temp storage - must be a multiple of 16 for V9
68#define tmps		0x40
69
70! register use
71
72! i0  n
73! i1  x
74! i2  stridex
75! i3  y
76! i4  stridey
77! i5  0x80000000
78
79! l0  hx0
80! l1  hx1
81! l2  hx2
82! l3  hx3
83! l4  k0
84! l5  k1
85! l6  k2
86! l7  k3
87
88! the following are 64-bit registers in both V8+ and V9
89
90! g1  __vlibm_TBL_sincos2
91! g5  scratch
92
93! o0  py0
94! o1  py1
95! o2  py2
96! o3  py3
97! o4  0x3e400000
98! o5  0x3fe921fb,0x4099251e
99! o7  scratch
100
101! f0  hx0
102! f2
103! f4
104! f6
105! f8  hx1
106! f10
107! f12
108! f14
109! f16 hx2
110! f18
111! f20
112! f22
113! f24 hx3
114! f26
115! f28
116! f30
117! f32
118! f34
119! f36
120! f38
121
122#define c3two44	%f40
123#define c3two51	%f42
124#define invpio2	%f44
125#define pio2_1	%f46
126#define pio2_2	%f48
127#define pio2_3	%f50
128#define pp1	%f52
129#define pp2	%f54
130#define pp3	%f56
131#define qq1	%f58
132#define qq2	%f60
133#define qq3	%f62
134
135	ENTRY(__vcos_ultra3)
136	save	%sp,-SA(MINFRAME)-tmps,%sp
137	PIC_SETUP(l7)
138	PIC_SET(l7,constants,o0)
139	PIC_SET(l7,__vlibm_TBL_sincos2,o1)
140	mov	%o1,%g1
141	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
142#ifdef __sparcv9
143	stx	%i1,[%fp+xsave]		! save arguments
144	stx	%i3,[%fp+ysave]
145#else
146	st	%i1,[%fp+xsave]		! save arguments
147	st	%i3,[%fp+ysave]
148#endif
149	st	%i0,[%fp+nsave]
150	st	%i2,[%fp+sxsave]
151	st	%i4,[%fp+sysave]
152	st	%g0,[%fp+biguns]	! biguns = 0
153	ldd	[%o0+0x00],c3two44	! load/set up constants
154	ldd	[%o0+0x08],c3two51
155	ldd	[%o0+0x10],invpio2
156	ldd	[%o0+0x18],pio2_1
157	ldd	[%o0+0x20],pio2_2
158	ldd	[%o0+0x28],pio2_3
159	ldd	[%o0+0x30],pp1
160	ldd	[%o0+0x38],pp2
161	ldd	[%o0+0x40],pp3
162	ldd	[%o0+0x48],qq1
163	ldd	[%o0+0x50],qq2
164	ldd	[%o0+0x58],qq3
165	sethi	%hi(0x80000000),%i5
166	sethi	%hi(0x3e400000),%o4
167	sethi	%hi(0x3fe921fb),%o5
168	or	%o5,%lo(0x3fe921fb),%o5
169	sllx	%o5,32,%o5
170	sethi	%hi(0x4099251e),%o7
171	or	%o7,%lo(0x4099251e),%o7
172	or	%o5,%o7,%o5
173	sll	%i2,3,%i2		! scale strides
174	sll	%i4,3,%i4
175	add	%fp,junk,%o1		! loop prologue
176	add	%fp,junk,%o2
177	add	%fp,junk,%o3
178	ld	[%i1],%l0		! *x
179	ld	[%i1],%f0
180	ld	[%i1+4],%f3
181	andn	%l0,%i5,%l0		! mask off sign
182	add	%i1,%i2,%i1		! x += stridex
183	ba	.loop0
184	nop
185
186! 16-byte aligned
187	.align	16
188.loop0:
189	lda	[%i1]%asi,%l1		! preload next argument
190	sub	%l0,%o4,%g5
191	sub	%o5,%l0,%o7
192	fabss	%f0,%f2
193
194	lda	[%i1]%asi,%f8
195	orcc	%o7,%g5,%g0
196	mov	%i3,%o0			! py0 = y
197	bl,pn	%icc,.range0		! hx < 0x3e400000 or hx > 0x4099251e
198
199! delay slot
200	lda	[%i1+4]%asi,%f11
201	addcc	%i0,-1,%i0
202	add	%i3,%i4,%i3		! y += stridey
203	ble,pn	%icc,.last1
204
205! delay slot
206	andn	%l1,%i5,%l1
207	add	%i1,%i2,%i1		! x += stridex
208	faddd	%f2,c3two44,%f4
209	st	%f15,[%o1+4]
210
211.loop1:
212	lda	[%i1]%asi,%l2		! preload next argument
213	sub	%l1,%o4,%g5
214	sub	%o5,%l1,%o7
215	fabss	%f8,%f10
216
217	lda	[%i1]%asi,%f16
218	orcc	%o7,%g5,%g0
219	mov	%i3,%o1			! py1 = y
220	bl,pn	%icc,.range1		! hx < 0x3e400000 or hx > 0x4099251e
221
222! delay slot
223	lda	[%i1+4]%asi,%f19
224	addcc	%i0,-1,%i0
225	add	%i3,%i4,%i3		! y += stridey
226	ble,pn	%icc,.last2
227
228! delay slot
229	andn	%l2,%i5,%l2
230	add	%i1,%i2,%i1		! x += stridex
231	faddd	%f10,c3two44,%f12
232	st	%f23,[%o2+4]
233
234.loop2:
235	lda	[%i1]%asi,%l3		! preload next argument
236	sub	%l2,%o4,%g5
237	sub	%o5,%l2,%o7
238	fabss	%f16,%f18
239
240	lda	[%i1]%asi,%f24
241	orcc	%o7,%g5,%g0
242	mov	%i3,%o2			! py2 = y
243	bl,pn	%icc,.range2		! hx < 0x3e400000 or hx > 0x4099251e
244
245! delay slot
246	lda	[%i1+4]%asi,%f27
247	addcc	%i0,-1,%i0
248	add	%i3,%i4,%i3		! y += stridey
249	ble,pn	%icc,.last3
250
251! delay slot
252	andn	%l3,%i5,%l3
253	add	%i1,%i2,%i1		! x += stridex
254	faddd	%f18,c3two44,%f20
255	st	%f31,[%o3+4]
256
257.loop3:
258	sub	%l3,%o4,%g5
259	sub	%o5,%l3,%o7
260	fabss	%f24,%f26
261	st	%f5,[%fp+nk0]
262
263	orcc	%o7,%g5,%g0
264	mov	%i3,%o3			! py3 = y
265	bl,pn	%icc,.range3		! hx < 0x3e400000 or > hx 0x4099251e
266! delay slot
267	st	%f13,[%fp+nk1]
268
269!!! DONE?
270.cont:
271	srlx	%o5,32,%o7
272	add	%i3,%i4,%i3		! y += stridey
273	fmovs	%f3,%f1
274	st	%f21,[%fp+nk2]
275
276	sub	%o7,%l0,%l0
277	sub	%o7,%l1,%l1
278	faddd	%f26,c3two44,%f28
279	st	%f29,[%fp+nk3]
280
281	sub	%o7,%l2,%l2
282	sub	%o7,%l3,%l3
283	fmovs	%f11,%f9
284
285	or	%l0,%l1,%l0
286	or	%l2,%l3,%l2
287	fmovs	%f19,%f17
288
289	fmovs	%f27,%f25
290	fmuld	%f0,invpio2,%f6		! x * invpio2, for medium range
291
292	fmuld	%f8,invpio2,%f14
293	ld	[%fp+nk0],%l4
294
295	fmuld	%f16,invpio2,%f22
296	ld	[%fp+nk1],%l5
297
298	orcc	%l0,%l2,%g0
299	bl,pn	%icc,.medium
300! delay slot
301	fmuld	%f24,invpio2,%f30
302	ld	[%fp+nk2],%l6
303
304	ld	[%fp+nk3],%l7
305	sll	%l4,5,%l4		! k
306	fcmpd	%fcc0,%f0,pio2_3	! x < pio2_3 iff x < 0
307
308	sll	%l5,5,%l5
309	ldd	[%l4+%g1],%f4
310	fcmpd	%fcc1,%f8,pio2_3
311
312	sll	%l6,5,%l6
313	ldd	[%l5+%g1],%f12
314	fcmpd	%fcc2,%f16,pio2_3
315
316	sll	%l7,5,%l7
317	ldd	[%l6+%g1],%f20
318	fcmpd	%fcc3,%f24,pio2_3
319
320	ldd	[%l7+%g1],%f28
321	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
322
323	fsubd	%f10,%f12,%f10
324
325	fsubd	%f18,%f20,%f18
326
327	fsubd	%f26,%f28,%f26
328
329	fmuld	%f2,%f2,%f0		! z = x * x
330
331	fmuld	%f10,%f10,%f8
332
333	fmuld	%f18,%f18,%f16
334
335	fmuld	%f26,%f26,%f24
336
337	fmuld	%f0,qq3,%f6
338
339	fmuld	%f8,qq3,%f14
340
341	fmuld	%f16,qq3,%f22
342
343	fmuld	%f24,qq3,%f30
344
345	faddd	%f6,qq2,%f6
346	fmuld	%f0,pp2,%f4
347
348	faddd	%f14,qq2,%f14
349	fmuld	%f8,pp2,%f12
350
351	faddd	%f22,qq2,%f22
352	fmuld	%f16,pp2,%f20
353
354	faddd	%f30,qq2,%f30
355	fmuld	%f24,pp2,%f28
356
357	fmuld	%f0,%f6,%f6
358	faddd	%f4,pp1,%f4
359
360	fmuld	%f8,%f14,%f14
361	faddd	%f12,pp1,%f12
362
363	fmuld	%f16,%f22,%f22
364	faddd	%f20,pp1,%f20
365
366	fmuld	%f24,%f30,%f30
367	faddd	%f28,pp1,%f28
368
369	faddd	%f6,qq1,%f6
370	fmuld	%f0,%f4,%f4
371	add	%l4,%g1,%l4
372
373	faddd	%f14,qq1,%f14
374	fmuld	%f8,%f12,%f12
375	add	%l5,%g1,%l5
376
377	faddd	%f22,qq1,%f22
378	fmuld	%f16,%f20,%f20
379	add	%l6,%g1,%l6
380
381	faddd	%f30,qq1,%f30
382	fmuld	%f24,%f28,%f28
383	add	%l7,%g1,%l7
384
385	fmuld	%f2,%f4,%f4
386
387	fmuld	%f10,%f12,%f12
388
389	fmuld	%f18,%f20,%f20
390
391	fmuld	%f26,%f28,%f28
392
393	fmuld	%f0,%f6,%f6
394	faddd	%f4,%f2,%f4
395	ldd	[%l4+16],%f32
396
397	fmuld	%f8,%f14,%f14
398	faddd	%f12,%f10,%f12
399	ldd	[%l5+16],%f34
400
401	fmuld	%f16,%f22,%f22
402	faddd	%f20,%f18,%f20
403	ldd	[%l6+16],%f36
404
405	fmuld	%f24,%f30,%f30
406	faddd	%f28,%f26,%f28
407	ldd	[%l7+16],%f38
408
409	fmuld	%f32,%f6,%f6
410	ldd	[%l4+8],%f2
411
412	fmuld	%f34,%f14,%f14
413	ldd	[%l5+8],%f10
414
415	fmuld	%f36,%f22,%f22
416	ldd	[%l6+8],%f18
417
418	fmuld	%f38,%f30,%f30
419	ldd	[%l7+8],%f26
420
421	fmuld	%f2,%f4,%f4
422
423	fmuld	%f10,%f12,%f12
424
425	fmuld	%f18,%f20,%f20
426
427	fmuld	%f26,%f28,%f28
428
429	fsubd	%f6,%f4,%f6
430	lda	[%i1]%asi,%l0		! preload next argument
431
432	fsubd	%f14,%f12,%f14
433	lda	[%i1]%asi,%f0
434
435	fsubd	%f22,%f20,%f22
436	lda	[%i1+4]%asi,%f3
437
438	fsubd	%f30,%f28,%f30
439	andn	%l0,%i5,%l0
440	add	%i1,%i2,%i1
441
442	faddd	%f6,%f32,%f6
443	st	%f6,[%o0]
444
445	faddd	%f14,%f34,%f14
446	st	%f14,[%o1]
447
448	faddd	%f22,%f36,%f22
449	st	%f22,[%o2]
450
451	faddd	%f30,%f38,%f30
452	st	%f30,[%o3]
453	addcc	%i0,-1,%i0
454
455	bg,pt	%icc,.loop0
456! delay slot
457	st	%f7,[%o0+4]
458
459	ba,pt	%icc,.end
460! delay slot
461	nop
462
463
464	.align	16
465.medium:
466	faddd	%f6,c3two51,%f4
467	st	%f5,[%fp+nk0]
468
469	faddd	%f14,c3two51,%f12
470	st	%f13,[%fp+nk1]
471
472	faddd	%f22,c3two51,%f20
473	st	%f21,[%fp+nk2]
474
475	faddd	%f30,c3two51,%f28
476	st	%f29,[%fp+nk3]
477
478	fsubd	%f4,c3two51,%f6
479
480	fsubd	%f12,c3two51,%f14
481
482	fsubd	%f20,c3two51,%f22
483
484	fsubd	%f28,c3two51,%f30
485
486	fmuld	%f6,pio2_1,%f2
487	ld	[%fp+nk0],%l0		! n
488
489	fmuld	%f14,pio2_1,%f10
490	ld	[%fp+nk1],%l1
491
492	fmuld	%f22,pio2_1,%f18
493	ld	[%fp+nk2],%l2
494
495	fmuld	%f30,pio2_1,%f26
496	ld	[%fp+nk3],%l3
497
498	fsubd	%f0,%f2,%f0
499	fmuld	%f6,pio2_2,%f4
500	add	%l0,1,%l0
501
502	fsubd	%f8,%f10,%f8
503	fmuld	%f14,pio2_2,%f12
504	add	%l1,1,%l1
505
506	fsubd	%f16,%f18,%f16
507	fmuld	%f22,pio2_2,%f20
508	add	%l2,1,%l2
509
510	fsubd	%f24,%f26,%f24
511	fmuld	%f30,pio2_2,%f28
512	add	%l3,1,%l3
513
514	fsubd	%f0,%f4,%f32
515
516	fsubd	%f8,%f12,%f34
517
518	fsubd	%f16,%f20,%f36
519
520	fsubd	%f24,%f28,%f38
521
522	fsubd	%f0,%f32,%f0
523	fcmple32 %f32,pio2_3,%l4	! x <= pio2_3 iff x < 0
524
525	fsubd	%f8,%f34,%f8
526	fcmple32 %f34,pio2_3,%l5
527
528	fsubd	%f16,%f36,%f16
529	fcmple32 %f36,pio2_3,%l6
530
531	fsubd	%f24,%f38,%f24
532	fcmple32 %f38,pio2_3,%l7
533
534	fsubd	%f0,%f4,%f0
535	fmuld	%f6,pio2_3,%f6
536	sll	%l4,30,%l4		! if (x < 0) n = -n ^ 2
537
538	fsubd	%f8,%f12,%f8
539	fmuld	%f14,pio2_3,%f14
540	sll	%l5,30,%l5
541
542	fsubd	%f16,%f20,%f16
543	fmuld	%f22,pio2_3,%f22
544	sll	%l6,30,%l6
545
546	fsubd	%f24,%f28,%f24
547	fmuld	%f30,pio2_3,%f30
548	sll	%l7,30,%l7
549
550	fsubd	%f6,%f0,%f6
551	sra	%l4,31,%l4
552
553	fsubd	%f14,%f8,%f14
554	sra	%l5,31,%l5
555
556	fsubd	%f22,%f16,%f22
557	sra	%l6,31,%l6
558
559	fsubd	%f30,%f24,%f30
560	sra	%l7,31,%l7
561
562	fsubd	%f32,%f6,%f0		! reduced x
563	xor	%l0,%l4,%l0
564
565	fsubd	%f34,%f14,%f8
566	xor	%l1,%l5,%l1
567
568	fsubd	%f36,%f22,%f16
569	xor	%l2,%l6,%l2
570
571	fsubd	%f38,%f30,%f24
572	xor	%l3,%l7,%l3
573
574	fabsd	%f0,%f2
575	sub	%l0,%l4,%l0
576
577	fabsd	%f8,%f10
578	sub	%l1,%l5,%l1
579
580	fabsd	%f16,%f18
581	sub	%l2,%l6,%l2
582
583	fabsd	%f24,%f26
584	sub	%l3,%l7,%l3
585
586	faddd	%f2,c3two44,%f4
587	st	%f5,[%fp+nk0]
588	and	%l4,2,%l4
589
590	faddd	%f10,c3two44,%f12
591	st	%f13,[%fp+nk1]
592	and	%l5,2,%l5
593
594	faddd	%f18,c3two44,%f20
595	st	%f21,[%fp+nk2]
596	and	%l6,2,%l6
597
598	faddd	%f26,c3two44,%f28
599	st	%f29,[%fp+nk3]
600	and	%l7,2,%l7
601
602	fsubd	%f32,%f0,%f4
603	xor	%l0,%l4,%l0
604
605	fsubd	%f34,%f8,%f12
606	xor	%l1,%l5,%l1
607
608	fsubd	%f36,%f16,%f20
609	xor	%l2,%l6,%l2
610
611	fsubd	%f38,%f24,%f28
612	xor	%l3,%l7,%l3
613
614	fzero	%f38
615	ld	[%fp+nk0],%l4
616
617	fsubd	%f4,%f6,%f6		! w
618	ld	[%fp+nk1],%l5
619
620	fsubd	%f12,%f14,%f14
621	ld	[%fp+nk2],%l6
622
623	fnegd	%f38,%f38
624	ld	[%fp+nk3],%l7
625	sll	%l4,5,%l4		! k
626
627	fsubd	%f20,%f22,%f22
628	sll	%l5,5,%l5
629
630	fsubd	%f28,%f30,%f30
631	sll	%l6,5,%l6
632
633	fand	%f0,%f38,%f32		! sign bit of x
634	ldd	[%l4+%g1],%f4
635	sll	%l7,5,%l7
636
637	fand	%f8,%f38,%f34
638	ldd	[%l5+%g1],%f12
639
640	fand	%f16,%f38,%f36
641	ldd	[%l6+%g1],%f20
642
643	fand	%f24,%f38,%f38
644	ldd	[%l7+%g1],%f28
645
646	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
647
648	fsubd	%f10,%f12,%f10
649
650	fsubd	%f18,%f20,%f18
651	nop
652
653	fsubd	%f26,%f28,%f26
654	nop
655
656! 16-byte aligned
657	fmuld	%f2,%f2,%f0		! z = x * x
658	andcc	%l0,1,%g0
659	bz,pn	%icc,.case8
660! delay slot
661	fxor	%f6,%f32,%f32
662
663	fmuld	%f10,%f10,%f8
664	andcc	%l1,1,%g0
665	bz,pn	%icc,.case4
666! delay slot
667	fxor	%f14,%f34,%f34
668
669	fmuld	%f18,%f18,%f16
670	andcc	%l2,1,%g0
671	bz,pn	%icc,.case2
672! delay slot
673	fxor	%f22,%f36,%f36
674
675	fmuld	%f26,%f26,%f24
676	andcc	%l3,1,%g0
677	bz,pn	%icc,.case1
678! delay slot
679	fxor	%f30,%f38,%f38
680
681!.case0:
682	fmuld	%f0,qq3,%f6		! cos(x0)
683
684	fmuld	%f8,qq3,%f14		! cos(x1)
685
686	fmuld	%f16,qq3,%f22		! cos(x2)
687
688	fmuld	%f24,qq3,%f30		! cos(x3)
689
690	faddd	%f6,qq2,%f6
691	fmuld	%f0,pp2,%f4
692
693	faddd	%f14,qq2,%f14
694	fmuld	%f8,pp2,%f12
695
696	faddd	%f22,qq2,%f22
697	fmuld	%f16,pp2,%f20
698
699	faddd	%f30,qq2,%f30
700	fmuld	%f24,pp2,%f28
701
702	fmuld	%f0,%f6,%f6
703	faddd	%f4,pp1,%f4
704
705	fmuld	%f8,%f14,%f14
706	faddd	%f12,pp1,%f12
707
708	fmuld	%f16,%f22,%f22
709	faddd	%f20,pp1,%f20
710
711	fmuld	%f24,%f30,%f30
712	faddd	%f28,pp1,%f28
713
714	faddd	%f6,qq1,%f6
715	fmuld	%f0,%f4,%f4
716	add	%l4,%g1,%l4
717
718	faddd	%f14,qq1,%f14
719	fmuld	%f8,%f12,%f12
720	add	%l5,%g1,%l5
721
722	faddd	%f22,qq1,%f22
723	fmuld	%f16,%f20,%f20
724	add	%l6,%g1,%l6
725
726	faddd	%f30,qq1,%f30
727	fmuld	%f24,%f28,%f28
728	add	%l7,%g1,%l7
729
730	fmuld	%f2,%f4,%f4
731
732	fmuld	%f10,%f12,%f12
733
734	fmuld	%f18,%f20,%f20
735
736	fmuld	%f26,%f28,%f28
737
738	fmuld	%f0,%f6,%f6
739	faddd	%f4,%f32,%f4
740	ldd	[%l4+16],%f0
741
742	fmuld	%f8,%f14,%f14
743	faddd	%f12,%f34,%f12
744	ldd	[%l5+16],%f8
745
746	fmuld	%f16,%f22,%f22
747	faddd	%f20,%f36,%f20
748	ldd	[%l6+16],%f16
749
750	fmuld	%f24,%f30,%f30
751	faddd	%f28,%f38,%f28
752	ldd	[%l7+16],%f24
753
754	fmuld	%f0,%f6,%f6
755	faddd	%f4,%f2,%f4
756	ldd	[%l4+8],%f32
757
758	fmuld	%f8,%f14,%f14
759	faddd	%f12,%f10,%f12
760	ldd	[%l5+8],%f34
761
762	fmuld	%f16,%f22,%f22
763	faddd	%f20,%f18,%f20
764	ldd	[%l6+8],%f36
765
766	fmuld	%f24,%f30,%f30
767	faddd	%f28,%f26,%f28
768	ldd	[%l7+8],%f38
769
770	fmuld	%f32,%f4,%f4
771
772	fmuld	%f34,%f12,%f12
773
774	fmuld	%f36,%f20,%f20
775
776	fmuld	%f38,%f28,%f28
777
778	fsubd	%f6,%f4,%f6
779
780	fsubd	%f14,%f12,%f14
781
782	fsubd	%f22,%f20,%f22
783
784	fsubd	%f30,%f28,%f30
785
786	faddd	%f6,%f0,%f6
787
788	faddd	%f14,%f8,%f14
789
790	faddd	%f22,%f16,%f22
791
792	faddd	%f30,%f24,%f30
793	mov	%l0,%l4
794
795	fnegd	%f6,%f4
796	lda	[%i1]%asi,%l0		! preload next argument
797
798	fnegd	%f14,%f12
799	lda	[%i1]%asi,%f0
800
801	fnegd	%f22,%f20
802	lda	[%i1+4]%asi,%f3
803
804	fnegd	%f30,%f28
805	andn	%l0,%i5,%l0
806	add	%i1,%i2,%i1
807
808	andcc	%l4,2,%g0
809	fmovdnz	%icc,%f4,%f6
810	st	%f6,[%o0]
811
812	andcc	%l1,2,%g0
813	fmovdnz	%icc,%f12,%f14
814	st	%f14,[%o1]
815
816	andcc	%l2,2,%g0
817	fmovdnz	%icc,%f20,%f22
818	st	%f22,[%o2]
819
820	andcc	%l3,2,%g0
821	fmovdnz	%icc,%f28,%f30
822	st	%f30,[%o3]
823
824	addcc	%i0,-1,%i0
825	bg,pt	%icc,.loop0
826! delay slot
827	st	%f7,[%o0+4]
828
829	ba,pt	%icc,.end
830! delay slot
831	nop
832
833	.align	16
834.case1:
835	fmuld	%f24,pp3,%f30		! sin(x3)
836
837	fmuld	%f0,qq3,%f6		! cos(x0)
838
839	fmuld	%f8,qq3,%f14		! cos(x1)
840
841	fmuld	%f16,qq3,%f22		! cos(x2)
842
843	faddd	%f30,pp2,%f30
844	fmuld	%f24,qq2,%f28
845
846	faddd	%f6,qq2,%f6
847	fmuld	%f0,pp2,%f4
848
849	faddd	%f14,qq2,%f14
850	fmuld	%f8,pp2,%f12
851
852	faddd	%f22,qq2,%f22
853	fmuld	%f16,pp2,%f20
854
855	fmuld	%f24,%f30,%f30
856	faddd	%f28,qq1,%f28
857
858	fmuld	%f0,%f6,%f6
859	faddd	%f4,pp1,%f4
860
861	fmuld	%f8,%f14,%f14
862	faddd	%f12,pp1,%f12
863
864	fmuld	%f16,%f22,%f22
865	faddd	%f20,pp1,%f20
866
867	faddd	%f30,pp1,%f30
868	fmuld	%f24,%f28,%f28
869	add	%l7,%g1,%l7
870
871	faddd	%f6,qq1,%f6
872	fmuld	%f0,%f4,%f4
873	add	%l4,%g1,%l4
874
875	faddd	%f14,qq1,%f14
876	fmuld	%f8,%f12,%f12
877	add	%l5,%g1,%l5
878
879	faddd	%f22,qq1,%f22
880	fmuld	%f16,%f20,%f20
881	add	%l6,%g1,%l6
882
883	fmuld	%f24,%f30,%f30
884
885	fmuld	%f2,%f4,%f4
886
887	fmuld	%f10,%f12,%f12
888
889	fmuld	%f18,%f20,%f20
890
891	fmuld	%f26,%f30,%f30
892	ldd	[%l7+8],%f24
893
894	fmuld	%f0,%f6,%f6
895	faddd	%f4,%f32,%f4
896	ldd	[%l4+16],%f0
897
898	fmuld	%f8,%f14,%f14
899	faddd	%f12,%f34,%f12
900	ldd	[%l5+16],%f8
901
902	fmuld	%f16,%f22,%f22
903	faddd	%f20,%f36,%f20
904	ldd	[%l6+16],%f16
905
906	fmuld	%f24,%f28,%f28
907	faddd	%f38,%f30,%f30
908
909	fmuld	%f0,%f6,%f6
910	faddd	%f4,%f2,%f4
911	ldd	[%l4+8],%f32
912
913	fmuld	%f8,%f14,%f14
914	faddd	%f12,%f10,%f12
915	ldd	[%l5+8],%f34
916
917	fmuld	%f16,%f22,%f22
918	faddd	%f20,%f18,%f20
919	ldd	[%l6+8],%f36
920
921	faddd	%f26,%f30,%f30
922	ldd	[%l7+16],%f38
923
924	fmuld	%f32,%f4,%f4
925
926	fmuld	%f34,%f12,%f12
927
928	fmuld	%f36,%f20,%f20
929
930	fmuld	%f38,%f30,%f30
931
932	fsubd	%f6,%f4,%f6
933
934	fsubd	%f14,%f12,%f14
935
936	fsubd	%f22,%f20,%f22
937
938	faddd	%f30,%f28,%f30
939
940	faddd	%f6,%f0,%f6
941
942	faddd	%f14,%f8,%f14
943
944	faddd	%f22,%f16,%f22
945
946	faddd	%f30,%f24,%f30
947	mov	%l0,%l4
948
949	fnegd	%f6,%f4
950	lda	[%i1]%asi,%l0		! preload next argument
951
952	fnegd	%f14,%f12
953	lda	[%i1]%asi,%f0
954
955	fnegd	%f22,%f20
956	lda	[%i1+4]%asi,%f3
957
958	fnegd	%f30,%f28
959	andn	%l0,%i5,%l0
960	add	%i1,%i2,%i1
961
962	andcc	%l4,2,%g0
963	fmovdnz	%icc,%f4,%f6
964	st	%f6,[%o0]
965
966	andcc	%l1,2,%g0
967	fmovdnz	%icc,%f12,%f14
968	st	%f14,[%o1]
969
970	andcc	%l2,2,%g0
971	fmovdnz	%icc,%f20,%f22
972	st	%f22,[%o2]
973
974	andcc	%l3,2,%g0
975	fmovdnz	%icc,%f28,%f30
976	st	%f30,[%o3]
977
978	addcc	%i0,-1,%i0
979	bg,pt	%icc,.loop0
980! delay slot
981	st	%f7,[%o0+4]
982
983	ba,pt	%icc,.end
984! delay slot
985	nop
986
987	.align	16
988.case2:
989	fmuld	%f26,%f26,%f24
990	andcc	%l3,1,%g0
991	bz,pn	%icc,.case3
992! delay slot
993	fxor	%f30,%f38,%f38
994
995	fmuld	%f16,pp3,%f22		! sin(x2)
996
997	fmuld	%f0,qq3,%f6		! cos(x0)
998
999	fmuld	%f8,qq3,%f14		! cos(x1)
1000
1001	faddd	%f22,pp2,%f22
1002	fmuld	%f16,qq2,%f20
1003
1004	fmuld	%f24,qq3,%f30		! cos(x3)
1005
1006	faddd	%f6,qq2,%f6
1007	fmuld	%f0,pp2,%f4
1008
1009	faddd	%f14,qq2,%f14
1010	fmuld	%f8,pp2,%f12
1011
1012	fmuld	%f16,%f22,%f22
1013	faddd	%f20,qq1,%f20
1014
1015	faddd	%f30,qq2,%f30
1016	fmuld	%f24,pp2,%f28
1017
1018	fmuld	%f0,%f6,%f6
1019	faddd	%f4,pp1,%f4
1020
1021	fmuld	%f8,%f14,%f14
1022	faddd	%f12,pp1,%f12
1023
1024	faddd	%f22,pp1,%f22
1025	fmuld	%f16,%f20,%f20
1026	add	%l6,%g1,%l6
1027
1028	fmuld	%f24,%f30,%f30
1029	faddd	%f28,pp1,%f28
1030
1031	faddd	%f6,qq1,%f6
1032	fmuld	%f0,%f4,%f4
1033	add	%l4,%g1,%l4
1034
1035	faddd	%f14,qq1,%f14
1036	fmuld	%f8,%f12,%f12
1037	add	%l5,%g1,%l5
1038
1039	fmuld	%f16,%f22,%f22
1040
1041	faddd	%f30,qq1,%f30
1042	fmuld	%f24,%f28,%f28
1043	add	%l7,%g1,%l7
1044
1045	fmuld	%f2,%f4,%f4
1046
1047	fmuld	%f10,%f12,%f12
1048
1049	fmuld	%f18,%f22,%f22
1050	ldd	[%l6+8],%f16
1051
1052	fmuld	%f26,%f28,%f28
1053
1054	fmuld	%f0,%f6,%f6
1055	faddd	%f4,%f32,%f4
1056	ldd	[%l4+16],%f0
1057
1058	fmuld	%f8,%f14,%f14
1059	faddd	%f12,%f34,%f12
1060	ldd	[%l5+16],%f8
1061
1062	fmuld	%f16,%f20,%f20
1063	faddd	%f36,%f22,%f22
1064
1065	fmuld	%f24,%f30,%f30
1066	faddd	%f28,%f38,%f28
1067	ldd	[%l7+16],%f24
1068
1069	fmuld	%f0,%f6,%f6
1070	faddd	%f4,%f2,%f4
1071	ldd	[%l4+8],%f32
1072
1073	fmuld	%f8,%f14,%f14
1074	faddd	%f12,%f10,%f12
1075	ldd	[%l5+8],%f34
1076
1077	faddd	%f18,%f22,%f22
1078	ldd	[%l6+16],%f36
1079
1080	fmuld	%f24,%f30,%f30
1081	faddd	%f28,%f26,%f28
1082	ldd	[%l7+8],%f38
1083
1084	fmuld	%f32,%f4,%f4
1085
1086	fmuld	%f34,%f12,%f12
1087
1088	fmuld	%f36,%f22,%f22
1089
1090	fmuld	%f38,%f28,%f28
1091
1092	fsubd	%f6,%f4,%f6
1093
1094	fsubd	%f14,%f12,%f14
1095
1096	faddd	%f22,%f20,%f22
1097
1098	fsubd	%f30,%f28,%f30
1099
1100	faddd	%f6,%f0,%f6
1101
1102	faddd	%f14,%f8,%f14
1103
1104	faddd	%f22,%f16,%f22
1105
1106	faddd	%f30,%f24,%f30
1107	mov	%l0,%l4
1108
1109	fnegd	%f6,%f4
1110	lda	[%i1]%asi,%l0		! preload next argument
1111
1112	fnegd	%f14,%f12
1113	lda	[%i1]%asi,%f0
1114
1115	fnegd	%f22,%f20
1116	lda	[%i1+4]%asi,%f3
1117
1118	fnegd	%f30,%f28
1119	andn	%l0,%i5,%l0
1120	add	%i1,%i2,%i1
1121
1122	andcc	%l4,2,%g0
1123	fmovdnz	%icc,%f4,%f6
1124	st	%f6,[%o0]
1125
1126	andcc	%l1,2,%g0
1127	fmovdnz	%icc,%f12,%f14
1128	st	%f14,[%o1]
1129
1130	andcc	%l2,2,%g0
1131	fmovdnz	%icc,%f20,%f22
1132	st	%f22,[%o2]
1133
1134	andcc	%l3,2,%g0
1135	fmovdnz	%icc,%f28,%f30
1136	st	%f30,[%o3]
1137
1138	addcc	%i0,-1,%i0
1139	bg,pt	%icc,.loop0
1140! delay slot
1141	st	%f7,[%o0+4]
1142
1143	ba,pt	%icc,.end
1144! delay slot
1145	nop
1146
1147	.align	16
1148.case3:
1149	fmuld	%f16,pp3,%f22		! sin(x2)
1150
1151	fmuld	%f24,pp3,%f30		! sin(x3)
1152
1153	fmuld	%f0,qq3,%f6		! cos(x0)
1154
1155	fmuld	%f8,qq3,%f14		! cos(x1)
1156
1157	faddd	%f22,pp2,%f22
1158	fmuld	%f16,qq2,%f20
1159
1160	faddd	%f30,pp2,%f30
1161	fmuld	%f24,qq2,%f28
1162
1163	faddd	%f6,qq2,%f6
1164	fmuld	%f0,pp2,%f4
1165
1166	faddd	%f14,qq2,%f14
1167	fmuld	%f8,pp2,%f12
1168
1169	fmuld	%f16,%f22,%f22
1170	faddd	%f20,qq1,%f20
1171
1172	fmuld	%f24,%f30,%f30
1173	faddd	%f28,qq1,%f28
1174
1175	fmuld	%f0,%f6,%f6
1176	faddd	%f4,pp1,%f4
1177
1178	fmuld	%f8,%f14,%f14
1179	faddd	%f12,pp1,%f12
1180
1181	faddd	%f22,pp1,%f22
1182	fmuld	%f16,%f20,%f20
1183	add	%l6,%g1,%l6
1184
1185	faddd	%f30,pp1,%f30
1186	fmuld	%f24,%f28,%f28
1187	add	%l7,%g1,%l7
1188
1189	faddd	%f6,qq1,%f6
1190	fmuld	%f0,%f4,%f4
1191	add	%l4,%g1,%l4
1192
1193	faddd	%f14,qq1,%f14
1194	fmuld	%f8,%f12,%f12
1195	add	%l5,%g1,%l5
1196
1197	fmuld	%f16,%f22,%f22
1198
1199	fmuld	%f24,%f30,%f30
1200
1201	fmuld	%f2,%f4,%f4
1202
1203	fmuld	%f10,%f12,%f12
1204
1205	fmuld	%f18,%f22,%f22
1206	ldd	[%l6+8],%f16
1207
1208	fmuld	%f26,%f30,%f30
1209	ldd	[%l7+8],%f24
1210
1211	fmuld	%f0,%f6,%f6
1212	faddd	%f4,%f32,%f4
1213	ldd	[%l4+16],%f0
1214
1215	fmuld	%f8,%f14,%f14
1216	faddd	%f12,%f34,%f12
1217	ldd	[%l5+16],%f8
1218
1219	fmuld	%f16,%f20,%f20
1220	faddd	%f36,%f22,%f22
1221
1222	fmuld	%f24,%f28,%f28
1223	faddd	%f38,%f30,%f30
1224
1225	fmuld	%f0,%f6,%f6
1226	faddd	%f4,%f2,%f4
1227	ldd	[%l4+8],%f32
1228
1229	fmuld	%f8,%f14,%f14
1230	faddd	%f12,%f10,%f12
1231	ldd	[%l5+8],%f34
1232
1233	faddd	%f18,%f22,%f22
1234	ldd	[%l6+16],%f36
1235
1236	faddd	%f26,%f30,%f30
1237	ldd	[%l7+16],%f38
1238
1239	fmuld	%f32,%f4,%f4
1240
1241	fmuld	%f34,%f12,%f12
1242
1243	fmuld	%f36,%f22,%f22
1244
1245	fmuld	%f38,%f30,%f30
1246
1247	fsubd	%f6,%f4,%f6
1248
1249	fsubd	%f14,%f12,%f14
1250
1251	faddd	%f22,%f20,%f22
1252
1253	faddd	%f30,%f28,%f30
1254
1255	faddd	%f6,%f0,%f6
1256
1257	faddd	%f14,%f8,%f14
1258
1259	faddd	%f22,%f16,%f22
1260
1261	faddd	%f30,%f24,%f30
1262	mov	%l0,%l4
1263
1264	fnegd	%f6,%f4
1265	lda	[%i1]%asi,%l0		! preload next argument
1266
1267	fnegd	%f14,%f12
1268	lda	[%i1]%asi,%f0
1269
1270	fnegd	%f22,%f20
1271	lda	[%i1+4]%asi,%f3
1272
1273	fnegd	%f30,%f28
1274	andn	%l0,%i5,%l0
1275	add	%i1,%i2,%i1
1276
1277	andcc	%l4,2,%g0
1278	fmovdnz	%icc,%f4,%f6
1279	st	%f6,[%o0]
1280
1281	andcc	%l1,2,%g0
1282	fmovdnz	%icc,%f12,%f14
1283	st	%f14,[%o1]
1284
1285	andcc	%l2,2,%g0
1286	fmovdnz	%icc,%f20,%f22
1287	st	%f22,[%o2]
1288
1289	andcc	%l3,2,%g0
1290	fmovdnz	%icc,%f28,%f30
1291	st	%f30,[%o3]
1292
1293	addcc	%i0,-1,%i0
1294	bg,pt	%icc,.loop0
1295! delay slot
1296	st	%f7,[%o0+4]
1297
1298	ba,pt	%icc,.end
1299! delay slot
1300	nop
1301
1302	.align	16
1303.case4:
1304	fmuld	%f18,%f18,%f16
1305	andcc	%l2,1,%g0
1306	bz,pn	%icc,.case6
1307! delay slot
1308	fxor	%f22,%f36,%f36
1309
1310	fmuld	%f26,%f26,%f24
1311	andcc	%l3,1,%g0
1312	bz,pn	%icc,.case5
1313! delay slot
1314	fxor	%f30,%f38,%f38
1315
1316	fmuld	%f8,pp3,%f14		! sin(x1)
1317
1318	fmuld	%f0,qq3,%f6		! cos(x0)
1319
1320	faddd	%f14,pp2,%f14
1321	fmuld	%f8,qq2,%f12
1322
1323	fmuld	%f16,qq3,%f22		! cos(x2)
1324
1325	fmuld	%f24,qq3,%f30		! cos(x3)
1326
1327	faddd	%f6,qq2,%f6
1328	fmuld	%f0,pp2,%f4
1329
1330	fmuld	%f8,%f14,%f14
1331	faddd	%f12,qq1,%f12
1332
1333	faddd	%f22,qq2,%f22
1334	fmuld	%f16,pp2,%f20
1335
1336	faddd	%f30,qq2,%f30
1337	fmuld	%f24,pp2,%f28
1338
1339	fmuld	%f0,%f6,%f6
1340	faddd	%f4,pp1,%f4
1341
1342	faddd	%f14,pp1,%f14
1343	fmuld	%f8,%f12,%f12
1344	add	%l5,%g1,%l5
1345
1346	fmuld	%f16,%f22,%f22
1347	faddd	%f20,pp1,%f20
1348
1349	fmuld	%f24,%f30,%f30
1350	faddd	%f28,pp1,%f28
1351
1352	faddd	%f6,qq1,%f6
1353	fmuld	%f0,%f4,%f4
1354	add	%l4,%g1,%l4
1355
1356	fmuld	%f8,%f14,%f14
1357
1358	faddd	%f22,qq1,%f22
1359	fmuld	%f16,%f20,%f20
1360	add	%l6,%g1,%l6
1361
1362	faddd	%f30,qq1,%f30
1363	fmuld	%f24,%f28,%f28
1364	add	%l7,%g1,%l7
1365
1366	fmuld	%f2,%f4,%f4
1367
1368	fmuld	%f10,%f14,%f14
1369	ldd	[%l5+8],%f8
1370
1371	fmuld	%f18,%f20,%f20
1372
1373	fmuld	%f26,%f28,%f28
1374
1375	fmuld	%f0,%f6,%f6
1376	faddd	%f4,%f32,%f4
1377	ldd	[%l4+16],%f0
1378
1379	fmuld	%f8,%f12,%f12
1380	faddd	%f34,%f14,%f14
1381
1382	fmuld	%f16,%f22,%f22
1383	faddd	%f20,%f36,%f20
1384	ldd	[%l6+16],%f16
1385
1386	fmuld	%f24,%f30,%f30
1387	faddd	%f28,%f38,%f28
1388	ldd	[%l7+16],%f24
1389
1390	fmuld	%f0,%f6,%f6
1391	faddd	%f4,%f2,%f4
1392	ldd	[%l4+8],%f32
1393
1394	faddd	%f10,%f14,%f14
1395	ldd	[%l5+16],%f34
1396
1397	fmuld	%f16,%f22,%f22
1398	faddd	%f20,%f18,%f20
1399	ldd	[%l6+8],%f36
1400
1401	fmuld	%f24,%f30,%f30
1402	faddd	%f28,%f26,%f28
1403	ldd	[%l7+8],%f38
1404
1405	fmuld	%f32,%f4,%f4
1406
1407	fmuld	%f34,%f14,%f14
1408
1409	fmuld	%f36,%f20,%f20
1410
1411	fmuld	%f38,%f28,%f28
1412
1413	fsubd	%f6,%f4,%f6
1414
1415	faddd	%f14,%f12,%f14
1416
1417	fsubd	%f22,%f20,%f22
1418
1419	fsubd	%f30,%f28,%f30
1420
1421	faddd	%f6,%f0,%f6
1422
1423	faddd	%f14,%f8,%f14
1424
1425	faddd	%f22,%f16,%f22
1426
1427	faddd	%f30,%f24,%f30
1428	mov	%l0,%l4
1429
1430	fnegd	%f6,%f4
1431	lda	[%i1]%asi,%l0		! preload next argument
1432
1433	fnegd	%f14,%f12
1434	lda	[%i1]%asi,%f0
1435
1436	fnegd	%f22,%f20
1437	lda	[%i1+4]%asi,%f3
1438
1439	fnegd	%f30,%f28
1440	andn	%l0,%i5,%l0
1441	add	%i1,%i2,%i1
1442
1443	andcc	%l4,2,%g0
1444	fmovdnz	%icc,%f4,%f6
1445	st	%f6,[%o0]
1446
1447	andcc	%l1,2,%g0
1448	fmovdnz	%icc,%f12,%f14
1449	st	%f14,[%o1]
1450
1451	andcc	%l2,2,%g0
1452	fmovdnz	%icc,%f20,%f22
1453	st	%f22,[%o2]
1454
1455	andcc	%l3,2,%g0
1456	fmovdnz	%icc,%f28,%f30
1457	st	%f30,[%o3]
1458
1459	addcc	%i0,-1,%i0
1460	bg,pt	%icc,.loop0
1461! delay slot
1462	st	%f7,[%o0+4]
1463
1464	ba,pt	%icc,.end
1465! delay slot
1466	nop
1467
1468	.align	16
1469.case5:
1470	fmuld	%f8,pp3,%f14		! sin(x1)
1471
1472	fmuld	%f24,pp3,%f30		! sin(x3)
1473
1474	fmuld	%f0,qq3,%f6		! cos(x0)
1475
1476	faddd	%f14,pp2,%f14
1477	fmuld	%f8,qq2,%f12
1478
1479	fmuld	%f16,qq3,%f22		! cos(x2)
1480
1481	faddd	%f30,pp2,%f30
1482	fmuld	%f24,qq2,%f28
1483
1484	faddd	%f6,qq2,%f6
1485	fmuld	%f0,pp2,%f4
1486
1487	fmuld	%f8,%f14,%f14
1488	faddd	%f12,qq1,%f12
1489
1490	faddd	%f22,qq2,%f22
1491	fmuld	%f16,pp2,%f20
1492
1493	fmuld	%f24,%f30,%f30
1494	faddd	%f28,qq1,%f28
1495
1496	fmuld	%f0,%f6,%f6
1497	faddd	%f4,pp1,%f4
1498
1499	faddd	%f14,pp1,%f14
1500	fmuld	%f8,%f12,%f12
1501	add	%l5,%g1,%l5
1502
1503	fmuld	%f16,%f22,%f22
1504	faddd	%f20,pp1,%f20
1505
1506	faddd	%f30,pp1,%f30
1507	fmuld	%f24,%f28,%f28
1508	add	%l7,%g1,%l7
1509
1510	faddd	%f6,qq1,%f6
1511	fmuld	%f0,%f4,%f4
1512	add	%l4,%g1,%l4
1513
1514	fmuld	%f8,%f14,%f14
1515
1516	faddd	%f22,qq1,%f22
1517	fmuld	%f16,%f20,%f20
1518	add	%l6,%g1,%l6
1519
1520	fmuld	%f24,%f30,%f30
1521
1522	fmuld	%f2,%f4,%f4
1523
1524	fmuld	%f10,%f14,%f14
1525	ldd	[%l5+8],%f8
1526
1527	fmuld	%f18,%f20,%f20
1528
1529	fmuld	%f26,%f30,%f30
1530	ldd	[%l7+8],%f24
1531
1532	fmuld	%f0,%f6,%f6
1533	faddd	%f4,%f32,%f4
1534	ldd	[%l4+16],%f0
1535
1536	fmuld	%f8,%f12,%f12
1537	faddd	%f34,%f14,%f14
1538
1539	fmuld	%f16,%f22,%f22
1540	faddd	%f20,%f36,%f20
1541	ldd	[%l6+16],%f16
1542
1543	fmuld	%f24,%f28,%f28
1544	faddd	%f38,%f30,%f30
1545
1546	fmuld	%f0,%f6,%f6
1547	faddd	%f4,%f2,%f4
1548	ldd	[%l4+8],%f32
1549
1550	faddd	%f10,%f14,%f14
1551	ldd	[%l5+16],%f34
1552
1553	fmuld	%f16,%f22,%f22
1554	faddd	%f20,%f18,%f20
1555	ldd	[%l6+8],%f36
1556
1557	faddd	%f26,%f30,%f30
1558	ldd	[%l7+16],%f38
1559
1560	fmuld	%f32,%f4,%f4
1561
1562	fmuld	%f34,%f14,%f14
1563
1564	fmuld	%f36,%f20,%f20
1565
1566	fmuld	%f38,%f30,%f30
1567
1568	fsubd	%f6,%f4,%f6
1569
1570	faddd	%f14,%f12,%f14
1571
1572	fsubd	%f22,%f20,%f22
1573
1574	faddd	%f30,%f28,%f30
1575
1576	faddd	%f6,%f0,%f6
1577
1578	faddd	%f14,%f8,%f14
1579
1580	faddd	%f22,%f16,%f22
1581
1582	faddd	%f30,%f24,%f30
1583	mov	%l0,%l4
1584
1585	fnegd	%f6,%f4
1586	lda	[%i1]%asi,%l0		! preload next argument
1587
1588	fnegd	%f14,%f12
1589	lda	[%i1]%asi,%f0
1590
1591	fnegd	%f22,%f20
1592	lda	[%i1+4]%asi,%f3
1593
1594	fnegd	%f30,%f28
1595	andn	%l0,%i5,%l0
1596	add	%i1,%i2,%i1
1597
1598	andcc	%l4,2,%g0
1599	fmovdnz	%icc,%f4,%f6
1600	st	%f6,[%o0]
1601
1602	andcc	%l1,2,%g0
1603	fmovdnz	%icc,%f12,%f14
1604	st	%f14,[%o1]
1605
1606	andcc	%l2,2,%g0
1607	fmovdnz	%icc,%f20,%f22
1608	st	%f22,[%o2]
1609
1610	andcc	%l3,2,%g0
1611	fmovdnz	%icc,%f28,%f30
1612	st	%f30,[%o3]
1613
1614	addcc	%i0,-1,%i0
1615	bg,pt	%icc,.loop0
1616! delay slot
1617	st	%f7,[%o0+4]
1618
1619	ba,pt	%icc,.end
1620! delay slot
1621	nop
1622
1623	.align	16
1624.case6:
1625	fmuld	%f26,%f26,%f24
1626	andcc	%l3,1,%g0
1627	bz,pn	%icc,.case7
1628! delay slot
1629	fxor	%f30,%f38,%f38
1630
1631	fmuld	%f8,pp3,%f14		! sin(x1)
1632
1633	fmuld	%f16,pp3,%f22		! sin(x2)
1634
1635	fmuld	%f0,qq3,%f6		! cos(x0)
1636
1637	faddd	%f14,pp2,%f14
1638	fmuld	%f8,qq2,%f12
1639
1640	faddd	%f22,pp2,%f22
1641	fmuld	%f16,qq2,%f20
1642
1643	fmuld	%f24,qq3,%f30		! cos(x3)
1644
1645	faddd	%f6,qq2,%f6
1646	fmuld	%f0,pp2,%f4
1647
1648	fmuld	%f8,%f14,%f14
1649	faddd	%f12,qq1,%f12
1650
1651	fmuld	%f16,%f22,%f22
1652	faddd	%f20,qq1,%f20
1653
1654	faddd	%f30,qq2,%f30
1655	fmuld	%f24,pp2,%f28
1656
1657	fmuld	%f0,%f6,%f6
1658	faddd	%f4,pp1,%f4
1659
1660	faddd	%f14,pp1,%f14
1661	fmuld	%f8,%f12,%f12
1662	add	%l5,%g1,%l5
1663
1664	faddd	%f22,pp1,%f22
1665	fmuld	%f16,%f20,%f20
1666	add	%l6,%g1,%l6
1667
1668	fmuld	%f24,%f30,%f30
1669	faddd	%f28,pp1,%f28
1670
1671	faddd	%f6,qq1,%f6
1672	fmuld	%f0,%f4,%f4
1673	add	%l4,%g1,%l4
1674
1675	fmuld	%f8,%f14,%f14
1676
1677	fmuld	%f16,%f22,%f22
1678
1679	faddd	%f30,qq1,%f30
1680	fmuld	%f24,%f28,%f28
1681	add	%l7,%g1,%l7
1682
1683	fmuld	%f2,%f4,%f4
1684
1685	fmuld	%f10,%f14,%f14
1686	ldd	[%l5+8],%f8
1687
1688	fmuld	%f18,%f22,%f22
1689	ldd	[%l6+8],%f16
1690
1691	fmuld	%f26,%f28,%f28
1692
1693	fmuld	%f0,%f6,%f6
1694	faddd	%f4,%f32,%f4
1695	ldd	[%l4+16],%f0
1696
1697	fmuld	%f8,%f12,%f12
1698	faddd	%f34,%f14,%f14
1699
1700	fmuld	%f16,%f20,%f20
1701	faddd	%f36,%f22,%f22
1702
1703	fmuld	%f24,%f30,%f30
1704	faddd	%f28,%f38,%f28
1705	ldd	[%l7+16],%f24
1706
1707	fmuld	%f0,%f6,%f6
1708	faddd	%f4,%f2,%f4
1709	ldd	[%l4+8],%f32
1710
1711	faddd	%f10,%f14,%f14
1712	ldd	[%l5+16],%f34
1713
1714	faddd	%f18,%f22,%f22
1715	ldd	[%l6+16],%f36
1716
1717	fmuld	%f24,%f30,%f30
1718	faddd	%f28,%f26,%f28
1719	ldd	[%l7+8],%f38
1720
1721	fmuld	%f32,%f4,%f4
1722
1723	fmuld	%f34,%f14,%f14
1724
1725	fmuld	%f36,%f22,%f22
1726
1727	fmuld	%f38,%f28,%f28
1728
1729	fsubd	%f6,%f4,%f6
1730
1731	faddd	%f14,%f12,%f14
1732
1733	faddd	%f22,%f20,%f22
1734
1735	fsubd	%f30,%f28,%f30
1736
1737	faddd	%f6,%f0,%f6
1738
1739	faddd	%f14,%f8,%f14
1740
1741	faddd	%f22,%f16,%f22
1742
1743	faddd	%f30,%f24,%f30
1744	mov	%l0,%l4
1745
1746	fnegd	%f6,%f4
1747	lda	[%i1]%asi,%l0		! preload next argument
1748
1749	fnegd	%f14,%f12
1750	lda	[%i1]%asi,%f0
1751
1752	fnegd	%f22,%f20
1753	lda	[%i1+4]%asi,%f3
1754
1755	fnegd	%f30,%f28
1756	andn	%l0,%i5,%l0
1757	add	%i1,%i2,%i1
1758
1759	andcc	%l4,2,%g0
1760	fmovdnz	%icc,%f4,%f6
1761	st	%f6,[%o0]
1762
1763	andcc	%l1,2,%g0
1764	fmovdnz	%icc,%f12,%f14
1765	st	%f14,[%o1]
1766
1767	andcc	%l2,2,%g0
1768	fmovdnz	%icc,%f20,%f22
1769	st	%f22,[%o2]
1770
1771	andcc	%l3,2,%g0
1772	fmovdnz	%icc,%f28,%f30
1773	st	%f30,[%o3]
1774
1775	addcc	%i0,-1,%i0
1776	bg,pt	%icc,.loop0
1777! delay slot
1778	st	%f7,[%o0+4]
1779
1780	ba,pt	%icc,.end
1781! delay slot
1782	nop
1783
1784	.align	16
1785.case7:
1786	fmuld	%f8,pp3,%f14		! sin(x1)
1787
1788	fmuld	%f16,pp3,%f22		! sin(x2)
1789
1790	fmuld	%f24,pp3,%f30		! sin(x3)
1791
1792	fmuld	%f0,qq3,%f6		! cos(x0)
1793
1794	faddd	%f14,pp2,%f14
1795	fmuld	%f8,qq2,%f12
1796
1797	faddd	%f22,pp2,%f22
1798	fmuld	%f16,qq2,%f20
1799
1800	faddd	%f30,pp2,%f30
1801	fmuld	%f24,qq2,%f28
1802
1803	faddd	%f6,qq2,%f6
1804	fmuld	%f0,pp2,%f4
1805
1806	fmuld	%f8,%f14,%f14
1807	faddd	%f12,qq1,%f12
1808
1809	fmuld	%f16,%f22,%f22
1810	faddd	%f20,qq1,%f20
1811
1812	fmuld	%f24,%f30,%f30
1813	faddd	%f28,qq1,%f28
1814
1815	fmuld	%f0,%f6,%f6
1816	faddd	%f4,pp1,%f4
1817
1818	faddd	%f14,pp1,%f14
1819	fmuld	%f8,%f12,%f12
1820	add	%l5,%g1,%l5
1821
1822	faddd	%f22,pp1,%f22
1823	fmuld	%f16,%f20,%f20
1824	add	%l6,%g1,%l6
1825
1826	faddd	%f30,pp1,%f30
1827	fmuld	%f24,%f28,%f28
1828	add	%l7,%g1,%l7
1829
1830	faddd	%f6,qq1,%f6
1831	fmuld	%f0,%f4,%f4
1832	add	%l4,%g1,%l4
1833
1834	fmuld	%f8,%f14,%f14
1835
1836	fmuld	%f16,%f22,%f22
1837
1838	fmuld	%f24,%f30,%f30
1839
1840	fmuld	%f2,%f4,%f4
1841
1842	fmuld	%f10,%f14,%f14
1843	ldd	[%l5+8],%f8
1844
1845	fmuld	%f18,%f22,%f22
1846	ldd	[%l6+8],%f16
1847
1848	fmuld	%f26,%f30,%f30
1849	ldd	[%l7+8],%f24
1850
1851	fmuld	%f0,%f6,%f6
1852	faddd	%f4,%f32,%f4
1853	ldd	[%l4+16],%f0
1854
1855	fmuld	%f8,%f12,%f12
1856	faddd	%f34,%f14,%f14
1857
1858	fmuld	%f16,%f20,%f20
1859	faddd	%f36,%f22,%f22
1860
1861	fmuld	%f24,%f28,%f28
1862	faddd	%f38,%f30,%f30
1863
1864	fmuld	%f0,%f6,%f6
1865	faddd	%f4,%f2,%f4
1866	ldd	[%l4+8],%f32
1867
1868	faddd	%f10,%f14,%f14
1869	ldd	[%l5+16],%f34
1870
1871	faddd	%f18,%f22,%f22
1872	ldd	[%l6+16],%f36
1873
1874	faddd	%f26,%f30,%f30
1875	ldd	[%l7+16],%f38
1876
1877	fmuld	%f32,%f4,%f4
1878
1879	fmuld	%f34,%f14,%f14
1880
1881	fmuld	%f36,%f22,%f22
1882
1883	fmuld	%f38,%f30,%f30
1884
1885	fsubd	%f6,%f4,%f6
1886
1887	faddd	%f14,%f12,%f14
1888
1889	faddd	%f22,%f20,%f22
1890
1891	faddd	%f30,%f28,%f30
1892
1893	faddd	%f6,%f0,%f6
1894
1895	faddd	%f14,%f8,%f14
1896
1897	faddd	%f22,%f16,%f22
1898
1899	faddd	%f30,%f24,%f30
1900	mov	%l0,%l4
1901
1902	fnegd	%f6,%f4
1903	lda	[%i1]%asi,%l0		! preload next argument
1904
1905	fnegd	%f14,%f12
1906	lda	[%i1]%asi,%f0
1907
1908	fnegd	%f22,%f20
1909	lda	[%i1+4]%asi,%f3
1910
1911	fnegd	%f30,%f28
1912	andn	%l0,%i5,%l0
1913	add	%i1,%i2,%i1
1914
1915	andcc	%l4,2,%g0
1916	fmovdnz	%icc,%f4,%f6
1917	st	%f6,[%o0]
1918
1919	andcc	%l1,2,%g0
1920	fmovdnz	%icc,%f12,%f14
1921	st	%f14,[%o1]
1922
1923	andcc	%l2,2,%g0
1924	fmovdnz	%icc,%f20,%f22
1925	st	%f22,[%o2]
1926
1927	andcc	%l3,2,%g0
1928	fmovdnz	%icc,%f28,%f30
1929	st	%f30,[%o3]
1930
1931	addcc	%i0,-1,%i0
1932	bg,pt	%icc,.loop0
1933! delay slot
1934	st	%f7,[%o0+4]
1935
1936	ba,pt	%icc,.end
1937! delay slot
1938	nop
1939
1940	.align	16
1941.case8:
1942	fmuld	%f10,%f10,%f8
1943	andcc	%l1,1,%g0
1944	bz,pn	%icc,.case12
1945! delay slot
1946	fxor	%f14,%f34,%f34
1947
1948	fmuld	%f18,%f18,%f16
1949	andcc	%l2,1,%g0
1950	bz,pn	%icc,.case10
1951! delay slot
1952	fxor	%f22,%f36,%f36
1953
1954	fmuld	%f26,%f26,%f24
1955	andcc	%l3,1,%g0
1956	bz,pn	%icc,.case9
1957! delay slot
1958	fxor	%f30,%f38,%f38
1959
1960	fmuld	%f0,pp3,%f6		! sin(x0)
1961
1962	faddd	%f6,pp2,%f6
1963	fmuld	%f0,qq2,%f4
1964
1965	fmuld	%f8,qq3,%f14		! cos(x1)
1966
1967	fmuld	%f16,qq3,%f22		! cos(x2)
1968
1969	fmuld	%f24,qq3,%f30		! cos(x3)
1970
1971	fmuld	%f0,%f6,%f6
1972	faddd	%f4,qq1,%f4
1973
1974	faddd	%f14,qq2,%f14
1975	fmuld	%f8,pp2,%f12
1976
1977	faddd	%f22,qq2,%f22
1978	fmuld	%f16,pp2,%f20
1979
1980	faddd	%f30,qq2,%f30
1981	fmuld	%f24,pp2,%f28
1982
1983	faddd	%f6,pp1,%f6
1984	fmuld	%f0,%f4,%f4
1985	add	%l4,%g1,%l4
1986
1987	fmuld	%f8,%f14,%f14
1988	faddd	%f12,pp1,%f12
1989
1990	fmuld	%f16,%f22,%f22
1991	faddd	%f20,pp1,%f20
1992
1993	fmuld	%f24,%f30,%f30
1994	faddd	%f28,pp1,%f28
1995
1996	fmuld	%f0,%f6,%f6
1997
1998	faddd	%f14,qq1,%f14
1999	fmuld	%f8,%f12,%f12
2000	add	%l5,%g1,%l5
2001
2002	faddd	%f22,qq1,%f22
2003	fmuld	%f16,%f20,%f20
2004	add	%l6,%g1,%l6
2005
2006	faddd	%f30,qq1,%f30
2007	fmuld	%f24,%f28,%f28
2008	add	%l7,%g1,%l7
2009
2010	fmuld	%f2,%f6,%f6
2011	ldd	[%l4+8],%f0
2012
2013	fmuld	%f10,%f12,%f12
2014
2015	fmuld	%f18,%f20,%f20
2016
2017	fmuld	%f26,%f28,%f28
2018
2019	fmuld	%f0,%f4,%f4
2020	faddd	%f32,%f6,%f6
2021
2022	fmuld	%f8,%f14,%f14
2023	faddd	%f12,%f34,%f12
2024	ldd	[%l5+16],%f8
2025
2026	fmuld	%f16,%f22,%f22
2027	faddd	%f20,%f36,%f20
2028	ldd	[%l6+16],%f16
2029
2030	fmuld	%f24,%f30,%f30
2031	faddd	%f28,%f38,%f28
2032	ldd	[%l7+16],%f24
2033
2034	faddd	%f2,%f6,%f6
2035	ldd	[%l4+16],%f32
2036
2037	fmuld	%f8,%f14,%f14
2038	faddd	%f12,%f10,%f12
2039	ldd	[%l5+8],%f34
2040
2041	fmuld	%f16,%f22,%f22
2042	faddd	%f20,%f18,%f20
2043	ldd	[%l6+8],%f36
2044
2045	fmuld	%f24,%f30,%f30
2046	faddd	%f28,%f26,%f28
2047	ldd	[%l7+8],%f38
2048
2049	fmuld	%f32,%f6,%f6
2050
2051	fmuld	%f34,%f12,%f12
2052
2053	fmuld	%f36,%f20,%f20
2054
2055	fmuld	%f38,%f28,%f28
2056
2057	faddd	%f6,%f4,%f6
2058
2059	fsubd	%f14,%f12,%f14
2060
2061	fsubd	%f22,%f20,%f22
2062
2063	fsubd	%f30,%f28,%f30
2064
2065	faddd	%f6,%f0,%f6
2066
2067	faddd	%f14,%f8,%f14
2068
2069	faddd	%f22,%f16,%f22
2070
2071	faddd	%f30,%f24,%f30
2072	mov	%l0,%l4
2073
2074	fnegd	%f6,%f4
2075	lda	[%i1]%asi,%l0		! preload next argument
2076
2077	fnegd	%f14,%f12
2078	lda	[%i1]%asi,%f0
2079
2080	fnegd	%f22,%f20
2081	lda	[%i1+4]%asi,%f3
2082
2083	fnegd	%f30,%f28
2084	andn	%l0,%i5,%l0
2085	add	%i1,%i2,%i1
2086
2087	andcc	%l4,2,%g0
2088	fmovdnz	%icc,%f4,%f6
2089	st	%f6,[%o0]
2090
2091	andcc	%l1,2,%g0
2092	fmovdnz	%icc,%f12,%f14
2093	st	%f14,[%o1]
2094
2095	andcc	%l2,2,%g0
2096	fmovdnz	%icc,%f20,%f22
2097	st	%f22,[%o2]
2098
2099	andcc	%l3,2,%g0
2100	fmovdnz	%icc,%f28,%f30
2101	st	%f30,[%o3]
2102
2103	addcc	%i0,-1,%i0
2104	bg,pt	%icc,.loop0
2105! delay slot
2106	st	%f7,[%o0+4]
2107
2108	ba,pt	%icc,.end
2109! delay slot
2110	nop
2111
2112	.align	16
2113.case9:
2114	fmuld	%f0,pp3,%f6		! sin(x0)
2115
2116	fmuld	%f24,pp3,%f30		! sin(x3)
2117
2118	faddd	%f6,pp2,%f6
2119	fmuld	%f0,qq2,%f4
2120
2121	fmuld	%f8,qq3,%f14		! cos(x1)
2122
2123	fmuld	%f16,qq3,%f22		! cos(x2)
2124
2125	faddd	%f30,pp2,%f30
2126	fmuld	%f24,qq2,%f28
2127
2128	fmuld	%f0,%f6,%f6
2129	faddd	%f4,qq1,%f4
2130
2131	faddd	%f14,qq2,%f14
2132	fmuld	%f8,pp2,%f12
2133
2134	faddd	%f22,qq2,%f22
2135	fmuld	%f16,pp2,%f20
2136
2137	fmuld	%f24,%f30,%f30
2138	faddd	%f28,qq1,%f28
2139
2140	faddd	%f6,pp1,%f6
2141	fmuld	%f0,%f4,%f4
2142	add	%l4,%g1,%l4
2143
2144	fmuld	%f8,%f14,%f14
2145	faddd	%f12,pp1,%f12
2146
2147	fmuld	%f16,%f22,%f22
2148	faddd	%f20,pp1,%f20
2149
2150	faddd	%f30,pp1,%f30
2151	fmuld	%f24,%f28,%f28
2152	add	%l7,%g1,%l7
2153
2154	fmuld	%f0,%f6,%f6
2155
2156	faddd	%f14,qq1,%f14
2157	fmuld	%f8,%f12,%f12
2158	add	%l5,%g1,%l5
2159
2160	faddd	%f22,qq1,%f22
2161	fmuld	%f16,%f20,%f20
2162	add	%l6,%g1,%l6
2163
2164	fmuld	%f24,%f30,%f30
2165
2166	fmuld	%f2,%f6,%f6
2167	ldd	[%l4+8],%f0
2168
2169	fmuld	%f10,%f12,%f12
2170
2171	fmuld	%f18,%f20,%f20
2172
2173	fmuld	%f26,%f30,%f30
2174	ldd	[%l7+8],%f24
2175
2176	fmuld	%f0,%f4,%f4
2177	faddd	%f32,%f6,%f6
2178
2179	fmuld	%f8,%f14,%f14
2180	faddd	%f12,%f34,%f12
2181	ldd	[%l5+16],%f8
2182
2183	fmuld	%f16,%f22,%f22
2184	faddd	%f20,%f36,%f20
2185	ldd	[%l6+16],%f16
2186
2187	fmuld	%f24,%f28,%f28
2188	faddd	%f38,%f30,%f30
2189
2190	faddd	%f2,%f6,%f6
2191	ldd	[%l4+16],%f32
2192
2193	fmuld	%f8,%f14,%f14
2194	faddd	%f12,%f10,%f12
2195	ldd	[%l5+8],%f34
2196
2197	fmuld	%f16,%f22,%f22
2198	faddd	%f20,%f18,%f20
2199	ldd	[%l6+8],%f36
2200
2201	faddd	%f26,%f30,%f30
2202	ldd	[%l7+16],%f38
2203
2204	fmuld	%f32,%f6,%f6
2205
2206	fmuld	%f34,%f12,%f12
2207
2208	fmuld	%f36,%f20,%f20
2209
2210	fmuld	%f38,%f30,%f30
2211
2212	faddd	%f6,%f4,%f6
2213
2214	fsubd	%f14,%f12,%f14
2215
2216	fsubd	%f22,%f20,%f22
2217
2218	faddd	%f30,%f28,%f30
2219
2220	faddd	%f6,%f0,%f6
2221
2222	faddd	%f14,%f8,%f14
2223
2224	faddd	%f22,%f16,%f22
2225
2226	faddd	%f30,%f24,%f30
2227	mov	%l0,%l4
2228
2229	fnegd	%f6,%f4
2230	lda	[%i1]%asi,%l0		! preload next argument
2231
2232	fnegd	%f14,%f12
2233	lda	[%i1]%asi,%f0
2234
2235	fnegd	%f22,%f20
2236	lda	[%i1+4]%asi,%f3
2237
2238	fnegd	%f30,%f28
2239	andn	%l0,%i5,%l0
2240	add	%i1,%i2,%i1
2241
2242	andcc	%l4,2,%g0
2243	fmovdnz	%icc,%f4,%f6
2244	st	%f6,[%o0]
2245
2246	andcc	%l1,2,%g0
2247	fmovdnz	%icc,%f12,%f14
2248	st	%f14,[%o1]
2249
2250	andcc	%l2,2,%g0
2251	fmovdnz	%icc,%f20,%f22
2252	st	%f22,[%o2]
2253
2254	andcc	%l3,2,%g0
2255	fmovdnz	%icc,%f28,%f30
2256	st	%f30,[%o3]
2257
2258	addcc	%i0,-1,%i0
2259	bg,pt	%icc,.loop0
2260! delay slot
2261	st	%f7,[%o0+4]
2262
2263	ba,pt	%icc,.end
2264! delay slot
2265	nop
2266
2267	.align	16
2268.case10:
2269	fmuld	%f26,%f26,%f24
2270	andcc	%l3,1,%g0
2271	bz,pn	%icc,.case11
2272! delay slot
2273	fxor	%f30,%f38,%f38
2274
2275	fmuld	%f0,pp3,%f6		! sin(x0)
2276
2277	fmuld	%f16,pp3,%f22		! sin(x2)
2278
2279	faddd	%f6,pp2,%f6
2280	fmuld	%f0,qq2,%f4
2281
2282	fmuld	%f8,qq3,%f14		! cos(x1)
2283
2284	faddd	%f22,pp2,%f22
2285	fmuld	%f16,qq2,%f20
2286
2287	fmuld	%f24,qq3,%f30		! cos(x3)
2288
2289	fmuld	%f0,%f6,%f6
2290	faddd	%f4,qq1,%f4
2291
2292	faddd	%f14,qq2,%f14
2293	fmuld	%f8,pp2,%f12
2294
2295	fmuld	%f16,%f22,%f22
2296	faddd	%f20,qq1,%f20
2297
2298	faddd	%f30,qq2,%f30
2299	fmuld	%f24,pp2,%f28
2300
2301	faddd	%f6,pp1,%f6
2302	fmuld	%f0,%f4,%f4
2303	add	%l4,%g1,%l4
2304
2305	fmuld	%f8,%f14,%f14
2306	faddd	%f12,pp1,%f12
2307
2308	faddd	%f22,pp1,%f22
2309	fmuld	%f16,%f20,%f20
2310	add	%l6,%g1,%l6
2311
2312	fmuld	%f24,%f30,%f30
2313	faddd	%f28,pp1,%f28
2314
2315	fmuld	%f0,%f6,%f6
2316
2317	faddd	%f14,qq1,%f14
2318	fmuld	%f8,%f12,%f12
2319	add	%l5,%g1,%l5
2320
2321	fmuld	%f16,%f22,%f22
2322
2323	faddd	%f30,qq1,%f30
2324	fmuld	%f24,%f28,%f28
2325	add	%l7,%g1,%l7
2326
2327	fmuld	%f2,%f6,%f6
2328	ldd	[%l4+8],%f0
2329
2330	fmuld	%f10,%f12,%f12
2331
2332	fmuld	%f18,%f22,%f22
2333	ldd	[%l6+8],%f16
2334
2335	fmuld	%f26,%f28,%f28
2336
2337	fmuld	%f0,%f4,%f4
2338	faddd	%f32,%f6,%f6
2339
2340	fmuld	%f8,%f14,%f14
2341	faddd	%f12,%f34,%f12
2342	ldd	[%l5+16],%f8
2343
2344	fmuld	%f16,%f20,%f20
2345	faddd	%f36,%f22,%f22
2346
2347	fmuld	%f24,%f30,%f30
2348	faddd	%f28,%f38,%f28
2349	ldd	[%l7+16],%f24
2350
2351	faddd	%f2,%f6,%f6
2352	ldd	[%l4+16],%f32
2353
2354	fmuld	%f8,%f14,%f14
2355	faddd	%f12,%f10,%f12
2356	ldd	[%l5+8],%f34
2357
2358	faddd	%f18,%f22,%f22
2359	ldd	[%l6+16],%f36
2360
2361	fmuld	%f24,%f30,%f30
2362	faddd	%f28,%f26,%f28
2363	ldd	[%l7+8],%f38
2364
2365	fmuld	%f32,%f6,%f6
2366
2367	fmuld	%f34,%f12,%f12
2368
2369	fmuld	%f36,%f22,%f22
2370
2371	fmuld	%f38,%f28,%f28
2372
2373	faddd	%f6,%f4,%f6
2374
2375	fsubd	%f14,%f12,%f14
2376
2377	faddd	%f22,%f20,%f22
2378
2379	fsubd	%f30,%f28,%f30
2380
2381	faddd	%f6,%f0,%f6
2382
2383	faddd	%f14,%f8,%f14
2384
2385	faddd	%f22,%f16,%f22
2386
2387	faddd	%f30,%f24,%f30
2388	mov	%l0,%l4
2389
2390	fnegd	%f6,%f4
2391	lda	[%i1]%asi,%l0		! preload next argument
2392
2393	fnegd	%f14,%f12
2394	lda	[%i1]%asi,%f0
2395
2396	fnegd	%f22,%f20
2397	lda	[%i1+4]%asi,%f3
2398
2399	fnegd	%f30,%f28
2400	andn	%l0,%i5,%l0
2401	add	%i1,%i2,%i1
2402
2403	andcc	%l4,2,%g0
2404	fmovdnz	%icc,%f4,%f6
2405	st	%f6,[%o0]
2406
2407	andcc	%l1,2,%g0
2408	fmovdnz	%icc,%f12,%f14
2409	st	%f14,[%o1]
2410
2411	andcc	%l2,2,%g0
2412	fmovdnz	%icc,%f20,%f22
2413	st	%f22,[%o2]
2414
2415	andcc	%l3,2,%g0
2416	fmovdnz	%icc,%f28,%f30
2417	st	%f30,[%o3]
2418
2419	addcc	%i0,-1,%i0
2420	bg,pt	%icc,.loop0
2421! delay slot
2422	st	%f7,[%o0+4]
2423
2424	ba,pt	%icc,.end
2425! delay slot
2426	nop
2427
2428	.align	16
2429.case11:
2430	fmuld	%f0,pp3,%f6		! sin(x0)
2431
2432	fmuld	%f16,pp3,%f22		! sin(x2)
2433
2434	fmuld	%f24,pp3,%f30		! sin(x3)
2435
2436	faddd	%f6,pp2,%f6
2437	fmuld	%f0,qq2,%f4
2438
2439	fmuld	%f8,qq3,%f14		! cos(x1)
2440
2441	faddd	%f22,pp2,%f22
2442	fmuld	%f16,qq2,%f20
2443
2444	faddd	%f30,pp2,%f30
2445	fmuld	%f24,qq2,%f28
2446
2447	fmuld	%f0,%f6,%f6
2448	faddd	%f4,qq1,%f4
2449
2450	faddd	%f14,qq2,%f14
2451	fmuld	%f8,pp2,%f12
2452
2453	fmuld	%f16,%f22,%f22
2454	faddd	%f20,qq1,%f20
2455
2456	fmuld	%f24,%f30,%f30
2457	faddd	%f28,qq1,%f28
2458
2459	faddd	%f6,pp1,%f6
2460	fmuld	%f0,%f4,%f4
2461	add	%l4,%g1,%l4
2462
2463	fmuld	%f8,%f14,%f14
2464	faddd	%f12,pp1,%f12
2465
2466	faddd	%f22,pp1,%f22
2467	fmuld	%f16,%f20,%f20
2468	add	%l6,%g1,%l6
2469
2470	faddd	%f30,pp1,%f30
2471	fmuld	%f24,%f28,%f28
2472	add	%l7,%g1,%l7
2473
2474	fmuld	%f0,%f6,%f6
2475
2476	faddd	%f14,qq1,%f14
2477	fmuld	%f8,%f12,%f12
2478	add	%l5,%g1,%l5
2479
2480	fmuld	%f16,%f22,%f22
2481
2482	fmuld	%f24,%f30,%f30
2483
2484	fmuld	%f2,%f6,%f6
2485	ldd	[%l4+8],%f0
2486
2487	fmuld	%f10,%f12,%f12
2488
2489	fmuld	%f18,%f22,%f22
2490	ldd	[%l6+8],%f16
2491
2492	fmuld	%f26,%f30,%f30
2493	ldd	[%l7+8],%f24
2494
2495	fmuld	%f0,%f4,%f4
2496	faddd	%f32,%f6,%f6
2497
2498	fmuld	%f8,%f14,%f14
2499	faddd	%f12,%f34,%f12
2500	ldd	[%l5+16],%f8
2501
2502	fmuld	%f16,%f20,%f20
2503	faddd	%f36,%f22,%f22
2504
2505	fmuld	%f24,%f28,%f28
2506	faddd	%f38,%f30,%f30
2507
2508	faddd	%f2,%f6,%f6
2509	ldd	[%l4+16],%f32
2510
2511	fmuld	%f8,%f14,%f14
2512	faddd	%f12,%f10,%f12
2513	ldd	[%l5+8],%f34
2514
2515	faddd	%f18,%f22,%f22
2516	ldd	[%l6+16],%f36
2517
2518	faddd	%f26,%f30,%f30
2519	ldd	[%l7+16],%f38
2520
2521	fmuld	%f32,%f6,%f6
2522
2523	fmuld	%f34,%f12,%f12
2524
2525	fmuld	%f36,%f22,%f22
2526
2527	fmuld	%f38,%f30,%f30
2528
2529	faddd	%f6,%f4,%f6
2530
2531	fsubd	%f14,%f12,%f14
2532
2533	faddd	%f22,%f20,%f22
2534
2535	faddd	%f30,%f28,%f30
2536
2537	faddd	%f6,%f0,%f6
2538
2539	faddd	%f14,%f8,%f14
2540
2541	faddd	%f22,%f16,%f22
2542
2543	faddd	%f30,%f24,%f30
2544	mov	%l0,%l4
2545
2546	fnegd	%f6,%f4
2547	lda	[%i1]%asi,%l0		! preload next argument
2548
2549	fnegd	%f14,%f12
2550	lda	[%i1]%asi,%f0
2551
2552	fnegd	%f22,%f20
2553	lda	[%i1+4]%asi,%f3
2554
2555	fnegd	%f30,%f28
2556	andn	%l0,%i5,%l0
2557	add	%i1,%i2,%i1
2558
2559	andcc	%l4,2,%g0
2560	fmovdnz	%icc,%f4,%f6
2561	st	%f6,[%o0]
2562
2563	andcc	%l1,2,%g0
2564	fmovdnz	%icc,%f12,%f14
2565	st	%f14,[%o1]
2566
2567	andcc	%l2,2,%g0
2568	fmovdnz	%icc,%f20,%f22
2569	st	%f22,[%o2]
2570
2571	andcc	%l3,2,%g0
2572	fmovdnz	%icc,%f28,%f30
2573	st	%f30,[%o3]
2574
2575	addcc	%i0,-1,%i0
2576	bg,pt	%icc,.loop0
2577! delay slot
2578	st	%f7,[%o0+4]
2579
2580	ba,pt	%icc,.end
2581! delay slot
2582	nop
2583
2584	.align	16
2585.case12:
2586	fmuld	%f18,%f18,%f16
2587	andcc	%l2,1,%g0
2588	bz,pn	%icc,.case14
2589! delay slot
2590	fxor	%f22,%f36,%f36
2591
2592	fmuld	%f26,%f26,%f24
2593	andcc	%l3,1,%g0
2594	bz,pn	%icc,.case13
2595! delay slot
2596	fxor	%f30,%f38,%f38
2597
2598	fmuld	%f0,pp3,%f6		! sin(x0)
2599
2600	fmuld	%f8,pp3,%f14		! sin(x1)
2601
2602	faddd	%f6,pp2,%f6
2603	fmuld	%f0,qq2,%f4
2604
2605	faddd	%f14,pp2,%f14
2606	fmuld	%f8,qq2,%f12
2607
2608	fmuld	%f16,qq3,%f22		! cos(x2)
2609
2610	fmuld	%f24,qq3,%f30		! cos(x3)
2611
2612	fmuld	%f0,%f6,%f6
2613	faddd	%f4,qq1,%f4
2614
2615	fmuld	%f8,%f14,%f14
2616	faddd	%f12,qq1,%f12
2617
2618	faddd	%f22,qq2,%f22
2619	fmuld	%f16,pp2,%f20
2620
2621	faddd	%f30,qq2,%f30
2622	fmuld	%f24,pp2,%f28
2623
2624	faddd	%f6,pp1,%f6
2625	fmuld	%f0,%f4,%f4
2626	add	%l4,%g1,%l4
2627
2628	faddd	%f14,pp1,%f14
2629	fmuld	%f8,%f12,%f12
2630	add	%l5,%g1,%l5
2631
2632	fmuld	%f16,%f22,%f22
2633	faddd	%f20,pp1,%f20
2634
2635	fmuld	%f24,%f30,%f30
2636	faddd	%f28,pp1,%f28
2637
2638	fmuld	%f0,%f6,%f6
2639
2640	fmuld	%f8,%f14,%f14
2641
2642	faddd	%f22,qq1,%f22
2643	fmuld	%f16,%f20,%f20
2644	add	%l6,%g1,%l6
2645
2646	faddd	%f30,qq1,%f30
2647	fmuld	%f24,%f28,%f28
2648	add	%l7,%g1,%l7
2649
2650	fmuld	%f2,%f6,%f6
2651	ldd	[%l4+8],%f0
2652
2653	fmuld	%f10,%f14,%f14
2654	ldd	[%l5+8],%f8
2655
2656	fmuld	%f18,%f20,%f20
2657
2658	fmuld	%f26,%f28,%f28
2659
2660	fmuld	%f0,%f4,%f4
2661	faddd	%f32,%f6,%f6
2662
2663	fmuld	%f8,%f12,%f12
2664	faddd	%f34,%f14,%f14
2665
2666	fmuld	%f16,%f22,%f22
2667	faddd	%f20,%f36,%f20
2668	ldd	[%l6+16],%f16
2669
2670	fmuld	%f24,%f30,%f30
2671	faddd	%f28,%f38,%f28
2672	ldd	[%l7+16],%f24
2673
2674	faddd	%f2,%f6,%f6
2675	ldd	[%l4+16],%f32
2676
2677	faddd	%f10,%f14,%f14
2678	ldd	[%l5+16],%f34
2679
2680	fmuld	%f16,%f22,%f22
2681	faddd	%f20,%f18,%f20
2682	ldd	[%l6+8],%f36
2683
2684	fmuld	%f24,%f30,%f30
2685	faddd	%f28,%f26,%f28
2686	ldd	[%l7+8],%f38
2687
2688	fmuld	%f32,%f6,%f6
2689
2690	fmuld	%f34,%f14,%f14
2691
2692	fmuld	%f36,%f20,%f20
2693
2694	fmuld	%f38,%f28,%f28
2695
2696	faddd	%f6,%f4,%f6
2697
2698	faddd	%f14,%f12,%f14
2699
2700	fsubd	%f22,%f20,%f22
2701
2702	fsubd	%f30,%f28,%f30
2703
2704	faddd	%f6,%f0,%f6
2705
2706	faddd	%f14,%f8,%f14
2707
2708	faddd	%f22,%f16,%f22
2709
2710	faddd	%f30,%f24,%f30
2711	mov	%l0,%l4
2712
2713	fnegd	%f6,%f4
2714	lda	[%i1]%asi,%l0		! preload next argument
2715
2716	fnegd	%f14,%f12
2717	lda	[%i1]%asi,%f0
2718
2719	fnegd	%f22,%f20
2720	lda	[%i1+4]%asi,%f3
2721
2722	fnegd	%f30,%f28
2723	andn	%l0,%i5,%l0
2724	add	%i1,%i2,%i1
2725
2726	andcc	%l4,2,%g0
2727	fmovdnz	%icc,%f4,%f6
2728	st	%f6,[%o0]
2729
2730	andcc	%l1,2,%g0
2731	fmovdnz	%icc,%f12,%f14
2732	st	%f14,[%o1]
2733
2734	andcc	%l2,2,%g0
2735	fmovdnz	%icc,%f20,%f22
2736	st	%f22,[%o2]
2737
2738	andcc	%l3,2,%g0
2739	fmovdnz	%icc,%f28,%f30
2740	st	%f30,[%o3]
2741
2742	addcc	%i0,-1,%i0
2743	bg,pt	%icc,.loop0
2744! delay slot
2745	st	%f7,[%o0+4]
2746
2747	ba,pt	%icc,.end
2748! delay slot
2749	nop
2750
2751	.align	16
2752.case13:
2753	fmuld	%f0,pp3,%f6		! sin(x0)
2754
2755	fmuld	%f8,pp3,%f14		! sin(x1)
2756
2757	fmuld	%f24,pp3,%f30		! sin(x3)
2758
2759	faddd	%f6,pp2,%f6
2760	fmuld	%f0,qq2,%f4
2761
2762	faddd	%f14,pp2,%f14
2763	fmuld	%f8,qq2,%f12
2764
2765	fmuld	%f16,qq3,%f22		! cos(x2)
2766
2767	faddd	%f30,pp2,%f30
2768	fmuld	%f24,qq2,%f28
2769
2770	fmuld	%f0,%f6,%f6
2771	faddd	%f4,qq1,%f4
2772
2773	fmuld	%f8,%f14,%f14
2774	faddd	%f12,qq1,%f12
2775
2776	faddd	%f22,qq2,%f22
2777	fmuld	%f16,pp2,%f20
2778
2779	fmuld	%f24,%f30,%f30
2780	faddd	%f28,qq1,%f28
2781
2782	faddd	%f6,pp1,%f6
2783	fmuld	%f0,%f4,%f4
2784	add	%l4,%g1,%l4
2785
2786	faddd	%f14,pp1,%f14
2787	fmuld	%f8,%f12,%f12
2788	add	%l5,%g1,%l5
2789
2790	fmuld	%f16,%f22,%f22
2791	faddd	%f20,pp1,%f20
2792
2793	faddd	%f30,pp1,%f30
2794	fmuld	%f24,%f28,%f28
2795	add	%l7,%g1,%l7
2796
2797	fmuld	%f0,%f6,%f6
2798
2799	fmuld	%f8,%f14,%f14
2800
2801	faddd	%f22,qq1,%f22
2802	fmuld	%f16,%f20,%f20
2803	add	%l6,%g1,%l6
2804
2805	fmuld	%f24,%f30,%f30
2806
2807	fmuld	%f2,%f6,%f6
2808	ldd	[%l4+8],%f0
2809
2810	fmuld	%f10,%f14,%f14
2811	ldd	[%l5+8],%f8
2812
2813	fmuld	%f18,%f20,%f20
2814
2815	fmuld	%f26,%f30,%f30
2816	ldd	[%l7+8],%f24
2817
2818	fmuld	%f0,%f4,%f4
2819	faddd	%f32,%f6,%f6
2820
2821	fmuld	%f8,%f12,%f12
2822	faddd	%f34,%f14,%f14
2823
2824	fmuld	%f16,%f22,%f22
2825	faddd	%f20,%f36,%f20
2826	ldd	[%l6+16],%f16
2827
2828	fmuld	%f24,%f28,%f28
2829	faddd	%f38,%f30,%f30
2830
2831	faddd	%f2,%f6,%f6
2832	ldd	[%l4+16],%f32
2833
2834	faddd	%f10,%f14,%f14
2835	ldd	[%l5+16],%f34
2836
2837	fmuld	%f16,%f22,%f22
2838	faddd	%f20,%f18,%f20
2839	ldd	[%l6+8],%f36
2840
2841	faddd	%f26,%f30,%f30
2842	ldd	[%l7+16],%f38
2843
2844	fmuld	%f32,%f6,%f6
2845
2846	fmuld	%f34,%f14,%f14
2847
2848	fmuld	%f36,%f20,%f20
2849
2850	fmuld	%f38,%f30,%f30
2851
2852	faddd	%f6,%f4,%f6
2853
2854	faddd	%f14,%f12,%f14
2855
2856	fsubd	%f22,%f20,%f22
2857
2858	faddd	%f30,%f28,%f30
2859
2860	faddd	%f6,%f0,%f6
2861
2862	faddd	%f14,%f8,%f14
2863
2864	faddd	%f22,%f16,%f22
2865
2866	faddd	%f30,%f24,%f30
2867	mov	%l0,%l4
2868
2869	fnegd	%f6,%f4
2870	lda	[%i1]%asi,%l0		! preload next argument
2871
2872	fnegd	%f14,%f12
2873	lda	[%i1]%asi,%f0
2874
2875	fnegd	%f22,%f20
2876	lda	[%i1+4]%asi,%f3
2877
2878	fnegd	%f30,%f28
2879	andn	%l0,%i5,%l0
2880	add	%i1,%i2,%i1
2881
2882	andcc	%l4,2,%g0
2883	fmovdnz	%icc,%f4,%f6
2884	st	%f6,[%o0]
2885
2886	andcc	%l1,2,%g0
2887	fmovdnz	%icc,%f12,%f14
2888	st	%f14,[%o1]
2889
2890	andcc	%l2,2,%g0
2891	fmovdnz	%icc,%f20,%f22
2892	st	%f22,[%o2]
2893
2894	andcc	%l3,2,%g0
2895	fmovdnz	%icc,%f28,%f30
2896	st	%f30,[%o3]
2897
2898	addcc	%i0,-1,%i0
2899	bg,pt	%icc,.loop0
2900! delay slot
2901	st	%f7,[%o0+4]
2902
2903	ba,pt	%icc,.end
2904! delay slot
2905	nop
2906
2907	.align	16
2908.case14:
2909	fmuld	%f26,%f26,%f24
2910	andcc	%l3,1,%g0
2911	bz,pn	%icc,.case15
2912! delay slot
2913	fxor	%f30,%f38,%f38
2914
2915	fmuld	%f0,pp3,%f6		! sin(x0)
2916
2917	fmuld	%f8,pp3,%f14		! sin(x1)
2918
2919	fmuld	%f16,pp3,%f22		! sin(x2)
2920
2921	faddd	%f6,pp2,%f6
2922	fmuld	%f0,qq2,%f4
2923
2924	faddd	%f14,pp2,%f14
2925	fmuld	%f8,qq2,%f12
2926
2927	faddd	%f22,pp2,%f22
2928	fmuld	%f16,qq2,%f20
2929
2930	fmuld	%f24,qq3,%f30		! cos(x3)
2931
2932	fmuld	%f0,%f6,%f6
2933	faddd	%f4,qq1,%f4
2934
2935	fmuld	%f8,%f14,%f14
2936	faddd	%f12,qq1,%f12
2937
2938	fmuld	%f16,%f22,%f22
2939	faddd	%f20,qq1,%f20
2940
2941	faddd	%f30,qq2,%f30
2942	fmuld	%f24,pp2,%f28
2943
2944	faddd	%f6,pp1,%f6
2945	fmuld	%f0,%f4,%f4
2946	add	%l4,%g1,%l4
2947
2948	faddd	%f14,pp1,%f14
2949	fmuld	%f8,%f12,%f12
2950	add	%l5,%g1,%l5
2951
2952	faddd	%f22,pp1,%f22
2953	fmuld	%f16,%f20,%f20
2954	add	%l6,%g1,%l6
2955
2956	fmuld	%f24,%f30,%f30
2957	faddd	%f28,pp1,%f28
2958
2959	fmuld	%f0,%f6,%f6
2960
2961	fmuld	%f8,%f14,%f14
2962
2963	fmuld	%f16,%f22,%f22
2964
2965	faddd	%f30,qq1,%f30
2966	fmuld	%f24,%f28,%f28
2967	add	%l7,%g1,%l7
2968
2969	fmuld	%f2,%f6,%f6
2970	ldd	[%l4+8],%f0
2971
2972	fmuld	%f10,%f14,%f14
2973	ldd	[%l5+8],%f8
2974
2975	fmuld	%f18,%f22,%f22
2976	ldd	[%l6+8],%f16
2977
2978	fmuld	%f26,%f28,%f28
2979
2980	fmuld	%f0,%f4,%f4
2981	faddd	%f32,%f6,%f6
2982
2983	fmuld	%f8,%f12,%f12
2984	faddd	%f34,%f14,%f14
2985
2986	fmuld	%f16,%f20,%f20
2987	faddd	%f36,%f22,%f22
2988
2989	fmuld	%f24,%f30,%f30
2990	faddd	%f28,%f38,%f28
2991	ldd	[%l7+16],%f24
2992
2993	faddd	%f2,%f6,%f6
2994	ldd	[%l4+16],%f32
2995
2996	faddd	%f10,%f14,%f14
2997	ldd	[%l5+16],%f34
2998
2999	faddd	%f18,%f22,%f22
3000	ldd	[%l6+16],%f36
3001
3002	fmuld	%f24,%f30,%f30
3003	faddd	%f28,%f26,%f28
3004	ldd	[%l7+8],%f38
3005
3006	fmuld	%f32,%f6,%f6
3007
3008	fmuld	%f34,%f14,%f14
3009
3010	fmuld	%f36,%f22,%f22
3011
3012	fmuld	%f38,%f28,%f28
3013
3014	faddd	%f6,%f4,%f6
3015
3016	faddd	%f14,%f12,%f14
3017
3018	faddd	%f22,%f20,%f22
3019
3020	fsubd	%f30,%f28,%f30
3021
3022	faddd	%f6,%f0,%f6
3023
3024	faddd	%f14,%f8,%f14
3025
3026	faddd	%f22,%f16,%f22
3027
3028	faddd	%f30,%f24,%f30
3029	mov	%l0,%l4
3030
3031	fnegd	%f6,%f4
3032	lda	[%i1]%asi,%l0		! preload next argument
3033
3034	fnegd	%f14,%f12
3035	lda	[%i1]%asi,%f0
3036
3037	fnegd	%f22,%f20
3038	lda	[%i1+4]%asi,%f3
3039
3040	fnegd	%f30,%f28
3041	andn	%l0,%i5,%l0
3042	add	%i1,%i2,%i1
3043
3044	andcc	%l4,2,%g0
3045	fmovdnz	%icc,%f4,%f6
3046	st	%f6,[%o0]
3047
3048	andcc	%l1,2,%g0
3049	fmovdnz	%icc,%f12,%f14
3050	st	%f14,[%o1]
3051
3052	andcc	%l2,2,%g0
3053	fmovdnz	%icc,%f20,%f22
3054	st	%f22,[%o2]
3055
3056	andcc	%l3,2,%g0
3057	fmovdnz	%icc,%f28,%f30
3058	st	%f30,[%o3]
3059
3060	addcc	%i0,-1,%i0
3061	bg,pt	%icc,.loop0
3062! delay slot
3063	st	%f7,[%o0+4]
3064
3065	ba,pt	%icc,.end
3066! delay slot
3067	nop
3068
3069	.align	16
3070.case15:
3071	fmuld	%f0,pp3,%f6		! sin(x0)
3072
3073	fmuld	%f8,pp3,%f14		! sin(x1)
3074
3075	fmuld	%f16,pp3,%f22		! sin(x2)
3076
3077	fmuld	%f24,pp3,%f30		! sin(x3)
3078
3079	faddd	%f6,pp2,%f6
3080	fmuld	%f0,qq2,%f4
3081
3082	faddd	%f14,pp2,%f14
3083	fmuld	%f8,qq2,%f12
3084
3085	faddd	%f22,pp2,%f22
3086	fmuld	%f16,qq2,%f20
3087
3088	faddd	%f30,pp2,%f30
3089	fmuld	%f24,qq2,%f28
3090
3091	fmuld	%f0,%f6,%f6
3092	faddd	%f4,qq1,%f4
3093
3094	fmuld	%f8,%f14,%f14
3095	faddd	%f12,qq1,%f12
3096
3097	fmuld	%f16,%f22,%f22
3098	faddd	%f20,qq1,%f20
3099
3100	fmuld	%f24,%f30,%f30
3101	faddd	%f28,qq1,%f28
3102
3103	faddd	%f6,pp1,%f6
3104	fmuld	%f0,%f4,%f4
3105	add	%l4,%g1,%l4
3106
3107	faddd	%f14,pp1,%f14
3108	fmuld	%f8,%f12,%f12
3109	add	%l5,%g1,%l5
3110
3111	faddd	%f22,pp1,%f22
3112	fmuld	%f16,%f20,%f20
3113	add	%l6,%g1,%l6
3114
3115	faddd	%f30,pp1,%f30
3116	fmuld	%f24,%f28,%f28
3117	add	%l7,%g1,%l7
3118
3119	fmuld	%f0,%f6,%f6
3120
3121	fmuld	%f8,%f14,%f14
3122
3123	fmuld	%f16,%f22,%f22
3124
3125	fmuld	%f24,%f30,%f30
3126
3127	fmuld	%f2,%f6,%f6
3128	ldd	[%l4+8],%f0
3129
3130	fmuld	%f10,%f14,%f14
3131	ldd	[%l5+8],%f8
3132
3133	fmuld	%f18,%f22,%f22
3134	ldd	[%l6+8],%f16
3135
3136	fmuld	%f26,%f30,%f30
3137	ldd	[%l7+8],%f24
3138
3139	fmuld	%f0,%f4,%f4
3140	faddd	%f32,%f6,%f6
3141
3142	fmuld	%f8,%f12,%f12
3143	faddd	%f34,%f14,%f14
3144
3145	fmuld	%f16,%f20,%f20
3146	faddd	%f36,%f22,%f22
3147
3148	fmuld	%f24,%f28,%f28
3149	faddd	%f38,%f30,%f30
3150
3151	faddd	%f2,%f6,%f6
3152	ldd	[%l4+16],%f32
3153
3154	faddd	%f10,%f14,%f14
3155	ldd	[%l5+16],%f34
3156
3157	faddd	%f18,%f22,%f22
3158	ldd	[%l6+16],%f36
3159
3160	faddd	%f26,%f30,%f30
3161	ldd	[%l7+16],%f38
3162
3163	fmuld	%f32,%f6,%f6
3164
3165	fmuld	%f34,%f14,%f14
3166
3167	fmuld	%f36,%f22,%f22
3168
3169	fmuld	%f38,%f30,%f30
3170
3171	faddd	%f6,%f4,%f6
3172
3173	faddd	%f14,%f12,%f14
3174
3175	faddd	%f22,%f20,%f22
3176
3177	faddd	%f30,%f28,%f30
3178
3179	faddd	%f6,%f0,%f6
3180
3181	faddd	%f14,%f8,%f14
3182
3183	faddd	%f22,%f16,%f22
3184
3185	faddd	%f30,%f24,%f30
3186	mov	%l0,%l4
3187
3188	fnegd	%f6,%f4
3189	lda	[%i1]%asi,%l0		! preload next argument
3190
3191	fnegd	%f14,%f12
3192	lda	[%i1]%asi,%f0
3193
3194	fnegd	%f22,%f20
3195	lda	[%i1+4]%asi,%f3
3196
3197	fnegd	%f30,%f28
3198	andn	%l0,%i5,%l0
3199	add	%i1,%i2,%i1
3200
3201	andcc	%l4,2,%g0
3202	fmovdnz	%icc,%f4,%f6
3203	st	%f6,[%o0]
3204
3205	andcc	%l1,2,%g0
3206	fmovdnz	%icc,%f12,%f14
3207	st	%f14,[%o1]
3208
3209	andcc	%l2,2,%g0
3210	fmovdnz	%icc,%f20,%f22
3211	st	%f22,[%o2]
3212
3213	andcc	%l3,2,%g0
3214	fmovdnz	%icc,%f28,%f30
3215	st	%f30,[%o3]
3216
3217	addcc	%i0,-1,%i0
3218	bg,pt	%icc,.loop0
3219! delay slot
3220	st	%f7,[%o0+4]
3221
3222	ba,pt	%icc,.end
3223! delay slot
3224	nop
3225
3226
3227	.align	16
3228.end:
3229	st	%f15,[%o1+4]
3230	st	%f23,[%o2+4]
3231	st	%f31,[%o3+4]
3232	ld	[%fp+biguns],%i5
3233	tst	%i5			! check for huge arguments remaining
3234	be,pt	%icc,.exit
3235! delay slot
3236	nop
3237#ifdef __sparcv9
3238	ldx	[%fp+xsave],%o1
3239	ldx	[%fp+ysave],%o3
3240#else
3241	ld	[%fp+xsave],%o1
3242	ld	[%fp+ysave],%o3
3243#endif
3244	ld	[%fp+nsave],%o0
3245	ld	[%fp+sxsave],%o2
3246	ld	[%fp+sysave],%o4
3247	sra	%o2,0,%o2		! sign-extend for V9
3248	sra	%o4,0,%o4
3249	call	__vlibm_vcos_big_ultra3
3250	sra	%o5,0,%o5		! delay slot
3251
3252.exit:
3253	ret
3254	restore
3255
3256
3257	.align	16
3258.last1:
3259	faddd	%f2,c3two44,%f4
3260	st	%f15,[%o1+4]
3261.last1_from_range1:
3262	mov	0,%l1
3263	fzeros	%f8
3264	fzero	%f10
3265	add	%fp,junk,%o1
3266.last2:
3267	faddd	%f10,c3two44,%f12
3268	st	%f23,[%o2+4]
3269.last2_from_range2:
3270	mov	0,%l2
3271	fzeros	%f16
3272	fzero	%f18
3273	add	%fp,junk,%o2
3274.last3:
3275	faddd	%f18,c3two44,%f20
3276	st	%f31,[%o3+4]
3277	st	%f5,[%fp+nk0]
3278	st	%f13,[%fp+nk1]
3279.last3_from_range3:
3280	mov	0,%l3
3281	fzeros	%f24
3282	fzero	%f26
3283	ba,pt	%icc,.cont
3284! delay slot
3285	add	%fp,junk,%o3
3286
3287
3288	.align	16
3289.range0:
3290	cmp	%l0,%o4
3291	bl,pt	%icc,1f			! hx < 0x3e400000
3292! delay slot, harmless if branch taken
3293	sethi	%hi(0x7ff00000),%o7
3294	cmp	%l0,%o7
3295	bl,a,pt	%icc,2f			! branch if finite
3296! delay slot, squashed if branch not taken
3297	st	%o4,[%fp+biguns]	! set biguns
3298	fzero	%f0
3299	fmuld	%f2,%f0,%f2
3300	st	%f2,[%o0]
3301	ba,pt	%icc,2f
3302! delay slot
3303	st	%f3,[%o0+4]
33041:
3305	fdtoi	%f2,%f4			! raise inexact if not zero
3306	sethi	%hi(0x3ff00000),%o7
3307	st	%o7,[%o0]
3308	st	%g0,[%o0+4]
33092:
3310	addcc	%i0,-1,%i0
3311	ble,pn	%icc,.end
3312! delay slot, harmless if branch taken
3313	add	%i3,%i4,%i3		! y += stridey
3314	andn	%l1,%i5,%l0		! hx &= ~0x80000000
3315	fmovs	%f8,%f0
3316	fmovs	%f11,%f3
3317	ba,pt	%icc,.loop0
3318! delay slot
3319	add	%i1,%i2,%i1		! x += stridex
3320
3321
3322	.align	16
3323.range1:
3324	cmp	%l1,%o4
3325	bl,pt	%icc,1f			! hx < 0x3e400000
3326! delay slot, harmless if branch taken
3327	sethi	%hi(0x7ff00000),%o7
3328	cmp	%l1,%o7
3329	bl,a,pt	%icc,2f			! branch if finite
3330! delay slot, squashed if branch not taken
3331	st	%o4,[%fp+biguns]	! set biguns
3332	fzero	%f8
3333	fmuld	%f10,%f8,%f10
3334	st	%f10,[%o1]
3335	ba,pt	%icc,2f
3336! delay slot
3337	st	%f11,[%o1+4]
33381:
3339	fdtoi	%f10,%f12		! raise inexact if not zero
3340	sethi	%hi(0x3ff00000),%o7
3341	st	%o7,[%o1]
3342	st	%g0,[%o1+4]
33432:
3344	addcc	%i0,-1,%i0
3345	ble,pn	%icc,.last1_from_range1
3346! delay slot, harmless if branch taken
3347	add	%i3,%i4,%i3		! y += stridey
3348	andn	%l2,%i5,%l1		! hx &= ~0x80000000
3349	fmovs	%f16,%f8
3350	fmovs	%f19,%f11
3351	ba,pt	%icc,.loop1
3352! delay slot
3353	add	%i1,%i2,%i1		! x += stridex
3354
3355
3356	.align	16
3357.range2:
3358	cmp	%l2,%o4
3359	bl,pt	%icc,1f			! hx < 0x3e400000
3360! delay slot, harmless if branch taken
3361	sethi	%hi(0x7ff00000),%o7
3362	cmp	%l2,%o7
3363	bl,a,pt	%icc,2f			! branch if finite
3364! delay slot, squashed if branch not taken
3365	st	%o4,[%fp+biguns]	! set biguns
3366	fzero	%f16
3367	fmuld	%f18,%f16,%f18
3368	st	%f18,[%o2]
3369	ba,pt	%icc,2f
3370! delay slot
3371	st	%f19,[%o2+4]
33721:
3373	fdtoi	%f18,%f20		! raise inexact if not zero
3374	sethi	%hi(0x3ff00000),%o7
3375	st	%o7,[%o2]
3376	st	%g0,[%o2+4]
33772:
3378	addcc	%i0,-1,%i0
3379	ble,pn	%icc,.last2_from_range2
3380! delay slot, harmless if branch taken
3381	add	%i3,%i4,%i3		! y += stridey
3382	andn	%l3,%i5,%l2		! hx &= ~0x80000000
3383	fmovs	%f24,%f16
3384	fmovs	%f27,%f19
3385	ba,pt	%icc,.loop2
3386! delay slot
3387	add	%i1,%i2,%i1		! x += stridex
3388
3389
3390	.align	16
3391.range3:
3392	cmp	%l3,%o4
3393	bl,pt	%icc,1f			! hx < 0x3e400000
3394! delay slot, harmless if branch taken
3395	sethi	%hi(0x7ff00000),%o7
3396	cmp	%l3,%o7
3397	bl,a,pt	%icc,2f			! branch if finite
3398! delay slot, squashed if branch not taken
3399	st	%o4,[%fp+biguns]	! set biguns
3400	fzero	%f24
3401	fmuld	%f26,%f24,%f26
3402	st	%f26,[%o3]
3403	ba,pt	%icc,2f
3404! delay slot
3405	st	%f27,[%o3+4]
34061:
3407	fdtoi	%f26,%f28		! raise inexact if not zero
3408	sethi	%hi(0x3ff00000),%o7
3409	st	%o7,[%o3]
3410	st	%g0,[%o3+4]
34112:
3412	addcc	%i0,-1,%i0
3413	ble,pn	%icc,.last3_from_range3
3414! delay slot, harmless if branch taken
3415	add	%i3,%i4,%i3		! y += stridey
3416	ld	[%i1],%l3
3417	ld	[%i1],%f24
3418	ld	[%i1+4],%f27
3419	andn	%l3,%i5,%l3		! hx &= ~0x80000000
3420	ba,pt	%icc,.loop3
3421! delay slot
3422	add	%i1,%i2,%i1		! x += stridex
3423
3424	SET_SIZE(__vcos_ultra3)
3425
3426