1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26	.ident	"@(#)__vsin.S	1.9	06/01/23 SMI"
27
28	.file	"__vsin.S"
29
30#include "libm.h"
31
32	RO_DATA
33	.align	64
34constants:
35	.word	0x3ec718e3,0xa6972785
36	.word	0x3ef9fd39,0x94293940
37	.word	0xbf2a019f,0x75ee4be1
38	.word	0xbf56c16b,0xba552569
39	.word	0x3f811111,0x1108c703
40	.word	0x3fa55555,0x554f5b35
41	.word	0xbfc55555,0x555554d0
42	.word	0xbfdfffff,0xffffff85
43	.word	0x3ff00000,0x00000000
44	.word	0xbfc55555,0x5551fc28
45	.word	0x3f811107,0x62eacc9d
46	.word	0xbfdfffff,0xffff6328
47	.word	0x3fa55551,0x5f7acf0c
48	.word	0x3fe45f30,0x6dc9c883
49	.word	0x43380000,0x00000000
50	.word	0x3ff921fb,0x54400000
51	.word	0x3dd0b461,0x1a600000
52	.word	0x3ba3198a,0x2e000000
53	.word	0x397b839a,0x252049c1
54	.word	0x80000000,0x00004000
55	.word	0xffff8000,0x00000000	! N.B.: low-order words used
56	.word	0x3fc90000,0x80000000	! for sign bit hacking; see
57	.word	0x3fc40000,0x00000000	! references to "thresh" below
58
59#define p4		0x0
60#define q4		0x08
61#define p3		0x10
62#define q3		0x18
63#define p2		0x20
64#define q2		0x28
65#define p1		0x30
66#define q1		0x38
67#define one		0x40
68#define pp1		0x48
69#define pp2		0x50
70#define qq1		0x58
71#define qq2		0x60
72#define invpio2		0x68
73#define round		0x70
74#define pio2_1		0x78
75#define pio2_2		0x80
76#define pio2_3		0x88
77#define pio2_3t		0x90
78#define f30val		0x98
79#define mask		0xa0
80#define thresh		0xa8
81
82! local storage indices
83
84#define xsave		STACK_BIAS-0x8
85#define ysave		STACK_BIAS-0x10
86#define nsave		STACK_BIAS-0x14
87#define sxsave		STACK_BIAS-0x18
88#define sysave		STACK_BIAS-0x1c
89#define biguns		STACK_BIAS-0x20
90#define n2		STACK_BIAS-0x24
91#define n1		STACK_BIAS-0x28
92#define n0		STACK_BIAS-0x2c
93#define x2_1		STACK_BIAS-0x40
94#define x1_1		STACK_BIAS-0x50
95#define x0_1		STACK_BIAS-0x60
96#define y2_0		STACK_BIAS-0x70
97#define y1_0		STACK_BIAS-0x80
98#define y0_0		STACK_BIAS-0x90
99! sizeof temp storage - must be a multiple of 16 for V9
100#define tmps		0x90
101
102!--------------------------------------------------------------
103!	Some defines to keep code more readable
104#define LIM_l6		%l6
105!	in primary range, contains |x| upper limit when cos(x)=1.
106!	in transferring to medium range, denotes what loop was active.
107!--------------------------------------------------------------
108
109	ENTRY(__vsin)
110	save	%sp,-SA(MINFRAME)-tmps,%sp
111	PIC_SETUP(g5)
112	PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
113	PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
114	PIC_SET(g5,constants,l5)
115	mov	%l5,%g1
116	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
117
118! ========== primary range ==========
119
120! register use
121
122! i0  n
123! i1  x
124! i2  stridex
125! i3  y
126! i4  stridey
127! i5  0x80000000
128
129! l0  hx0
130! l1  hx1
131! l2  hx2
132! l3  __vlibm_TBL_sincos_hi
133! l4  __vlibm_TBL_sincos_lo
134! l5  0x3fc90000
135! l6  0x3e400000
136! l7  0x3fe921fb
137
138! the following are 64-bit registers in both V8+ and V9
139
140! g1  scratch
141! g5
142
143! o0  py0
144! o1  py1
145! o2  py2
146! o3  oy0
147! o4  oy1
148! o5  oy2
149! o7  scratch
150
151! f0  x0
152! f2
153! f4
154! f6
155! f8  scratch for table base
156! f9  signbit0
157! f10 x1
158! f12
159! f14
160! f16
161! f18 scratch for table base
162! f19 signbit1
163! f20 x2
164! f22
165! f24
166! f26
167! f28 scratch for table base
168! f29 signbit2
169! f30 0x80000000
170! f31 0x4000
171! f32
172! f34
173! f36
174! f38
175! f40
176! f42
177! f44 0xffff800000000000
178! f46 p1
179! f48 p2
180! f50 p3
181! f52 p4
182! f54 one
183! f56 pp1
184! f58 pp2
185! f60 qq1
186! f62 qq2
187
188#ifdef __sparcv9
189	stx	%i1,[%fp+xsave]		! save arguments
190	stx	%i3,[%fp+ysave]
191#else
192	st	%i1,[%fp+xsave]		! save arguments
193	st	%i3,[%fp+ysave]
194#endif
195	st	%i0,[%fp+nsave]
196	st	%i2,[%fp+sxsave]
197	st	%i4,[%fp+sysave]
198	sethi	%hi(0x80000000),%i5	! load/set up constants
199	sethi	%hi(0x3fc90000),%l5
200	sethi	%hi(0x3e400000),LIM_l6
201	sethi	%hi(0x3fe921fb),%l7
202	or	%l7,%lo(0x3fe921fb),%l7
203	ldd	[%g1+f30val],%f30
204	ldd	[%g1+mask],%f44
205	ldd	[%g1+p1],%f46
206	ldd	[%g1+p2],%f48
207	ldd	[%g1+p3],%f50
208	ldd	[%g1+p4],%f52
209	ldd	[%g1+one],%f54
210	ldd	[%g1+pp1],%f56
211	ldd	[%g1+pp2],%f58
212	ldd	[%g1+qq1],%f60
213	ldd	[%g1+qq2],%f62
214	sll	%i2,3,%i2		! scale strides
215	sll	%i4,3,%i4
216	add	%fp,x0_1,%o3		! precondition loop
217	add	%fp,x0_1,%o4
218	add	%fp,x0_1,%o5
219	ld	[%i1],%l0		! hx = *x
220	ld	[%i1],%f0
221	ld	[%i1+4],%f1
222	andn	%l0,%i5,%l0		! hx &= ~0x80000000
223	add	%i1,%i2,%i1		! x += stridex
224
225	ba,pt	%icc,.loop0
226! delay slot
227	nop
228
229	.align 32
230.loop0:
231	lda	[%i1]%asi,%l1		! preload next argument
232	sub	%l0,LIM_l6,%g1
233	sub	%l7,%l0,%o7
234	fands	%f0,%f30,%f9		! save signbit
235
236	lda	[%i1]%asi,%f10
237	orcc	%o7,%g1,%g0
238	mov	%i3,%o0			! py0 = y
239	bl,pn	%icc,.range0		! if hx < 0x3e400000 or > 0x3fe921fb
240
241! delay slot
242	lda	[%i1+4]%asi,%f11
243	addcc	%i0,-1,%i0
244	add	%i3,%i4,%i3		! y += stridey
245	ble,pn	%icc,.endloop1
246
247! delay slot
248	andn	%l1,%i5,%l1
249	add	%i1,%i2,%i1		! x += stridex
250	fabsd	%f0,%f0
251	fmuld	%f54,%f54,%f54		! one*one; a nop for alignment only
252
253.loop1:
254	lda	[%i1]%asi,%l2		! preload next argument
255	sub	%l1,LIM_l6,%g1
256	sub	%l7,%l1,%o7
257	fands	%f10,%f30,%f19		! save signbit
258
259	lda	[%i1]%asi,%f20
260	orcc	%o7,%g1,%g0
261	mov	%i3,%o1			! py1 = y
262	bl,pn	%icc,.range1		! if hx < 0x3e400000 or > 0x3fe921fb
263
264! delay slot
265	lda	[%i1+4]%asi,%f21
266	addcc	%i0,-1,%i0
267	add	%i3,%i4,%i3		! y += stridey
268	ble,pn	%icc,.endloop2
269
270! delay slot
271	andn	%l2,%i5,%l2
272	add	%i1,%i2,%i1		! x += stridex
273	fabsd	%f10,%f10
274	fmuld	%f54,%f54,%f54		! one*one; a nop for alignment only
275
276.loop2:
277	st	%f6,[%o3]
278	sub	%l2,LIM_l6,%g1
279	sub	%l7,%l2,%o7
280	fands	%f20,%f30,%f29		! save signbit
281
282	st	%f7,[%o3+4]
283	orcc	%g1,%o7,%g0
284	mov	%i3,%o2			! py2 = y
285	bl,pn	%icc,.range2		! if hx < 0x3e400000 or > 0x3fe921fb
286
287! delay slot
288	add	%i3,%i4,%i3		! y += stridey
289	cmp	%l0,%l5
290	fabsd	%f20,%f20
291	bl,pn	%icc,.case4
292
293! delay slot
294	st	%f16,[%o4]
295	cmp	%l1,%l5
296	fpadd32s %f0,%f31,%f8
297	bl,pn	%icc,.case2
298
299! delay slot
300	st	%f17,[%o4+4]
301	cmp	%l2,%l5
302	fpadd32s %f10,%f31,%f18
303	bl,pn	%icc,.case1
304
305! delay slot
306	st	%f26,[%o5]
307	mov	%o0,%o3
308	sethi	%hi(0x3fc3c000),%o7
309	fpadd32s %f20,%f31,%f28
310
311	st	%f27,[%o5+4]
312	fand	%f8,%f44,%f2
313	mov	%o1,%o4
314
315	fand	%f18,%f44,%f12
316	mov	%o2,%o5
317	sub	%l0,%o7,%l0
318
319	fand	%f28,%f44,%f22
320	sub	%l1,%o7,%l1
321	sub	%l2,%o7,%l2
322
323	fsubd	%f0,%f2,%f0
324	srl	%l0,10,%l0
325	add	%l3,8,%g1
326
327	fsubd	%f10,%f12,%f10
328	srl	%l1,10,%l1
329
330	fsubd	%f20,%f22,%f20
331	srl	%l2,10,%l2
332
333	fmuld	%f0,%f0,%f2
334	andn	%l0,0x1f,%l0
335
336	fmuld	%f10,%f10,%f12
337	andn	%l1,0x1f,%l1
338
339	fmuld	%f20,%f20,%f22
340	andn	%l2,0x1f,%l2
341
342	fmuld	%f2,%f58,%f6
343	ldd	[%l3+%l0],%f32
344
345	fmuld	%f12,%f58,%f16
346	ldd	[%l3+%l1],%f36
347
348	fmuld	%f22,%f58,%f26
349	ldd	[%l3+%l2],%f40
350
351	faddd	%f6,%f56,%f6
352	fmuld	%f2,%f62,%f4
353	ldd	[%g1+%l0],%f34
354
355	faddd	%f16,%f56,%f16
356	fmuld	%f12,%f62,%f14
357	ldd	[%g1+%l1],%f38
358
359	faddd	%f26,%f56,%f26
360	fmuld	%f22,%f62,%f24
361	ldd	[%g1+%l2],%f42
362
363	fmuld	%f2,%f6,%f6
364	faddd	%f4,%f60,%f4
365
366	fmuld	%f12,%f16,%f16
367	faddd	%f14,%f60,%f14
368
369	fmuld	%f22,%f26,%f26
370	faddd	%f24,%f60,%f24
371
372	faddd	%f6,%f54,%f6
373	fmuld	%f2,%f4,%f4
374
375	faddd	%f16,%f54,%f16
376	fmuld	%f12,%f14,%f14
377
378	faddd	%f26,%f54,%f26
379	fmuld	%f22,%f24,%f24
380
381	fmuld	%f0,%f6,%f6
382	ldd	[%l4+%l0],%f2
383
384	fmuld	%f10,%f16,%f16
385	ldd	[%l4+%l1],%f12
386
387	fmuld	%f20,%f26,%f26
388	ldd	[%l4+%l2],%f22
389
390	fmuld	%f4,%f32,%f4
391	lda	[%i1]%asi,%l0		! preload next argument
392
393	fmuld	%f14,%f36,%f14
394	lda	[%i1]%asi,%f0
395
396	fmuld	%f24,%f40,%f24
397	lda	[%i1+4]%asi,%f1
398
399	fmuld	%f6,%f34,%f6
400	add	%i1,%i2,%i1		! x += stridex
401
402	fmuld	%f16,%f38,%f16
403
404	fmuld	%f26,%f42,%f26
405
406	faddd	%f6,%f4,%f6
407
408	faddd	%f16,%f14,%f16
409
410	faddd	%f26,%f24,%f26
411
412	faddd	%f6,%f2,%f6
413
414	faddd	%f16,%f12,%f16
415
416	faddd	%f26,%f22,%f26
417
418	faddd	%f6,%f32,%f6
419
420	faddd	%f16,%f36,%f16
421
422	faddd	%f26,%f40,%f26
423	andn	%l0,%i5,%l0		! hx &= ~0x80000000
424
425	fors	%f6,%f9,%f6
426	addcc	%i0,-1,%i0
427
428	fors	%f16,%f19,%f16
429	bg,pt	%icc,.loop0
430
431! delay slot
432	fors	%f26,%f29,%f26
433
434	ba,pt	%icc,.endloop0
435! delay slot
436	nop
437
438	.align	32
439.case1:
440	st	%f27,[%o5+4]
441	sethi	%hi(0x3fc3c000),%o7
442	add	%l3,8,%g1
443	fand	%f8,%f44,%f2
444
445	sub	%l0,%o7,%l0
446	sub	%l1,%o7,%l1
447	fand	%f18,%f44,%f12
448	fmuld	%f20,%f20,%f22
449
450	fsubd	%f0,%f2,%f0
451	srl	%l0,10,%l0
452	mov	%o0,%o3
453
454	fsubd	%f10,%f12,%f10
455	srl	%l1,10,%l1
456	mov	%o1,%o4
457
458	fmuld	%f22,%f52,%f24
459	mov	%o2,%o5
460
461	fmuld	%f0,%f0,%f2
462	andn	%l0,0x1f,%l0
463
464	fmuld	%f10,%f10,%f12
465	andn	%l1,0x1f,%l1
466
467	faddd	%f24,%f50,%f24
468
469	fmuld	%f2,%f58,%f6
470	ldd	[%l3+%l0],%f32
471
472	fmuld	%f12,%f58,%f16
473	ldd	[%l3+%l1],%f36
474
475	fmuld	%f22,%f24,%f24
476
477	faddd	%f6,%f56,%f6
478	fmuld	%f2,%f62,%f4
479	ldd	[%g1+%l0],%f34
480
481	faddd	%f16,%f56,%f16
482	fmuld	%f12,%f62,%f14
483	ldd	[%g1+%l1],%f38
484
485	faddd	%f24,%f48,%f24
486
487	fmuld	%f2,%f6,%f6
488	faddd	%f4,%f60,%f4
489
490	fmuld	%f12,%f16,%f16
491	faddd	%f14,%f60,%f14
492
493	fmuld	%f22,%f24,%f24
494
495	faddd	%f6,%f54,%f6
496	fmuld	%f2,%f4,%f4
497
498	faddd	%f16,%f54,%f16
499	fmuld	%f12,%f14,%f14
500
501	faddd	%f24,%f46,%f24
502
503	fmuld	%f0,%f6,%f6
504	ldd	[%l4+%l0],%f2
505
506	fmuld	%f10,%f16,%f16
507	ldd	[%l4+%l1],%f12
508
509	fmuld	%f4,%f32,%f4
510	lda	[%i1]%asi,%l0		! preload next argument
511
512	fmuld	%f14,%f36,%f14
513	lda	[%i1]%asi,%f0
514
515	fmuld	%f6,%f34,%f6
516	lda	[%i1+4]%asi,%f1
517
518	fmuld	%f16,%f38,%f16
519	add	%i1,%i2,%i1		! x += stridex
520
521	fmuld	%f22,%f24,%f24
522
523	faddd	%f6,%f4,%f6
524
525	faddd	%f16,%f14,%f16
526
527	fmuld	%f20,%f24,%f24
528
529	faddd	%f6,%f2,%f6
530
531	faddd	%f16,%f12,%f16
532
533	faddd	%f20,%f24,%f26
534
535	faddd	%f6,%f32,%f6
536
537	faddd	%f16,%f36,%f16
538	andn	%l0,%i5,%l0		! hx &= ~0x80000000
539
540	fors	%f26,%f29,%f26
541	addcc	%i0,-1,%i0
542
543	fors	%f6,%f9,%f6
544	bg,pt	%icc,.loop0
545
546! delay slot
547	fors	%f16,%f19,%f16
548
549	ba,pt	%icc,.endloop0
550! delay slot
551	nop
552
553	.align	32
554.case2:
555	st	%f26,[%o5]
556	cmp	%l2,%l5
557	fpadd32s %f20,%f31,%f28
558	bl,pn	%icc,.case3
559
560! delay slot
561	st	%f27,[%o5+4]
562	sethi	%hi(0x3fc3c000),%o7
563	add	%l3,8,%g1
564	fand	%f8,%f44,%f2
565
566	sub	%l0,%o7,%l0
567	sub	%l2,%o7,%l2
568	fand	%f28,%f44,%f22
569	fmuld	%f10,%f10,%f12
570
571	fsubd	%f0,%f2,%f0
572	srl	%l0,10,%l0
573	mov	%o0,%o3
574
575	fsubd	%f20,%f22,%f20
576	srl	%l2,10,%l2
577	mov	%o2,%o5
578
579	fmuld	%f12,%f52,%f14
580	mov	%o1,%o4
581
582	fmuld	%f0,%f0,%f2
583	andn	%l0,0x1f,%l0
584
585	fmuld	%f20,%f20,%f22
586	andn	%l2,0x1f,%l2
587
588	faddd	%f14,%f50,%f14
589
590	fmuld	%f2,%f58,%f6
591	ldd	[%l3+%l0],%f32
592
593	fmuld	%f22,%f58,%f26
594	ldd	[%l3+%l2],%f40
595
596	fmuld	%f12,%f14,%f14
597
598	faddd	%f6,%f56,%f6
599	fmuld	%f2,%f62,%f4
600	ldd	[%g1+%l0],%f34
601
602	faddd	%f26,%f56,%f26
603	fmuld	%f22,%f62,%f24
604	ldd	[%g1+%l2],%f42
605
606	faddd	%f14,%f48,%f14
607
608	fmuld	%f2,%f6,%f6
609	faddd	%f4,%f60,%f4
610
611	fmuld	%f22,%f26,%f26
612	faddd	%f24,%f60,%f24
613
614	fmuld	%f12,%f14,%f14
615
616	faddd	%f6,%f54,%f6
617	fmuld	%f2,%f4,%f4
618
619	faddd	%f26,%f54,%f26
620	fmuld	%f22,%f24,%f24
621
622	faddd	%f14,%f46,%f14
623
624	fmuld	%f0,%f6,%f6
625	ldd	[%l4+%l0],%f2
626
627	fmuld	%f20,%f26,%f26
628	ldd	[%l4+%l2],%f22
629
630	fmuld	%f4,%f32,%f4
631	lda	[%i1]%asi,%l0		! preload next argument
632
633	fmuld	%f24,%f40,%f24
634	lda	[%i1]%asi,%f0
635
636	fmuld	%f6,%f34,%f6
637	lda	[%i1+4]%asi,%f1
638
639	fmuld	%f26,%f42,%f26
640	add	%i1,%i2,%i1		! x += stridex
641
642	fmuld	%f12,%f14,%f14
643
644	faddd	%f6,%f4,%f6
645
646	faddd	%f26,%f24,%f26
647
648	fmuld	%f10,%f14,%f14
649
650	faddd	%f6,%f2,%f6
651
652	faddd	%f26,%f22,%f26
653
654	faddd	%f10,%f14,%f16
655
656	faddd	%f6,%f32,%f6
657
658	faddd	%f26,%f40,%f26
659	andn	%l0,%i5,%l0		! hx &= ~0x80000000
660
661	fors	%f16,%f19,%f16
662	addcc	%i0,-1,%i0
663
664	fors	%f6,%f9,%f6
665	bg,pt	%icc,.loop0
666
667! delay slot
668	fors	%f26,%f29,%f26
669
670	ba,pt	%icc,.endloop0
671! delay slot
672	nop
673
674	.align	32
675.case3:
676	sethi	%hi(0x3fc3c000),%o7
677	add	%l3,8,%g1
678	fand	%f8,%f44,%f2
679	fmuld	%f10,%f10,%f12
680
681	sub	%l0,%o7,%l0
682	fmuld	%f20,%f20,%f22
683
684	fsubd	%f0,%f2,%f0
685	srl	%l0,10,%l0
686	mov	%o0,%o3
687
688	fmuld	%f12,%f52,%f14
689	mov	%o1,%o4
690
691	fmuld	%f22,%f52,%f24
692	mov	%o2,%o5
693
694	fmuld	%f0,%f0,%f2
695	andn	%l0,0x1f,%l0
696
697	faddd	%f14,%f50,%f14
698
699	faddd	%f24,%f50,%f24
700
701	fmuld	%f2,%f58,%f6
702	ldd	[%l3+%l0],%f32
703
704	fmuld	%f12,%f14,%f14
705
706	fmuld	%f22,%f24,%f24
707
708	faddd	%f6,%f56,%f6
709	fmuld	%f2,%f62,%f4
710	ldd	[%g1+%l0],%f34
711
712	faddd	%f14,%f48,%f14
713
714	faddd	%f24,%f48,%f24
715
716	fmuld	%f2,%f6,%f6
717	faddd	%f4,%f60,%f4
718
719	fmuld	%f12,%f14,%f14
720
721	fmuld	%f22,%f24,%f24
722
723	faddd	%f6,%f54,%f6
724	fmuld	%f2,%f4,%f4
725
726	faddd	%f14,%f46,%f14
727
728	faddd	%f24,%f46,%f24
729
730	fmuld	%f0,%f6,%f6
731	ldd	[%l4+%l0],%f2
732
733	fmuld	%f4,%f32,%f4
734	lda	[%i1]%asi,%l0		! preload next argument
735
736	fmuld	%f12,%f14,%f14
737	lda	[%i1]%asi,%f0
738
739	fmuld	%f6,%f34,%f6
740	lda	[%i1+4]%asi,%f1
741
742	fmuld	%f22,%f24,%f24
743	add	%i1,%i2,%i1		! x += stridex
744
745	fmuld	%f10,%f14,%f14
746
747	faddd	%f6,%f4,%f6
748
749	fmuld	%f20,%f24,%f24
750
751	faddd	%f10,%f14,%f16
752
753	faddd	%f6,%f2,%f6
754
755	faddd	%f20,%f24,%f26
756
757	fors	%f16,%f19,%f16
758	andn	%l0,%i5,%l0		! hx &= ~0x80000000
759
760	faddd	%f6,%f32,%f6
761	addcc	%i0,-1,%i0
762
763	fors	%f26,%f29,%f26
764	bg,pt	%icc,.loop0
765
766! delay slot
767	fors	%f6,%f9,%f6
768
769	ba,pt	%icc,.endloop0
770! delay slot
771	nop
772
773	.align	32
774.case4:
775	st	%f17,[%o4+4]
776	cmp	%l1,%l5
777	fpadd32s %f10,%f31,%f18
778	bl,pn	%icc,.case6
779
780! delay slot
781	st	%f26,[%o5]
782	cmp	%l2,%l5
783	fpadd32s %f20,%f31,%f28
784	bl,pn	%icc,.case5
785
786! delay slot
787	st	%f27,[%o5+4]
788	sethi	%hi(0x3fc3c000),%o7
789	add	%l3,8,%g1
790	fand	%f18,%f44,%f12
791
792	sub	%l1,%o7,%l1
793	sub	%l2,%o7,%l2
794	fand	%f28,%f44,%f22
795	fmuld	%f0,%f0,%f2
796
797	fsubd	%f10,%f12,%f10
798	srl	%l1,10,%l1
799	mov	%o1,%o4
800
801	fsubd	%f20,%f22,%f20
802	srl	%l2,10,%l2
803	mov	%o2,%o5
804
805	fmovd	%f0,%f6
806	fmuld	%f2,%f52,%f4
807	mov	%o0,%o3
808
809	fmuld	%f10,%f10,%f12
810	andn	%l1,0x1f,%l1
811
812	fmuld	%f20,%f20,%f22
813	andn	%l2,0x1f,%l2
814
815	faddd	%f4,%f50,%f4
816
817	fmuld	%f12,%f58,%f16
818	ldd	[%l3+%l1],%f36
819
820	fmuld	%f22,%f58,%f26
821	ldd	[%l3+%l2],%f40
822
823	fmuld	%f2,%f4,%f4
824
825	faddd	%f16,%f56,%f16
826	fmuld	%f12,%f62,%f14
827	ldd	[%g1+%l1],%f38
828
829	faddd	%f26,%f56,%f26
830	fmuld	%f22,%f62,%f24
831	ldd	[%g1+%l2],%f42
832
833	faddd	%f4,%f48,%f4
834
835	fmuld	%f12,%f16,%f16
836	faddd	%f14,%f60,%f14
837
838	fmuld	%f22,%f26,%f26
839	faddd	%f24,%f60,%f24
840
841	fmuld	%f2,%f4,%f4
842
843	faddd	%f16,%f54,%f16
844	fmuld	%f12,%f14,%f14
845
846	faddd	%f26,%f54,%f26
847	fmuld	%f22,%f24,%f24
848
849	faddd	%f4,%f46,%f4
850
851	fmuld	%f10,%f16,%f16
852	ldd	[%l4+%l1],%f12
853
854	fmuld	%f20,%f26,%f26
855	ldd	[%l4+%l2],%f22
856
857	fmuld	%f14,%f36,%f14
858	lda	[%i1]%asi,%l0		! preload next argument
859
860	fmuld	%f24,%f40,%f24
861	lda	[%i1]%asi,%f0
862
863	fmuld	%f16,%f38,%f16
864	lda	[%i1+4]%asi,%f1
865
866	fmuld	%f26,%f42,%f26
867	add	%i1,%i2,%i1		! x += stridex
868
869	fmuld	%f2,%f4,%f4
870
871	faddd	%f16,%f14,%f16
872
873	faddd	%f26,%f24,%f26
874
875	fmuld	%f6,%f4,%f4
876
877	faddd	%f16,%f12,%f16
878
879	faddd	%f26,%f22,%f26
880
881	faddd	%f6,%f4,%f6
882
883	faddd	%f16,%f36,%f16
884
885	faddd	%f26,%f40,%f26
886	andn	%l0,%i5,%l0		! hx &= ~0x80000000
887
888	fors	%f6,%f9,%f6
889	addcc	%i0,-1,%i0
890
891	fors	%f16,%f19,%f16
892	bg,pt	%icc,.loop0
893
894! delay slot
895	fors	%f26,%f29,%f26
896
897	ba,pt	%icc,.endloop0
898! delay slot
899	nop
900
901	.align	32
902.case5:
903	sethi	%hi(0x3fc3c000),%o7
904	add	%l3,8,%g1
905	fand	%f18,%f44,%f12
906	fmuld	%f0,%f0,%f2
907
908	sub	%l1,%o7,%l1
909	fmuld	%f20,%f20,%f22
910
911	fsubd	%f10,%f12,%f10
912	srl	%l1,10,%l1
913	mov	%o1,%o4
914
915	fmovd	%f0,%f6
916	fmuld	%f2,%f52,%f4
917	mov	%o0,%o3
918
919	fmuld	%f22,%f52,%f24
920	mov	%o2,%o5
921
922	fmuld	%f10,%f10,%f12
923	andn	%l1,0x1f,%l1
924
925	faddd	%f4,%f50,%f4
926
927	faddd	%f24,%f50,%f24
928
929	fmuld	%f12,%f58,%f16
930	ldd	[%l3+%l1],%f36
931
932	fmuld	%f2,%f4,%f4
933
934	fmuld	%f22,%f24,%f24
935
936	faddd	%f16,%f56,%f16
937	fmuld	%f12,%f62,%f14
938	ldd	[%g1+%l1],%f38
939
940	faddd	%f4,%f48,%f4
941
942	faddd	%f24,%f48,%f24
943
944	fmuld	%f12,%f16,%f16
945	faddd	%f14,%f60,%f14
946
947	fmuld	%f2,%f4,%f4
948
949	fmuld	%f22,%f24,%f24
950
951	faddd	%f16,%f54,%f16
952	fmuld	%f12,%f14,%f14
953
954	faddd	%f4,%f46,%f4
955
956	faddd	%f24,%f46,%f24
957
958	fmuld	%f10,%f16,%f16
959	ldd	[%l4+%l1],%f12
960
961	fmuld	%f14,%f36,%f14
962	lda	[%i1]%asi,%l0		! preload next argument
963
964	fmuld	%f2,%f4,%f4
965	lda	[%i1]%asi,%f0
966
967	fmuld	%f16,%f38,%f16
968	lda	[%i1+4]%asi,%f1
969
970	fmuld	%f22,%f24,%f24
971	add	%i1,%i2,%i1		! x += stridex
972
973	fmuld	%f6,%f4,%f4
974
975	faddd	%f16,%f14,%f16
976
977	fmuld	%f20,%f24,%f24
978
979	faddd	%f6,%f4,%f6
980
981	faddd	%f16,%f12,%f16
982
983	faddd	%f20,%f24,%f26
984
985	fors	%f6,%f9,%f6
986	andn	%l0,%i5,%l0		! hx &= ~0x80000000
987
988	faddd	%f16,%f36,%f16
989	addcc	%i0,-1,%i0
990
991	fors	%f26,%f29,%f26
992	bg,pt	%icc,.loop0
993
994! delay slot
995	fors	%f16,%f19,%f16
996
997	ba,pt	%icc,.endloop0
998! delay slot
999	nop
1000
1001	.align	32
1002.case6:
1003	st	%f27,[%o5+4]
1004	cmp	%l2,%l5
1005	fpadd32s %f20,%f31,%f28
1006	bl,pn	%icc,.case7
1007
1008! delay slot
1009	sethi	%hi(0x3fc3c000),%o7
1010	add	%l3,8,%g1
1011	fand	%f28,%f44,%f22
1012	fmuld	%f0,%f0,%f2
1013
1014	sub	%l2,%o7,%l2
1015	fmuld	%f10,%f10,%f12
1016
1017	fsubd	%f20,%f22,%f20
1018	srl	%l2,10,%l2
1019	mov	%o2,%o5
1020
1021	fmovd	%f0,%f6
1022	fmuld	%f2,%f52,%f4
1023	mov	%o0,%o3
1024
1025	fmuld	%f12,%f52,%f14
1026	mov	%o1,%o4
1027
1028	fmuld	%f20,%f20,%f22
1029	andn	%l2,0x1f,%l2
1030
1031	faddd	%f4,%f50,%f4
1032
1033	faddd	%f14,%f50,%f14
1034
1035	fmuld	%f22,%f58,%f26
1036	ldd	[%l3+%l2],%f40
1037
1038	fmuld	%f2,%f4,%f4
1039
1040	fmuld	%f12,%f14,%f14
1041
1042	faddd	%f26,%f56,%f26
1043	fmuld	%f22,%f62,%f24
1044	ldd	[%g1+%l2],%f42
1045
1046	faddd	%f4,%f48,%f4
1047
1048	faddd	%f14,%f48,%f14
1049
1050	fmuld	%f22,%f26,%f26
1051	faddd	%f24,%f60,%f24
1052
1053	fmuld	%f2,%f4,%f4
1054
1055	fmuld	%f12,%f14,%f14
1056
1057	faddd	%f26,%f54,%f26
1058	fmuld	%f22,%f24,%f24
1059
1060	faddd	%f4,%f46,%f4
1061
1062	faddd	%f14,%f46,%f14
1063
1064	fmuld	%f20,%f26,%f26
1065	ldd	[%l4+%l2],%f22
1066
1067	fmuld	%f24,%f40,%f24
1068	lda	[%i1]%asi,%l0		! preload next argument
1069
1070	fmuld	%f2,%f4,%f4
1071	lda	[%i1]%asi,%f0
1072
1073	fmuld	%f26,%f42,%f26
1074	lda	[%i1+4]%asi,%f1
1075
1076	fmuld	%f12,%f14,%f14
1077	add	%i1,%i2,%i1		! x += stridex
1078
1079	fmuld	%f6,%f4,%f4
1080
1081	faddd	%f26,%f24,%f26
1082
1083	fmuld	%f10,%f14,%f14
1084
1085	faddd	%f6,%f4,%f6
1086
1087	faddd	%f26,%f22,%f26
1088
1089	faddd	%f10,%f14,%f16
1090
1091	fors	%f6,%f9,%f6
1092	andn	%l0,%i5,%l0		! hx &= ~0x80000000
1093
1094	faddd	%f26,%f40,%f26
1095	addcc	%i0,-1,%i0
1096
1097	fors	%f16,%f19,%f16
1098	bg,pt	%icc,.loop0
1099
1100! delay slot
1101	fors	%f26,%f29,%f26
1102
1103	ba,pt	%icc,.endloop0
1104! delay slot
1105	nop
1106
1107	.align	32
1108.case7:
1109	fmuld	%f0,%f0,%f2
1110	fmovd	%f0,%f6
1111	mov	%o0,%o3
1112
1113	fmuld	%f10,%f10,%f12
1114	mov	%o1,%o4
1115
1116	fmuld	%f20,%f20,%f22
1117	mov	%o2,%o5
1118
1119	fmuld	%f2,%f52,%f4
1120	lda	[%i1]%asi,%l0		! preload next argument
1121
1122	fmuld	%f12,%f52,%f14
1123	lda	[%i1]%asi,%f0
1124
1125	fmuld	%f22,%f52,%f24
1126	lda	[%i1+4]%asi,%f1
1127
1128	faddd	%f4,%f50,%f4
1129	add	%i1,%i2,%i1		! x += stridex
1130
1131	faddd	%f14,%f50,%f14
1132
1133	faddd	%f24,%f50,%f24
1134
1135	fmuld	%f2,%f4,%f4
1136
1137	fmuld	%f12,%f14,%f14
1138
1139	fmuld	%f22,%f24,%f24
1140
1141	faddd	%f4,%f48,%f4
1142
1143	faddd	%f14,%f48,%f14
1144
1145	faddd	%f24,%f48,%f24
1146
1147	fmuld	%f2,%f4,%f4
1148
1149	fmuld	%f12,%f14,%f14
1150
1151	fmuld	%f22,%f24,%f24
1152
1153	faddd	%f4,%f46,%f4
1154
1155	faddd	%f14,%f46,%f14
1156
1157	faddd	%f24,%f46,%f24
1158
1159	fmuld	%f2,%f4,%f4
1160
1161	fmuld	%f12,%f14,%f14
1162
1163	fmuld	%f22,%f24,%f24
1164
1165	fmuld	%f6,%f4,%f4
1166
1167	fmuld	%f10,%f14,%f14
1168
1169	fmuld	%f20,%f24,%f24
1170
1171	faddd	%f6,%f4,%f6
1172
1173	faddd	%f10,%f14,%f16
1174
1175	faddd	%f20,%f24,%f26
1176	andn	%l0,%i5,%l0		! hx &= ~0x80000000
1177
1178	fors	%f6,%f9,%f6
1179	addcc	%i0,-1,%i0
1180
1181	fors	%f16,%f19,%f16
1182	bg,pt	%icc,.loop0
1183
1184! delay slot
1185	fors	%f26,%f29,%f26
1186
1187	ba,pt	%icc,.endloop0
1188! delay slot
1189	nop
1190
1191
1192	.align	32
1193.endloop2:
1194	cmp	%l1,%l5
1195	bl,pn	%icc,1f
1196! delay slot
1197	fabsd	%f10,%f10
1198	sethi	%hi(0x3fc3c000),%o7
1199	fpadd32s %f10,%f31,%f18
1200	add	%l3,8,%g1
1201	fand	%f18,%f44,%f12
1202	sub	%l1,%o7,%l1
1203	fsubd	%f10,%f12,%f10
1204	srl	%l1,10,%l1
1205	fmuld	%f10,%f10,%f12
1206	andn	%l1,0x1f,%l1
1207	fmuld	%f12,%f58,%f20
1208	ldd	[%l3+%l1],%f36
1209	faddd	%f20,%f56,%f20
1210	fmuld	%f12,%f62,%f14
1211	ldd	[%g1+%l1],%f38
1212	fmuld	%f12,%f20,%f20
1213	faddd	%f14,%f60,%f14
1214	faddd	%f20,%f54,%f20
1215	fmuld	%f12,%f14,%f14
1216	fmuld	%f10,%f20,%f20
1217	ldd	[%l4+%l1],%f12
1218	fmuld	%f14,%f36,%f14
1219	fmuld	%f20,%f38,%f20
1220	faddd	%f20,%f14,%f20
1221	faddd	%f20,%f12,%f20
1222	ba,pt	%icc,2f
1223! delay slot
1224	faddd	%f20,%f36,%f20
12251:
1226	fmuld	%f10,%f10,%f12
1227	fmuld	%f12,%f52,%f14
1228	faddd	%f14,%f50,%f14
1229	fmuld	%f12,%f14,%f14
1230	faddd	%f14,%f48,%f14
1231	fmuld	%f12,%f14,%f14
1232	faddd	%f14,%f46,%f14
1233	fmuld	%f12,%f14,%f14
1234	fmuld	%f10,%f14,%f14
1235	faddd	%f10,%f14,%f20
12362:
1237	fors	%f20,%f19,%f20
1238	st	%f20,[%o1]
1239	st	%f21,[%o1+4]
1240
1241.endloop1:
1242	cmp	%l0,%l5
1243	bl,pn	%icc,1f
1244! delay slot
1245	fabsd	%f0,%f0
1246	sethi	%hi(0x3fc3c000),%o7
1247	fpadd32s %f0,%f31,%f8
1248	add	%l3,8,%g1
1249	fand	%f8,%f44,%f2
1250	sub	%l0,%o7,%l0
1251	fsubd	%f0,%f2,%f0
1252	srl	%l0,10,%l0
1253	fmuld	%f0,%f0,%f2
1254	andn	%l0,0x1f,%l0
1255	fmuld	%f2,%f58,%f20
1256	ldd	[%l3+%l0],%f32
1257	faddd	%f20,%f56,%f20
1258	fmuld	%f2,%f62,%f4
1259	ldd	[%g1+%l0],%f34
1260	fmuld	%f2,%f20,%f20
1261	faddd	%f4,%f60,%f4
1262	faddd	%f20,%f54,%f20
1263	fmuld	%f2,%f4,%f4
1264	fmuld	%f0,%f20,%f20
1265	ldd	[%l4+%l0],%f2
1266	fmuld	%f4,%f32,%f4
1267	fmuld	%f20,%f34,%f20
1268	faddd	%f20,%f4,%f20
1269	faddd	%f20,%f2,%f20
1270	ba,pt	%icc,2f
1271! delay slot
1272	faddd	%f20,%f32,%f20
12731:
1274	fmuld	%f0,%f0,%f2
1275	fmuld	%f2,%f52,%f4
1276	faddd	%f4,%f50,%f4
1277	fmuld	%f2,%f4,%f4
1278	faddd	%f4,%f48,%f4
1279	fmuld	%f2,%f4,%f4
1280	faddd	%f4,%f46,%f4
1281	fmuld	%f2,%f4,%f4
1282	fmuld	%f0,%f4,%f4
1283	faddd	%f0,%f4,%f20
12842:
1285	fors	%f20,%f9,%f20
1286	st	%f20,[%o0]
1287	st	%f21,[%o0+4]
1288
1289.endloop0:
1290	st	%f6,[%o3]
1291	st	%f7,[%o3+4]
1292	st	%f16,[%o4]
1293	st	%f17,[%o4+4]
1294	st	%f26,[%o5]
1295	st	%f27,[%o5+4]
1296
1297! return.  finished off with only primary range arguments.
1298
1299	ret
1300	restore
1301
1302
1303	.align	32
1304.range0:
1305	cmp	%l0,LIM_l6
1306	bg,a,pt	%icc,.MEDIUM		! branch if x is not tiny
1307! delay slot, annulled if branch not taken
1308	mov	0x1,LIM_l6		! set "processing loop0"
1309	st	%f0,[%o0]		! *y = *x with inexact if x nonzero
1310	st	%f1,[%o0+4]
1311	fdtoi	%f0,%f2
1312	addcc	%i0,-1,%i0
1313	ble,pn	%icc,.endloop0
1314! delay slot, harmless if branch taken
1315	add	%i3,%i4,%i3		! y += stridey
1316	andn	%l1,%i5,%l0		! hx &= ~0x80000000
1317	fmovd	%f10,%f0
1318	ba,pt	%icc,.loop0
1319! delay slot
1320	add	%i1,%i2,%i1		! x += stridex
1321
1322
1323	.align	32
1324.range1:
1325	cmp	%l1,LIM_l6
1326	bg,a,pt	%icc,.MEDIUM		! branch if x is not tiny
1327! delay slot, annulled if branch not taken
1328	mov	0x2,LIM_l6		! set "processing loop1"
1329	st	%f10,[%o1]		! *y = *x with inexact if x nonzero
1330	st	%f11,[%o1+4]
1331	fdtoi	%f10,%f12
1332	addcc	%i0,-1,%i0
1333	ble,pn	%icc,.endloop1
1334! delay slot, harmless if branch taken
1335	add	%i3,%i4,%i3		! y += stridey
1336	andn	%l2,%i5,%l1		! hx &= ~0x80000000
1337	fmovd	%f20,%f10
1338	ba,pt	%icc,.loop1
1339! delay slot
1340	add	%i1,%i2,%i1		! x += stridex
1341
1342
1343	.align	32
1344.range2:
1345	cmp	%l2,LIM_l6
1346	bg,a,pt	%icc,.MEDIUM		! branch if x is not tiny
1347! delay slot, annulled if branch not taken
1348	mov	0x3,LIM_l6		! set "processing loop2"
1349	st	%f20,[%o2]		! *y = *x with inexact if x nonzero
1350	st	%f21,[%o2+4]
1351	fdtoi	%f20,%f22
13521:
1353	addcc	%i0,-1,%i0
1354	ble,pn	%icc,.endloop2
1355! delay slot
1356	nop
1357	ld	[%i1],%l2
1358	ld	[%i1],%f20
1359	ld	[%i1+4],%f21
1360	andn	%l2,%i5,%l2		! hx &= ~0x80000000
1361	ba,pt	%icc,.loop2
1362! delay slot
1363	add	%i1,%i2,%i1		! x += stridex
1364
1365
1366	.align	32
1367.MEDIUM:
1368
1369! ========== medium range ==========
1370
1371! register use
1372
1373! i0  n
1374! i1  x
1375! i2  stridex
1376! i3  y
1377! i4  stridey
1378! i5  0x80000000
1379
1380! l0  hx0
1381! l1  hx1
1382! l2  hx2
1383! l3  __vlibm_TBL_sincos_hi
1384! l4  __vlibm_TBL_sincos_lo
1385! l5  constants
1386! l6  in transition from pri-range and here, use for biguns
1387! l7  0x413921fb
1388
1389! the following are 64-bit registers in both V8+ and V9
1390
1391! g1  scratch
1392! g5
1393
1394! o0  py0
1395! o1  py1
1396! o2  py2
1397! o3  n0
1398! o4  n1
1399! o5  n2
1400! o7  scratch
1401
1402! f0  x0
1403! f2  n0,y0
1404! f4
1405! f6
1406! f8  scratch for table base
1407! f9  signbit0
1408! f10 x1
1409! f12 n1,y1
1410! f14
1411! f16
1412! f18 scratch for table base
1413! f19 signbit1
1414! f20 x2
1415! f22 n2,y2
1416! f24
1417! f26
1418! f28 scratch for table base
1419! f29 signbit2
1420! f30 0x80000000
1421! f31 0x4000
1422! f32
1423! f34
1424! f36
1425! f38
1426! f40 invpio2
1427! f42 round
1428! f44 0xffff800000000000
1429! f46 pio2_1
1430! f48 pio2_2
1431! f50 pio2_3
1432! f52 pio2_3t
1433! f54 one
1434! f56 pp1
1435! f58 pp2
1436! f60 qq1
1437! f62 qq2
1438
1439	PIC_SET(g5,constants,l5)
1440
1441	! %o3,%o4,%o5 need to be stored
1442	st	%f6,[%o3]
1443	sethi	%hi(0x413921fb),%l7
1444	st	%f7,[%o3+4]
1445	or	%l7,%lo(0x413921fb),%l7
1446	st	%f16,[%o4]
1447	st	%f17,[%o4+4]
1448	st	%f26,[%o5]
1449	st	%f27,[%o5+4]
1450	ldd	[%l5+invpio2],%f40
1451	ldd	[%l5+round],%f42
1452	ldd	[%l5+pio2_1],%f46
1453	ldd	[%l5+pio2_2],%f48
1454	ldd	[%l5+pio2_3],%f50
1455	ldd	[%l5+pio2_3t],%f52
1456	std	%f54,[%fp+x0_1+8]	! set up stack data
1457	std	%f54,[%fp+x1_1+8]
1458	std	%f54,[%fp+x2_1+8]
1459	stx	%g0,[%fp+y0_0+8]
1460	stx	%g0,[%fp+y1_0+8]
1461	stx	%g0,[%fp+y2_0+8]
1462
1463!	branched here in the middle of the array.  Need to adjust
1464!	for the members of the triple that were selected in the primary
1465!	loop.
1466
1467!	no adjustment since all three selected here
1468	subcc	LIM_l6,0x1,%g0		! continue in LOOP0?
1469	bz,a	%icc,.LOOP0
1470	mov	0x0,LIM_l6		! delay slot set biguns=0
1471
1472!	ajust 1st triple since 2d and 3d done here
1473	subcc	LIM_l6,0x2,%g0		! continue in LOOP1?
1474	fors	%f0,%f9,%f0		! restore sign bit
1475	fmuld	%f0,%f40,%f2		! adj LOOP0
1476	bz,a	%icc,.LOOP1
1477	mov	0x0,LIM_l6		! delay slot set biguns=0
1478
1479!	ajust 1st and 2d triple since 3d done here
1480	subcc	LIM_l6,0x3,%g0		! continue in LOOP2?
1481	!done fmuld	%f0,%f40,%f2		! adj LOOP0
1482	sub	%i3,%i4,%i3		! adjust to not double increment
1483	fors	%f10,%f19,%f10		! restore sign bit
1484	fmuld	%f10,%f40,%f12		! adj LOOP1
1485	faddd	%f2,%f42,%f2		! adj LOOP1
1486	bz,a	%icc,.LOOP2
1487	mov	0x0,LIM_l6		! delay slot set biguns=0
1488
1489	.align 32
1490.LOOP0:
1491	lda	[%i1]%asi,%l1		! preload next argument
1492	mov	%i3,%o0			! py0 = y
1493	lda	[%i1]%asi,%f10
1494	cmp	%l0,%l7
1495	add	%i3,%i4,%i3		! y += stridey
1496	bg,pn	%icc,.BIG0		! if hx > 0x413921fb
1497
1498! delay slot
1499	lda	[%i1+4]%asi,%f11
1500	addcc	%i0,-1,%i0
1501	add	%i1,%i2,%i1		! x += stridex
1502	ble,pn	%icc,.ENDLOOP1
1503
1504! delay slot
1505	andn	%l1,%i5,%l1
1506	nop
1507	fmuld	%f0,%f40,%f2
1508	fabsd	%f54,%f54		! a nop for alignment only
1509
1510.LOOP1:
1511	lda	[%i1]%asi,%l2		! preload next argument
1512	mov	%i3,%o1			! py1 = y
1513
1514	lda	[%i1]%asi,%f20
1515	cmp	%l1,%l7
1516	add	%i3,%i4,%i3		! y += stridey
1517	bg,pn	%icc,.BIG1		! if hx > 0x413921fb
1518
1519! delay slot
1520	lda	[%i1+4]%asi,%f21
1521	addcc	%i0,-1,%i0
1522	add	%i1,%i2,%i1		! x += stridex
1523	ble,pn	%icc,.ENDLOOP2
1524
1525! delay slot
1526	andn	%l2,%i5,%l2
1527	nop
1528	fmuld	%f10,%f40,%f12
1529	faddd	%f2,%f42,%f2
1530
1531.LOOP2:
1532	st	%f3,[%fp+n0]
1533	mov	%i3,%o2			! py2 = y
1534
1535	cmp	%l2,%l7
1536	add	%i3,%i4,%i3		! y += stridey
1537	fmuld	%f20,%f40,%f22
1538	bg,pn	%icc,.BIG2		! if hx > 0x413921fb
1539
1540! delay slot
1541	add	%l5,thresh+4,%o7
1542	faddd	%f12,%f42,%f12
1543	st	%f13,[%fp+n1]
1544
1545! -
1546
1547	add	%l5,thresh,%g1
1548	faddd	%f22,%f42,%f22
1549	st	%f23,[%fp+n2]
1550
1551	fsubd	%f2,%f42,%f2		! n
1552
1553	fsubd	%f12,%f42,%f12		! n
1554
1555	fsubd	%f22,%f42,%f22		! n
1556
1557	fmuld	%f2,%f46,%f4
1558
1559	fmuld	%f12,%f46,%f14
1560
1561	fmuld	%f22,%f46,%f24
1562
1563	fsubd	%f0,%f4,%f4
1564	fmuld	%f2,%f48,%f6
1565
1566	fsubd	%f10,%f14,%f14
1567	fmuld	%f12,%f48,%f16
1568
1569	fsubd	%f20,%f24,%f24
1570	fmuld	%f22,%f48,%f26
1571
1572	fsubd	%f4,%f6,%f0
1573	ld	[%fp+n0],%o3
1574
1575	fsubd	%f14,%f16,%f10
1576	ld	[%fp+n1],%o4
1577
1578	fsubd	%f24,%f26,%f20
1579	ld	[%fp+n2],%o5
1580
1581	fsubd	%f4,%f0,%f32
1582	and	%o3,1,%o3
1583
1584	fsubd	%f14,%f10,%f34
1585	and	%o4,1,%o4
1586
1587	fsubd	%f24,%f20,%f36
1588	and	%o5,1,%o5
1589
1590	fsubd	%f32,%f6,%f32
1591	fmuld	%f2,%f50,%f8
1592	sll	%o3,3,%o3
1593
1594	fsubd	%f34,%f16,%f34
1595	fmuld	%f12,%f50,%f18
1596	sll	%o4,3,%o4
1597
1598	fsubd	%f36,%f26,%f36
1599	fmuld	%f22,%f50,%f28
1600	sll	%o5,3,%o5
1601
1602	fsubd	%f8,%f32,%f8
1603	ld	[%g1+%o3],%f6
1604
1605	fsubd	%f18,%f34,%f18
1606	ld	[%g1+%o4],%f16
1607
1608	fsubd	%f28,%f36,%f28
1609	ld	[%g1+%o5],%f26
1610
1611	fsubd	%f0,%f8,%f4
1612
1613	fsubd	%f10,%f18,%f14
1614
1615	fsubd	%f20,%f28,%f24
1616
1617	fsubd	%f0,%f4,%f32
1618
1619	fsubd	%f10,%f14,%f34
1620
1621	fsubd	%f20,%f24,%f36
1622
1623	fsubd	%f32,%f8,%f32
1624	fmuld	%f2,%f52,%f2
1625
1626	fsubd	%f34,%f18,%f34
1627	fmuld	%f12,%f52,%f12
1628
1629	fsubd	%f36,%f28,%f36
1630	fmuld	%f22,%f52,%f22
1631
1632	fsubd	%f2,%f32,%f2
1633	ld	[%o7+%o3],%f8
1634
1635	fsubd	%f12,%f34,%f12
1636	ld	[%o7+%o4],%f18
1637
1638	fsubd	%f22,%f36,%f22
1639	ld	[%o7+%o5],%f28
1640
1641	fsubd	%f4,%f2,%f0		! x
1642
1643	fsubd	%f14,%f12,%f10		! x
1644
1645	fsubd	%f24,%f22,%f20		! x
1646
1647	fsubd	%f4,%f0,%f4
1648
1649	fsubd	%f14,%f10,%f14
1650
1651	fsubd	%f24,%f20,%f24
1652
1653	fands	%f0,%f30,%f9		! save signbit
1654
1655	fands	%f10,%f30,%f19		! save signbit
1656
1657	fands	%f20,%f30,%f29		! save signbit
1658
1659	fabsd	%f0,%f0
1660	std	%f0,[%fp+x0_1]
1661
1662	fabsd	%f10,%f10
1663	std	%f10,[%fp+x1_1]
1664
1665	fabsd	%f20,%f20
1666	std	%f20,[%fp+x2_1]
1667
1668	fsubd	%f4,%f2,%f2		! y
1669
1670	fsubd	%f14,%f12,%f12		! y
1671
1672	fsubd	%f24,%f22,%f22		! y
1673
1674	fcmpgt32 %f6,%f0,%l0
1675
1676	fcmpgt32 %f16,%f10,%l1
1677
1678	fcmpgt32 %f26,%f20,%l2
1679
1680! -- 16 byte aligned
1681	fxors	%f2,%f9,%f2
1682
1683	fxors	%f12,%f19,%f12
1684
1685	fxors	%f22,%f29,%f22
1686
1687	fands	%f9,%f8,%f9		! if (n & 1) clear sign bit
1688	andcc	%l0,2,%g0
1689	bne,pn	%icc,.CASE4
1690
1691! delay slot
1692	fands	%f19,%f18,%f19		! if (n & 1) clear sign bit
1693	andcc	%l1,2,%g0
1694	bne,pn	%icc,.CASE2
1695
1696! delay slot
1697	fands	%f29,%f28,%f29		! if (n & 1) clear sign bit
1698	andcc	%l2,2,%g0
1699	bne,pn	%icc,.CASE1
1700
1701! delay slot
1702	fpadd32s %f0,%f31,%f8
1703	sethi	%hi(0x3fc3c000),%o7
1704	ld	[%fp+x0_1],%l0
1705
1706	fpadd32s %f10,%f31,%f18
1707	add	%l3,8,%g1
1708	ld	[%fp+x1_1],%l1
1709
1710	fpadd32s %f20,%f31,%f28
1711	ld	[%fp+x2_1],%l2
1712
1713	fand	%f8,%f44,%f4
1714	sub	%l0,%o7,%l0
1715
1716	fand	%f18,%f44,%f14
1717	sub	%l1,%o7,%l1
1718
1719	fand	%f28,%f44,%f24
1720	sub	%l2,%o7,%l2
1721
1722	fsubd	%f0,%f4,%f0
1723	srl	%l0,10,%l0
1724
1725	fsubd	%f10,%f14,%f10
1726	srl	%l1,10,%l1
1727
1728	fsubd	%f20,%f24,%f20
1729	srl	%l2,10,%l2
1730
1731	faddd	%f0,%f2,%f0
1732	andn	%l0,0x1f,%l0
1733
1734	faddd	%f10,%f12,%f10
1735	andn	%l1,0x1f,%l1
1736
1737	faddd	%f20,%f22,%f20
1738	andn	%l2,0x1f,%l2
1739
1740	fmuld	%f0,%f0,%f2
1741	add	%l0,%o3,%l0
1742
1743	fmuld	%f10,%f10,%f12
1744	add	%l1,%o4,%l1
1745
1746	fmuld	%f20,%f20,%f22
1747	add	%l2,%o5,%l2
1748
1749	fmuld	%f2,%f58,%f6
1750	ldd	[%l3+%l0],%f32
1751
1752	fmuld	%f12,%f58,%f16
1753	ldd	[%l3+%l1],%f34
1754
1755	fmuld	%f22,%f58,%f26
1756	ldd	[%l3+%l2],%f36
1757
1758	faddd	%f6,%f56,%f6
1759	fmuld	%f2,%f62,%f4
1760
1761	faddd	%f16,%f56,%f16
1762	fmuld	%f12,%f62,%f14
1763
1764	faddd	%f26,%f56,%f26
1765	fmuld	%f22,%f62,%f24
1766
1767	fmuld	%f2,%f6,%f6
1768	faddd	%f4,%f60,%f4
1769
1770	fmuld	%f12,%f16,%f16
1771	faddd	%f14,%f60,%f14
1772
1773	fmuld	%f22,%f26,%f26
1774	faddd	%f24,%f60,%f24
1775
1776	faddd	%f6,%f54,%f6
1777	fmuld	%f2,%f4,%f4
1778
1779	faddd	%f16,%f54,%f16
1780	fmuld	%f12,%f14,%f14
1781
1782	faddd	%f26,%f54,%f26
1783	fmuld	%f22,%f24,%f24
1784
1785	fmuld	%f0,%f6,%f6
1786	ldd	[%g1+%l0],%f2
1787
1788	fmuld	%f10,%f16,%f16
1789	ldd	[%g1+%l1],%f12
1790
1791	fmuld	%f20,%f26,%f26
1792	ldd	[%g1+%l2],%f22
1793
1794	fmuld	%f4,%f32,%f4
1795	ldd	[%l4+%l0],%f0
1796
1797	fmuld	%f14,%f34,%f14
1798	ldd	[%l4+%l1],%f10
1799
1800	fmuld	%f24,%f36,%f24
1801	ldd	[%l4+%l2],%f20
1802
1803	fmuld	%f6,%f2,%f6
1804
1805	fmuld	%f16,%f12,%f16
1806
1807	fmuld	%f26,%f22,%f26
1808
1809	faddd	%f6,%f4,%f6
1810
1811	faddd	%f16,%f14,%f16
1812
1813	faddd	%f26,%f24,%f26
1814
1815	faddd	%f6,%f0,%f6
1816
1817	faddd	%f16,%f10,%f16
1818
1819	faddd	%f26,%f20,%f26
1820
1821	faddd	%f6,%f32,%f6
1822
1823	faddd	%f16,%f34,%f16
1824
1825	faddd	%f26,%f36,%f26
1826
1827.FIXSIGN:
1828	ld	[%fp+n0],%o3
1829	add	%l5,thresh-4,%g1
1830
1831	ld	[%fp+n1],%o4
1832
1833	ld	[%fp+n2],%o5
1834	and	%o3,2,%o3
1835
1836	sll	%o3,2,%o3
1837	and	%o4,2,%o4
1838	lda	[%i1]%asi,%l0		! preload next argument
1839
1840	sll	%o4,2,%o4
1841	and	%o5,2,%o5
1842	ld	[%g1+%o3],%f8
1843
1844	sll	%o5,2,%o5
1845	ld	[%g1+%o4],%f18
1846
1847	ld	[%g1+%o5],%f28
1848	fxors	%f9,%f8,%f9
1849
1850	lda	[%i1]%asi,%f0
1851	fxors	%f29,%f28,%f29
1852
1853	lda	[%i1+4]%asi,%f1
1854	fxors	%f19,%f18,%f19
1855
1856	fors	%f6,%f9,%f6		! tack on sign
1857	add	%i1,%i2,%i1		! x += stridex
1858	st	%f6,[%o0]
1859
1860	fors	%f26,%f29,%f26		! tack on sign
1861	st	%f7,[%o0+4]
1862
1863	fors	%f16,%f19,%f16		! tack on sign
1864	st	%f26,[%o2]
1865
1866	st	%f27,[%o2+4]
1867	addcc	%i0,-1,%i0
1868
1869	st	%f16,[%o1]
1870	andn	%l0,%i5,%l0		! hx &= ~0x80000000
1871	bg,pt	%icc,.LOOP0
1872
1873! delay slot
1874	st	%f17,[%o1+4]
1875
1876	ba,pt	%icc,.ENDLOOP0
1877! delay slot
1878	nop
1879
1880	.align	32
1881.CASE1:
1882	fpadd32s %f10,%f31,%f18
1883	sethi	%hi(0x3fc3c000),%o7
1884	ld	[%fp+x0_1],%l0
1885
1886	fand	%f8,%f44,%f4
1887	add	%l3,8,%g1
1888	ld	[%fp+x1_1],%l1
1889
1890	fand	%f18,%f44,%f14
1891	sub	%l0,%o7,%l0
1892
1893	fsubd	%f0,%f4,%f0
1894	srl	%l0,10,%l0
1895	sub	%l1,%o7,%l1
1896
1897	fsubd	%f10,%f14,%f10
1898	srl	%l1,10,%l1
1899
1900	fmuld	%f20,%f20,%f20
1901	ldd	[%l5+%o5],%f36
1902	add	%l5,%o5,%l2
1903
1904	faddd	%f0,%f2,%f0
1905	andn	%l0,0x1f,%l0
1906
1907	faddd	%f10,%f12,%f10
1908	andn	%l1,0x1f,%l1
1909
1910	fmuld	%f20,%f36,%f24
1911	ldd	[%l2+0x10],%f26
1912	add	%fp,%o5,%o5
1913
1914	fmuld	%f0,%f0,%f2
1915	add	%l0,%o3,%l0
1916
1917	fmuld	%f10,%f10,%f12
1918	add	%l1,%o4,%l1
1919
1920	faddd	%f24,%f26,%f24
1921	ldd	[%l2+0x20],%f36
1922
1923	fmuld	%f2,%f58,%f6
1924	ldd	[%l3+%l0],%f32
1925
1926	fmuld	%f12,%f58,%f16
1927	ldd	[%l3+%l1],%f34
1928
1929	fmuld	%f20,%f24,%f24
1930	ldd	[%l2+0x30],%f26
1931
1932	faddd	%f6,%f56,%f6
1933	fmuld	%f2,%f62,%f4
1934
1935	faddd	%f16,%f56,%f16
1936	fmuld	%f12,%f62,%f14
1937
1938	faddd	%f24,%f36,%f24
1939	ldd	[%o5+x2_1],%f36
1940
1941	fmuld	%f2,%f6,%f6
1942	faddd	%f4,%f60,%f4
1943
1944	fmuld	%f12,%f16,%f16
1945	faddd	%f14,%f60,%f14
1946
1947	fmuld	%f20,%f24,%f24
1948
1949	faddd	%f6,%f54,%f6
1950	fmuld	%f2,%f4,%f4
1951	ldd	[%g1+%l0],%f2
1952
1953	faddd	%f16,%f54,%f16
1954	fmuld	%f12,%f14,%f14
1955	ldd	[%g1+%l1],%f12
1956
1957	faddd	%f24,%f26,%f24
1958
1959	fmuld	%f0,%f6,%f6
1960	ldd	[%l4+%l0],%f0
1961
1962	fmuld	%f10,%f16,%f16
1963	ldd	[%l4+%l1],%f10
1964
1965	fmuld	%f4,%f32,%f4
1966	std	%f22,[%fp+y2_0]
1967
1968	fmuld	%f14,%f34,%f14
1969
1970	fmuld	%f6,%f2,%f6
1971
1972	fmuld	%f16,%f12,%f16
1973
1974	fmuld	%f20,%f24,%f24
1975
1976	faddd	%f6,%f4,%f6
1977
1978	faddd	%f16,%f14,%f16
1979
1980	fmuld	%f36,%f24,%f24
1981	ldd	[%o5+y2_0],%f22
1982
1983	faddd	%f6,%f0,%f6
1984
1985	faddd	%f16,%f10,%f16
1986
1987	faddd	%f24,%f22,%f24
1988
1989	faddd	%f6,%f32,%f6
1990
1991	faddd	%f16,%f34,%f16
1992	ba,pt	%icc,.FIXSIGN
1993
1994! delay slot
1995	faddd	%f36,%f24,%f26
1996
1997	.align	32
1998.CASE2:
1999	fpadd32s %f0,%f31,%f8
2000	ld	[%fp+x0_1],%l0
2001	andcc	%l2,2,%g0
2002	bne,pn	%icc,.CASE3
2003
2004! delay slot
2005	sethi	%hi(0x3fc3c000),%o7
2006	fpadd32s %f20,%f31,%f28
2007	ld	[%fp+x2_1],%l2
2008
2009	fand	%f8,%f44,%f4
2010	sub	%l0,%o7,%l0
2011	add	%l3,8,%g1
2012
2013	fand	%f28,%f44,%f24
2014	sub	%l2,%o7,%l2
2015
2016	fsubd	%f0,%f4,%f0
2017	srl	%l0,10,%l0
2018
2019	fsubd	%f20,%f24,%f20
2020	srl	%l2,10,%l2
2021
2022	fmuld	%f10,%f10,%f10
2023	ldd	[%l5+%o4],%f34
2024	add	%l5,%o4,%l1
2025
2026	faddd	%f0,%f2,%f0
2027	andn	%l0,0x1f,%l0
2028
2029	faddd	%f20,%f22,%f20
2030	andn	%l2,0x1f,%l2
2031
2032	fmuld	%f10,%f34,%f14
2033	ldd	[%l1+0x10],%f16
2034	add	%fp,%o4,%o4
2035
2036	fmuld	%f0,%f0,%f2
2037	add	%l0,%o3,%l0
2038
2039	fmuld	%f20,%f20,%f22
2040	add	%l2,%o5,%l2
2041
2042	faddd	%f14,%f16,%f14
2043	ldd	[%l1+0x20],%f34
2044
2045	fmuld	%f2,%f58,%f6
2046	ldd	[%l3+%l0],%f32
2047
2048	fmuld	%f22,%f58,%f26
2049	ldd	[%l3+%l2],%f36
2050
2051	fmuld	%f10,%f14,%f14
2052	ldd	[%l1+0x30],%f16
2053
2054	faddd	%f6,%f56,%f6
2055	fmuld	%f2,%f62,%f4
2056
2057	faddd	%f26,%f56,%f26
2058	fmuld	%f22,%f62,%f24
2059
2060	faddd	%f14,%f34,%f14
2061	ldd	[%o4+x1_1],%f34
2062
2063	fmuld	%f2,%f6,%f6
2064	faddd	%f4,%f60,%f4
2065
2066	fmuld	%f22,%f26,%f26
2067	faddd	%f24,%f60,%f24
2068
2069	fmuld	%f10,%f14,%f14
2070
2071	faddd	%f6,%f54,%f6
2072	fmuld	%f2,%f4,%f4
2073	ldd	[%g1+%l0],%f2
2074
2075	faddd	%f26,%f54,%f26
2076	fmuld	%f22,%f24,%f24
2077	ldd	[%g1+%l2],%f22
2078
2079	faddd	%f14,%f16,%f14
2080
2081	fmuld	%f0,%f6,%f6
2082	ldd	[%l4+%l0],%f0
2083
2084	fmuld	%f20,%f26,%f26
2085	ldd	[%l4+%l2],%f20
2086
2087	fmuld	%f4,%f32,%f4
2088	std	%f12,[%fp+y1_0]
2089
2090	fmuld	%f24,%f36,%f24
2091
2092	fmuld	%f6,%f2,%f6
2093
2094	fmuld	%f26,%f22,%f26
2095
2096	fmuld	%f10,%f14,%f14
2097
2098	faddd	%f6,%f4,%f6
2099
2100	faddd	%f26,%f24,%f26
2101
2102	fmuld	%f34,%f14,%f14
2103	ldd	[%o4+y1_0],%f12
2104
2105	faddd	%f6,%f0,%f6
2106
2107	faddd	%f26,%f20,%f26
2108
2109	faddd	%f14,%f12,%f14
2110
2111	faddd	%f6,%f32,%f6
2112
2113	faddd	%f26,%f36,%f26
2114	ba,pt	%icc,.FIXSIGN
2115
2116! delay slot
2117	faddd	%f34,%f14,%f16
2118
2119	.align	32
2120.CASE3:
2121	fand	%f8,%f44,%f4
2122	add	%l3,8,%g1
2123	sub	%l0,%o7,%l0
2124
2125	fmuld	%f10,%f10,%f10
2126	ldd	[%l5+%o4],%f34
2127	add	%l5,%o4,%l1
2128
2129	fsubd	%f0,%f4,%f0
2130	srl	%l0,10,%l0
2131
2132	fmuld	%f20,%f20,%f20
2133	ldd	[%l5+%o5],%f36
2134	add	%l5,%o5,%l2
2135
2136	fmuld	%f10,%f34,%f14
2137	ldd	[%l1+0x10],%f16
2138	add	%fp,%o4,%o4
2139
2140	faddd	%f0,%f2,%f0
2141	andn	%l0,0x1f,%l0
2142
2143	fmuld	%f20,%f36,%f24
2144	ldd	[%l2+0x10],%f26
2145	add	%fp,%o5,%o5
2146
2147	faddd	%f14,%f16,%f14
2148	ldd	[%l1+0x20],%f34
2149
2150	fmuld	%f0,%f0,%f2
2151	add	%l0,%o3,%l0
2152
2153	faddd	%f24,%f26,%f24
2154	ldd	[%l2+0x20],%f36
2155
2156	fmuld	%f10,%f14,%f14
2157	ldd	[%l1+0x30],%f16
2158
2159	fmuld	%f2,%f58,%f6
2160	ldd	[%l3+%l0],%f32
2161
2162	fmuld	%f20,%f24,%f24
2163	ldd	[%l2+0x30],%f26
2164
2165	faddd	%f14,%f34,%f14
2166	ldd	[%o4+x1_1],%f34
2167
2168	faddd	%f6,%f56,%f6
2169	fmuld	%f2,%f62,%f4
2170
2171	faddd	%f24,%f36,%f24
2172	ldd	[%o5+x2_1],%f36
2173
2174	fmuld	%f10,%f14,%f14
2175	std	%f12,[%fp+y1_0]
2176
2177	fmuld	%f2,%f6,%f6
2178	faddd	%f4,%f60,%f4
2179
2180	fmuld	%f20,%f24,%f24
2181	std	%f22,[%fp+y2_0]
2182
2183	faddd	%f14,%f16,%f14
2184
2185	faddd	%f6,%f54,%f6
2186	fmuld	%f2,%f4,%f4
2187	ldd	[%g1+%l0],%f2
2188
2189	faddd	%f24,%f26,%f24
2190
2191	fmuld	%f10,%f14,%f14
2192
2193	fmuld	%f0,%f6,%f6
2194	ldd	[%l4+%l0],%f0
2195
2196	fmuld	%f4,%f32,%f4
2197
2198	fmuld	%f20,%f24,%f24
2199
2200	fmuld	%f6,%f2,%f6
2201
2202	fmuld	%f34,%f14,%f14
2203	ldd	[%o4+y1_0],%f12
2204
2205	fmuld	%f36,%f24,%f24
2206	ldd	[%o5+y2_0],%f22
2207
2208	faddd	%f6,%f4,%f6
2209
2210	faddd	%f14,%f12,%f14
2211
2212	faddd	%f24,%f22,%f24
2213
2214	faddd	%f6,%f0,%f6
2215
2216	faddd	%f34,%f14,%f16
2217
2218	faddd	%f36,%f24,%f26
2219	ba,pt	%icc,.FIXSIGN
2220
2221! delay slot
2222	faddd	%f6,%f32,%f6
2223
2224	.align	32
2225.CASE4:
2226	fands	%f29,%f28,%f29		! if (n & 1) clear sign bit
2227	sethi	%hi(0x3fc3c000),%o7
2228	andcc	%l1,2,%g0
2229	bne,pn	%icc,.CASE6
2230
2231! delay slot
2232	andcc	%l2,2,%g0
2233	fpadd32s %f10,%f31,%f18
2234	ld	[%fp+x1_1],%l1
2235	bne,pn	%icc,.CASE5
2236
2237! delay slot
2238	add	%l3,8,%g1
2239	ld	[%fp+x2_1],%l2
2240	fpadd32s %f20,%f31,%f28
2241
2242	fand	%f18,%f44,%f14
2243	sub	%l1,%o7,%l1
2244
2245	fand	%f28,%f44,%f24
2246	sub	%l2,%o7,%l2
2247
2248	fsubd	%f10,%f14,%f10
2249	srl	%l1,10,%l1
2250
2251	fsubd	%f20,%f24,%f20
2252	srl	%l2,10,%l2
2253
2254	fmuld	%f0,%f0,%f0
2255	ldd	[%l5+%o3],%f32
2256	add	%l5,%o3,%l0
2257
2258	faddd	%f10,%f12,%f10
2259	andn	%l1,0x1f,%l1
2260
2261	faddd	%f20,%f22,%f20
2262	andn	%l2,0x1f,%l2
2263
2264	fmuld	%f0,%f32,%f4
2265	ldd	[%l0+0x10],%f6
2266	add	%fp,%o3,%o3
2267
2268	fmuld	%f10,%f10,%f12
2269	add	%l1,%o4,%l1
2270
2271	fmuld	%f20,%f20,%f22
2272	add	%l2,%o5,%l2
2273
2274	faddd	%f4,%f6,%f4
2275	ldd	[%l0+0x20],%f32
2276
2277	fmuld	%f12,%f58,%f16
2278	ldd	[%l3+%l1],%f34
2279
2280	fmuld	%f22,%f58,%f26
2281	ldd	[%l3+%l2],%f36
2282
2283	fmuld	%f0,%f4,%f4
2284	ldd	[%l0+0x30],%f6
2285
2286	faddd	%f16,%f56,%f16
2287	fmuld	%f12,%f62,%f14
2288
2289	faddd	%f26,%f56,%f26
2290	fmuld	%f22,%f62,%f24
2291
2292	faddd	%f4,%f32,%f4
2293	ldd	[%o3+x0_1],%f32
2294
2295	fmuld	%f12,%f16,%f16
2296	faddd	%f14,%f60,%f14
2297
2298	fmuld	%f22,%f26,%f26
2299	faddd	%f24,%f60,%f24
2300
2301	fmuld	%f0,%f4,%f4
2302
2303	faddd	%f16,%f54,%f16
2304	fmuld	%f12,%f14,%f14
2305	ldd	[%g1+%l1],%f12
2306
2307	faddd	%f26,%f54,%f26
2308	fmuld	%f22,%f24,%f24
2309	ldd	[%g1+%l2],%f22
2310
2311	faddd	%f4,%f6,%f4
2312
2313	fmuld	%f10,%f16,%f16
2314	ldd	[%l4+%l1],%f10
2315
2316	fmuld	%f20,%f26,%f26
2317	ldd	[%l4+%l2],%f20
2318
2319	fmuld	%f14,%f34,%f14
2320	std	%f2,[%fp+y0_0]
2321
2322	fmuld	%f24,%f36,%f24
2323
2324	fmuld	%f0,%f4,%f4
2325
2326	fmuld	%f16,%f12,%f16
2327
2328	fmuld	%f26,%f22,%f26
2329
2330	fmuld	%f32,%f4,%f4
2331	ldd	[%o3+y0_0],%f2
2332
2333	faddd	%f16,%f14,%f16
2334
2335	faddd	%f26,%f24,%f26
2336
2337	faddd	%f4,%f2,%f4
2338
2339	faddd	%f16,%f10,%f16
2340
2341	faddd	%f26,%f20,%f26
2342
2343	faddd	%f32,%f4,%f6
2344
2345	faddd	%f16,%f34,%f16
2346	ba,pt	%icc,.FIXSIGN
2347
2348! delay slot
2349	faddd	%f26,%f36,%f26
2350
2351	.align	32
2352.CASE5:
2353	fand	%f18,%f44,%f14
2354	sub	%l1,%o7,%l1
2355
2356	fmuld	%f0,%f0,%f0
2357	ldd	[%l5+%o3],%f32
2358	add	%l5,%o3,%l0
2359
2360	fsubd	%f10,%f14,%f10
2361	srl	%l1,10,%l1
2362
2363	fmuld	%f20,%f20,%f20
2364	ldd	[%l5+%o5],%f36
2365	add	%l5,%o5,%l2
2366
2367	fmuld	%f0,%f32,%f4
2368	ldd	[%l0+0x10],%f6
2369	add	%fp,%o3,%o3
2370
2371	faddd	%f10,%f12,%f10
2372	andn	%l1,0x1f,%l1
2373
2374	fmuld	%f20,%f36,%f24
2375	ldd	[%l2+0x10],%f26
2376	add	%fp,%o5,%o5
2377
2378	faddd	%f4,%f6,%f4
2379	ldd	[%l0+0x20],%f32
2380
2381	fmuld	%f10,%f10,%f12
2382	add	%l1,%o4,%l1
2383
2384	faddd	%f24,%f26,%f24
2385	ldd	[%l2+0x20],%f36
2386
2387	fmuld	%f0,%f4,%f4
2388	ldd	[%l0+0x30],%f6
2389
2390	fmuld	%f12,%f58,%f16
2391	ldd	[%l3+%l1],%f34
2392
2393	fmuld	%f20,%f24,%f24
2394	ldd	[%l2+0x30],%f26
2395
2396	faddd	%f4,%f32,%f4
2397	ldd	[%o3+x0_1],%f32
2398
2399	faddd	%f16,%f56,%f16
2400	fmuld	%f12,%f62,%f14
2401
2402	faddd	%f24,%f36,%f24
2403	ldd	[%o5+x2_1],%f36
2404
2405	fmuld	%f0,%f4,%f4
2406	std	%f2,[%fp+y0_0]
2407
2408	fmuld	%f12,%f16,%f16
2409	faddd	%f14,%f60,%f14
2410
2411	fmuld	%f20,%f24,%f24
2412	std	%f22,[%fp+y2_0]
2413
2414	faddd	%f4,%f6,%f4
2415
2416	faddd	%f16,%f54,%f16
2417	fmuld	%f12,%f14,%f14
2418	ldd	[%g1+%l1],%f12
2419
2420	faddd	%f24,%f26,%f24
2421
2422	fmuld	%f0,%f4,%f4
2423
2424	fmuld	%f10,%f16,%f16
2425	ldd	[%l4+%l1],%f10
2426
2427	fmuld	%f14,%f34,%f14
2428
2429	fmuld	%f20,%f24,%f24
2430
2431	fmuld	%f16,%f12,%f16
2432
2433	fmuld	%f32,%f4,%f4
2434	ldd	[%o3+y0_0],%f2
2435
2436	fmuld	%f36,%f24,%f24
2437	ldd	[%o5+y2_0],%f22
2438
2439	faddd	%f16,%f14,%f16
2440
2441	faddd	%f4,%f2,%f4
2442
2443	faddd	%f24,%f22,%f24
2444
2445	faddd	%f16,%f10,%f16
2446
2447	faddd	%f32,%f4,%f6
2448
2449	faddd	%f36,%f24,%f26
2450	ba,pt	%icc,.FIXSIGN
2451
2452! delay slot
2453	faddd	%f16,%f34,%f16
2454
2455	.align	32
2456.CASE6:
2457	ld	[%fp+x2_1],%l2
2458	add	%l3,8,%g1
2459	bne,pn	%icc,.CASE7
2460! delay slot
2461	fpadd32s %f20,%f31,%f28
2462
2463	fand	%f28,%f44,%f24
2464	ldd	[%l5+%o3],%f32
2465	add	%l5,%o3,%l0
2466
2467	fmuld	%f0,%f0,%f0
2468	sub	%l2,%o7,%l2
2469
2470	fsubd	%f20,%f24,%f20
2471	srl	%l2,10,%l2
2472
2473	fmuld	%f10,%f10,%f10
2474	ldd	[%l5+%o4],%f34
2475	add	%l5,%o4,%l1
2476
2477	fmuld	%f0,%f32,%f4
2478	ldd	[%l0+0x10],%f6
2479	add	%fp,%o3,%o3
2480
2481	faddd	%f20,%f22,%f20
2482	andn	%l2,0x1f,%l2
2483
2484	fmuld	%f10,%f34,%f14
2485	ldd	[%l1+0x10],%f16
2486	add	%fp,%o4,%o4
2487
2488	faddd	%f4,%f6,%f4
2489	ldd	[%l0+0x20],%f32
2490
2491	fmuld	%f20,%f20,%f22
2492	add	%l2,%o5,%l2
2493
2494	faddd	%f14,%f16,%f14
2495	ldd	[%l1+0x20],%f34
2496
2497	fmuld	%f0,%f4,%f4
2498	ldd	[%l0+0x30],%f6
2499
2500	fmuld	%f22,%f58,%f26
2501	ldd	[%l3+%l2],%f36
2502
2503	fmuld	%f10,%f14,%f14
2504	ldd	[%l1+0x30],%f16
2505
2506	faddd	%f4,%f32,%f4
2507	ldd	[%o3+x0_1],%f32
2508
2509	faddd	%f26,%f56,%f26
2510	fmuld	%f22,%f62,%f24
2511
2512	faddd	%f14,%f34,%f14
2513	ldd	[%o4+x1_1],%f34
2514
2515	fmuld	%f0,%f4,%f4
2516	std	%f2,[%fp+y0_0]
2517
2518	fmuld	%f22,%f26,%f26
2519	faddd	%f24,%f60,%f24
2520
2521	fmuld	%f10,%f14,%f14
2522	std	%f12,[%fp+y1_0]
2523
2524	faddd	%f4,%f6,%f4
2525
2526	faddd	%f26,%f54,%f26
2527	fmuld	%f22,%f24,%f24
2528	ldd	[%g1+%l2],%f22
2529
2530	faddd	%f14,%f16,%f14
2531
2532	fmuld	%f0,%f4,%f4
2533
2534	fmuld	%f20,%f26,%f26
2535	ldd	[%l4+%l2],%f20
2536
2537	fmuld	%f24,%f36,%f24
2538
2539	fmuld	%f10,%f14,%f14
2540
2541	fmuld	%f26,%f22,%f26
2542
2543	fmuld	%f32,%f4,%f4
2544	ldd	[%o3+y0_0],%f2
2545
2546	fmuld	%f34,%f14,%f14
2547	ldd	[%o4+y1_0],%f12
2548
2549	faddd	%f26,%f24,%f26
2550
2551	faddd	%f4,%f2,%f4
2552
2553	faddd	%f14,%f12,%f14
2554
2555	faddd	%f26,%f20,%f26
2556
2557	faddd	%f32,%f4,%f6
2558
2559	faddd	%f34,%f14,%f16
2560	ba,pt	%icc,.FIXSIGN
2561
2562! delay slot
2563	faddd	%f26,%f36,%f26
2564
2565	.align	32
2566.CASE7:
2567	fmuld	%f0,%f0,%f0
2568	ldd	[%l5+%o3],%f32
2569	add	%l5,%o3,%l0
2570
2571	fmuld	%f10,%f10,%f10
2572	ldd	[%l5+%o4],%f34
2573	add	%l5,%o4,%l1
2574
2575	fmuld	%f20,%f20,%f20
2576	ldd	[%l5+%o5],%f36
2577	add	%l5,%o5,%l2
2578
2579	fmuld	%f0,%f32,%f4
2580	ldd	[%l0+0x10],%f6
2581	add	%fp,%o3,%o3
2582
2583	fmuld	%f10,%f34,%f14
2584	ldd	[%l1+0x10],%f16
2585	add	%fp,%o4,%o4
2586
2587	fmuld	%f20,%f36,%f24
2588	ldd	[%l2+0x10],%f26
2589	add	%fp,%o5,%o5
2590
2591	faddd	%f4,%f6,%f4
2592	ldd	[%l0+0x20],%f32
2593
2594	faddd	%f14,%f16,%f14
2595	ldd	[%l1+0x20],%f34
2596
2597	faddd	%f24,%f26,%f24
2598	ldd	[%l2+0x20],%f36
2599
2600	fmuld	%f0,%f4,%f4
2601	ldd	[%l0+0x30],%f6
2602
2603	fmuld	%f10,%f14,%f14
2604	ldd	[%l1+0x30],%f16
2605
2606	fmuld	%f20,%f24,%f24
2607	ldd	[%l2+0x30],%f26
2608
2609	faddd	%f4,%f32,%f4
2610	ldd	[%o3+x0_1],%f32
2611
2612	faddd	%f14,%f34,%f14
2613	ldd	[%o4+x1_1],%f34
2614
2615	faddd	%f24,%f36,%f24
2616	ldd	[%o5+x2_1],%f36
2617
2618	fmuld	%f0,%f4,%f4
2619	std	%f2,[%fp+y0_0]
2620
2621	fmuld	%f10,%f14,%f14
2622	std	%f12,[%fp+y1_0]
2623
2624	fmuld	%f20,%f24,%f24
2625	std	%f22,[%fp+y2_0]
2626
2627	faddd	%f4,%f6,%f4
2628
2629	faddd	%f14,%f16,%f14
2630
2631	faddd	%f24,%f26,%f24
2632
2633	fmuld	%f0,%f4,%f4
2634
2635	fmuld	%f10,%f14,%f14
2636
2637	fmuld	%f20,%f24,%f24
2638
2639	fmuld	%f32,%f4,%f4
2640	ldd	[%o3+y0_0],%f2
2641
2642	fmuld	%f34,%f14,%f14
2643	ldd	[%o4+y1_0],%f12
2644
2645	fmuld	%f36,%f24,%f24
2646	ldd	[%o5+y2_0],%f22
2647
2648	faddd	%f4,%f2,%f4
2649
2650	faddd	%f14,%f12,%f14
2651
2652	faddd	%f24,%f22,%f24
2653
2654	faddd	%f32,%f4,%f6
2655
2656	faddd	%f34,%f14,%f16
2657	ba,pt	%icc,.FIXSIGN
2658
2659! delay slot
2660	faddd	%f36,%f24,%f26
2661
2662
2663	.align	32
2664.ENDLOOP2:
2665	fmuld	%f10,%f40,%f12
2666	add	%l5,thresh,%g1
2667	faddd	%f12,%f42,%f12
2668	st	%f13,[%fp+n1]
2669	fsubd	%f12,%f42,%f12		! n
2670	fmuld	%f12,%f46,%f14
2671	fsubd	%f10,%f14,%f14
2672	fmuld	%f12,%f48,%f16
2673	fsubd	%f14,%f16,%f10
2674	ld	[%fp+n1],%o4
2675	fsubd	%f14,%f10,%f34
2676	and	%o4,1,%o4
2677	fsubd	%f34,%f16,%f34
2678	fmuld	%f12,%f50,%f18
2679	sll	%o4,3,%o4
2680	fsubd	%f18,%f34,%f18
2681	ld	[%g1+%o4],%f16
2682	fsubd	%f10,%f18,%f14
2683	fsubd	%f10,%f14,%f34
2684	add	%l5,thresh+4,%o7
2685	fsubd	%f34,%f18,%f34
2686	fmuld	%f12,%f52,%f12
2687	fsubd	%f12,%f34,%f12
2688	ld	[%o7+%o4],%f18
2689	fsubd	%f14,%f12,%f10		! x
2690	fsubd	%f14,%f10,%f14
2691	fands	%f10,%f30,%f19		! save signbit
2692	fabsd	%f10,%f10
2693	std	%f10,[%fp+x1_1]
2694	fsubd	%f14,%f12,%f12		! y
2695	fcmpgt32 %f16,%f10,%l1
2696	fxors	%f12,%f19,%f12
2697	fands	%f19,%f18,%f19		! if (n & 1) clear sign bit
2698	andcc	%l1,2,%g0
2699	bne,pn	%icc,1f
2700! delay slot
2701	nop
2702	fpadd32s %f10,%f31,%f18
2703	ld	[%fp+x1_1],%l1
2704	fand	%f18,%f44,%f14
2705	sethi	%hi(0x3fc3c000),%o7
2706	add	%l3,8,%g1
2707	fsubd	%f10,%f14,%f10
2708	sub	%l1,%o7,%l1
2709	srl	%l1,10,%l1
2710	faddd	%f10,%f12,%f10
2711	andn	%l1,0x1f,%l1
2712	fmuld	%f10,%f10,%f12
2713	add	%l1,%o4,%l1
2714	fmuld	%f12,%f58,%f16
2715	ldd	[%l3+%l1],%f34
2716	faddd	%f16,%f56,%f16
2717	fmuld	%f12,%f62,%f14
2718	fmuld	%f12,%f16,%f16
2719	faddd	%f14,%f60,%f14
2720	faddd	%f16,%f54,%f16
2721	fmuld	%f12,%f14,%f14
2722	ldd	[%g1+%l1],%f12
2723	fmuld	%f10,%f16,%f16
2724	ldd	[%l4+%l1],%f10
2725	fmuld	%f14,%f34,%f14
2726	fmuld	%f16,%f12,%f16
2727	faddd	%f16,%f14,%f16
2728	faddd	%f16,%f10,%f16
2729	ba,pt	%icc,2f
2730	faddd	%f16,%f34,%f16
27311:
2732	fmuld	%f10,%f10,%f10
2733	ldd	[%l5+%o4],%f34
2734	add	%l5,%o4,%l1
2735	fmuld	%f10,%f34,%f14
2736	ldd	[%l1+0x10],%f16
2737	add	%fp,%o4,%o4
2738	faddd	%f14,%f16,%f14
2739	ldd	[%l1+0x20],%f34
2740	fmuld	%f10,%f14,%f14
2741	ldd	[%l1+0x30],%f16
2742	faddd	%f14,%f34,%f14
2743	ldd	[%o4+x1_1],%f34
2744	fmuld	%f10,%f14,%f14
2745	std	%f12,[%fp+y1_0]
2746	faddd	%f14,%f16,%f14
2747	fmuld	%f10,%f14,%f14
2748	fmuld	%f34,%f14,%f14
2749	ldd	[%o4+y1_0],%f12
2750	faddd	%f14,%f12,%f14
2751	faddd	%f34,%f14,%f16
27522:
2753	add	%l5,thresh-4,%g1
2754	ld	[%fp+n1],%o4
2755	and	%o4,2,%o4
2756	sll	%o4,2,%o4
2757	ld	[%g1+%o4],%f18
2758	fxors	%f19,%f18,%f19
2759	fors	%f16,%f19,%f16		! tack on sign
2760	st	%f16,[%o1]
2761	st	%f17,[%o1+4]
2762
2763.ENDLOOP1:
2764	fmuld	%f0,%f40,%f2
2765	add	%l5,thresh,%g1
2766	faddd	%f2,%f42,%f2
2767	st	%f3,[%fp+n0]
2768	fsubd	%f2,%f42,%f2		! n
2769	fmuld	%f2,%f46,%f4
2770	fsubd	%f0,%f4,%f4
2771	fmuld	%f2,%f48,%f6
2772	fsubd	%f4,%f6,%f0
2773	ld	[%fp+n0],%o3
2774	fsubd	%f4,%f0,%f32
2775	and	%o3,1,%o3
2776	fsubd	%f32,%f6,%f32
2777	fmuld	%f2,%f50,%f8
2778	sll	%o3,3,%o3
2779	fsubd	%f8,%f32,%f8
2780	ld	[%g1+%o3],%f6
2781	fsubd	%f0,%f8,%f4
2782	fsubd	%f0,%f4,%f32
2783	add	%l5,thresh+4,%o7
2784	fsubd	%f32,%f8,%f32
2785	fmuld	%f2,%f52,%f2
2786	fsubd	%f2,%f32,%f2
2787	ld	[%o7+%o3],%f8
2788	fsubd	%f4,%f2,%f0		! x
2789	fsubd	%f4,%f0,%f4
2790	fands	%f0,%f30,%f9		! save signbit
2791	fabsd	%f0,%f0
2792	std	%f0,[%fp+x0_1]
2793	fsubd	%f4,%f2,%f2		! y
2794	fcmpgt32 %f6,%f0,%l0
2795	fxors	%f2,%f9,%f2
2796	fands	%f9,%f8,%f9		! if (n & 1) clear sign bit
2797	andcc	%l0,2,%g0
2798	bne,pn	%icc,1f
2799! delay slot
2800	nop
2801	fpadd32s %f0,%f31,%f8
2802	ld	[%fp+x0_1],%l0
2803	fand	%f8,%f44,%f4
2804	sethi	%hi(0x3fc3c000),%o7
2805	add	%l3,8,%g1
2806	fsubd	%f0,%f4,%f0
2807	sub	%l0,%o7,%l0
2808	srl	%l0,10,%l0
2809	faddd	%f0,%f2,%f0
2810	andn	%l0,0x1f,%l0
2811	fmuld	%f0,%f0,%f2
2812	add	%l0,%o3,%l0
2813	fmuld	%f2,%f58,%f6
2814	ldd	[%l3+%l0],%f32
2815	faddd	%f6,%f56,%f6
2816	fmuld	%f2,%f62,%f4
2817	fmuld	%f2,%f6,%f6
2818	faddd	%f4,%f60,%f4
2819	faddd	%f6,%f54,%f6
2820	fmuld	%f2,%f4,%f4
2821	ldd	[%g1+%l0],%f2
2822	fmuld	%f0,%f6,%f6
2823	ldd	[%l4+%l0],%f0
2824	fmuld	%f4,%f32,%f4
2825	fmuld	%f6,%f2,%f6
2826	faddd	%f6,%f4,%f6
2827	faddd	%f6,%f0,%f6
2828	ba,pt	%icc,2f
2829	faddd	%f6,%f32,%f6
28301:
2831	fmuld	%f0,%f0,%f0
2832	ldd	[%l5+%o3],%f32
2833	add	%l5,%o3,%l0
2834	fmuld	%f0,%f32,%f4
2835	ldd	[%l0+0x10],%f6
2836	add	%fp,%o3,%o3
2837	faddd	%f4,%f6,%f4
2838	ldd	[%l0+0x20],%f32
2839	fmuld	%f0,%f4,%f4
2840	ldd	[%l0+0x30],%f6
2841	faddd	%f4,%f32,%f4
2842	ldd	[%o3+x0_1],%f32
2843	fmuld	%f0,%f4,%f4
2844	std	%f2,[%fp+y0_0]
2845	faddd	%f4,%f6,%f4
2846	fmuld	%f0,%f4,%f4
2847	fmuld	%f32,%f4,%f4
2848	ldd	[%o3+y0_0],%f2
2849	faddd	%f4,%f2,%f4
2850	faddd	%f32,%f4,%f6
28512:
2852	add	%l5,thresh-4,%g1
2853	ld	[%fp+n0],%o3
2854	and	%o3,2,%o3
2855	sll	%o3,2,%o3
2856	ld	[%g1+%o3],%f8
2857	fxors	%f9,%f8,%f9
2858	fors	%f6,%f9,%f6		! tack on sign
2859	st	%f6,[%o0]
2860	st	%f7,[%o0+4]
2861
2862.ENDLOOP0:
2863
2864! check for huge arguments remaining
2865
2866	tst	LIM_l6
2867	be,pt	%icc,.exit
2868! delay slot
2869	nop
2870
2871! ========== huge range (use C code) ==========
2872
2873#ifdef __sparcv9
2874	ldx	[%fp+xsave],%o1
2875	ldx	[%fp+ysave],%o3
2876#else
2877	ld	[%fp+xsave],%o1
2878	ld	[%fp+ysave],%o3
2879#endif
2880	ld	[%fp+nsave],%o0
2881	ld	[%fp+sxsave],%o2
2882	ld	[%fp+sysave],%o4
2883	sra	%o2,0,%o2		! sign-extend for V9
2884	sra	%o4,0,%o4
2885	call	__vlibm_vsin_big
2886	mov	%l7,%o5			! delay slot
2887
2888.exit:
2889	ret
2890	restore
2891
2892
2893	.align	32
2894.SKIP0:
2895	addcc	%i0,-1,%i0
2896	ble,pn	%icc,.ENDLOOP0
2897! delay slot, harmless if branch taken
2898	add	%i3,%i4,%i3		! y += stridey
2899	andn	%l1,%i5,%l0		! hx &= ~0x80000000
2900	fmovs	%f10,%f0
2901	ld	[%i1+4],%f1
2902	ba,pt	%icc,.LOOP0
2903! delay slot
2904	add	%i1,%i2,%i1		! x += stridex
2905
2906
2907	.align	32
2908.SKIP1:
2909	addcc	%i0,-1,%i0
2910	ble,pn	%icc,.ENDLOOP1
2911! delay slot, harmless if branch taken
2912	add	%i3,%i4,%i3		! y += stridey
2913	andn	%l2,%i5,%l1		! hx &= ~0x80000000
2914	fmovs	%f20,%f10
2915	ld	[%i1+4],%f11
2916	ba,pt	%icc,.LOOP1
2917! delay slot
2918	add	%i1,%i2,%i1		! x += stridex
2919
2920
2921	.align	32
2922.SKIP2:
2923	addcc	%i0,-1,%i0
2924	ble,pn	%icc,.ENDLOOP2
2925! delay slot, harmless if branch taken
2926	add	%i3,%i4,%i3		! y += stridey
2927	ld	[%i1],%l2
2928	ld	[%i1],%f20
2929	ld	[%i1+4],%f21
2930	andn	%l2,%i5,%l2		! hx &= ~0x80000000
2931	ba,pt	%icc,.LOOP2
2932! delay slot
2933	add	%i1,%i2,%i1		! x += stridex
2934
2935
2936	.align	32
2937.BIG0:
2938	sethi	%hi(0x7ff00000),%o7
2939	cmp	%l0,%o7
2940	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
2941! delay slot, annulled if branch not taken
2942	mov	%l7,LIM_l6		! set biguns flag or
2943	fsubd	%f0,%f0,%f0		! y = x - x
2944	st	%f0,[%o0]
2945	st	%f1,[%o0+4]
29461:
2947	addcc	%i0,-1,%i0
2948	ble,pn	%icc,.ENDLOOP0
2949! delay slot, harmless if branch taken
2950	andn	%l1,%i5,%l0		! hx &= ~0x80000000
2951	fmovd	%f10,%f0
2952	ba,pt	%icc,.LOOP0
2953! delay slot
2954	add	%i1,%i2,%i1		! x += stridex
2955
2956
2957	.align	32
2958.BIG1:
2959	sethi	%hi(0x7ff00000),%o7
2960	cmp	%l1,%o7
2961	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
2962! delay slot, annulled if branch not taken
2963	mov	%l7,LIM_l6		! set biguns flag or
2964	fsubd	%f10,%f10,%f10		! y = x - x
2965	st	%f10,[%o1]
2966	st	%f11,[%o1+4]
29671:
2968	addcc	%i0,-1,%i0
2969	ble,pn	%icc,.ENDLOOP1
2970! delay slot, harmless if branch taken
2971	andn	%l2,%i5,%l1		! hx &= ~0x80000000
2972	fmovd	%f20,%f10
2973	ba,pt	%icc,.LOOP1
2974! delay slot
2975	add	%i1,%i2,%i1		! x += stridex
2976
2977
2978	.align	32
2979.BIG2:
2980	sethi	%hi(0x7ff00000),%o7
2981	cmp	%l2,%o7
2982	bl,a,pt	%icc,1f			! if hx < 0x7ff00000
2983! delay slot, annulled if branch not taken
2984	mov	%l7,LIM_l6		! set biguns flag or
2985	fsubd	%f20,%f20,%f20		! y = x - x
2986	st	%f20,[%o2]
2987	st	%f21,[%o2+4]
29881:
2989	addcc	%i0,-1,%i0
2990	ble,pn	%icc,.ENDLOOP2
2991! delay slot
2992	nop
2993	ld	[%i1],%l2
2994	ld	[%i1],%f20
2995	ld	[%i1+4],%f21
2996	andn	%l2,%i5,%l2		! hx &= ~0x80000000
2997	ba,pt	%icc,.LOOP2
2998! delay slot
2999	add	%i1,%i2,%i1		! x += stridex
3000
3001	SET_SIZE(__vsin)
3002
3003