1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/isa_defs.h>
29 
30 #ifdef _LITTLE_ENDIAN
31 #define HI(x)	*(1+(int*)x)
32 #define LO(x)	*(unsigned*)x
33 #else
34 #define HI(x)	*(int*)x
35 #define LO(x)	*(1+(unsigned*)x)
36 #endif
37 
38 #ifdef __RESTRICT
39 #define restrict _Restrict
40 #else
41 #define restrict
42 #endif
43 
44 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
45 
46 static const double
47 	half[2]	= { 0.5, -0.5 },
48 	one		= 1.0,
49 	invpio2	= 0.636619772367581343075535,
50 	pio2_1	= 1.570796326734125614166,
51 	pio2_2	= 6.077100506303965976596e-11,
52 	pio2_3	= 2.022266248711166455796e-21,
53 	pio2_3t	= 8.478427660368899643959e-32,
54 	pp1		= -1.666666666605760465276263943134982554676e-0001,
55 	pp2		=  8.333261209690963126718376566146180944442e-0003,
56 	qq1		= -4.999999999977710986407023955908711557870e-0001,
57 	qq2		=  4.166654863857219350645055881018842089580e-0002,
58 	poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
59 				-4.999999999999931701464060878888294524481e-0001 },
60 	poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
61 				 4.166666666394861917535640593963708222319e-0002 },
62 	poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
63 				-1.388888552656142867832756687736851681462e-0003 },
64 	poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
65 				 2.478519423681460796618128289454530524759e-0005 };
66 
67 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
68 
69 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
70 
71 void
__vlibm_vcos_big_ultra3(int n,double * restrict x,int stridex,double * restrict y,int stridey,int pthresh)72 __vlibm_vcos_big_ultra3( int n, double * restrict x, int stridex, double * restrict y,
73 	int stridey, int pthresh )
74 {
75 	double		x0_or_one[4], x1_or_one[4], x2_or_one[4];
76 	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
77 	double		x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
78 	unsigned	xsb0, xsb1, xsb2;
79 	int			i, biguns, nsave, sxsave, sysave;
80 
81 	nsave = n;
82 	xsave = x;
83 	sxsave = stridex;
84 	ysave = y;
85 	sysave = stridey;
86 	biguns = 0;
87 
88 	x0_or_one[1] = 1.0;
89 	x1_or_one[1] = 1.0;
90 	x2_or_one[1] = 1.0;
91 	x0_or_one[3] = -1.0;
92 	x1_or_one[3] = -1.0;
93 	x2_or_one[3] = -1.0;
94 	y0_or_zero[1] = 0.0;
95 	y1_or_zero[1] = 0.0;
96 	y2_or_zero[1] = 0.0;
97 	y0_or_zero[3] = 0.0;
98 	y1_or_zero[3] = 0.0;
99 	y2_or_zero[3] = 0.0;
100 
101 	do
102 	{
103 		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
104 		unsigned	hx;
105 		int			n0, n1, n2;
106 
107 loop0:
108 		hx = HI(x);
109 		xsb0 = hx >> 31;
110 		hx &= ~0x80000000;
111 		if ( hx <= pthresh || hx > 0x413921fb )
112 		{
113 			if ( hx > 0x413921fb && hx < 0x7ff00000)
114 				biguns = 1;
115 			x += stridex;
116 			y += stridey;
117 			i = 0;
118 			if ( --n <= 0 )
119 				break;
120 			goto loop0;
121 		}
122 		x0 = *x;
123 		py0 = y;
124 		x += stridex;
125 		y += stridey;
126 		i = 1;
127 		if ( --n <= 0 )
128 			break;
129 
130 loop1:
131 		hx = HI(x);
132 		xsb1 = hx >> 31;
133 		hx &= ~0x80000000;
134 		if ( hx <= pthresh || hx > 0x413921fb )
135 		{
136 			if ( hx > 0x413921fb && hx < 0x7ff00000)
137 				biguns = 1;
138 			x += stridex;
139 			y += stridey;
140 			i = 1;
141 			if ( --n <= 0 )
142 				break;
143 			goto loop1;
144 		}
145 		x1 = *x;
146 		py1 = y;
147 		x += stridex;
148 		y += stridey;
149 		i = 2;
150 		if ( --n <= 0 )
151 			break;
152 
153 loop2:
154 		hx = HI(x);
155 		xsb2 = hx >> 31;
156 		hx &= ~0x80000000;
157 		if ( hx <= pthresh || hx > 0x413921fb )
158 		{
159 			if ( hx > 0x413921fb && hx < 0x7ff00000)
160 				biguns = 1;
161 			x += stridex;
162 			y += stridey;
163 			i = 2;
164 			if ( --n <= 0 )
165 				break;
166 			goto loop2;
167 		}
168 		x2 = *x;
169 		py2 = y;
170 
171 		n0 = (int) ( x0 * invpio2 + half[xsb0] );
172 		n1 = (int) ( x1 * invpio2 + half[xsb1] );
173 		n2 = (int) ( x2 * invpio2 + half[xsb2] );
174 		fn0 = (double) n0;
175 		fn1 = (double) n1;
176 		fn2 = (double) n2;
177 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
178 		n1 = (n1 + 1) & 3;
179 		n2 = (n2 + 1) & 3;
180 		a0 = x0 - fn0 * pio2_1;
181 		a1 = x1 - fn1 * pio2_1;
182 		a2 = x2 - fn2 * pio2_1;
183 		w0 = fn0 * pio2_2;
184 		w1 = fn1 * pio2_2;
185 		w2 = fn2 * pio2_2;
186 		x0 = a0 - w0;
187 		x1 = a1 - w1;
188 		x2 = a2 - w2;
189 		y0 = ( a0 - x0 ) - w0;
190 		y1 = ( a1 - x1 ) - w1;
191 		y2 = ( a2 - x2 ) - w2;
192 		a0 = x0;
193 		a1 = x1;
194 		a2 = x2;
195 		w0 = fn0 * pio2_3 - y0;
196 		w1 = fn1 * pio2_3 - y1;
197 		w2 = fn2 * pio2_3 - y2;
198 		x0 = a0 - w0;
199 		x1 = a1 - w1;
200 		x2 = a2 - w2;
201 		y0 = ( a0 - x0 ) - w0;
202 		y1 = ( a1 - x1 ) - w1;
203 		y2 = ( a2 - x2 ) - w2;
204 		a0 = x0;
205 		a1 = x1;
206 		a2 = x2;
207 		w0 = fn0 * pio2_3t - y0;
208 		w1 = fn1 * pio2_3t - y1;
209 		w2 = fn2 * pio2_3t - y2;
210 		x0 = a0 - w0;
211 		x1 = a1 - w1;
212 		x2 = a2 - w2;
213 		y0 = ( a0 - x0 ) - w0;
214 		y1 = ( a1 - x1 ) - w1;
215 		y2 = ( a2 - x2 ) - w2;
216 		xsb0 = HI(&x0);
217 		i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
218 		xsb1 = HI(&x1);
219 		i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
220 		xsb2 = HI(&x2);
221 		i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
222 		switch ( i )
223 		{
224 			double		t0, t1, t2, z0, z1, z2;
225 			unsigned	j0, j1, j2;
226 
227 		case 0:
228 			j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
229 			j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
230 			j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
231 			HI(&t0) = j0;
232 			HI(&t1) = j1;
233 			HI(&t2) = j2;
234 			LO(&t0) = 0;
235 			LO(&t1) = 0;
236 			LO(&t2) = 0;
237 			x0 = ( x0 - t0 ) + y0;
238 			x1 = ( x1 - t1 ) + y1;
239 			x2 = ( x2 - t2 ) + y2;
240 			z0 = x0 * x0;
241 			z1 = x1 * x1;
242 			z2 = x2 * x2;
243 			t0 = z0 * ( qq1 + z0 * qq2 );
244 			t1 = z1 * ( qq1 + z1 * qq2 );
245 			t2 = z2 * ( qq1 + z2 * qq2 );
246 			w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
247 			w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
248 			w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
249 			j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
250 			j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
251 			j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
252 			xsb0 = ( xsb0 >> 30 ) & 2;
253 			xsb1 = ( xsb1 >> 30 ) & 2;
254 			xsb2 = ( xsb2 >> 30 ) & 2;
255 			n0 ^= ( xsb0 & ~( n0 << 1 ) );
256 			n1 ^= ( xsb1 & ~( n1 << 1 ) );
257 			n2 ^= ( xsb2 & ~( n2 << 1 ) );
258 			xsb0 |= 1;
259 			xsb1 |= 1;
260 			xsb2 |= 1;
261 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
262 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
263 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
264 			t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
265 			t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
266 			t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
267 			*py0 = ( a0 + t0 );
268 			*py1 = ( a1 + t1 );
269 			*py2 = ( a2 + t2 );
270 			break;
271 
272 		case 1:
273 			j0 = n0 & 1;
274 			j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
275 			j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
276 			HI(&t1) = j1;
277 			HI(&t2) = j2;
278 			LO(&t1) = 0;
279 			LO(&t2) = 0;
280 			x0_or_one[0] = x0;
281 			x0_or_one[2] = -x0;
282 			y0_or_zero[0] = y0;
283 			y0_or_zero[2] = -y0;
284 			x1 = ( x1 - t1 ) + y1;
285 			x2 = ( x2 - t2 ) + y2;
286 			z0 = x0 * x0;
287 			z1 = x1 * x1;
288 			z2 = x2 * x2;
289 			t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
290 			t1 = z1 * ( qq1 + z1 * qq2 );
291 			t2 = z2 * ( qq1 + z2 * qq2 );
292 			t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
293 			w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
294 			w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
295 			j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
296 			j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
297 			xsb1 = ( xsb1 >> 30 ) & 2;
298 			xsb2 = ( xsb2 >> 30 ) & 2;
299 			n1 ^= ( xsb1 & ~( n1 << 1 ) );
300 			n2 ^= ( xsb2 & ~( n2 << 1 ) );
301 			xsb1 |= 1;
302 			xsb2 |= 1;
303 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
304 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
305 			t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
306 			t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
307 			t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
308 			*py0 = t0;
309 			*py1 = ( a1 + t1 );
310 			*py2 = ( a2 + t2 );
311 			break;
312 
313 		case 2:
314 			j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
315 			j1 = n1 & 1;
316 			j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
317 			HI(&t0) = j0;
318 			HI(&t2) = j2;
319 			LO(&t0) = 0;
320 			LO(&t2) = 0;
321 			x1_or_one[0] = x1;
322 			x1_or_one[2] = -x1;
323 			x0 = ( x0 - t0 ) + y0;
324 			y1_or_zero[0] = y1;
325 			y1_or_zero[2] = -y1;
326 			x2 = ( x2 - t2 ) + y2;
327 			z0 = x0 * x0;
328 			z1 = x1 * x1;
329 			z2 = x2 * x2;
330 			t0 = z0 * ( qq1 + z0 * qq2 );
331 			t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
332 			t2 = z2 * ( qq1 + z2 * qq2 );
333 			w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
334 			t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
335 			w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
336 			j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
337 			j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
338 			xsb0 = ( xsb0 >> 30 ) & 2;
339 			xsb2 = ( xsb2 >> 30 ) & 2;
340 			n0 ^= ( xsb0 & ~( n0 << 1 ) );
341 			n2 ^= ( xsb2 & ~( n2 << 1 ) );
342 			xsb0 |= 1;
343 			xsb2 |= 1;
344 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
345 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
346 			t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
347 			t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
348 			t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
349 			*py0 = ( a0 + t0 );
350 			*py1 = t1;
351 			*py2 = ( a2 + t2 );
352 			break;
353 
354 		case 3:
355 			j0 = n0 & 1;
356 			j1 = n1 & 1;
357 			j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
358 			HI(&t2) = j2;
359 			LO(&t2) = 0;
360 			x0_or_one[0] = x0;
361 			x0_or_one[2] = -x0;
362 			x1_or_one[0] = x1;
363 			x1_or_one[2] = -x1;
364 			y0_or_zero[0] = y0;
365 			y0_or_zero[2] = -y0;
366 			y1_or_zero[0] = y1;
367 			y1_or_zero[2] = -y1;
368 			x2 = ( x2 - t2 ) + y2;
369 			z0 = x0 * x0;
370 			z1 = x1 * x1;
371 			z2 = x2 * x2;
372 			t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
373 			t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
374 			t2 = z2 * ( qq1 + z2 * qq2 );
375 			t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
376 			t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
377 			w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
378 			j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
379 			xsb2 = ( xsb2 >> 30 ) & 2;
380 			n2 ^= ( xsb2 & ~( n2 << 1 ) );
381 			xsb2 |= 1;
382 			a2 = __vlibm_TBL_sincos_hi[j2+n2];
383 			t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
384 			t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
385 			t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
386 			*py0 = t0;
387 			*py1 = t1;
388 			*py2 = ( a2 + t2 );
389 			break;
390 
391 		case 4:
392 			j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
393 			j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
394 			j2 = n2 & 1;
395 			HI(&t0) = j0;
396 			HI(&t1) = j1;
397 			LO(&t0) = 0;
398 			LO(&t1) = 0;
399 			x2_or_one[0] = x2;
400 			x2_or_one[2] = -x2;
401 			x0 = ( x0 - t0 ) + y0;
402 			x1 = ( x1 - t1 ) + y1;
403 			y2_or_zero[0] = y2;
404 			y2_or_zero[2] = -y2;
405 			z0 = x0 * x0;
406 			z1 = x1 * x1;
407 			z2 = x2 * x2;
408 			t0 = z0 * ( qq1 + z0 * qq2 );
409 			t1 = z1 * ( qq1 + z1 * qq2 );
410 			t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
411 			w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
412 			w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
413 			t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
414 			j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
415 			j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
416 			xsb0 = ( xsb0 >> 30 ) & 2;
417 			xsb1 = ( xsb1 >> 30 ) & 2;
418 			n0 ^= ( xsb0 & ~( n0 << 1 ) );
419 			n1 ^= ( xsb1 & ~( n1 << 1 ) );
420 			xsb0 |= 1;
421 			xsb1 |= 1;
422 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
423 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
424 			t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
425 			t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
426 			t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
427 			*py0 = ( a0 + t0 );
428 			*py1 = ( a1 + t1 );
429 			*py2 = t2;
430 			break;
431 
432 		case 5:
433 			j0 = n0 & 1;
434 			j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
435 			j2 = n2 & 1;
436 			HI(&t1) = j1;
437 			LO(&t1) = 0;
438 			x0_or_one[0] = x0;
439 			x0_or_one[2] = -x0;
440 			x2_or_one[0] = x2;
441 			x2_or_one[2] = -x2;
442 			y0_or_zero[0] = y0;
443 			y0_or_zero[2] = -y0;
444 			x1 = ( x1 - t1 ) + y1;
445 			y2_or_zero[0] = y2;
446 			y2_or_zero[2] = -y2;
447 			z0 = x0 * x0;
448 			z1 = x1 * x1;
449 			z2 = x2 * x2;
450 			t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
451 			t1 = z1 * ( qq1 + z1 * qq2 );
452 			t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
453 			t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
454 			w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
455 			t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
456 			j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
457 			xsb1 = ( xsb1 >> 30 ) & 2;
458 			n1 ^= ( xsb1 & ~( n1 << 1 ) );
459 			xsb1 |= 1;
460 			a1 = __vlibm_TBL_sincos_hi[j1+n1];
461 			t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
462 			t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
463 			t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
464 			*py0 = t0;
465 			*py1 = ( a1 + t1 );
466 			*py2 = t2;
467 			break;
468 
469 		case 6:
470 			j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
471 			j1 = n1 & 1;
472 			j2 = n2 & 1;
473 			HI(&t0) = j0;
474 			LO(&t0) = 0;
475 			x1_or_one[0] = x1;
476 			x1_or_one[2] = -x1;
477 			x2_or_one[0] = x2;
478 			x2_or_one[2] = -x2;
479 			x0 = ( x0 - t0 ) + y0;
480 			y1_or_zero[0] = y1;
481 			y1_or_zero[2] = -y1;
482 			y2_or_zero[0] = y2;
483 			y2_or_zero[2] = -y2;
484 			z0 = x0 * x0;
485 			z1 = x1 * x1;
486 			z2 = x2 * x2;
487 			t0 = z0 * ( qq1 + z0 * qq2 );
488 			t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
489 			t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
490 			w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
491 			t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
492 			t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
493 			j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
494 			xsb0 = ( xsb0 >> 30 ) & 2;
495 			n0 ^= ( xsb0 & ~( n0 << 1 ) );
496 			xsb0 |= 1;
497 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
498 			t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
499 			t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
500 			t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
501 			*py0 = ( a0 + t0 );
502 			*py1 = t1;
503 			*py2 = t2;
504 			break;
505 
506 		case 7:
507 			j0 = n0 & 1;
508 			j1 = n1 & 1;
509 			j2 = n2 & 1;
510 			x0_or_one[0] = x0;
511 			x0_or_one[2] = -x0;
512 			x1_or_one[0] = x1;
513 			x1_or_one[2] = -x1;
514 			x2_or_one[0] = x2;
515 			x2_or_one[2] = -x2;
516 			y0_or_zero[0] = y0;
517 			y0_or_zero[2] = -y0;
518 			y1_or_zero[0] = y1;
519 			y1_or_zero[2] = -y1;
520 			y2_or_zero[0] = y2;
521 			y2_or_zero[2] = -y2;
522 			z0 = x0 * x0;
523 			z1 = x1 * x1;
524 			z2 = x2 * x2;
525 			t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
526 			t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
527 			t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
528 			t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
529 			t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
530 			t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
531 			t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
532 			t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
533 			t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
534 			*py0 = t0;
535 			*py1 = t1;
536 			*py2 = t2;
537 			break;
538 		}
539 
540 		x += stridex;
541 		y += stridey;
542 		i = 0;
543 	} while ( --n > 0 );
544 
545 	if ( i > 0 )
546 	{
547 		double		fn0, fn1, a0, a1, w0, w1, y0, y1;
548 		double		t0, t1, z0, z1;
549 		unsigned	j0, j1;
550 		int			n0, n1;
551 
552 		if ( i > 1 )
553 		{
554 			n1 = (int) ( x1 * invpio2 + half[xsb1] );
555 			fn1 = (double) n1;
556 			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
557 			a1 = x1 - fn1 * pio2_1;
558 			w1 = fn1 * pio2_2;
559 			x1 = a1 - w1;
560 			y1 = ( a1 - x1 ) - w1;
561 			a1 = x1;
562 			w1 = fn1 * pio2_3 - y1;
563 			x1 = a1 - w1;
564 			y1 = ( a1 - x1 ) - w1;
565 			a1 = x1;
566 			w1 = fn1 * pio2_3t - y1;
567 			x1 = a1 - w1;
568 			y1 = ( a1 - x1 ) - w1;
569 			xsb1 = HI(&x1);
570 			if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
571 			{
572 				j1 = n1 & 1;
573 				x1_or_one[0] = x1;
574 				x1_or_one[2] = -x1;
575 				y1_or_zero[0] = y1;
576 				y1_or_zero[2] = -y1;
577 				z1 = x1 * x1;
578 				t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
579 				t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
580 				t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
581 				*py1 = t1;
582 			}
583 			else
584 			{
585 				j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
586 				HI(&t1) = j1;
587 				LO(&t1) = 0;
588 				x1 = ( x1 - t1 ) + y1;
589 				z1 = x1 * x1;
590 				t1 = z1 * ( qq1 + z1 * qq2 );
591 				w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
592 				j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
593 				xsb1 = ( xsb1 >> 30 ) & 2;
594 				n1 ^= ( xsb1 & ~( n1 << 1 ) );
595 				xsb1 |= 1;
596 				a1 = __vlibm_TBL_sincos_hi[j1+n1];
597 				t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
598 				*py1 = ( a1 + t1 );
599 			}
600 		}
601 		n0 = (int) ( x0 * invpio2 + half[xsb0] );
602 		fn0 = (double) n0;
603 		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
604 		a0 = x0 - fn0 * pio2_1;
605 		w0 = fn0 * pio2_2;
606 		x0 = a0 - w0;
607 		y0 = ( a0 - x0 ) - w0;
608 		a0 = x0;
609 		w0 = fn0 * pio2_3 - y0;
610 		x0 = a0 - w0;
611 		y0 = ( a0 - x0 ) - w0;
612 		a0 = x0;
613 		w0 = fn0 * pio2_3t - y0;
614 		x0 = a0 - w0;
615 		y0 = ( a0 - x0 ) - w0;
616 		xsb0 = HI(&x0);
617 		if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
618 		{
619 			j0 = n0 & 1;
620 			x0_or_one[0] = x0;
621 			x0_or_one[2] = -x0;
622 			y0_or_zero[0] = y0;
623 			y0_or_zero[2] = -y0;
624 			z0 = x0 * x0;
625 			t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
626 			t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
627 			t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
628 			*py0 = t0;
629 		}
630 		else
631 		{
632 			j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
633 			HI(&t0) = j0;
634 			LO(&t0) = 0;
635 			x0 = ( x0 - t0 ) + y0;
636 			z0 = x0 * x0;
637 			t0 = z0 * ( qq1 + z0 * qq2 );
638 			w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
639 			j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
640 			xsb0 = ( xsb0 >> 30 ) & 2;
641 			n0 ^= ( xsb0 & ~( n0 << 1 ) );
642 			xsb0 |= 1;
643 			a0 = __vlibm_TBL_sincos_hi[j0+n0];
644 			t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
645 			*py0 = ( a0 + t0 );
646 		}
647 	}
648 
649 	if ( biguns )
650 		__vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
651 }
652