1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28 #include <sys/isa_defs.h>
29
30 #ifdef _LITTLE_ENDIAN
31 #define HI(x) *(1+(int*)x)
32 #define LO(x) *(unsigned*)x
33 #else
34 #define HI(x) *(int*)x
35 #define LO(x) *(1+(unsigned*)x)
36 #endif
37
38 #ifdef __RESTRICT
39 #define restrict _Restrict
40 #else
41 #define restrict
42 #endif
43
44 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
45
46 static const double
47 half[2] = { 0.5, -0.5 },
48 one = 1.0,
49 invpio2 = 0.636619772367581343075535,
50 pio2_1 = 1.570796326734125614166,
51 pio2_2 = 6.077100506303965976596e-11,
52 pio2_3 = 2.022266248711166455796e-21,
53 pio2_3t = 8.478427660368899643959e-32,
54 pp1 = -1.666666666605760465276263943134982554676e-0001,
55 pp2 = 8.333261209690963126718376566146180944442e-0003,
56 qq1 = -4.999999999977710986407023955908711557870e-0001,
57 qq2 = 4.166654863857219350645055881018842089580e-0002,
58 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
59 -4.999999999999931701464060878888294524481e-0001 },
60 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
61 4.166666666394861917535640593963708222319e-0002 },
62 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
63 -1.388888552656142867832756687736851681462e-0003 },
64 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
65 2.478519423681460796618128289454530524759e-0005 };
66
67 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
68
69 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
70
71 void
__vlibm_vcos_big_ultra3(int n,double * restrict x,int stridex,double * restrict y,int stridey,int pthresh)72 __vlibm_vcos_big_ultra3( int n, double * restrict x, int stridex, double * restrict y,
73 int stridey, int pthresh )
74 {
75 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
76 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
77 double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
78 unsigned xsb0, xsb1, xsb2;
79 int i, biguns, nsave, sxsave, sysave;
80
81 nsave = n;
82 xsave = x;
83 sxsave = stridex;
84 ysave = y;
85 sysave = stridey;
86 biguns = 0;
87
88 x0_or_one[1] = 1.0;
89 x1_or_one[1] = 1.0;
90 x2_or_one[1] = 1.0;
91 x0_or_one[3] = -1.0;
92 x1_or_one[3] = -1.0;
93 x2_or_one[3] = -1.0;
94 y0_or_zero[1] = 0.0;
95 y1_or_zero[1] = 0.0;
96 y2_or_zero[1] = 0.0;
97 y0_or_zero[3] = 0.0;
98 y1_or_zero[3] = 0.0;
99 y2_or_zero[3] = 0.0;
100
101 do
102 {
103 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
104 unsigned hx;
105 int n0, n1, n2;
106
107 loop0:
108 hx = HI(x);
109 xsb0 = hx >> 31;
110 hx &= ~0x80000000;
111 if ( hx <= pthresh || hx > 0x413921fb )
112 {
113 if ( hx > 0x413921fb && hx < 0x7ff00000)
114 biguns = 1;
115 x += stridex;
116 y += stridey;
117 i = 0;
118 if ( --n <= 0 )
119 break;
120 goto loop0;
121 }
122 x0 = *x;
123 py0 = y;
124 x += stridex;
125 y += stridey;
126 i = 1;
127 if ( --n <= 0 )
128 break;
129
130 loop1:
131 hx = HI(x);
132 xsb1 = hx >> 31;
133 hx &= ~0x80000000;
134 if ( hx <= pthresh || hx > 0x413921fb )
135 {
136 if ( hx > 0x413921fb && hx < 0x7ff00000)
137 biguns = 1;
138 x += stridex;
139 y += stridey;
140 i = 1;
141 if ( --n <= 0 )
142 break;
143 goto loop1;
144 }
145 x1 = *x;
146 py1 = y;
147 x += stridex;
148 y += stridey;
149 i = 2;
150 if ( --n <= 0 )
151 break;
152
153 loop2:
154 hx = HI(x);
155 xsb2 = hx >> 31;
156 hx &= ~0x80000000;
157 if ( hx <= pthresh || hx > 0x413921fb )
158 {
159 if ( hx > 0x413921fb && hx < 0x7ff00000)
160 biguns = 1;
161 x += stridex;
162 y += stridey;
163 i = 2;
164 if ( --n <= 0 )
165 break;
166 goto loop2;
167 }
168 x2 = *x;
169 py2 = y;
170
171 n0 = (int) ( x0 * invpio2 + half[xsb0] );
172 n1 = (int) ( x1 * invpio2 + half[xsb1] );
173 n2 = (int) ( x2 * invpio2 + half[xsb2] );
174 fn0 = (double) n0;
175 fn1 = (double) n1;
176 fn2 = (double) n2;
177 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
178 n1 = (n1 + 1) & 3;
179 n2 = (n2 + 1) & 3;
180 a0 = x0 - fn0 * pio2_1;
181 a1 = x1 - fn1 * pio2_1;
182 a2 = x2 - fn2 * pio2_1;
183 w0 = fn0 * pio2_2;
184 w1 = fn1 * pio2_2;
185 w2 = fn2 * pio2_2;
186 x0 = a0 - w0;
187 x1 = a1 - w1;
188 x2 = a2 - w2;
189 y0 = ( a0 - x0 ) - w0;
190 y1 = ( a1 - x1 ) - w1;
191 y2 = ( a2 - x2 ) - w2;
192 a0 = x0;
193 a1 = x1;
194 a2 = x2;
195 w0 = fn0 * pio2_3 - y0;
196 w1 = fn1 * pio2_3 - y1;
197 w2 = fn2 * pio2_3 - y2;
198 x0 = a0 - w0;
199 x1 = a1 - w1;
200 x2 = a2 - w2;
201 y0 = ( a0 - x0 ) - w0;
202 y1 = ( a1 - x1 ) - w1;
203 y2 = ( a2 - x2 ) - w2;
204 a0 = x0;
205 a1 = x1;
206 a2 = x2;
207 w0 = fn0 * pio2_3t - y0;
208 w1 = fn1 * pio2_3t - y1;
209 w2 = fn2 * pio2_3t - y2;
210 x0 = a0 - w0;
211 x1 = a1 - w1;
212 x2 = a2 - w2;
213 y0 = ( a0 - x0 ) - w0;
214 y1 = ( a1 - x1 ) - w1;
215 y2 = ( a2 - x2 ) - w2;
216 xsb0 = HI(&x0);
217 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
218 xsb1 = HI(&x1);
219 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
220 xsb2 = HI(&x2);
221 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
222 switch ( i )
223 {
224 double t0, t1, t2, z0, z1, z2;
225 unsigned j0, j1, j2;
226
227 case 0:
228 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
229 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
230 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
231 HI(&t0) = j0;
232 HI(&t1) = j1;
233 HI(&t2) = j2;
234 LO(&t0) = 0;
235 LO(&t1) = 0;
236 LO(&t2) = 0;
237 x0 = ( x0 - t0 ) + y0;
238 x1 = ( x1 - t1 ) + y1;
239 x2 = ( x2 - t2 ) + y2;
240 z0 = x0 * x0;
241 z1 = x1 * x1;
242 z2 = x2 * x2;
243 t0 = z0 * ( qq1 + z0 * qq2 );
244 t1 = z1 * ( qq1 + z1 * qq2 );
245 t2 = z2 * ( qq1 + z2 * qq2 );
246 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
247 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
248 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
249 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
250 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
251 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
252 xsb0 = ( xsb0 >> 30 ) & 2;
253 xsb1 = ( xsb1 >> 30 ) & 2;
254 xsb2 = ( xsb2 >> 30 ) & 2;
255 n0 ^= ( xsb0 & ~( n0 << 1 ) );
256 n1 ^= ( xsb1 & ~( n1 << 1 ) );
257 n2 ^= ( xsb2 & ~( n2 << 1 ) );
258 xsb0 |= 1;
259 xsb1 |= 1;
260 xsb2 |= 1;
261 a0 = __vlibm_TBL_sincos_hi[j0+n0];
262 a1 = __vlibm_TBL_sincos_hi[j1+n1];
263 a2 = __vlibm_TBL_sincos_hi[j2+n2];
264 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
265 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
266 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
267 *py0 = ( a0 + t0 );
268 *py1 = ( a1 + t1 );
269 *py2 = ( a2 + t2 );
270 break;
271
272 case 1:
273 j0 = n0 & 1;
274 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
275 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
276 HI(&t1) = j1;
277 HI(&t2) = j2;
278 LO(&t1) = 0;
279 LO(&t2) = 0;
280 x0_or_one[0] = x0;
281 x0_or_one[2] = -x0;
282 y0_or_zero[0] = y0;
283 y0_or_zero[2] = -y0;
284 x1 = ( x1 - t1 ) + y1;
285 x2 = ( x2 - t2 ) + y2;
286 z0 = x0 * x0;
287 z1 = x1 * x1;
288 z2 = x2 * x2;
289 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
290 t1 = z1 * ( qq1 + z1 * qq2 );
291 t2 = z2 * ( qq1 + z2 * qq2 );
292 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
293 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
294 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
295 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
296 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
297 xsb1 = ( xsb1 >> 30 ) & 2;
298 xsb2 = ( xsb2 >> 30 ) & 2;
299 n1 ^= ( xsb1 & ~( n1 << 1 ) );
300 n2 ^= ( xsb2 & ~( n2 << 1 ) );
301 xsb1 |= 1;
302 xsb2 |= 1;
303 a1 = __vlibm_TBL_sincos_hi[j1+n1];
304 a2 = __vlibm_TBL_sincos_hi[j2+n2];
305 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
306 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
307 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
308 *py0 = t0;
309 *py1 = ( a1 + t1 );
310 *py2 = ( a2 + t2 );
311 break;
312
313 case 2:
314 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
315 j1 = n1 & 1;
316 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
317 HI(&t0) = j0;
318 HI(&t2) = j2;
319 LO(&t0) = 0;
320 LO(&t2) = 0;
321 x1_or_one[0] = x1;
322 x1_or_one[2] = -x1;
323 x0 = ( x0 - t0 ) + y0;
324 y1_or_zero[0] = y1;
325 y1_or_zero[2] = -y1;
326 x2 = ( x2 - t2 ) + y2;
327 z0 = x0 * x0;
328 z1 = x1 * x1;
329 z2 = x2 * x2;
330 t0 = z0 * ( qq1 + z0 * qq2 );
331 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
332 t2 = z2 * ( qq1 + z2 * qq2 );
333 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
334 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
335 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
336 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
337 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
338 xsb0 = ( xsb0 >> 30 ) & 2;
339 xsb2 = ( xsb2 >> 30 ) & 2;
340 n0 ^= ( xsb0 & ~( n0 << 1 ) );
341 n2 ^= ( xsb2 & ~( n2 << 1 ) );
342 xsb0 |= 1;
343 xsb2 |= 1;
344 a0 = __vlibm_TBL_sincos_hi[j0+n0];
345 a2 = __vlibm_TBL_sincos_hi[j2+n2];
346 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
347 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
348 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
349 *py0 = ( a0 + t0 );
350 *py1 = t1;
351 *py2 = ( a2 + t2 );
352 break;
353
354 case 3:
355 j0 = n0 & 1;
356 j1 = n1 & 1;
357 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
358 HI(&t2) = j2;
359 LO(&t2) = 0;
360 x0_or_one[0] = x0;
361 x0_or_one[2] = -x0;
362 x1_or_one[0] = x1;
363 x1_or_one[2] = -x1;
364 y0_or_zero[0] = y0;
365 y0_or_zero[2] = -y0;
366 y1_or_zero[0] = y1;
367 y1_or_zero[2] = -y1;
368 x2 = ( x2 - t2 ) + y2;
369 z0 = x0 * x0;
370 z1 = x1 * x1;
371 z2 = x2 * x2;
372 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
373 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
374 t2 = z2 * ( qq1 + z2 * qq2 );
375 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
376 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
377 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
378 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
379 xsb2 = ( xsb2 >> 30 ) & 2;
380 n2 ^= ( xsb2 & ~( n2 << 1 ) );
381 xsb2 |= 1;
382 a2 = __vlibm_TBL_sincos_hi[j2+n2];
383 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
384 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
385 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
386 *py0 = t0;
387 *py1 = t1;
388 *py2 = ( a2 + t2 );
389 break;
390
391 case 4:
392 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
393 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
394 j2 = n2 & 1;
395 HI(&t0) = j0;
396 HI(&t1) = j1;
397 LO(&t0) = 0;
398 LO(&t1) = 0;
399 x2_or_one[0] = x2;
400 x2_or_one[2] = -x2;
401 x0 = ( x0 - t0 ) + y0;
402 x1 = ( x1 - t1 ) + y1;
403 y2_or_zero[0] = y2;
404 y2_or_zero[2] = -y2;
405 z0 = x0 * x0;
406 z1 = x1 * x1;
407 z2 = x2 * x2;
408 t0 = z0 * ( qq1 + z0 * qq2 );
409 t1 = z1 * ( qq1 + z1 * qq2 );
410 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
411 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
412 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
413 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
414 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
415 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
416 xsb0 = ( xsb0 >> 30 ) & 2;
417 xsb1 = ( xsb1 >> 30 ) & 2;
418 n0 ^= ( xsb0 & ~( n0 << 1 ) );
419 n1 ^= ( xsb1 & ~( n1 << 1 ) );
420 xsb0 |= 1;
421 xsb1 |= 1;
422 a0 = __vlibm_TBL_sincos_hi[j0+n0];
423 a1 = __vlibm_TBL_sincos_hi[j1+n1];
424 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
425 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
426 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
427 *py0 = ( a0 + t0 );
428 *py1 = ( a1 + t1 );
429 *py2 = t2;
430 break;
431
432 case 5:
433 j0 = n0 & 1;
434 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
435 j2 = n2 & 1;
436 HI(&t1) = j1;
437 LO(&t1) = 0;
438 x0_or_one[0] = x0;
439 x0_or_one[2] = -x0;
440 x2_or_one[0] = x2;
441 x2_or_one[2] = -x2;
442 y0_or_zero[0] = y0;
443 y0_or_zero[2] = -y0;
444 x1 = ( x1 - t1 ) + y1;
445 y2_or_zero[0] = y2;
446 y2_or_zero[2] = -y2;
447 z0 = x0 * x0;
448 z1 = x1 * x1;
449 z2 = x2 * x2;
450 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
451 t1 = z1 * ( qq1 + z1 * qq2 );
452 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
453 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
454 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
455 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
456 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
457 xsb1 = ( xsb1 >> 30 ) & 2;
458 n1 ^= ( xsb1 & ~( n1 << 1 ) );
459 xsb1 |= 1;
460 a1 = __vlibm_TBL_sincos_hi[j1+n1];
461 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
462 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
463 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
464 *py0 = t0;
465 *py1 = ( a1 + t1 );
466 *py2 = t2;
467 break;
468
469 case 6:
470 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
471 j1 = n1 & 1;
472 j2 = n2 & 1;
473 HI(&t0) = j0;
474 LO(&t0) = 0;
475 x1_or_one[0] = x1;
476 x1_or_one[2] = -x1;
477 x2_or_one[0] = x2;
478 x2_or_one[2] = -x2;
479 x0 = ( x0 - t0 ) + y0;
480 y1_or_zero[0] = y1;
481 y1_or_zero[2] = -y1;
482 y2_or_zero[0] = y2;
483 y2_or_zero[2] = -y2;
484 z0 = x0 * x0;
485 z1 = x1 * x1;
486 z2 = x2 * x2;
487 t0 = z0 * ( qq1 + z0 * qq2 );
488 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
489 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
490 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
491 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
492 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
493 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
494 xsb0 = ( xsb0 >> 30 ) & 2;
495 n0 ^= ( xsb0 & ~( n0 << 1 ) );
496 xsb0 |= 1;
497 a0 = __vlibm_TBL_sincos_hi[j0+n0];
498 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
499 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
500 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
501 *py0 = ( a0 + t0 );
502 *py1 = t1;
503 *py2 = t2;
504 break;
505
506 case 7:
507 j0 = n0 & 1;
508 j1 = n1 & 1;
509 j2 = n2 & 1;
510 x0_or_one[0] = x0;
511 x0_or_one[2] = -x0;
512 x1_or_one[0] = x1;
513 x1_or_one[2] = -x1;
514 x2_or_one[0] = x2;
515 x2_or_one[2] = -x2;
516 y0_or_zero[0] = y0;
517 y0_or_zero[2] = -y0;
518 y1_or_zero[0] = y1;
519 y1_or_zero[2] = -y1;
520 y2_or_zero[0] = y2;
521 y2_or_zero[2] = -y2;
522 z0 = x0 * x0;
523 z1 = x1 * x1;
524 z2 = x2 * x2;
525 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
526 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
527 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
528 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
529 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
530 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
531 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
532 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
533 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
534 *py0 = t0;
535 *py1 = t1;
536 *py2 = t2;
537 break;
538 }
539
540 x += stridex;
541 y += stridey;
542 i = 0;
543 } while ( --n > 0 );
544
545 if ( i > 0 )
546 {
547 double fn0, fn1, a0, a1, w0, w1, y0, y1;
548 double t0, t1, z0, z1;
549 unsigned j0, j1;
550 int n0, n1;
551
552 if ( i > 1 )
553 {
554 n1 = (int) ( x1 * invpio2 + half[xsb1] );
555 fn1 = (double) n1;
556 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
557 a1 = x1 - fn1 * pio2_1;
558 w1 = fn1 * pio2_2;
559 x1 = a1 - w1;
560 y1 = ( a1 - x1 ) - w1;
561 a1 = x1;
562 w1 = fn1 * pio2_3 - y1;
563 x1 = a1 - w1;
564 y1 = ( a1 - x1 ) - w1;
565 a1 = x1;
566 w1 = fn1 * pio2_3t - y1;
567 x1 = a1 - w1;
568 y1 = ( a1 - x1 ) - w1;
569 xsb1 = HI(&x1);
570 if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
571 {
572 j1 = n1 & 1;
573 x1_or_one[0] = x1;
574 x1_or_one[2] = -x1;
575 y1_or_zero[0] = y1;
576 y1_or_zero[2] = -y1;
577 z1 = x1 * x1;
578 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
579 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
580 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
581 *py1 = t1;
582 }
583 else
584 {
585 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
586 HI(&t1) = j1;
587 LO(&t1) = 0;
588 x1 = ( x1 - t1 ) + y1;
589 z1 = x1 * x1;
590 t1 = z1 * ( qq1 + z1 * qq2 );
591 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
592 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
593 xsb1 = ( xsb1 >> 30 ) & 2;
594 n1 ^= ( xsb1 & ~( n1 << 1 ) );
595 xsb1 |= 1;
596 a1 = __vlibm_TBL_sincos_hi[j1+n1];
597 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
598 *py1 = ( a1 + t1 );
599 }
600 }
601 n0 = (int) ( x0 * invpio2 + half[xsb0] );
602 fn0 = (double) n0;
603 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
604 a0 = x0 - fn0 * pio2_1;
605 w0 = fn0 * pio2_2;
606 x0 = a0 - w0;
607 y0 = ( a0 - x0 ) - w0;
608 a0 = x0;
609 w0 = fn0 * pio2_3 - y0;
610 x0 = a0 - w0;
611 y0 = ( a0 - x0 ) - w0;
612 a0 = x0;
613 w0 = fn0 * pio2_3t - y0;
614 x0 = a0 - w0;
615 y0 = ( a0 - x0 ) - w0;
616 xsb0 = HI(&x0);
617 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
618 {
619 j0 = n0 & 1;
620 x0_or_one[0] = x0;
621 x0_or_one[2] = -x0;
622 y0_or_zero[0] = y0;
623 y0_or_zero[2] = -y0;
624 z0 = x0 * x0;
625 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
626 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
627 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
628 *py0 = t0;
629 }
630 else
631 {
632 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
633 HI(&t0) = j0;
634 LO(&t0) = 0;
635 x0 = ( x0 - t0 ) + y0;
636 z0 = x0 * x0;
637 t0 = z0 * ( qq1 + z0 * qq2 );
638 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
639 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
640 xsb0 = ( xsb0 >> 30 ) & 2;
641 n0 ^= ( xsb0 & ~( n0 << 1 ) );
642 xsb0 |= 1;
643 a0 = __vlibm_TBL_sincos_hi[j0+n0];
644 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
645 *py0 = ( a0 + t0 );
646 }
647 }
648
649 if ( biguns )
650 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
651 }
652