1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "crlibm.h"
4 /*
5 * Correctly rounded trigpi functions
6 *
7 * Authors : F. de Dinechin, S. Chevillard, C. Lauter (the latter two
8 * didn't write a line of this file, but wrote a tool that wrote a
9 * tool that wrote etc that wrote bits of code related to polynomial
10 * evaluation.)
11 *
12 * This file is part of the crlibm library developed by the Arenaire
13 * project at Ecole Normale Superieure de Lyon
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU Lesser General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU Lesser General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28 */
29 #include "crlibm_private.h"
30 #include "triple-double.h"
31 #include "trigpi.h"
32
33
34 /* TODO
35
36
37 Use the symmetries of the tables
38
39 Actually, use the tables from the standard trigo.
40
41 Write decent quick steps. Or hammer Christoph and Sylvain to do so.
42
43 */
44
45
46
47
48
49 /* This ugly bits of code in the beginning are polynomial evaluations
50 automagically generated and proven by Chevillard and Lauter's
51 tools
52 */
53
54
55 #define sinpiacc_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
56 #define sinpiacc_coeff_1m 1.22464679914735320717376402945839660462569212467758006379625612680683843791484833e-16
57 #define sinpiacc_coeff_1l -2.87889731599645993207191707893463395148177292198731390393739579574603302514349608e-33
58 #define sinpiacc_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
59 #define sinpiacc_coeff_3m 2.26656228257550136196266687046492287115561324595258696490418515168130397796630859e-16
60 #define sinpiacc_coeff_5h 2.55016403987734552316624103696085512638092041015625000000000000000000000000000000e+00
61 #define sinpiacc_coeff_5m -7.93098961936403945684716222915171282926664203267314023904077657789457589387893677e-17
62 #define sinpiacc_coeff_7h -5.99264529320792105338000510528218001127243041992187500000000000000000000000000000e-01
63 #define sinpiacc_coeff_9h 8.21458866130424236740026344705256633460521697998046875000000000000000000000000000e-02
64 #define sinpiacc_coeff_11h -7.37046804820839888960914976223648409359157085418701171875000000000000000000000000e-03
65
66
67 #define cospiacc_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
68 #define cospiacc_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
69 #define cospiacc_coeff_2m -3.13264775437072047133490817894057799839785556899468543790021612949203699827194214e-16
70 #define cospiacc_coeff_4h 4.05871212641676848420502210501581430435180664062500000000000000000000000000000000e+00
71 #define cospiacc_coeff_4m -2.66019969731660223662555032718185048048635055542576743903282476821914315223693848e-16
72 #define cospiacc_coeff_6h -1.33526276885458949905682857206556946039199829101562500000000000000000000000000000e+00
73 #define cospiacc_coeff_8h 2.35330630358513925859398341344785876572132110595703125000000000000000000000000000e-01
74 #define cospiacc_coeff_10h -2.58068327360992909313974763563237502239644527435302734375000000000000000000000000e-02
75
sincospiacc(double * sinpiacc_resh,double * sinpiacc_resm,double * sinpiacc_resl,double * cospiacc_resh,double * cospiacc_resm,double * cospiacc_resl,double x)76 static void sincospiacc(double *sinpiacc_resh, double *sinpiacc_resm, double *sinpiacc_resl,
77 double *cospiacc_resh, double *cospiacc_resm, double *cospiacc_resl,
78 double x) {
79 double x2h, x2m;
80 double sinpiacc_t_1_0h;
81 double sinpiacc_t_2_0h;
82 double sinpiacc_t_3_0h;
83 double sinpiacc_t_4_0h;
84 double sinpiacc_t_5_0h, sinpiacc_t_5_0m;
85 double sinpiacc_t_6_0h, sinpiacc_t_6_0m;
86 double sinpiacc_t_7_0h, sinpiacc_t_7_0m;
87 double sinpiacc_t_8_0h, sinpiacc_t_8_0m;
88 double sinpiacc_t_9_0h, sinpiacc_t_9_0m, sinpiacc_t_9_0l;
89 double sinpiacc_t_10_0h, sinpiacc_t_10_0m, sinpiacc_t_10_0l;
90
91 double cospiacc_t_1_0h;
92 double cospiacc_t_2_0h;
93 double cospiacc_t_3_0h;
94 double cospiacc_t_4_0h;
95 double cospiacc_t_5_0h, cospiacc_t_5_0m;
96 double cospiacc_t_6_0h, cospiacc_t_6_0m;
97 double cospiacc_t_7_0h, cospiacc_t_7_0m;
98 double cospiacc_t_8_0h, cospiacc_t_8_0m;
99 double cospiacc_t_9_0h, cospiacc_t_9_0m, cospiacc_t_9_0l;
100
101 Mul12(&x2h,&x2m,x,x);
102
103 sinpiacc_t_1_0h = sinpiacc_coeff_11h;
104 sinpiacc_t_2_0h = sinpiacc_t_1_0h * x2h;
105 sinpiacc_t_3_0h = sinpiacc_coeff_9h + sinpiacc_t_2_0h;
106 sinpiacc_t_4_0h = sinpiacc_t_3_0h * x2h;
107 Add12(sinpiacc_t_5_0h,sinpiacc_t_5_0m,sinpiacc_coeff_7h,sinpiacc_t_4_0h);
108 MulAdd22(&sinpiacc_t_6_0h,&sinpiacc_t_6_0m,sinpiacc_coeff_5h,sinpiacc_coeff_5m,x2h,x2m,sinpiacc_t_5_0h,sinpiacc_t_5_0m);
109 MulAdd22(&sinpiacc_t_7_0h,&sinpiacc_t_7_0m,sinpiacc_coeff_3h,sinpiacc_coeff_3m,x2h,x2m,sinpiacc_t_6_0h,sinpiacc_t_6_0m);
110 Mul22(&sinpiacc_t_8_0h,&sinpiacc_t_8_0m,sinpiacc_t_7_0h,sinpiacc_t_7_0m,x2h,x2m);
111 Add233Cond(&sinpiacc_t_9_0h,&sinpiacc_t_9_0m,&sinpiacc_t_9_0l,sinpiacc_t_8_0h,sinpiacc_t_8_0m,sinpiacc_coeff_1h,sinpiacc_coeff_1m,sinpiacc_coeff_1l);
112 Mul133(&sinpiacc_t_10_0h,&sinpiacc_t_10_0m,&sinpiacc_t_10_0l,x,sinpiacc_t_9_0h,sinpiacc_t_9_0m,sinpiacc_t_9_0l);
113 Renormalize3(sinpiacc_resh,sinpiacc_resm,sinpiacc_resl,sinpiacc_t_10_0h,sinpiacc_t_10_0m,sinpiacc_t_10_0l);
114
115
116 cospiacc_t_1_0h = cospiacc_coeff_10h;
117 cospiacc_t_2_0h = cospiacc_t_1_0h * x2h;
118 cospiacc_t_3_0h = cospiacc_coeff_8h + cospiacc_t_2_0h;
119 cospiacc_t_4_0h = cospiacc_t_3_0h * x2h;
120 Add12(cospiacc_t_5_0h,cospiacc_t_5_0m,cospiacc_coeff_6h,cospiacc_t_4_0h);
121 MulAdd22(&cospiacc_t_6_0h,&cospiacc_t_6_0m,cospiacc_coeff_4h,cospiacc_coeff_4m,x2h,x2m,cospiacc_t_5_0h,cospiacc_t_5_0m);
122 MulAdd22(&cospiacc_t_7_0h,&cospiacc_t_7_0m,cospiacc_coeff_2h,cospiacc_coeff_2m,x2h,x2m,cospiacc_t_6_0h,cospiacc_t_6_0m);
123 Mul22(&cospiacc_t_8_0h,&cospiacc_t_8_0m,cospiacc_t_7_0h,cospiacc_t_7_0m,x2h,x2m);
124 Add123(&cospiacc_t_9_0h,&cospiacc_t_9_0m,&cospiacc_t_9_0l,cospiacc_coeff_0h,cospiacc_t_8_0h,cospiacc_t_8_0m);
125 *cospiacc_resh = cospiacc_t_9_0h; *cospiacc_resm = cospiacc_t_9_0m; *cospiacc_resl = cospiacc_t_9_0l;
126
127 }
128
129
130
131
132
133
134
135
136
137
138
139 /* Comment on comparing sa, ca, sy and cy
140 either index=0, then sa=0 and ca=1, therefore t2=0, and the Add33 will be exact
141 or index !=0, and
142 -eps1 < sy < eps1 (but sy may be negative)
143 sa > eps1 (sa>0)
144 1-eps2 < cy < 1
145 ca < 1-eps2
146 therefore
147 sacy = t2 >0
148 casy = t1 may be negative
149 abs(t1) <= abs(t2)
150 Unfortunately we need a stronger condition to use the Add33
151 */
152
153
154
155
sinpi_accurate(double * rh,double * rm,double * rl,double y,int index,int quadrant)156 static void sinpi_accurate(double *rh, double *rm, double *rl,
157 double y, int index, int quadrant)
158 {
159 double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
160 double t1h, t1m, t1l, t2h, t2m, t2l;
161
162 sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
163
164 sah=sincosTable[index].sh;
165 cah=sincosTable[index].ch;
166 sam=sincosTable[index].sm;
167 cam=sincosTable[index].cm;
168 sal=sincosTable[index].sl;
169 cal=sincosTable[index].cl;
170
171 if(quadrant==0 || quadrant==2) {
172 /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
173 Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
174 Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
175 Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
176 }
177 else {
178 /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
179 Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
180 Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
181 Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
182 }
183
184 if (quadrant>=2) {
185 *rh = -*rh;
186 *rm = -*rm;
187 *rl = -*rl;
188 }
189
190 };
191
192
193
194
195
196
197 #define sinpiquick_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
198 #define sinpiquick_coeff_1m 1.22464971683184787123862072310058851157814368464452070561776508839102461934089661e-16
199 #define sinpiquick_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
200 #define sinpiquick_coeff_5h 2.55016403989992213041659852024167776107788085937500000000000000000000000000000000e+00
201 #define sinpiquick_coeff_7h -5.99263913290728922333983064163476228713989257812500000000000000000000000000000000e-01
202 #define cospiquick_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
203 #define cospiquick_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
204 #define cospiquick_coeff_4h 4.05871212632582167856298838160000741481781005859375000000000000000000000000000000e+00
205 #define cospiquick_coeff_6h -1.33525456323720947970912220625905320048332214355468750000000000000000000000000000e+00
206
207
208
cospi_accurate(double * rh,double * rm,double * rl,double y,int index,int quadrant)209 static void cospi_accurate(double *rh, double *rm, double *rl,
210 double y, int index, int quadrant)
211 {
212 double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
213 double t1h, t1m, t1l, t2h, t2m, t2l;
214
215 sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
216
217 sah=sincosTable[index].sh;
218 cah=sincosTable[index].ch;
219 sam=sincosTable[index].sm;
220 cam=sincosTable[index].cm;
221 sal=sincosTable[index].sl;
222 cal=sincosTable[index].cl;
223
224 if(quadrant==0 || quadrant==2) {
225 /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
226 Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
227 Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
228 Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
229 }
230 else {
231 /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
232 Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
233 Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
234 Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
235 }
236
237 if (quadrant==1 || quadrant==2) {
238 *rh = -*rh;
239 *rm = -*rm;
240 *rl = -*rl;
241 }
242
243 };
244
245
246
247
248
249
250 /* This one can clearly be improved. It was set up in less than one hour */
sinpiquick(double * rh,double * rm,double x,int index,int quadrant)251 void sinpiquick(double *rh, double *rm, double x, int index, int quadrant) {
252 double x2h, x2m;
253 double sinpiquick_t_1_0h;
254 double sinpiquick_t_2_0h;
255 double sinpiquick_t_3_0h;
256 double sinpiquick_t_4_0h;
257 double sinpiquick_t_5_0h, sinpiquick_t_5_0m;
258 double sinpiquick_t_6_0h, sinpiquick_t_6_0m;
259 double sinpiquick_t_7_0h, sinpiquick_t_7_0m;
260 double syh, sym;
261 double cospiquick_t_1_0h;
262 double cospiquick_t_2_0h;
263 double cospiquick_t_3_0h;
264 double cospiquick_t_4_0h;
265 double cospiquick_t_5_0h, cospiquick_t_5_0m;
266 double cospiquick_t_6_0h, cospiquick_t_6_0m;
267 double cospiquick_t_7_0h, cospiquick_t_7_0m;
268 double cyh, cym;
269 double t1h, t1m, t2h, t2m, sah, sam, cah,cam;
270
271 Mul12(&x2h,&x2m,x,x);
272
273 sah=sincosTable[index].sh;
274 cah=sincosTable[index].ch;
275 sam=sincosTable[index].sm;
276 cam=sincosTable[index].cm;
277
278 sinpiquick_t_1_0h = sinpiquick_coeff_7h;
279 sinpiquick_t_2_0h = sinpiquick_t_1_0h * x2h;
280 sinpiquick_t_3_0h = sinpiquick_coeff_5h + sinpiquick_t_2_0h;
281 sinpiquick_t_4_0h = sinpiquick_t_3_0h * x2h;
282 Add12(sinpiquick_t_5_0h,sinpiquick_t_5_0m,sinpiquick_coeff_3h,sinpiquick_t_4_0h);
283 MulAdd22(&sinpiquick_t_6_0h,&sinpiquick_t_6_0m,sinpiquick_coeff_1h,sinpiquick_coeff_1m,x2h,x2m,sinpiquick_t_5_0h,sinpiquick_t_5_0m);
284 Mul122(&sinpiquick_t_7_0h,&sinpiquick_t_7_0m,x,sinpiquick_t_6_0h,sinpiquick_t_6_0m);
285 syh = sinpiquick_t_7_0h; sym = sinpiquick_t_7_0m;
286
287 cospiquick_t_1_0h = cospiquick_coeff_6h;
288 cospiquick_t_2_0h = cospiquick_t_1_0h * x2h;
289 cospiquick_t_3_0h = cospiquick_coeff_4h + cospiquick_t_2_0h;
290 cospiquick_t_4_0h = cospiquick_t_3_0h * x2h;
291 Add12(cospiquick_t_5_0h,cospiquick_t_5_0m,cospiquick_coeff_2h,cospiquick_t_4_0h);
292 Mul22(&cospiquick_t_6_0h,&cospiquick_t_6_0m,cospiquick_t_5_0h,cospiquick_t_5_0m,x2h,x2m);
293 Add122(&cospiquick_t_7_0h,&cospiquick_t_7_0m,cospiquick_coeff_0h,cospiquick_t_6_0h,cospiquick_t_6_0m);
294 cyh = cospiquick_t_7_0h; cym = cospiquick_t_7_0m;
295
296 /* Here comes the hand-written, unproven yet code */
297 if(quadrant==0 || quadrant==2) {
298 /* compute sy*ca+sa*cy : t1 = sy*ca, t2 = sa*cy*/
299 Mul22(&t1h,&t1m, syh,sym, cah,cam);
300 Mul22(&t2h,&t2m, sah,sam, cyh,cym);
301 Add22Cond(rh, rm, t2h,t2m, t1h,t1m);
302 }
303 else {
304 /* compute cy*ca - sa*sy : t1 = cy*ca, t2 = sa*sy */
305 Mul22(&t1h,&t1m, cyh,cym, cah,cam);
306 Mul22(&t2h,&t2m, sah,sam, syh,sym);
307 Add22Cond(rh, rm, t1h,t1m, -t2h,-t2m);
308 }
309
310 if (quadrant>=2) {
311 *rh = -*rh;
312 *rm = -*rm;
313 }
314
315 }
316
317
318
319
320
321
322
sinpi_rn(double x)323 double sinpi_rn(double x){
324 double xs, y,u, rh, rm, rl, sign,absx;
325 db_number xdb, t;
326 int32_t xih, absxih, index, quadrant;
327
328 if (x<0) absx = -x; else absx = x;
329
330 xdb.d = x;
331
332 xs = x*128.0;
333
334 /* argument reduction */
335 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
336 t.d = xs;
337 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
338 So what remains in t is an FP integer almost as large as x */
339 xs = xs-t.d; /* we are going to throw away the int part anyway */
340 }
341
342 t.d = TWOTO5251 + xs;
343 u = t.d - TWOTO5251;
344 y = xs - u;
345 index = t.i[LO] & 0x3f;
346 quadrant = (t.i[LO] & 0xff) >>6;
347
348 /* Special case tests come late because the conversion FP to int is slow */
349 xih = xdb.i[HI];
350 absxih = xih & 0x7fffffff;
351 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
352
353 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
354
355 y = y * INV128;
356
357 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
358 if (absxih>=0x7ff00000) {
359 xdb.l=0xfff8000000000000LL;
360 return xdb.d - xdb.d;
361 }
362
363 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
364 return sign*0.0; /*signed */
365
366 if(absxih<=0x3E000000) /*2^{-31}*/ {
367 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
368 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
369 scs_t result;
370 scs_set_d(result, x );
371 scs_mul(result, PiSCS_ptr, result);
372 scs_get_d(&rh, result);
373 return rh;
374 }
375 /* First step for Pi*x. TODO: FMA-based optimisation */
376 const double DekkerConst = 134217729.; /* 2^27 +1 */
377 double tt, xh, xl;
378 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
379 tt = x*DekkerConst;
380 xh = (x-tt)+tt;
381 xl = x-xh;
382 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
383 if(rh == (rh + (rl * PIX_RNCST_SIN)))
384 return rh;
385 }
386 /* Fall here either if we have a large input, or if we have a small
387 input and the rounding test fails. */
388 sinpiquick(&rh, &rm, y, index, quadrant);
389 if (rh==rh+1.00001*rm) /* See trigpiquick.gappa. This first step is ridiculously too accurate */
390 return rh;
391 sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
392 ReturnRoundToNearest3(rh,rm,rl);
393 }
394
395
396
397
398
399
400
401
402
sinpi_rd(double x)403 double sinpi_rd(double x){
404 double xs, y,u, rh, rm, rl, sign,absx;
405 db_number xdb, t;
406 int32_t xih, absxih, index, quadrant;
407
408 if (x<0) absx = -x; else absx = x;
409
410 xdb.d = x;
411
412 xs = x*128.0;
413
414 /* argument reduction */
415 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
416 t.d = xs;
417 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
418 So what remains in t is an FP integer almost as large as x */
419 xs = xs-t.d; /* we are going to throw away the int part anyway */
420 }
421
422 t.d = TWOTO5251 + xs;
423 u = t.d - TWOTO5251;
424 y = xs - u;
425 index = t.i[LO] & 0x3f;
426 quadrant = (t.i[LO] & 0xff) >>6;
427
428 /* Special case tests come late because the conversion FP to int is slow */
429 xih = xdb.i[HI];
430 absxih = xih & 0x7fffffff;
431 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
432
433 if(index==0 && y==0.0 && ((quadrant&1)==0)) return -0.0; /*signed, inspired by LIA-2 */
434
435 y = y * INV128;
436
437 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
438 if (absxih>=0x7ff00000) {
439 xdb.l=0xfff8000000000000LL;
440 return xdb.d - xdb.d;
441 }
442
443 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
444 return sign*0.0; /*signed */
445
446 if(absxih<=0x3E000000) /*2^{-31}*/ {
447 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
448 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
449 scs_t result;
450 scs_set_d(result, x );
451 scs_mul(result, PiSCS_ptr, result);
452 scs_get_d_minf(&rh, result);
453 return rh;
454 }
455 /* First step for Pi*x. TODO: FMA-based optimisation */
456 const double DekkerConst = 134217729.; /* 2^27 +1 */
457 double tt, xh, xl;
458 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
459 tt = x*DekkerConst;
460 xh = (x-tt)+tt;
461 xl = x-xh;
462 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
463 TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
464 }
465 /* Fall here either if we have a large input, or if we have a small
466 input and the rounding test fails. */
467 sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
468
469 ReturnRoundDownwards3(rh,rm,rl);
470 };
471
472
473
sinpi_ru(double x)474 double sinpi_ru(double x){
475 double xs, y,u, rh, rm, rl, sign,absx;
476 db_number xdb, t;
477 int32_t xih, absxih, index, quadrant;
478
479 if (x<0) absx = -x; else absx = x;
480
481 xdb.d = x;
482
483 xs = x*128.0;
484
485 /* argument reduction */
486 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
487 t.d = xs;
488 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
489 So what remains in t is an FP integer almost as large as x */
490 xs = xs-t.d; /* we are going to throw away the int part anyway */
491 }
492
493 t.d = TWOTO5251 + xs;
494 u = t.d - TWOTO5251;
495 y = xs - u;
496 index = t.i[LO] & 0x3f;
497 quadrant = (t.i[LO] & 0xff) >>6;
498
499 /* Special case tests come late because the conversion FP to int is slow */
500 xih = xdb.i[HI];
501 absxih = xih & 0x7fffffff;
502 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
503
504 if(index==0 && y==0.0 && ((quadrant&1)==0)) return +0.0; /*signed, inspired by LIA-2 */
505
506 y = y * INV128;
507
508 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
509 if (absxih>=0x7ff00000) {
510 xdb.l=0xfff8000000000000LL;
511 return xdb.d - xdb.d;
512 }
513
514 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
515 return sign*0.0; /*signed */
516
517 if(absxih<=0x3E000000) /*2^{-31}*/ {
518 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
519 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
520 scs_t result;
521 scs_set_d(result, x );
522 scs_mul(result, PiSCS_ptr, result);
523 scs_get_d_pinf(&rh, result);
524 return rh;
525 }
526 /* First step for Pi*x. TODO: FMA-based optimisation */
527 const double DekkerConst = 134217729.; /* 2^27 +1 */
528 double tt, xh, xl;
529 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
530 tt = x*DekkerConst;
531 xh = (x-tt)+tt;
532 xl = x-xh;
533 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
534 TEST_AND_RETURN_RU(rh,rl,PIX_EPS_SIN);
535 }
536 /* Fall here either if we have a large input, or if we have a small
537 input and the rounding test fails. */
538 sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
539
540 ReturnRoundUpwards3(rh,rm,rl);
541
542 };
543
544
545
546
sinpi_rz(double x)547 double sinpi_rz(double x){
548 double xs, y,u, rh, rm, rl, sign,absx;
549 db_number xdb, t;
550 int32_t xih, absxih, index, quadrant;
551
552 if (x<0) absx = -x; else absx = x;
553
554 xdb.d = x;
555
556 xs = x*128.0;
557
558 /* argument reduction */
559 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
560 t.d = xs;
561 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
562 So what remains in t is an FP integer almost as large as x */
563 xs = xs-t.d; /* we are going to throw away the int part anyway */
564 }
565
566 t.d = TWOTO5251 + xs;
567 u = t.d - TWOTO5251;
568 y = xs - u;
569 index = t.i[LO] & 0x3f;
570 quadrant = (t.i[LO] & 0xff) >>6;
571
572 /* Special case tests come late because the conversion FP to int is slow */
573 xih = xdb.i[HI];
574 absxih = xih & 0x7fffffff;
575 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
576
577 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
578
579 y = y * INV128;
580
581 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
582 if (absxih>=0x7ff00000) {
583 xdb.l=0xfff8000000000000LL;
584 return xdb.d - xdb.d;
585 }
586
587 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
588 return sign*0.0; /*signed */
589
590 if(absxih<=0x3E000000) /*2^{-31}*/ {
591 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
592 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
593 scs_t result;
594 scs_set_d(result, x );
595 scs_mul(result, PiSCS_ptr, result);
596 scs_get_d_zero(&rh, result);
597 return rh;
598 }
599 /* First step for Pi*x. TODO: FMA-based optimisation */
600 const double DekkerConst = 134217729.; /* 2^27 +1 */
601 double tt, xh, xl;
602 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
603 tt = x*DekkerConst;
604 xh = (x-tt)+tt;
605 xl = x-xh;
606 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
607 TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
608 }
609 /* Fall here either if we have a large input, or if we have a small
610 input and the rounding test fails. */
611 sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
612
613 ReturnRoundTowardsZero3(rh,rm,rl);
614 };
615
616
617
618
619
620
621
622
623
624 /* to nearest */
cospi_rn(double x)625 double cospi_rn(double x){
626 double xs, y,u, rh, rm, rl, absx;
627 db_number xdb, t;
628 int32_t xih, absxih, index, quadrant;
629
630 if (x<0) absx =-x; else absx=x;
631
632 xdb.d=x;
633 xs = x*128.0;
634
635 /* argument reduction.
636 We do it before the special case tests for performance,
637 it might compute garbage for inf, very large inputs, etc */
638 if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
639 t.d = xs;
640 t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
641 So what remains in t is an FP integer almost as large as x */
642 xs = xs-t.d; /* we are going to throw away the int part anyway */
643 }
644
645 t.d = TWOTO5251 + xs;
646 u = t.d - TWOTO5251;
647 y = xs - u;
648 y = y * INV128;
649 index = t.i[LO] & 0x3f;
650 quadrant = (t.i[LO] & 0xff) >>6;
651
652 /* Special case tests come late because the conversion FP to int is slow */
653 xih = xdb.i[HI];
654 absxih = xih & 0x7fffffff;
655
656 /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
657
658 if (absxih>=0x7ff00000) {
659 xdb.l=0xfff8000000000000LL;
660 return xdb.d - xdb.d;
661 }
662
663 if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
664 return 1.0;
665
666 if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
667 /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
668 in this case */
669
670 if(index==0 && y==0. && quadrant==0) return 1.;
671 if(index==0 && y==0. && quadrant==2) return -1.;
672
673 if (absxih<0x3E26A09E) /* sqrt(2^-53)/4 */
674 return 1.0;
675 /* printf("\n\nint part = %f frac part = %f index=%d quadrant=%d \n", u, y, index, quadrant);
676 */
677
678 cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
679 ReturnRoundToNearest3(rh,rm,rl);
680 };
681
682
683
684
685
cospi_rd(double x)686 double cospi_rd(double x){
687 double xs, y,u, rh, rm, rl, absx;
688 db_number xdb, t;
689 int32_t xih, absxih, index, quadrant;
690
691 if (x<0) absx =-x; else absx=x;
692
693 xdb.d=x;
694 xs = x*128.0;
695
696
697 /* argument reduction.
698 We do it before the special case tests for performance,
699 it might compute garbage for inf, very large inputs, etc */
700 if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
701 t.d = xs;
702 t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
703 So what remains in t is an FP integer almost as large as x */
704 xs = xs-t.d; /* we are going to throw away the int part anyway */
705 }
706
707 t.d = TWOTO5251 + xs;
708 u = t.d - TWOTO5251;
709 y = xs - u;
710 y = y * INV128;
711 index = t.i[LO] & 0x3f;
712 quadrant = (t.i[LO] & 0xff) >>6;
713
714
715 /* Special case tests come late because the conversion FP to int is slow */
716 xih = xdb.i[HI];
717 absxih = xih & 0x7fffffff;
718
719 /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
720
721 if (absxih>=0x7ff00000) {
722 xdb.l=0xfff8000000000000LL;
723 return xdb.d - xdb.d;
724 }
725
726 if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
727 return 1.0; /*signed */
728
729 if(index==0 && y==0. && ((quadrant&1)==1)) return -0.;
730
731 if(index==0 && y==0. && quadrant==0) return 1.;
732 if(index==0 && y==0. && quadrant==2) return -1.;
733
734 if (absxih<0x3E200000) /* 2^-29 */
735 return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
736 /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
737 in this case */
738
739 cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
740 ReturnRoundDownwards3(rh,rm,rl);
741 };
742
743
744
745
cospi_ru(double x)746 double cospi_ru(double x){
747 double xs, y,u, rh, rm, rl, absx;
748 db_number xdb, t;
749 int32_t xih, absxih, index, quadrant;
750
751 if (x<0) absx =-x; else absx=x;
752
753 xdb.d=x;
754 xs = x*128.0;
755
756
757 /* argument reduction.
758 We do it before the special case tests for performance,
759 it might compute garbage for inf, very large inputs, etc */
760 if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
761 t.d = xs;
762 t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
763 So what remains in t is an FP integer almost as large as x */
764 xs = xs-t.d; /* we are going to throw away the int part anyway */
765 }
766
767 t.d = TWOTO5251 + xs;
768 u = t.d - TWOTO5251;
769 y = xs - u;
770 y = y * INV128;
771 index = t.i[LO] & 0x3f;
772 quadrant = (t.i[LO] & 0xff) >>6;
773
774
775 /* Special case tests come late because the conversion FP to int is slow */
776 xih = xdb.i[HI];
777 absxih = xih & 0x7fffffff;
778
779 /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
780 if (absxih>=0x7ff00000) {
781 xdb.l=0xfff8000000000000LL;
782 return xdb.d - xdb.d;
783 }
784
785 if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
786 return 1.0; /*signed */
787
788 if(index==0 && y==0. && quadrant==0) return 1.;
789 if(index==0 && y==0. && quadrant==2) return -1.;
790
791 if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
792 /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
793 in this case */
794
795 if (absxih<0x3E200000) /* 2^-29 */
796 return 1;
797
798 cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
799 ReturnRoundUpwards3(rh,rm,rl);
800 };
801
802
803
804
cospi_rz(double x)805 double cospi_rz(double x){
806 double xs, y,u, rh, rm, rl, absx;
807 db_number xdb, t;
808 int32_t xih, absxih, index, quadrant;
809
810 if (x<0) absx =-x; else absx=x;
811
812 xdb.d=x;
813 xs = x*128.0;
814
815
816 /* argument reduction.
817 We do it before the special case tests for performance,
818 it might compute garbage for inf, very large inputs, etc */
819 if(absx> TWOTO42 ) { /* 2^42, x is very large, let us first subtract a large integer from it */
820 t.d = xs;
821 t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
822 So what remains in t is an FP integer almost as large as x */
823 xs = xs-t.d; /* we are going to throw away the int part anyway */
824 }
825
826 t.d = TWOTO5251 + xs;
827 u = t.d - TWOTO5251;
828 y = xs - u;
829 y = y * INV128;
830 index = t.i[LO] & 0x3f;
831 quadrant = (t.i[LO] & 0xff) >>6;
832
833
834 /* Special case tests come late because the conversion FP to int is slow */
835 xih = xdb.i[HI];
836 absxih = xih & 0x7fffffff;
837
838 /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
839
840 if (absxih>=0x7ff00000) {
841 xdb.l=0xfff8000000000000LL;
842 return xdb.d - xdb.d;
843 }
844
845 if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
846 return 1.0; /*signed */
847
848 if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
849 /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
850 in this case */
851
852 if(index==0 && y==0. && quadrant==0) return 1.;
853 if(index==0 && y==0. && quadrant==2) return -1.;
854
855 if (absxih<0x3E200000) /* 2^-29 */
856 return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
857
858 cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
859 ReturnRoundTowardsZero3(rh,rm,rl);
860 };
861
862
863
864
865
866
867 /* tangent of pi times x */
tanpi_rn(double x)868 double tanpi_rn(double x){
869 double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
870 db_number xdb, t;
871 int32_t xih, absxih, index, quadrant;
872
873 if (x<0) absx = -x; else absx = x;
874
875 xdb.d = x;
876
877 xs = x*128.0;
878
879 /* argument reduction */
880 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
881 t.d = xs;
882 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
883 So what remains in t is an FP integer almost as large as x */
884 xs = xs-t.d; /* we are going to throw away the int part anyway */
885 }
886
887 t.d = TWOTO5251 + xs;
888 u = t.d - TWOTO5251;
889 y = xs - u;
890 index = t.i[LO] & 0x3f;
891 quadrant = (t.i[LO] & 0xff) >>6;
892
893 /* Special case tests come late because the conversion FP to int is slow */
894 xih = xdb.i[HI];
895 absxih = xih & 0x7fffffff;
896 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
897
898 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
899 /* TODO ? No test for Pi/4. Such a value will lauch the accurate phase. */
900
901 y = y * INV128;
902
903 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
904 if (absxih>=0x7ff00000) {
905 xdb.l=0xfff8000000000000LL;
906 return xdb.d - xdb.d;
907 }
908
909 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
910 return sign*0.0; /*signed */
911
912 if(absxih<=0x3E000000) /*2^{-31}*/ {
913 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
914 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
915 scs_t result;
916 scs_set_d(result, x );
917 scs_mul(result, PiSCS_ptr, result);
918 scs_get_d(&rh, result);
919 return rh;
920 }
921 /* First step for Pi*x. TODO: FMA-based optimisation */
922 const double DekkerConst = 134217729.; /* 2^27 +1 */
923 double tt, xh, xl;
924 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
925 tt = x*DekkerConst;
926 xh = (x-tt)+tt;
927 xl = x-xh;
928 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
929 if(rh == (rh + (rl * PIX_RNCST_TAN)))
930 return rh;
931 }
932 /* Fall here either if we have a large input, or if we have a small
933 input and the rounding test fails. */
934 cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
935 Recpr33(&ich, &icm, &icl, ch, cm, cl);
936 sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
937 Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
938 ReturnRoundToNearest3(rh,rm,rl);
939
940 };
941
942
943
944
tanpi_rd(double x)945 double tanpi_rd(double x){
946 double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
947 db_number xdb, t;
948 int32_t xih, absxih, index, quadrant;
949
950 if (x<0) absx = -x; else absx = x;
951
952 xdb.d = x;
953
954 xs = x*128.0;
955
956 /* argument reduction */
957 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
958 t.d = xs;
959 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
960 So what remains in t is an FP integer almost as large as x */
961 xs = xs-t.d; /* we are going to throw away the int part anyway */
962 }
963
964 t.d = TWOTO5251 + xs;
965 u = t.d - TWOTO5251;
966 y = xs - u;
967 index = t.i[LO] & 0x3f;
968 quadrant = (t.i[LO] & 0xff) >>6;
969
970 /* Special case tests come late because the conversion FP to int is slow */
971 xih = xdb.i[HI];
972 absxih = xih & 0x7fffffff;
973 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
974
975 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
976
977 y = y * INV128;
978
979 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
980 if (absxih>=0x7ff00000) {
981 xdb.l=0xfff8000000000000LL;
982 return xdb.d - xdb.d;
983 }
984
985 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
986 return sign*0.0; /*signed */
987
988 if(absxih<=0x3E000000) /*2^{-31}*/ {
989 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
990 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
991 scs_t result;
992 scs_set_d(result, x );
993 scs_mul(result, PiSCS_ptr, result);
994 scs_get_d_minf(&rh, result);
995 return rh;
996 }
997 /* First step for Pi*x. TODO: FMA-based optimisation */
998 const double DekkerConst = 134217729.; /* 2^27 +1 */
999 double tt, xh, xl;
1000 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1001 tt = x*DekkerConst;
1002 xh = (x-tt)+tt;
1003 xl = x-xh;
1004 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1005 TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
1006 }
1007 /* Fall here either if we have a large input, or if we have a small
1008 input and the rounding test fails. */
1009 cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1010 Recpr33(&ich, &icm, &icl, ch, cm, cl);
1011 sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1012 Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1013 ReturnRoundDownwards3(rh,rm,rl);
1014 };
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
tanpi_ru(double x)1025 double tanpi_ru(double x){
1026 double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1027 db_number xdb, t;
1028 int32_t xih, absxih, index, quadrant;
1029
1030 if (x<0) absx = -x; else absx = x;
1031
1032 xdb.d = x;
1033
1034 xs = x*128.0;
1035
1036 /* argument reduction */
1037 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
1038 t.d = xs;
1039 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1040 So what remains in t is an FP integer almost as large as x */
1041 xs = xs-t.d; /* we are going to throw away the int part anyway */
1042 }
1043
1044 t.d = TWOTO5251 + xs;
1045 u = t.d - TWOTO5251;
1046 y = xs - u;
1047 index = t.i[LO] & 0x3f;
1048 quadrant = (t.i[LO] & 0xff) >>6;
1049
1050 /* Special case tests come late because the conversion FP to int is slow */
1051 xih = xdb.i[HI];
1052 absxih = xih & 0x7fffffff;
1053 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
1054
1055 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1056
1057 y = y * INV128;
1058
1059 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1060 if (absxih>=0x7ff00000) {
1061 xdb.l=0xfff8000000000000LL;
1062 return xdb.d - xdb.d;
1063 }
1064
1065 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1066 return sign*0.0; /*signed */
1067
1068 if(absxih<=0x3E000000) /*2^{-31}*/ {
1069 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
1070 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1071 scs_t result;
1072 scs_set_d(result, x );
1073 scs_mul(result, PiSCS_ptr, result);
1074 scs_get_d_pinf(&rh, result);
1075 return rh;
1076 }
1077 /* First step for Pi*x. TODO: FMA-based optimisation */
1078 const double DekkerConst = 134217729.; /* 2^27 +1 */
1079 double tt, xh, xl;
1080 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1081 tt = x*DekkerConst;
1082 xh = (x-tt)+tt;
1083 xl = x-xh;
1084 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1085 TEST_AND_RETURN_RU(rh,rl,PIX_EPS_TAN);
1086 }
1087 /* Fall here either if we have a large input, or if we have a small
1088 input and the rounding test fails. */
1089 cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1090 Recpr33(&ich, &icm, &icl, ch, cm, cl);
1091 sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1092 Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1093 ReturnRoundUpwards3(rh,rm,rl);
1094 };
1095
1096
1097
tanpi_rz(double x)1098 double tanpi_rz(double x){
1099 double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1100 db_number xdb, t;
1101 int32_t xih, absxih, index, quadrant;
1102
1103 if (x<0) absx = -x; else absx = x;
1104
1105 xdb.d = x;
1106
1107 xs = x*128.0;
1108
1109 /* argument reduction */
1110 if(absx> TWOTO42 ) { /* x is very large, let us first subtract a large integer from it */
1111 t.d = xs;
1112 t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1113 So what remains in t is an FP integer almost as large as x */
1114 xs = xs-t.d; /* we are going to throw away the int part anyway */
1115 }
1116
1117 t.d = TWOTO5251 + xs;
1118 u = t.d - TWOTO5251;
1119 y = xs - u;
1120 index = t.i[LO] & 0x3f;
1121 quadrant = (t.i[LO] & 0xff) >>6;
1122
1123 /* Special case tests come late because the conversion FP to int is slow */
1124 xih = xdb.i[HI];
1125 absxih = xih & 0x7fffffff;
1126 if (xih>>31) sign=-1.; else sign=1.; /* consider the sign bit */
1127
1128 if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1129
1130 y = y * INV128;
1131
1132 /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1133 if (absxih>=0x7ff00000) {
1134 xdb.l=0xfff8000000000000LL;
1135 return xdb.d - xdb.d;
1136 }
1137
1138 if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1139 return sign*0.0; /*signed */
1140
1141 if(absxih<=0x3E000000) /*2^{-31}*/ {
1142 if (absxih<0x01700000) { /* 2^{-1000} : Get rid of possible subnormals */
1143 /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1144 scs_t result;
1145 scs_set_d(result, x );
1146 scs_mul(result, PiSCS_ptr, result);
1147 scs_get_d_zero(&rh, result);
1148 return rh;
1149 }
1150 /* First step for Pi*x. TODO: FMA-based optimisation */
1151 const double DekkerConst = 134217729.; /* 2^27 +1 */
1152 double tt, xh, xl;
1153 /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1154 tt = x*DekkerConst;
1155 xh = (x-tt)+tt;
1156 xl = x-xh;
1157 Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1158 TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
1159 }
1160 /* Fall here either if we have a large input, or if we have a small
1161 input and the rounding test fails. */
1162 cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1163 Recpr33(&ich, &icm, &icl, ch, cm, cl);
1164 sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1165 Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1166 ReturnRoundTowardsZero3(rh,rm,rl);
1167 };
1168
1169
1170