1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "crlibm.h"
4 /*
5  * Correctly rounded trigpi functions
6  *
7  * Authors : F. de Dinechin, S. Chevillard, C. Lauter (the latter two
8  * didn't write a line of this file, but wrote a tool that wrote a
9  * tool that wrote etc that wrote bits of code related to polynomial
10  * evaluation.)
11  *
12  * This file is part of the crlibm library developed by the Arenaire
13  * project at Ecole Normale Superieure de Lyon
14  *
15  * This program is free software; you can redistribute it and/or modify
16  * it under the terms of the GNU Lesser General Public License as published by
17  * the Free Software Foundation; either version 2 of the License, or
18  * (at your option) any later version.
19  *
20  * This program is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU Lesser General Public License
26  * along with this program; if not, write to the Free Software
27  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28 */
29 #include "crlibm_private.h"
30 #include "triple-double.h"
31 #include "trigpi.h"
32 
33 
34 /*   TODO
35 
36 
37 Use the symmetries of the tables
38 
39 Actually, use the tables from the standard trigo.
40 
41 Write decent quick steps. Or hammer Christoph and Sylvain to do so.
42 
43  */
44 
45 
46 
47 
48 
49 /* This ugly bits of code in the beginning are polynomial evaluations
50    automagically generated and proven by Chevillard and Lauter's
51    tools
52 */
53 
54 
55 #define sinpiacc_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
56 #define sinpiacc_coeff_1m 1.22464679914735320717376402945839660462569212467758006379625612680683843791484833e-16
57 #define sinpiacc_coeff_1l -2.87889731599645993207191707893463395148177292198731390393739579574603302514349608e-33
58 #define sinpiacc_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
59 #define sinpiacc_coeff_3m 2.26656228257550136196266687046492287115561324595258696490418515168130397796630859e-16
60 #define sinpiacc_coeff_5h 2.55016403987734552316624103696085512638092041015625000000000000000000000000000000e+00
61 #define sinpiacc_coeff_5m -7.93098961936403945684716222915171282926664203267314023904077657789457589387893677e-17
62 #define sinpiacc_coeff_7h -5.99264529320792105338000510528218001127243041992187500000000000000000000000000000e-01
63 #define sinpiacc_coeff_9h 8.21458866130424236740026344705256633460521697998046875000000000000000000000000000e-02
64 #define sinpiacc_coeff_11h -7.37046804820839888960914976223648409359157085418701171875000000000000000000000000e-03
65 
66 
67 #define cospiacc_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
68 #define cospiacc_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
69 #define cospiacc_coeff_2m -3.13264775437072047133490817894057799839785556899468543790021612949203699827194214e-16
70 #define cospiacc_coeff_4h 4.05871212641676848420502210501581430435180664062500000000000000000000000000000000e+00
71 #define cospiacc_coeff_4m -2.66019969731660223662555032718185048048635055542576743903282476821914315223693848e-16
72 #define cospiacc_coeff_6h -1.33526276885458949905682857206556946039199829101562500000000000000000000000000000e+00
73 #define cospiacc_coeff_8h 2.35330630358513925859398341344785876572132110595703125000000000000000000000000000e-01
74 #define cospiacc_coeff_10h -2.58068327360992909313974763563237502239644527435302734375000000000000000000000000e-02
75 
sincospiacc(double * sinpiacc_resh,double * sinpiacc_resm,double * sinpiacc_resl,double * cospiacc_resh,double * cospiacc_resm,double * cospiacc_resl,double x)76 static void sincospiacc(double *sinpiacc_resh, double *sinpiacc_resm, double *sinpiacc_resl,
77 		 double *cospiacc_resh, double *cospiacc_resm, double *cospiacc_resl,
78 		 double x) {
79   double x2h, x2m;
80   double sinpiacc_t_1_0h;
81   double sinpiacc_t_2_0h;
82   double sinpiacc_t_3_0h;
83   double sinpiacc_t_4_0h;
84   double sinpiacc_t_5_0h, sinpiacc_t_5_0m;
85   double sinpiacc_t_6_0h, sinpiacc_t_6_0m;
86   double sinpiacc_t_7_0h, sinpiacc_t_7_0m;
87   double sinpiacc_t_8_0h, sinpiacc_t_8_0m;
88   double sinpiacc_t_9_0h, sinpiacc_t_9_0m, sinpiacc_t_9_0l;
89   double sinpiacc_t_10_0h, sinpiacc_t_10_0m, sinpiacc_t_10_0l;
90 
91   double cospiacc_t_1_0h;
92   double cospiacc_t_2_0h;
93   double cospiacc_t_3_0h;
94   double cospiacc_t_4_0h;
95   double cospiacc_t_5_0h, cospiacc_t_5_0m;
96   double cospiacc_t_6_0h, cospiacc_t_6_0m;
97   double cospiacc_t_7_0h, cospiacc_t_7_0m;
98   double cospiacc_t_8_0h, cospiacc_t_8_0m;
99   double cospiacc_t_9_0h, cospiacc_t_9_0m, cospiacc_t_9_0l;
100 
101   Mul12(&x2h,&x2m,x,x);
102 
103   sinpiacc_t_1_0h = sinpiacc_coeff_11h;
104   sinpiacc_t_2_0h = sinpiacc_t_1_0h * x2h;
105   sinpiacc_t_3_0h = sinpiacc_coeff_9h + sinpiacc_t_2_0h;
106   sinpiacc_t_4_0h = sinpiacc_t_3_0h * x2h;
107   Add12(sinpiacc_t_5_0h,sinpiacc_t_5_0m,sinpiacc_coeff_7h,sinpiacc_t_4_0h);
108   MulAdd22(&sinpiacc_t_6_0h,&sinpiacc_t_6_0m,sinpiacc_coeff_5h,sinpiacc_coeff_5m,x2h,x2m,sinpiacc_t_5_0h,sinpiacc_t_5_0m);
109   MulAdd22(&sinpiacc_t_7_0h,&sinpiacc_t_7_0m,sinpiacc_coeff_3h,sinpiacc_coeff_3m,x2h,x2m,sinpiacc_t_6_0h,sinpiacc_t_6_0m);
110   Mul22(&sinpiacc_t_8_0h,&sinpiacc_t_8_0m,sinpiacc_t_7_0h,sinpiacc_t_7_0m,x2h,x2m);
111   Add233Cond(&sinpiacc_t_9_0h,&sinpiacc_t_9_0m,&sinpiacc_t_9_0l,sinpiacc_t_8_0h,sinpiacc_t_8_0m,sinpiacc_coeff_1h,sinpiacc_coeff_1m,sinpiacc_coeff_1l);
112   Mul133(&sinpiacc_t_10_0h,&sinpiacc_t_10_0m,&sinpiacc_t_10_0l,x,sinpiacc_t_9_0h,sinpiacc_t_9_0m,sinpiacc_t_9_0l);
113   Renormalize3(sinpiacc_resh,sinpiacc_resm,sinpiacc_resl,sinpiacc_t_10_0h,sinpiacc_t_10_0m,sinpiacc_t_10_0l);
114 
115 
116   cospiacc_t_1_0h = cospiacc_coeff_10h;
117   cospiacc_t_2_0h = cospiacc_t_1_0h * x2h;
118   cospiacc_t_3_0h = cospiacc_coeff_8h + cospiacc_t_2_0h;
119   cospiacc_t_4_0h = cospiacc_t_3_0h * x2h;
120   Add12(cospiacc_t_5_0h,cospiacc_t_5_0m,cospiacc_coeff_6h,cospiacc_t_4_0h);
121   MulAdd22(&cospiacc_t_6_0h,&cospiacc_t_6_0m,cospiacc_coeff_4h,cospiacc_coeff_4m,x2h,x2m,cospiacc_t_5_0h,cospiacc_t_5_0m);
122   MulAdd22(&cospiacc_t_7_0h,&cospiacc_t_7_0m,cospiacc_coeff_2h,cospiacc_coeff_2m,x2h,x2m,cospiacc_t_6_0h,cospiacc_t_6_0m);
123   Mul22(&cospiacc_t_8_0h,&cospiacc_t_8_0m,cospiacc_t_7_0h,cospiacc_t_7_0m,x2h,x2m);
124   Add123(&cospiacc_t_9_0h,&cospiacc_t_9_0m,&cospiacc_t_9_0l,cospiacc_coeff_0h,cospiacc_t_8_0h,cospiacc_t_8_0m);
125   *cospiacc_resh = cospiacc_t_9_0h; *cospiacc_resm = cospiacc_t_9_0m; *cospiacc_resl = cospiacc_t_9_0l;
126 
127 }
128 
129 
130 
131 
132 
133 
134 
135 
136 
137 
138 
139 /* Comment on comparing sa, ca, sy and cy
140    either index=0, then sa=0 and ca=1, therefore t2=0, and the Add33 will be exact
141    or index !=0, and
142 	-eps1 < sy < eps1  (but sy may be negative)
143 	sa > eps1 (sa>0)
144 	1-eps2 < cy < 1
145 	ca < 1-eps2
146 	therefore
147 	sacy = t2 >0
148 	casy = t1 may be negative
149 	abs(t1) <=  abs(t2)
150 	Unfortunately we need a stronger condition to use the Add33
151 */
152 
153 
154 
155 
sinpi_accurate(double * rh,double * rm,double * rl,double y,int index,int quadrant)156 static void sinpi_accurate(double *rh, double *rm, double *rl,
157 			   double y, int index, int quadrant)
158 {
159    double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
160    double t1h, t1m, t1l, t2h, t2m, t2l;
161 
162    sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
163 
164    sah=sincosTable[index].sh;
165    cah=sincosTable[index].ch;
166    sam=sincosTable[index].sm;
167    cam=sincosTable[index].cm;
168    sal=sincosTable[index].sl;
169    cal=sincosTable[index].cl;
170 
171    if(quadrant==0 || quadrant==2) {
172      /* compute sy*ca+sa*cy   :    t1 = sy*ca,     t2 =  sa*cy*/
173      Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
174      Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
175      Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
176    }
177    else {
178      /* compute cy*ca - sa*sy : t1 = cy*ca,    t2 =  sa*sy */
179      Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
180      Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
181      Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
182    }
183 
184    if (quadrant>=2) {
185      *rh = -*rh;
186      *rm = -*rm;
187      *rl = -*rl;
188    }
189 
190 };
191 
192 
193 
194 
195 
196 
197 #define sinpiquick_coeff_1h 3.14159265358979311599796346854418516159057617187500000000000000000000000000000000e+00
198 #define sinpiquick_coeff_1m 1.22464971683184787123862072310058851157814368464452070561776508839102461934089661e-16
199 #define sinpiquick_coeff_3h -5.16771278004997025590228076907806098461151123046875000000000000000000000000000000e+00
200 #define sinpiquick_coeff_5h 2.55016403989992213041659852024167776107788085937500000000000000000000000000000000e+00
201 #define sinpiquick_coeff_7h -5.99263913290728922333983064163476228713989257812500000000000000000000000000000000e-01
202 #define cospiquick_coeff_0h 1.00000000000000000000000000000000000000000000000000000000000000000000000000000000e+00
203 #define cospiquick_coeff_2h -4.93480220054467899615247006295248866081237792968750000000000000000000000000000000e+00
204 #define cospiquick_coeff_4h 4.05871212632582167856298838160000741481781005859375000000000000000000000000000000e+00
205 #define cospiquick_coeff_6h -1.33525456323720947970912220625905320048332214355468750000000000000000000000000000e+00
206 
207 
208 
cospi_accurate(double * rh,double * rm,double * rl,double y,int index,int quadrant)209 static void cospi_accurate(double *rh, double *rm, double *rl,
210 			   double y, int index, int quadrant)
211 {
212    double syh, sym, syl, cyh, cym, cyl, sah, sam, sal, cah, cam, cal;
213    double t1h, t1m, t1l, t2h, t2m, t2l;
214 
215    sincospiacc(&syh, &sym, &syl, &cyh, &cym, &cyl, y);
216 
217    sah=sincosTable[index].sh;
218    cah=sincosTable[index].ch;
219    sam=sincosTable[index].sm;
220    cam=sincosTable[index].cm;
221    sal=sincosTable[index].sl;
222    cal=sincosTable[index].cl;
223 
224    if(quadrant==0 || quadrant==2) {
225      /* compute cy*ca - sa*sy : t1 = cy*ca,    t2 =  sa*sy */
226      Mul33(&t1h,&t1m,&t1l, cyh,cym,cyl, cah,cam,cal);
227      Mul33(&t2h,&t2m,&t2l, sah,sam,sal, syh,sym,syl);
228      Add33Cond(rh, rm, rl, t1h,t1m,t1l, -t2h,-t2m,-t2l);
229    }
230    else {
231      /* compute sy*ca+sa*cy   :    t1 = sy*ca,     t2 =  sa*cy*/
232      Mul33(&t1h,&t1m,&t1l, syh,sym,syl, cah,cam,cal);
233      Mul33(&t2h,&t2m,&t2l, sah,sam,sal, cyh,cym,cyl);
234      Add33Cond(rh, rm, rl, t2h,t2m,t2l, t1h,t1m,t1l);
235    }
236 
237    if (quadrant==1 || quadrant==2) {
238      *rh = -*rh;
239      *rm = -*rm;
240      *rl = -*rl;
241    }
242 
243 };
244 
245 
246 
247 
248 
249 
250 /* This one can clearly be improved. It was set up in less than one hour */
sinpiquick(double * rh,double * rm,double x,int index,int quadrant)251 void sinpiquick(double *rh, double *rm, double x, int index, int quadrant) {
252   double x2h, x2m;
253   double sinpiquick_t_1_0h;
254   double sinpiquick_t_2_0h;
255   double sinpiquick_t_3_0h;
256   double sinpiquick_t_4_0h;
257   double sinpiquick_t_5_0h, sinpiquick_t_5_0m;
258   double sinpiquick_t_6_0h, sinpiquick_t_6_0m;
259   double sinpiquick_t_7_0h, sinpiquick_t_7_0m;
260   double syh, sym;
261   double cospiquick_t_1_0h;
262   double cospiquick_t_2_0h;
263   double cospiquick_t_3_0h;
264   double cospiquick_t_4_0h;
265   double cospiquick_t_5_0h, cospiquick_t_5_0m;
266   double cospiquick_t_6_0h, cospiquick_t_6_0m;
267   double cospiquick_t_7_0h, cospiquick_t_7_0m;
268   double cyh, cym;
269   double t1h, t1m, t2h, t2m, sah, sam, cah,cam;
270 
271   Mul12(&x2h,&x2m,x,x);
272 
273   sah=sincosTable[index].sh;
274   cah=sincosTable[index].ch;
275   sam=sincosTable[index].sm;
276   cam=sincosTable[index].cm;
277 
278   sinpiquick_t_1_0h = sinpiquick_coeff_7h;
279   sinpiquick_t_2_0h = sinpiquick_t_1_0h * x2h;
280   sinpiquick_t_3_0h = sinpiquick_coeff_5h + sinpiquick_t_2_0h;
281   sinpiquick_t_4_0h = sinpiquick_t_3_0h * x2h;
282   Add12(sinpiquick_t_5_0h,sinpiquick_t_5_0m,sinpiquick_coeff_3h,sinpiquick_t_4_0h);
283   MulAdd22(&sinpiquick_t_6_0h,&sinpiquick_t_6_0m,sinpiquick_coeff_1h,sinpiquick_coeff_1m,x2h,x2m,sinpiquick_t_5_0h,sinpiquick_t_5_0m);
284   Mul122(&sinpiquick_t_7_0h,&sinpiquick_t_7_0m,x,sinpiquick_t_6_0h,sinpiquick_t_6_0m);
285   syh = sinpiquick_t_7_0h; sym = sinpiquick_t_7_0m;
286 
287   cospiquick_t_1_0h = cospiquick_coeff_6h;
288   cospiquick_t_2_0h = cospiquick_t_1_0h * x2h;
289   cospiquick_t_3_0h = cospiquick_coeff_4h + cospiquick_t_2_0h;
290   cospiquick_t_4_0h = cospiquick_t_3_0h * x2h;
291   Add12(cospiquick_t_5_0h,cospiquick_t_5_0m,cospiquick_coeff_2h,cospiquick_t_4_0h);
292   Mul22(&cospiquick_t_6_0h,&cospiquick_t_6_0m,cospiquick_t_5_0h,cospiquick_t_5_0m,x2h,x2m);
293   Add122(&cospiquick_t_7_0h,&cospiquick_t_7_0m,cospiquick_coeff_0h,cospiquick_t_6_0h,cospiquick_t_6_0m);
294   cyh = cospiquick_t_7_0h; cym = cospiquick_t_7_0m;
295 
296   /* Here comes the hand-written, unproven yet code */
297    if(quadrant==0 || quadrant==2) {
298      /* compute sy*ca+sa*cy   :    t1 = sy*ca,     t2 =  sa*cy*/
299      Mul22(&t1h,&t1m, syh,sym, cah,cam);
300      Mul22(&t2h,&t2m, sah,sam, cyh,cym);
301      Add22Cond(rh, rm, t2h,t2m, t1h,t1m);
302    }
303    else {
304      /* compute cy*ca - sa*sy : t1 = cy*ca,    t2 =  sa*sy */
305      Mul22(&t1h,&t1m, cyh,cym, cah,cam);
306      Mul22(&t2h,&t2m, sah,sam, syh,sym);
307      Add22Cond(rh, rm, t1h,t1m, -t2h,-t2m);
308    }
309 
310    if (quadrant>=2) {
311      *rh = -*rh;
312      *rm = -*rm;
313    }
314 
315 }
316 
317 
318 
319 
320 
321 
322 
sinpi_rn(double x)323  double sinpi_rn(double x){
324    double xs, y,u, rh, rm, rl, sign,absx;
325    db_number xdb, t;
326    int32_t xih, absxih, index, quadrant;
327 
328    if (x<0) absx = -x;   else absx = x;
329 
330    xdb.d = x;
331 
332    xs = x*128.0;
333 
334    /* argument reduction */
335    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
336      t.d = xs;
337      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
338 		    So what remains in t is an FP integer almost as large as x */
339      xs = xs-t.d; /* we are going to throw away the int part anyway */
340    }
341 
342    t.d = TWOTO5251 + xs;
343    u = t.d - TWOTO5251;
344    y = xs - u;
345    index = t.i[LO] & 0x3f;
346    quadrant = (t.i[LO] & 0xff) >>6;
347 
348    /* Special case tests come late because the conversion FP to int is slow */
349    xih = xdb.i[HI];
350    absxih = xih & 0x7fffffff;
351    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
352 
353    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
354 
355    y = y * INV128;
356 
357    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
358    if (absxih>=0x7ff00000) {
359      xdb.l=0xfff8000000000000LL;
360      return xdb.d - xdb.d;
361    }
362 
363    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
364      return sign*0.0; /*signed */
365 
366    if(absxih<=0x3E000000) /*2^{-31}*/ {
367      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
368        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
369        scs_t result;
370        scs_set_d(result, x );
371        scs_mul(result, PiSCS_ptr, result);
372        scs_get_d(&rh, result);
373        return rh;
374      }
375      /* First step for Pi*x. TODO: FMA-based optimisation */
376      const double DekkerConst  = 134217729.; /* 2^27 +1 */
377      double tt, xh, xl;
378      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
379      tt = x*DekkerConst;
380      xh = (x-tt)+tt;
381      xl = x-xh;
382      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
383      if(rh == (rh + (rl * PIX_RNCST_SIN)))
384        return rh;
385    }
386    /* Fall here either if we have a large input, or if we have a small
387       input and the rounding test fails.  */
388    sinpiquick(&rh, &rm,  y, index, quadrant);
389    if (rh==rh+1.00001*rm) /* See trigpiquick.gappa. This first step is ridiculously too accurate */
390      return rh;
391    sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
392    ReturnRoundToNearest3(rh,rm,rl);
393  }
394 
395 
396 
397 
398 
399 
400 
401 
402 
sinpi_rd(double x)403  double sinpi_rd(double x){
404    double xs, y,u, rh, rm, rl, sign,absx;
405    db_number xdb, t;
406    int32_t xih, absxih, index, quadrant;
407 
408    if (x<0) absx = -x;   else absx = x;
409 
410    xdb.d = x;
411 
412    xs = x*128.0;
413 
414    /* argument reduction */
415    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
416      t.d = xs;
417      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
418 		    So what remains in t is an FP integer almost as large as x */
419      xs = xs-t.d; /* we are going to throw away the int part anyway */
420    }
421 
422    t.d = TWOTO5251 + xs;
423    u = t.d - TWOTO5251;
424    y = xs - u;
425    index = t.i[LO] & 0x3f;
426    quadrant = (t.i[LO] & 0xff) >>6;
427 
428    /* Special case tests come late because the conversion FP to int is slow */
429    xih = xdb.i[HI];
430    absxih = xih & 0x7fffffff;
431    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
432 
433    if(index==0 && y==0.0 && ((quadrant&1)==0)) return -0.0; /*signed, inspired by LIA-2 */
434 
435    y = y * INV128;
436 
437    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
438    if (absxih>=0x7ff00000) {
439      xdb.l=0xfff8000000000000LL;
440      return xdb.d - xdb.d;
441    }
442 
443    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
444      return sign*0.0; /*signed */
445 
446     if(absxih<=0x3E000000) /*2^{-31}*/ {
447      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
448        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
449        scs_t result;
450        scs_set_d(result, x );
451        scs_mul(result, PiSCS_ptr, result);
452        scs_get_d_minf(&rh, result);
453        return rh;
454      }
455      /* First step for Pi*x. TODO: FMA-based optimisation */
456      const double DekkerConst  = 134217729.; /* 2^27 +1 */
457      double tt, xh, xl;
458      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
459      tt = x*DekkerConst;
460      xh = (x-tt)+tt;
461      xl = x-xh;
462      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
463      TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
464    }
465    /* Fall here either if we have a large input, or if we have a small
466       input and the rounding test fails.  */
467    sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
468 
469    ReturnRoundDownwards3(rh,rm,rl);
470 };
471 
472 
473 
sinpi_ru(double x)474  double sinpi_ru(double x){
475    double xs, y,u, rh, rm, rl, sign,absx;
476    db_number xdb, t;
477    int32_t xih, absxih, index, quadrant;
478 
479    if (x<0) absx = -x;   else absx = x;
480 
481    xdb.d = x;
482 
483    xs = x*128.0;
484 
485    /* argument reduction */
486    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
487      t.d = xs;
488      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
489 		    So what remains in t is an FP integer almost as large as x */
490      xs = xs-t.d; /* we are going to throw away the int part anyway */
491    }
492 
493    t.d = TWOTO5251 + xs;
494    u = t.d - TWOTO5251;
495    y = xs - u;
496    index = t.i[LO] & 0x3f;
497    quadrant = (t.i[LO] & 0xff) >>6;
498 
499    /* Special case tests come late because the conversion FP to int is slow */
500    xih = xdb.i[HI];
501    absxih = xih & 0x7fffffff;
502    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
503 
504    if(index==0 && y==0.0 && ((quadrant&1)==0)) return +0.0; /*signed, inspired by LIA-2 */
505 
506    y = y * INV128;
507 
508    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
509    if (absxih>=0x7ff00000) {
510      xdb.l=0xfff8000000000000LL;
511      return xdb.d - xdb.d;
512    }
513 
514    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
515      return sign*0.0; /*signed */
516 
517     if(absxih<=0x3E000000) /*2^{-31}*/ {
518      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
519        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
520        scs_t result;
521        scs_set_d(result, x );
522        scs_mul(result, PiSCS_ptr, result);
523        scs_get_d_pinf(&rh, result);
524        return rh;
525      }
526      /* First step for Pi*x. TODO: FMA-based optimisation */
527      const double DekkerConst  = 134217729.; /* 2^27 +1 */
528      double tt, xh, xl;
529      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
530      tt = x*DekkerConst;
531      xh = (x-tt)+tt;
532      xl = x-xh;
533      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
534      TEST_AND_RETURN_RU(rh,rl,PIX_EPS_SIN);
535    }
536    /* Fall here either if we have a large input, or if we have a small
537       input and the rounding test fails.  */
538    sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
539 
540    ReturnRoundUpwards3(rh,rm,rl);
541 
542 };
543 
544 
545 
546 
sinpi_rz(double x)547  double sinpi_rz(double x){
548    double xs, y,u, rh, rm, rl, sign,absx;
549    db_number xdb, t;
550    int32_t xih, absxih, index, quadrant;
551 
552    if (x<0) absx = -x;   else absx = x;
553 
554    xdb.d = x;
555 
556    xs = x*128.0;
557 
558    /* argument reduction */
559    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
560      t.d = xs;
561      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
562 		    So what remains in t is an FP integer almost as large as x */
563      xs = xs-t.d; /* we are going to throw away the int part anyway */
564    }
565 
566    t.d = TWOTO5251 + xs;
567    u = t.d - TWOTO5251;
568    y = xs - u;
569    index = t.i[LO] & 0x3f;
570    quadrant = (t.i[LO] & 0xff) >>6;
571 
572    /* Special case tests come late because the conversion FP to int is slow */
573    xih = xdb.i[HI];
574    absxih = xih & 0x7fffffff;
575    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
576 
577    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
578 
579    y = y * INV128;
580 
581    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
582    if (absxih>=0x7ff00000) {
583      xdb.l=0xfff8000000000000LL;
584      return xdb.d - xdb.d;
585    }
586 
587    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
588      return sign*0.0; /*signed */
589 
590     if(absxih<=0x3E000000) /*2^{-31}*/ {
591      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
592        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
593        scs_t result;
594        scs_set_d(result, x );
595        scs_mul(result, PiSCS_ptr, result);
596        scs_get_d_zero(&rh, result);
597        return rh;
598      }
599      /* First step for Pi*x. TODO: FMA-based optimisation */
600      const double DekkerConst  = 134217729.; /* 2^27 +1 */
601      double tt, xh, xl;
602      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
603      tt = x*DekkerConst;
604      xh = (x-tt)+tt;
605      xl = x-xh;
606      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
607      TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
608    }
609    /* Fall here either if we have a large input, or if we have a small
610       input and the rounding test fails.  */
611    sinpi_accurate(&rh, &rm, &rl, y, index, quadrant);
612 
613    ReturnRoundTowardsZero3(rh,rm,rl);
614 };
615 
616 
617 
618 
619 
620 
621 
622 
623 
624  /* to nearest  */
cospi_rn(double x)625  double cospi_rn(double x){
626    double xs, y,u, rh, rm, rl, absx;
627    db_number xdb, t;
628    int32_t xih, absxih, index, quadrant;
629 
630    if (x<0) absx =-x; else absx=x;
631 
632    xdb.d=x;
633    xs = x*128.0;
634 
635    /* argument reduction.
636       We do it before the special case tests for performance,
637       it might compute garbage for inf, very large inputs, etc */
638    if(absx>  TWOTO42 ) {  /* 2^42, x is very large, let us first subtract a large integer from it */
639      t.d = xs;
640      t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
641 		    So what remains in t is an FP integer almost as large as x */
642      xs = xs-t.d; /* we are going to throw away the int part anyway */
643    }
644 
645    t.d = TWOTO5251 + xs;
646    u = t.d - TWOTO5251;
647    y = xs - u;
648    y = y * INV128;
649    index = t.i[LO] & 0x3f;
650    quadrant = (t.i[LO] & 0xff) >>6;
651 
652    /* Special case tests come late because the conversion FP to int is slow */
653    xih = xdb.i[HI];
654    absxih = xih & 0x7fffffff;
655 
656    /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
657 
658    if (absxih>=0x7ff00000) {
659      xdb.l=0xfff8000000000000LL;
660      return xdb.d - xdb.d;
661    }
662 
663    if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
664      return 1.0;
665 
666    if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
667    /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
668       in this case */
669 
670    if(index==0 && y==0. && quadrant==0) return 1.;
671    if(index==0 && y==0. && quadrant==2) return -1.;
672 
673    if (absxih<0x3E26A09E) /* sqrt(2^-53)/4 */
674      return 1.0;
675    /* printf("\n\nint part = %f    frac part = %f     index=%d   quadrant=%d   \n", u, y, index, quadrant);
676     */
677 
678    cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
679    ReturnRoundToNearest3(rh,rm,rl);
680 };
681 
682 
683 
684 
685 
cospi_rd(double x)686  double cospi_rd(double x){
687    double xs, y,u, rh, rm, rl, absx;
688    db_number xdb, t;
689    int32_t xih, absxih, index, quadrant;
690 
691    if (x<0) absx =-x; else absx=x;
692 
693    xdb.d=x;
694    xs = x*128.0;
695 
696 
697    /* argument reduction.
698       We do it before the special case tests for performance,
699       it might compute garbage for inf, very large inputs, etc */
700    if(absx>  TWOTO42 ) {  /* 2^42, x is very large, let us first subtract a large integer from it */
701      t.d = xs;
702      t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
703 		    So what remains in t is an FP integer almost as large as x */
704      xs = xs-t.d; /* we are going to throw away the int part anyway */
705    }
706 
707    t.d = TWOTO5251 + xs;
708    u = t.d - TWOTO5251;
709    y = xs - u;
710    y = y * INV128;
711    index = t.i[LO] & 0x3f;
712    quadrant = (t.i[LO] & 0xff) >>6;
713 
714 
715    /* Special case tests come late because the conversion FP to int is slow */
716    xih = xdb.i[HI];
717    absxih = xih & 0x7fffffff;
718 
719    /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
720 
721    if (absxih>=0x7ff00000) {
722      xdb.l=0xfff8000000000000LL;
723      return xdb.d - xdb.d;
724    }
725 
726    if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
727      return 1.0; /*signed */
728 
729    if(index==0 && y==0. && ((quadrant&1)==1)) return -0.;
730 
731    if(index==0 && y==0. && quadrant==0) return 1.;
732    if(index==0 && y==0. && quadrant==2) return -1.;
733 
734    if (absxih<0x3E200000) /* 2^-29 */
735      return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
736    /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
737       in this case */
738 
739    cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
740    ReturnRoundDownwards3(rh,rm,rl);
741  };
742 
743 
744 
745 
cospi_ru(double x)746  double cospi_ru(double x){
747    double xs, y,u, rh, rm, rl, absx;
748    db_number xdb, t;
749    int32_t xih, absxih, index, quadrant;
750 
751    if (x<0) absx =-x; else absx=x;
752 
753    xdb.d=x;
754    xs = x*128.0;
755 
756 
757    /* argument reduction.
758       We do it before the special case tests for performance,
759       it might compute garbage for inf, very large inputs, etc */
760    if(absx>  TWOTO42 ) {  /* 2^42, x is very large, let us first subtract a large integer from it */
761      t.d = xs;
762      t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
763 		    So what remains in t is an FP integer almost as large as x */
764      xs = xs-t.d; /* we are going to throw away the int part anyway */
765    }
766 
767    t.d = TWOTO5251 + xs;
768    u = t.d - TWOTO5251;
769    y = xs - u;
770    y = y * INV128;
771    index = t.i[LO] & 0x3f;
772    quadrant = (t.i[LO] & 0xff) >>6;
773 
774 
775    /* Special case tests come late because the conversion FP to int is slow */
776    xih = xdb.i[HI];
777    absxih = xih & 0x7fffffff;
778 
779    /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
780    if (absxih>=0x7ff00000) {
781      xdb.l=0xfff8000000000000LL;
782      return xdb.d - xdb.d;
783    }
784 
785    if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
786      return 1.0; /*signed */
787 
788    if(index==0 && y==0. && quadrant==0) return 1.;
789    if(index==0 && y==0. && quadrant==2) return -1.;
790 
791    if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
792    /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
793       in this case */
794 
795    if (absxih<0x3E200000) /* 2^-29 */
796      return 1;
797 
798    cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
799    ReturnRoundUpwards3(rh,rm,rl);
800 };
801 
802 
803 
804 
cospi_rz(double x)805 double cospi_rz(double x){
806    double xs, y,u, rh, rm, rl, absx;
807    db_number xdb, t;
808    int32_t xih, absxih, index, quadrant;
809 
810    if (x<0) absx =-x; else absx=x;
811 
812    xdb.d=x;
813    xs = x*128.0;
814 
815 
816    /* argument reduction.
817       We do it before the special case tests for performance,
818       it might compute garbage for inf, very large inputs, etc */
819    if(absx>  TWOTO42 ) {  /* 2^42, x is very large, let us first subtract a large integer from it */
820      t.d = xs;
821      t.i[LO] =0; /* remove the low part, in which was the coma since x > 2^42.
822 		    So what remains in t is an FP integer almost as large as x */
823      xs = xs-t.d; /* we are going to throw away the int part anyway */
824    }
825 
826    t.d = TWOTO5251 + xs;
827    u = t.d - TWOTO5251;
828    y = xs - u;
829    y = y * INV128;
830    index = t.i[LO] & 0x3f;
831    quadrant = (t.i[LO] & 0xff) >>6;
832 
833 
834    /* Special case tests come late because the conversion FP to int is slow */
835    xih = xdb.i[HI];
836    absxih = xih & 0x7fffffff;
837 
838    /* SPECIAL CASES: x=(Nan, Inf) cos(pi*x)=Nan */
839 
840    if (absxih>=0x7ff00000) {
841      xdb.l=0xfff8000000000000LL;
842      return xdb.d - xdb.d;
843    }
844 
845    if(absxih>=0x43400000) /* 2^53, which entails that x is an even integer */
846      return 1.0; /*signed */
847 
848    if(index==0 && y==0. && ((quadrant&1)==1)) return +0.;
849    /* Always +0, inpired by LIA2; We do not have cos(x+pi) == - cos(x)
850       in this case */
851 
852    if(index==0 && y==0. && quadrant==0) return 1.;
853    if(index==0 && y==0. && quadrant==2) return -1.;
854 
855    if (absxih<0x3E200000) /* 2^-29 */
856      return 0.9999999999999998889776975374843459576368331909179687500; /* 1-2^-53 */
857 
858    cospi_accurate(&rh, &rm, &rl, y, index, quadrant);
859    ReturnRoundTowardsZero3(rh,rm,rl);
860   };
861 
862 
863 
864 
865 
866 
867 /*  tangent of pi times x */
tanpi_rn(double x)868  double tanpi_rn(double x){
869    double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
870    db_number xdb, t;
871    int32_t xih, absxih, index, quadrant;
872 
873    if (x<0) absx = -x;   else absx = x;
874 
875    xdb.d = x;
876 
877    xs = x*128.0;
878 
879    /* argument reduction */
880    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
881      t.d = xs;
882      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
883 		    So what remains in t is an FP integer almost as large as x */
884      xs = xs-t.d; /* we are going to throw away the int part anyway */
885    }
886 
887    t.d = TWOTO5251 + xs;
888    u = t.d - TWOTO5251;
889    y = xs - u;
890    index = t.i[LO] & 0x3f;
891    quadrant = (t.i[LO] & 0xff) >>6;
892 
893    /* Special case tests come late because the conversion FP to int is slow */
894    xih = xdb.i[HI];
895    absxih = xih & 0x7fffffff;
896    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
897 
898    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
899    /* TODO ? No test for Pi/4. Such a value will lauch the accurate phase. */
900 
901    y = y * INV128;
902 
903    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
904    if (absxih>=0x7ff00000) {
905      xdb.l=0xfff8000000000000LL;
906      return xdb.d - xdb.d;
907    }
908 
909    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
910      return sign*0.0; /*signed */
911 
912    if(absxih<=0x3E000000) /*2^{-31}*/ {
913      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
914        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
915        scs_t result;
916        scs_set_d(result, x );
917        scs_mul(result, PiSCS_ptr, result);
918        scs_get_d(&rh, result);
919        return rh;
920      }
921      /* First step for Pi*x. TODO: FMA-based optimisation */
922      const double DekkerConst  = 134217729.; /* 2^27 +1 */
923      double tt, xh, xl;
924      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
925      tt = x*DekkerConst;
926      xh = (x-tt)+tt;
927      xl = x-xh;
928      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
929      if(rh == (rh + (rl * PIX_RNCST_TAN)))
930        return rh;
931    }
932    /* Fall here either if we have a large input, or if we have a small
933       input and the rounding test fails.  */
934    cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
935    Recpr33(&ich, &icm, &icl, ch, cm, cl);
936    sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
937    Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
938    ReturnRoundToNearest3(rh,rm,rl);
939 
940 };
941 
942 
943 
944 
tanpi_rd(double x)945  double tanpi_rd(double x){
946    double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
947    db_number xdb, t;
948    int32_t xih, absxih, index, quadrant;
949 
950    if (x<0) absx = -x;   else absx = x;
951 
952    xdb.d = x;
953 
954    xs = x*128.0;
955 
956    /* argument reduction */
957    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
958      t.d = xs;
959      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
960 		    So what remains in t is an FP integer almost as large as x */
961      xs = xs-t.d; /* we are going to throw away the int part anyway */
962    }
963 
964    t.d = TWOTO5251 + xs;
965    u = t.d - TWOTO5251;
966    y = xs - u;
967    index = t.i[LO] & 0x3f;
968    quadrant = (t.i[LO] & 0xff) >>6;
969 
970    /* Special case tests come late because the conversion FP to int is slow */
971    xih = xdb.i[HI];
972    absxih = xih & 0x7fffffff;
973    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
974 
975    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
976 
977    y = y * INV128;
978 
979    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
980    if (absxih>=0x7ff00000) {
981      xdb.l=0xfff8000000000000LL;
982      return xdb.d - xdb.d;
983    }
984 
985    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
986      return sign*0.0; /*signed */
987 
988    if(absxih<=0x3E000000) /*2^{-31}*/ {
989      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
990        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
991        scs_t result;
992        scs_set_d(result, x );
993        scs_mul(result, PiSCS_ptr, result);
994        scs_get_d_minf(&rh, result);
995        return rh;
996      }
997      /* First step for Pi*x. TODO: FMA-based optimisation */
998      const double DekkerConst  = 134217729.; /* 2^27 +1 */
999      double tt, xh, xl;
1000      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1001      tt = x*DekkerConst;
1002      xh = (x-tt)+tt;
1003      xl = x-xh;
1004      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1005      TEST_AND_RETURN_RD(rh,rl,PIX_EPS_SIN);
1006    }
1007    /* Fall here either if we have a large input, or if we have a small
1008       input and the rounding test fails.  */
1009    cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1010    Recpr33(&ich, &icm, &icl, ch, cm, cl);
1011    sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1012    Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1013    ReturnRoundDownwards3(rh,rm,rl);
1014 };
1015 
1016 
1017 
1018 
1019 
1020 
1021 
1022 
1023 
1024 
tanpi_ru(double x)1025  double tanpi_ru(double x){
1026    double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1027    db_number xdb, t;
1028    int32_t xih, absxih, index, quadrant;
1029 
1030    if (x<0) absx = -x;   else absx = x;
1031 
1032    xdb.d = x;
1033 
1034    xs = x*128.0;
1035 
1036    /* argument reduction */
1037    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
1038      t.d = xs;
1039      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1040 		    So what remains in t is an FP integer almost as large as x */
1041      xs = xs-t.d; /* we are going to throw away the int part anyway */
1042    }
1043 
1044    t.d = TWOTO5251 + xs;
1045    u = t.d - TWOTO5251;
1046    y = xs - u;
1047    index = t.i[LO] & 0x3f;
1048    quadrant = (t.i[LO] & 0xff) >>6;
1049 
1050    /* Special case tests come late because the conversion FP to int is slow */
1051    xih = xdb.i[HI];
1052    absxih = xih & 0x7fffffff;
1053    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
1054 
1055    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1056 
1057    y = y * INV128;
1058 
1059    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1060    if (absxih>=0x7ff00000) {
1061      xdb.l=0xfff8000000000000LL;
1062      return xdb.d - xdb.d;
1063    }
1064 
1065    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1066      return sign*0.0; /*signed */
1067 
1068    if(absxih<=0x3E000000) /*2^{-31}*/ {
1069      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
1070        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1071        scs_t result;
1072        scs_set_d(result, x );
1073        scs_mul(result, PiSCS_ptr, result);
1074        scs_get_d_pinf(&rh, result);
1075        return rh;
1076      }
1077      /* First step for Pi*x. TODO: FMA-based optimisation */
1078      const double DekkerConst  = 134217729.; /* 2^27 +1 */
1079      double tt, xh, xl;
1080      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1081      tt = x*DekkerConst;
1082      xh = (x-tt)+tt;
1083      xl = x-xh;
1084      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1085      TEST_AND_RETURN_RU(rh,rl,PIX_EPS_TAN);
1086    }
1087    /* Fall here either if we have a large input, or if we have a small
1088       input and the rounding test fails.  */
1089    cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1090    Recpr33(&ich, &icm, &icl, ch, cm, cl);
1091    sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1092    Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1093    ReturnRoundUpwards3(rh,rm,rl);
1094 };
1095 
1096 
1097 
tanpi_rz(double x)1098  double tanpi_rz(double x){
1099    double xs, y,u, rh, rm, rl, ch,cm,cl, ich,icm,icl, sh,sm,sl, sign,absx;
1100    db_number xdb, t;
1101    int32_t xih, absxih, index, quadrant;
1102 
1103    if (x<0) absx = -x;   else absx = x;
1104 
1105    xdb.d = x;
1106 
1107    xs = x*128.0;
1108 
1109    /* argument reduction */
1110    if(absx>  TWOTO42 ) {  /* x is very large, let us first subtract a large integer from it */
1111      t.d = xs;
1112      t.i[LO] =0; /* remove the low part. The point is somewhere there since x > 2^42.
1113 		    So what remains in t is an FP integer almost as large as x */
1114      xs = xs-t.d; /* we are going to throw away the int part anyway */
1115    }
1116 
1117    t.d = TWOTO5251 + xs;
1118    u = t.d - TWOTO5251;
1119    y = xs - u;
1120    index = t.i[LO] & 0x3f;
1121    quadrant = (t.i[LO] & 0xff) >>6;
1122 
1123    /* Special case tests come late because the conversion FP to int is slow */
1124    xih = xdb.i[HI];
1125    absxih = xih & 0x7fffffff;
1126    if (xih>>31)  sign=-1.;   else sign=1.; /* consider the sign bit */
1127 
1128    if(index==0 && y==0.0 && ((quadrant&1)==0)) return sign*0.0; /*signed, inspired by LIA-2 */
1129 
1130    y = y * INV128;
1131 
1132    /* SPECIAL CASES: x=(Nan, Inf) sin(pi*x)=Nan */
1133    if (absxih>=0x7ff00000) {
1134      xdb.l=0xfff8000000000000LL;
1135      return xdb.d - xdb.d;
1136    }
1137 
1138    if(absxih>=0x43300000) /* 2^52, which entails that x is an integer */
1139      return sign*0.0; /*signed */
1140 
1141    if(absxih<=0x3E000000) /*2^{-31}*/ {
1142      if (absxih<0x01700000) { /* 2^{-1000} :  Get rid of possible subnormals  */
1143        /* in this case, SCS computation, accurate to 2^-210 which is provably enough */
1144        scs_t result;
1145        scs_set_d(result, x );
1146        scs_mul(result, PiSCS_ptr, result);
1147        scs_get_d_zero(&rh, result);
1148        return rh;
1149      }
1150      /* First step for Pi*x. TODO: FMA-based optimisation */
1151      const double DekkerConst  = 134217729.; /* 2^27 +1 */
1152      double tt, xh, xl;
1153      /* Splitting of x. Both xh and xl have at least 26 consecutive LSB zeroes */
1154      tt = x*DekkerConst;
1155      xh = (x-tt)+tt;
1156      xl = x-xh;
1157      Add12(rh,rl, xh*PIHH, (xl*PIHH + xh*PIHM) + (xh*PIM + xl*PIHM) );
1158      TEST_AND_RETURN_RZ(rh,rl,PIX_EPS_SIN);
1159    }
1160    /* Fall here either if we have a large input, or if we have a small
1161       input and the rounding test fails.  */
1162    cospi_accurate(&ch, &cm, &cl, y, index, quadrant);
1163    Recpr33(&ich, &icm, &icl, ch, cm, cl);
1164    sinpi_accurate(&sh, &sm, &sl, y, index, quadrant);
1165    Mul33(&rh,&rm,&rl, sh,sm,sl, ich,icm,icl);
1166    ReturnRoundTowardsZero3(rh,rm,rl);
1167 };
1168 
1169 
1170